From 4435aad09ddb1ec2a3a9d2d90e3c62253338fd1e Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 30 Jul 2011 19:25:24 +0000
Subject: [PATCH 001/177] Branching for HDFS-1623 - High Availability Framework
 for HDFS NN

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1152502 13f79535-47bb-0310-9956-ffa450edef68

From 57213dbcb54ad8fee7f651c8efea00d9518cf13f Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 4 Aug 2011 17:23:57 +0000
Subject: [PATCH 002/177] Add a CHANGES.txt file for HDFS-1623 branch

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1153937 13f79535-47bb-0310-9956-ffa450edef68
---
 hdfs/CHANGES.HDFS-1623.txt | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 hdfs/CHANGES.HDFS-1623.txt
diff --git a/hdfs/CHANGES.HDFS-1623.txt b/hdfs/CHANGES.HDFS-1623.txt
new file mode 100644
index 00000000000..56c9086b69e
--- /dev/null
+++ b/hdfs/CHANGES.HDFS-1623.txt
@@ -0,0 +1,5 @@
+Changes for HDFS-1623 branch.
+
+This change list will be merged into the trunk CHANGES.txt when the HDFS-1623
+branch is merged.
+------------------------------

From 05c24937cfdfc19afbcf50face41447144796dfc Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 4 Aug 2011 17:24:57 +0000
Subject: [PATCH 003/177] HDFS-2179. Add fencing framework and mechanisms for
 NameNode HA. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1153939 13f79535-47bb-0310-9956-ffa450edef68
---
 hdfs/CHANGES.HDFS-1623.txt                    |   2 +
 hdfs/ivy.xml                                  |   1 +
 hdfs/ivy/libraries.properties                 |   1 +
 .../ha/BadFencingConfigurationException.java  |  36 ++
 .../hdfs/server/namenode/ha/FenceMethod.java  |  63 ++++
 .../hdfs/server/namenode/ha/NodeFencer.java   | 186 +++++++++
 .../namenode/ha/ShellCommandFencer.java       | 173 +++++++++
 .../server/namenode/ha/SshFenceByTcpPort.java | 352 ++++++++++++++++++
 .../hdfs/server/namenode/ha/StreamPumper.java |  73 ++++
 .../server/namenode/ha/TestNodeFencer.java    | 142 +++++++
 .../namenode/ha/TestShellCommandFencer.java   | 133 +++++++
 .../namenode/ha/TestSshFenceByTcpPort.java    | 102 +++++
 12 files changed, 1264 insertions(+)
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/BadFencingConfigurationException.java
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/FenceMethod.java
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/NodeFencer.java
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/ShellCommandFencer.java
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/SshFenceByTcpPort.java
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java
 create mode 100644 hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestNodeFencer.java
 create mode 100644 hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestShellCommandFencer.java
 create mode 100644 hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java

diff --git a/hdfs/CHANGES.HDFS-1623.txt b/hdfs/CHANGES.HDFS-1623.txt
index 56c9086b69e..eeb80edc5a6 100644
--- a/hdfs/CHANGES.HDFS-1623.txt
+++ b/hdfs/CHANGES.HDFS-1623.txt
@@ -3,3 +3,5 @@ Changes for HDFS-1623 branch.
 This change list will be merged into the trunk CHANGES.txt when the HDFS-1623
 branch is merged.
 ------------------------------
+
+HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd)
diff --git a/hdfs/ivy.xml b/hdfs/ivy.xml
index e63c44029d5..2a67b39e1ab 100644
--- a/hdfs/ivy.xml
+++ b/hdfs/ivy.xml
@@ -74,6 +74,7 @@
     </dependency>
     <dependency org="com.google.guava" name="guava" rev="${guava.version}" conf="hdfs->default" />
     <dependency org="com.google.protobuf" name="protobuf-java" rev="2.4.0a" conf="common->master"/>
+    <dependency org="com.jcraft" name="jsch" rev="${jsch.version}" conf="hdfs->default" />
     <dependency org="org.apache.hadoop" name="avro" rev="${avro.version}" conf="compile->master">
       <exclude module="ant"/>
       <exclude module="jetty"/>
diff --git a/hdfs/ivy/libraries.properties b/hdfs/ivy/libraries.properties
index 8fc14d9d2da..5468d3780b4 100644
--- a/hdfs/ivy/libraries.properties
+++ b/hdfs/ivy/libraries.properties
@@ -45,6 +45,7 @@ ivy.version=2.1.0
 
 jasper.version=5.5.12
 jdeb.version=0.8
+jsch.version=0.1.42
 jsp.version=2.1
 jsp-api.version=5.5.12
 jetty.version=6.1.14
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/BadFencingConfigurationException.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/BadFencingConfigurationException.java
new file mode 100644
index 00000000000..4540d9994de
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/BadFencingConfigurationException.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+
+/**
+ * Indicates that the operator has specified an invalid configuration
+ * for fencing methods.
+ */
+class BadFencingConfigurationException extends IOException {
+  private static final long serialVersionUID = 1L;
+
+  public BadFencingConfigurationException(String msg) {
+    super(msg);
+  }
+
+  public BadFencingConfigurationException(String msg, Throwable cause) {
+    super(msg, cause);
+  }
+}
\ No newline at end of file
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/FenceMethod.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/FenceMethod.java
new file mode 100644
index 00000000000..e4c79a229ee
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/FenceMethod.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configurable;
+
+/**
+ * A fencing method is a method by which one node can forcibly prevent
+ * another node from making continued progress. This might be implemented
+ * by killing a process on the other node, by denying the other node's
+ * access to shared storage, or by accessing a PDU to cut the other node's
+ * power.
+ * <p>
+ * Since these methods are often vendor- or device-specific, operators
+ * may implement this interface in order to achieve fencing.
+ * <p>
+ * Fencing is configured by the operator as an ordered list of methods to
+ * attempt. Each method will be tried in turn, and the next in the list
+ * will only be attempted if the previous one fails. See {@link NodeFencer}
+ * for more information.
+ * <p>
+ * If an implementation also implements {@link Configurable} then its
+ * <code>setConf</code> method will be called upon instantiation.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Unstable
+public interface FenceMethod {
+  /**
+   * Verify that the given fencing method's arguments are valid.
+   * @param args the arguments provided in the configuration. This may
+   *        be null if the operator did not configure any arguments.
+   * @throws BadFencingConfigurationException if the arguments are invalid
+   */
+  public void checkArgs(String args) throws BadFencingConfigurationException;
+  
+  /**
+   * Attempt to fence the target node.
+   * @param args the configured arguments, which were checked at startup by
+   *             {@link #checkArgs(String)}
+   * @return true if fencing was successful, false if unsuccessful or
+   *              indeterminate
+   * @throws BadFencingConfigurationException if the configuration was
+   *         determined to be invalid only at runtime
+   */
+  public boolean tryFence(String args) throws BadFencingConfigurationException; 
+}
\ No newline at end of file
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/NodeFencer.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/NodeFencer.java
new file mode 100644
index 00000000000..279a26acd9c
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/NodeFencer.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+
+/**
+ * This class parses the configured list of fencing methods, and
+ * is responsible for trying each one in turn while logging informative
+ * output.<p>
+ * 
+ * The fencing methods are configured as a carriage-return separated list.
+ * Each line in the list is of the form:<p>
+ * <code>com.example.foo.MyMethod(arg string)</code>
+ * or
+ * <code>com.example.foo.MyMethod</code>
+ * The class provided must implement the {@link FenceMethod} interface.
+ * The fencing methods that ship with Hadoop may also be referred to
+ * by shortened names:<p>
+ * <ul>
+ * <li><code>shell(/path/to/some/script.sh args...)</code></li>
+ * <li><code>sshfence(...)</code> (see {@link SshFenceByTcpPort})
+ * </ul>
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class NodeFencer {
+  static final String CONF_METHODS_KEY =
+    "dfs.namenode.ha.fencing.methods";
+  
+  private static final String CLASS_RE = "([a-zA-Z0-9\\.\\$]+)";
+  private static final Pattern CLASS_WITH_ARGUMENT =
+    Pattern.compile(CLASS_RE + "\\((.+?)\\)");
+  private static final Pattern CLASS_WITHOUT_ARGUMENT =
+    Pattern.compile(CLASS_RE);
+  private static final Pattern HASH_COMMENT_RE =
+    Pattern.compile("#.*$");
+
+  private static final Log LOG = LogFactory.getLog(NodeFencer.class);
+
+  /**
+   * Standard fencing methods included with HDFS.
+   */
+  private static final Map<String, Class<? extends FenceMethod>> STANDARD_METHODS =
+    ImmutableMap.<String, Class<? extends FenceMethod>>of(
+        "shell", ShellCommandFencer.class,
+        "sshfence", SshFenceByTcpPort.class);
+  
+  private final List<FenceMethodWithArg> methods;
+  
+  public NodeFencer(Configuration conf)
+      throws BadFencingConfigurationException {
+    this.methods = parseMethods(conf);
+  }
+  
+  public boolean fence() {
+    LOG.info("====== Beginning NameNode Fencing Process... ======");
+    int i = 0;
+    for (FenceMethodWithArg method : methods) {
+      LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
+      
+      try {
+        if (method.method.tryFence(method.arg)) {
+          LOG.info("====== Fencing successful by method " + method + " ======");
+          return true;
+        }
+      } catch (BadFencingConfigurationException e) {
+        LOG.error("Fencing method " + method + " misconfigured", e);
+        continue;
+      } catch (Throwable t) {
+        LOG.error("Fencing method " + method + " failed with an unexpected error.", t);
+        continue;
+      }
+      LOG.warn("Fencing method " + method + " was unsuccessful.");
+    }
+    
+    LOG.error("Unable to fence NameNode by any configured method.");
+    return false;
+  }
+
+  private static List<FenceMethodWithArg> parseMethods(Configuration conf)
+  throws BadFencingConfigurationException {
+    String confStr = conf.get(CONF_METHODS_KEY);
+    String[] lines = confStr.split("\\s*\n\\s*");
+    
+    List<FenceMethodWithArg> methods = Lists.newArrayList();
+    for (String line : lines) {
+      line = HASH_COMMENT_RE.matcher(line).replaceAll("");
+      line = line.trim();
+      if (!line.isEmpty()) {
+        methods.add(parseMethod(conf, line));
+      }
+    }
+    
+    return methods;
+  }
+
+  private static FenceMethodWithArg parseMethod(Configuration conf, String line)
+      throws BadFencingConfigurationException {
+    Matcher m;
+    if ((m = CLASS_WITH_ARGUMENT.matcher(line)).matches()) {
+      String className = m.group(1);
+      String arg = m.group(2);
+      
+      return createFenceMethod(conf, className, arg);
+    } else if ((m = CLASS_WITHOUT_ARGUMENT.matcher(line)).matches()) {
+      String className = m.group(1);
+      return createFenceMethod(conf, className, null);
+    } else {
+      throw new BadFencingConfigurationException(
+          "Unable to parse line: '" + line + "'");
+    }
+  }
+
+  private static FenceMethodWithArg createFenceMethod(
+      Configuration conf, String clazzName, String arg)
+      throws BadFencingConfigurationException {
+
+    Class<?> clazz;
+    try {
+      // See if it's a short name for one of the built-in methods
+      clazz = STANDARD_METHODS.get(clazzName);
+      if (clazz == null) {
+        // Try to instantiate the user's custom method
+        clazz = Class.forName(clazzName);
+      }
+    } catch (Exception e) {
+      throw new BadFencingConfigurationException(
+          "Could not find configured fencing method " + clazzName,
+          e);
+    }
+    
+    // Check that it implements the right interface
+    if (!FenceMethod.class.isAssignableFrom(clazz)) {
+      throw new BadFencingConfigurationException("Class " + clazzName +
+          " does not implement FenceMethod");
+    }
+    
+    FenceMethod method = (FenceMethod)ReflectionUtils.newInstance(
+        clazz, conf);
+    method.checkArgs(arg);
+    return new FenceMethodWithArg(method, arg);
+  }
+  
+  private static class FenceMethodWithArg {
+    private final FenceMethod method;
+    private final String arg;
+    
+    private FenceMethodWithArg(FenceMethod method, String arg) {
+      this.method = method;
+      this.arg = arg;
+    }
+    
+    public String toString() {
+      return method.getClass().getCanonicalName() + "(" + arg + ")";
+    }
+  }
+}
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/ShellCommandFencer.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/ShellCommandFencer.java
new file mode 100644
index 00000000000..96e12287466
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/ShellCommandFencer.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configured;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * Fencing method that runs a shell command. It should be specified
+ * in the fencing configuration like:<br>
+ * <code>
+ *   shell(/path/to/my/script.sh arg1 arg2 ...)
+ * </code><br>
+ * The string between '(' and ')' is passed directly to a bash shell and
+ * may not include any closing parentheses.<p>
+ * 
+ * The shell command will be run with an environment set up to contain
+ * all of the current Hadoop configuration variables, with the '_' character 
+ * replacing any '.' characters in the configuration keys.<p>
+ * 
+ * If the shell command returns an exit code of 0, the fencing is
+ * determined to be successful. If it returns any other exit code, the
+ * fencing was not successful and the next fencing method in the list
+ * will be attempted.<p>
+ * 
+ * <em>Note:</em> this fencing method does not implement any timeout.
+ * If timeouts are necessary, they should be implemented in the shell
+ * script itself (eg by forking a subshell to kill its parent in
+ * some number of seconds).
+ */
+public class ShellCommandFencer
+  extends Configured implements FenceMethod {
+
+  /** Length at which to abbreviate command in long messages */
+  private static final int ABBREV_LENGTH = 20;
+  
+  @VisibleForTesting
+  static Log LOG = LogFactory.getLog(
+      ShellCommandFencer.class);
+  
+  @Override
+  public void checkArgs(String args) throws BadFencingConfigurationException {
+    if (args == null || args.isEmpty()) {
+      throw new BadFencingConfigurationException(
+          "No argument passed to 'shell' fencing method");
+    }
+    // Nothing else we can really check without actually running the command
+  }
+
+  @Override
+  public boolean tryFence(String cmd) {
+    ProcessBuilder builder = new ProcessBuilder(
+        "bash", "-e", "-c", cmd);
+    setConfAsEnvVars(builder.environment());
+
+    Process p;
+    try {
+      p = builder.start();
+      p.getOutputStream().close();
+    } catch (IOException e) {
+      LOG.warn("Unable to execute " + cmd, e);
+      return false;
+    }
+    
+    String pid = tryGetPid(p);
+    LOG.info("Launched fencing command '" + cmd + "' with "
+        + ((pid != null) ? ("pid " + pid) : "unknown pid"));
+    
+    String logPrefix = abbreviate(cmd, ABBREV_LENGTH);
+    if (pid != null) {
+      logPrefix = "[PID " + pid + "] " + logPrefix;
+    }
+    
+    // Pump logs to stderr
+    StreamPumper errPumper = new StreamPumper(
+        LOG, logPrefix, p.getErrorStream(),
+        StreamPumper.StreamType.STDERR);
+    errPumper.start();
+    
+    StreamPumper outPumper = new StreamPumper(
+        LOG, logPrefix, p.getInputStream(),
+        StreamPumper.StreamType.STDOUT);
+    outPumper.start();
+    
+    int rc;
+    try {
+      rc = p.waitFor();
+      errPumper.join();
+      outPumper.join();
+    } catch (InterruptedException ie) {
+      LOG.warn("Interrupted while waiting for fencing command: " + cmd);
+      return false;
+    }
+    
+    return rc == 0;
+  }
+
+  /**
+   * Abbreviate a string by putting '...' in the middle of it,
+   * in an attempt to keep logs from getting too messy.
+   * @param cmd the string to abbreviate
+   * @param len maximum length to abbreviate to
+   * @return abbreviated string
+   */
+  static String abbreviate(String cmd, int len) {
+    if (cmd.length() > len && len >= 5) {
+      int firstHalf = (len - 3) / 2;
+      int rem = len - firstHalf - 3;
+      
+      return cmd.substring(0, firstHalf) + 
+        "..." + cmd.substring(cmd.length() - rem);
+    } else {
+      return cmd;
+    }
+  }
+  
+  /**
+   * Attempt to use evil reflection tricks to determine the
+   * pid of a launched process. This is helpful to ops
+   * if debugging a fencing process that might have gone
+   * wrong. If running on a system or JVM where this doesn't
+   * work, it will simply return null.
+   */
+  private static String tryGetPid(Process p) {
+    try {
+      Class<? extends Process> clazz = p.getClass();
+      if (clazz.getName().equals("java.lang.UNIXProcess")) {
+        Field f = clazz.getDeclaredField("pid");
+        f.setAccessible(true);
+        return String.valueOf(f.getInt(p));
+      } else {
+        LOG.trace("Unable to determine pid for " + p
+            + " since it is not a UNIXProcess");
+        return null;
+      }
+    } catch (Throwable t) {
+      LOG.trace("Unable to determine pid for " + p, t);
+      return null;
+    }
+  }
+
+  /**
+   * Set the environment of the subprocess to be the Configuration,
+   * with '.'s replaced by '_'s.
+   */
+  private void setConfAsEnvVars(Map<String, String> env) {
+    for (Map.Entry<String, String> pair : getConf()) {
+      env.put(pair.getKey().replace('.', '_'), pair.getValue());
+    }
+  }
+}
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/SshFenceByTcpPort.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/SshFenceByTcpPort.java
new file mode 100644
index 00000000000..bb01d53d035
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/SshFenceByTcpPort.java
@@ -0,0 +1,352 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
+import java.util.Collection;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.jcraft.jsch.ChannelExec;
+import com.jcraft.jsch.JSch;
+import com.jcraft.jsch.JSchException;
+import com.jcraft.jsch.Session;
+
+/**
+ * This fencing implementation sshes to the target node and uses <code>fuser</code>
+ * to kill the process listening on the NameNode's TCP port. This is
+ * more accurate than using "jps" since it doesn't require parsing,
+ * and will work even if there are multiple NameNodes running on the
+ * same machine.<p>
+ * It returns a successful status code if:
+ * <ul>
+ * <li><code>fuser</code> indicates it successfully killed a process, <em>or</em>
+ * <li><code>nc -z</code> indicates that nothing is listening on the target port
+ * </ul>
+ * <p>
+ * This fencing mechanism is configured as following in the fencing method
+ * list:
+ * <code>sshfence([username@]nnhost[:ssh-port][, target-nn-port])</code>
+ * where the first argument specifies the username, host, and port to ssh
+ * into, and the second argument specifies the port on which the target
+ * NN process is listening on.
+ * <p>
+ * For example, <code>sshfence(other-nn, 8020)<code> will SSH into
+ * <code>other-nn<code> as the current user on the standard SSH port,
+ * then kill whatever process is listening on port 8020.
+ * <p>
+ * If no <code>target-nn-port</code> is specified, it is assumed that the
+ * target NameNode is listening on the same port as the local NameNode.
+ * <p>
+ * In order to achieve passwordless SSH, the operator must also configure
+ * <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
+ * SSH key that has passphrase-less access to the given username and host.
+ */
+public class SshFenceByTcpPort extends Configured
+  implements FenceMethod {
+
+  static final Log LOG = LogFactory.getLog(
+      SshFenceByTcpPort.class);
+  
+  static final String CONF_CONNECT_TIMEOUT_KEY =
+    "dfs.namenode.ha.fencing.ssh.connect-timeout";
+  private static final int CONF_CONNECT_TIMEOUT_DEFAULT =
+    30*1000;
+  static final String CONF_IDENTITIES_KEY =
+    "dfs.namenode.ha.fencing.ssh.private-key-files";
+
+  /**
+   * Verify that the arguments are parseable and that the host
+   * can be resolved.
+   */
+  @Override
+  public void checkArgs(String argStr) throws BadFencingConfigurationException {
+    Args args = new Args(argStr);
+    try {
+      InetAddress.getByName(args.host);
+    } catch (UnknownHostException e) {
+      throw new BadFencingConfigurationException(
+          "Unknown host: " + args.host);
+    }
+  }
+
+  @Override
+  public boolean tryFence(String argsStr)
+      throws BadFencingConfigurationException {
+    Args args = new Args(argsStr);
+    
+    Session session;
+    try {
+      session = createSession(args);
+    } catch (JSchException e) {
+      LOG.warn("Unable to create SSH session", e);
+      return false;
+    }
+
+    LOG.info("Connecting to " + args.host + "...");
+    
+    try {
+      session.connect(getSshConnectTimeout());
+    } catch (JSchException e) {
+      LOG.warn("Unable to connect to " + args.host
+          + " as user " + args.user, e);
+      return false;
+    }
+    LOG.info("Connected to " + args.host);
+
+    int targetPort = args.targetPort != null ?
+        args.targetPort : getDefaultNNPort();
+    try {
+      return doFence(session, targetPort);
+    } catch (JSchException e) {
+      LOG.warn("Unable to achieve fencing on remote host", e);
+      return false;
+    } finally {
+      session.disconnect();
+    }
+  }
+
+
+  private Session createSession(Args args) throws JSchException {
+    JSch jsch = new JSch();
+    for (String keyFile : getKeyFiles()) {
+      jsch.addIdentity(keyFile);
+    }
+    JSch.setLogger(new LogAdapter());
+
+    Session session = jsch.getSession(args.user, args.host, args.sshPort);
+    session.setConfig("StrictHostKeyChecking", "no");
+    return session;
+  }
+
+  private boolean doFence(Session session, int nnPort) throws JSchException {
+    try {
+      LOG.info("Looking for process running on port " + nnPort);
+      int rc = execCommand(session,
+          "PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + nnPort);
+      if (rc == 0) {
+        LOG.info("Successfully killed process that was " +
+            "listening on port " + nnPort);
+        // exit code 0 indicates the process was successfully killed.
+        return true;
+      } else if (rc == 1) {
+        // exit code 1 indicates either that the process was not running
+        // or that fuser didn't have root privileges in order to find it
+        // (eg running as a different user)
+        LOG.info(
+            "Indeterminate response from trying to kill NameNode. " +
+            "Verifying whether it is running using nc...");
+        rc = execCommand(session, "nc -z localhost 8020");
+        if (rc == 0) {
+          // the NN is still listening - we are unable to fence
+          LOG.warn("Unable to fence NN - it is running but we cannot kill it");
+          return false;
+        } else {
+          LOG.info("Verified that the NN is down.");
+          return true;          
+        }
+      } else {
+        // other 
+      }
+      LOG.info("rc: " + rc);
+      return rc == 0;
+    } catch (InterruptedException e) {
+      LOG.warn("Interrupted while trying to fence via ssh", e);
+      return false;
+    } catch (IOException e) {
+      LOG.warn("Unknown failure while trying to fence via ssh", e);
+      return false;
+    }
+  }
+  
+  /**
+   * Execute a command through the ssh session, pumping its
+   * stderr and stdout to our own logs.
+   */
+  private int execCommand(Session session, String cmd)
+      throws JSchException, InterruptedException, IOException {
+    LOG.debug("Running cmd: " + cmd);
+    ChannelExec exec = null;
+    try {
+      exec = (ChannelExec)session.openChannel("exec");
+      exec.setCommand(cmd);
+      exec.setInputStream(null);
+      exec.connect();
+      
+
+      // Pump stdout of the command to our WARN logs
+      StreamPumper outPumper = new StreamPumper(LOG, cmd + " via ssh",
+          exec.getInputStream(), StreamPumper.StreamType.STDOUT);
+      outPumper.start();
+      
+      // Pump stderr of the command to our WARN logs
+      StreamPumper errPumper = new StreamPumper(LOG, cmd + " via ssh",
+          exec.getErrStream(), StreamPumper.StreamType.STDERR);
+      errPumper.start();
+      
+      outPumper.join();
+      errPumper.join();
+      return exec.getExitStatus();
+    } finally {
+      cleanup(exec);
+    }
+  }
+
+  private static void cleanup(ChannelExec exec) {
+    if (exec != null) {
+      try {
+        exec.disconnect();
+      } catch (Throwable t) {
+        LOG.warn("Couldn't disconnect ssh channel", t);
+      }
+    }
+  }
+
+  private int getSshConnectTimeout() {
+    return getConf().getInt(
+        CONF_CONNECT_TIMEOUT_KEY, CONF_CONNECT_TIMEOUT_DEFAULT);
+  }
+
+  private Collection<String> getKeyFiles() {
+    return getConf().getTrimmedStringCollection(CONF_IDENTITIES_KEY);
+  }
+  
+  private int getDefaultNNPort() {
+    return NameNode.getAddress(getConf()).getPort();
+  }
+
+  /**
+   * Container for the parsed arg line for this fencing method.
+   */
+  @VisibleForTesting
+  static class Args {
+    private static final Pattern USER_HOST_PORT_RE = Pattern.compile(
+      "(?:(.+?)@)?([^:]+?)(?:\\:(\\d+))?");
+
+    private static final int DEFAULT_SSH_PORT = 22;
+
+    final String user;
+    final String host;
+    final int sshPort;
+    
+    final Integer targetPort;
+    
+    public Args(String args) throws BadFencingConfigurationException {
+      if (args == null) {
+        throw new BadFencingConfigurationException(
+            "Must specify args for ssh fencing configuration");
+      }
+      String[] argList = args.split(",\\s*");
+      if (argList.length > 2 || argList.length == 0) {
+        throw new BadFencingConfigurationException(
+            "Incorrect number of arguments: " + args);
+      }
+      
+      // Parse SSH destination.
+      String sshDestArg = argList[0];
+      Matcher m = USER_HOST_PORT_RE.matcher(sshDestArg);
+      if (!m.matches()) {
+        throw new BadFencingConfigurationException(
+            "Unable to parse SSH destination: "+ sshDestArg);
+      }
+      if (m.group(1) != null) {
+        user = m.group(1);
+      } else {
+        user = System.getProperty("user.name");
+      }
+      
+      host = m.group(2);
+
+      if (m.group(3) != null) {
+        sshPort = parseConfiggedPort(m.group(3));
+      } else {
+        sshPort = DEFAULT_SSH_PORT;
+      }
+      
+      // Parse target port.
+      if (argList.length > 1) {
+        targetPort = parseConfiggedPort(argList[1]);
+      } else {
+        targetPort = null;
+      }
+    }
+
+    private Integer parseConfiggedPort(String portStr)
+        throws BadFencingConfigurationException {
+      try {
+        return Integer.valueOf(portStr);
+      } catch (NumberFormatException nfe) {
+        throw new BadFencingConfigurationException(
+            "Port number '" + portStr + "' invalid");
+      }
+    }
+  }
+
+  /**
+   * Adapter from JSch's logger interface to our log4j
+   */
+  private static class LogAdapter implements com.jcraft.jsch.Logger {
+    static final Log LOG = LogFactory.getLog(
+        SshFenceByTcpPort.class.getName() + ".jsch");
+
+    public boolean isEnabled(int level) {
+      switch (level) {
+      case com.jcraft.jsch.Logger.DEBUG:
+        return LOG.isDebugEnabled();
+      case com.jcraft.jsch.Logger.INFO:
+        return LOG.isInfoEnabled();
+      case com.jcraft.jsch.Logger.WARN:
+        return LOG.isWarnEnabled();
+      case com.jcraft.jsch.Logger.ERROR:
+        return LOG.isErrorEnabled();
+      case com.jcraft.jsch.Logger.FATAL:
+        return LOG.isFatalEnabled();
+      default:
+        return false;
+      }
+    }
+      
+    public void log(int level, String message) {
+      switch (level) {
+      case com.jcraft.jsch.Logger.DEBUG:
+        LOG.debug(message);
+        break;
+      case com.jcraft.jsch.Logger.INFO:
+        LOG.info(message);
+        break;
+      case com.jcraft.jsch.Logger.WARN:
+        LOG.warn(message);
+        break;
+      case com.jcraft.jsch.Logger.ERROR:
+        LOG.error(message);
+        break;
+      case com.jcraft.jsch.Logger.FATAL:
+        LOG.fatal(message);
+        break;
+      }
+    }
+  }
+}
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java
new file mode 100644
index 00000000000..c375da3bcd4
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java
@@ -0,0 +1,73 @@
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+
+import org.apache.commons.logging.Log;
+
+/**
+ * Class responsible for pumping the streams of the subprocess
+ * out to log4j. stderr is pumped to WARN level and stdout is
+ * pumped to INFO level
+ */
+class StreamPumper {
+  enum StreamType {
+    STDOUT, STDERR;
+  }
+
+  private final Log log;
+  
+  final Thread thread;
+  final String logPrefix;
+  final StreamPumper.StreamType type;
+  private final InputStream stream;
+  private boolean started = false;
+  
+  StreamPumper(final Log log, final String logPrefix,
+      final InputStream stream, final StreamType type) {
+    this.log = log;
+    this.logPrefix = logPrefix;
+    this.stream = stream;
+    this.type = type;
+    
+    thread = new Thread(new Runnable() {
+      @Override
+      public void run() {
+        try {
+          pump();
+        } catch (Throwable t) {
+          ShellCommandFencer.LOG.warn(logPrefix +
+              ": Unable to pump output from " + type,
+              t);
+        }
+      }
+    }, logPrefix + ": StreamPumper for " + type);
+    thread.setDaemon(true);
+  }
+  
+  void join() throws InterruptedException {
+    assert started;
+    thread.join();
+  }
+
+  void start() {
+    assert !started;
+    thread.start();
+    started = true;
+  }
+
+  protected void pump() throws IOException {
+    InputStreamReader inputStreamReader = new InputStreamReader(stream);
+    BufferedReader br = new BufferedReader(inputStreamReader);
+    String line = null;
+    while ((line = br.readLine()) != null) {
+      if (type == StreamType.STDOUT) {
+        log.info(logPrefix + ": " + line);
+      } else {
+        log.warn(logPrefix + ": " + line);          
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestNodeFencer.java b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestNodeFencer.java
new file mode 100644
index 00000000000..5481ea23228
--- /dev/null
+++ b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestNodeFencer.java
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+public class TestNodeFencer {
+
+  @Before
+  public void clearMockState() {
+    AlwaysSucceedFencer.fenceCalled = 0;
+    AlwaysSucceedFencer.callArgs.clear();
+    AlwaysFailFencer.fenceCalled = 0;
+    AlwaysFailFencer.callArgs.clear();
+  }
+
+  @Test
+  public void testSingleFencer() throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer(
+        AlwaysSucceedFencer.class.getName() + "(foo)");
+    assertTrue(fencer.fence());
+    assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals("foo", AlwaysSucceedFencer.callArgs.get(0));
+  }
+  
+  @Test
+  public void testMultipleFencers() throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer(
+        AlwaysSucceedFencer.class.getName() + "(foo)\n" +
+        AlwaysSucceedFencer.class.getName() + "(bar)\n");
+    assertTrue(fencer.fence());
+    // Only one call, since the first fencer succeeds
+    assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals("foo", AlwaysSucceedFencer.callArgs.get(0));
+  }
+  
+  @Test
+  public void testWhitespaceAndCommentsInConfig()
+      throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer(
+        "\n" +
+        " # the next one will always fail\n" +
+        " " + AlwaysFailFencer.class.getName() + "(foo) # <- fails\n" +
+        AlwaysSucceedFencer.class.getName() + "(bar) \n");
+    assertTrue(fencer.fence());
+    // One call to each, since top fencer fails
+    assertEquals(1, AlwaysFailFencer.fenceCalled);
+    assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals("foo", AlwaysFailFencer.callArgs.get(0));
+    assertEquals("bar", AlwaysSucceedFencer.callArgs.get(0));
+  }
+ 
+  @Test
+  public void testArglessFencer() throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer(
+        AlwaysSucceedFencer.class.getName());
+    assertTrue(fencer.fence());
+    // One call to each, since top fencer fails
+    assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals(null, AlwaysSucceedFencer.callArgs.get(0));
+  }
+  
+  @Test
+  public void testShortName() throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer("shell(true)");
+    assertTrue(fencer.fence());
+  }
+ 
+  private NodeFencer setupFencer(String confStr)
+      throws BadFencingConfigurationException {
+    System.err.println("Testing configuration:\n" + confStr);
+    Configuration conf = new Configuration();
+    conf.set(NodeFencer.CONF_METHODS_KEY,
+        confStr);
+    return new NodeFencer(conf);
+  }
+  
+  /**
+   * Mock fencing method that always returns true
+   */
+  public static class AlwaysSucceedFencer extends Configured
+      implements FenceMethod {
+    static int fenceCalled = 0;
+    static List<String> callArgs = Lists.newArrayList();
+
+    @Override
+    public boolean tryFence(String args) {
+      callArgs.add(args);
+      fenceCalled++;
+      return true;
+    }
+
+    @Override
+    public void checkArgs(String args) {
+    }
+  }
+  
+  /**
+   * Identical mock to above, except always returns false
+   */
+  public static class AlwaysFailFencer extends Configured
+      implements FenceMethod {
+    static int fenceCalled = 0;
+    static List<String> callArgs = Lists.newArrayList();
+
+    @Override
+    public boolean tryFence(String args) {
+      callArgs.add(args);
+      fenceCalled++;
+      return false;
+    }
+
+    @Override
+    public void checkArgs(String args) {
+    }
+  }
+}
diff --git a/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestShellCommandFencer.java b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestShellCommandFencer.java
new file mode 100644
index 00000000000..3b942560cb6
--- /dev/null
+++ b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestShellCommandFencer.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import static org.mockito.Mockito.spy;
+
+public class TestShellCommandFencer {
+  private ShellCommandFencer fencer = createFencer();
+  
+  @BeforeClass
+  public static void setupLogSpy() {
+    ShellCommandFencer.LOG = spy(ShellCommandFencer.LOG);
+  }
+  
+  @Before
+  public void resetLogSpy() {
+    Mockito.reset(ShellCommandFencer.LOG);
+  }
+  
+  private static ShellCommandFencer createFencer() {
+    Configuration conf = new Configuration();
+    conf.set("in.fencing.tests", "yessir");
+    ShellCommandFencer fencer = new ShellCommandFencer();
+    fencer.setConf(conf);
+    return fencer;
+  }
+  
+  /**
+   * Test that the exit code of the script determines
+   * whether the fencer succeeded or failed
+   */
+  @Test
+  public void testBasicSuccessFailure() {
+    assertTrue(fencer.tryFence("exit 0"));
+    assertFalse(fencer.tryFence("exit 1"));
+    // bad path should also fail
+    assertFalse(fencer.tryFence("xxxxxxxxxxxx"));
+  }
+  
+  
+  @Test
+  public void testCheckArgs() {
+    try {
+      Configuration conf = new Configuration();
+      conf.set(NodeFencer.CONF_METHODS_KEY, "shell");
+      new NodeFencer(conf);
+      fail("Didn't throw when passing no args to shell");
+    } catch (BadFencingConfigurationException confe) {
+      GenericTestUtils.assertExceptionContains(
+          "No argument passed", confe);
+    }
+  }
+  
+  /**
+   * Test that lines on stdout get passed as INFO
+   * level messages
+   */
+  @Test
+  public void testStdoutLogging() {
+    assertTrue(fencer.tryFence("echo hello"));
+    Mockito.verify(ShellCommandFencer.LOG).info(
+        Mockito.endsWith("echo hello: hello"));
+  }
+   
+  /**
+   * Test that lines on stderr get passed as
+   * WARN level log messages
+   */
+  @Test
+  public void testStderrLogging() {
+    assertTrue(fencer.tryFence("echo hello >&2"));
+    Mockito.verify(ShellCommandFencer.LOG).warn(
+        Mockito.endsWith("echo hello >&2: hello"));
+  }
+
+  /**
+   * Verify that the Configuration gets passed as
+   * environment variables to the fencer.
+   */
+  @Test
+  public void testConfAsEnvironment() {
+    fencer.tryFence("echo $in_fencing_tests");
+    Mockito.verify(ShellCommandFencer.LOG).info(
+        Mockito.endsWith("echo $in...ing_tests: yessir"));
+  }
+
+  /**
+   * Test that we properly close off our input to the subprocess
+   * such that it knows there's no tty connected. This is important
+   * so that, if we use 'ssh', it won't try to prompt for a password
+   * and block forever, for example.
+   */
+  @Test(timeout=10000)
+  public void testSubprocessInputIsClosed() {
+    assertFalse(fencer.tryFence("read"));
+  }
+  
+  @Test
+  public void testCommandAbbreviation() {
+    assertEquals("a...f", ShellCommandFencer.abbreviate("abcdef", 5));
+    assertEquals("abcdef", ShellCommandFencer.abbreviate("abcdef", 6));
+    assertEquals("abcdef", ShellCommandFencer.abbreviate("abcdef", 7));
+
+    assertEquals("a...g", ShellCommandFencer.abbreviate("abcdefg", 5));
+    assertEquals("a...h", ShellCommandFencer.abbreviate("abcdefgh", 5));
+    assertEquals("a...gh", ShellCommandFencer.abbreviate("abcdefgh", 6));
+    assertEquals("ab...gh", ShellCommandFencer.abbreviate("abcdefgh", 7));
+  }
+}
diff --git a/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java
new file mode 100644
index 00000000000..e407d4c928c
--- /dev/null
+++ b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java
@@ -0,0 +1,102 @@
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.server.namenode.ha.SshFenceByTcpPort.Args;
+import org.apache.log4j.Level;
+import org.junit.Assume;
+import org.junit.Test;
+
+public class TestSshFenceByTcpPort {
+
+  static {
+    ((Log4JLogger)SshFenceByTcpPort.LOG).getLogger().setLevel(Level.ALL);
+  }
+  
+  private String TEST_FENCING_ARG = System.getProperty(
+      "test.TestSshFenceByTcpPort.arg", "localhost");
+  private final String TEST_KEYFILE = System.getProperty(
+      "test.TestSshFenceByTcpPort.key");
+
+  @Test(timeout=20000)
+  public void testFence() throws BadFencingConfigurationException {
+    Assume.assumeTrue(isConfigured());
+    Configuration conf = new Configuration();
+    conf.set(SshFenceByTcpPort.CONF_IDENTITIES_KEY, TEST_KEYFILE);
+    FileSystem.setDefaultUri(conf, "localhost:8020");
+    SshFenceByTcpPort fence = new SshFenceByTcpPort();
+    fence.setConf(conf);
+    assertTrue(fence.tryFence(TEST_FENCING_ARG));
+  }
+
+  /**
+   * Test connecting to a host which definitely won't respond.
+   * Make sure that it times out and returns false, but doesn't throw
+   * any exception
+   */
+  @Test(timeout=20000)
+  public void testConnectTimeout() throws BadFencingConfigurationException {
+    Configuration conf = new Configuration();
+    conf.setInt(SshFenceByTcpPort.CONF_CONNECT_TIMEOUT_KEY, 3000);
+    SshFenceByTcpPort fence = new SshFenceByTcpPort();
+    fence.setConf(conf);
+    // Connect to Google's DNS server - not running ssh!
+    assertFalse(fence.tryFence("8.8.8.8"));
+  }
+  
+  @Test
+  public void testArgsParsing() throws BadFencingConfigurationException {
+    Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234");
+    assertEquals("foo", args.user);
+    assertEquals("bar.com", args.host);
+    assertEquals(1234, args.sshPort);
+    assertNull(args.targetPort);
+
+    args = new SshFenceByTcpPort.Args("foo@bar.com");
+    assertEquals("foo", args.user);
+    assertEquals("bar.com", args.host);
+    assertEquals(22, args.sshPort);
+    assertNull(args.targetPort);
+    
+    args = new SshFenceByTcpPort.Args("bar.com");
+    assertEquals(System.getProperty("user.name"), args.user);
+    assertEquals("bar.com", args.host);
+    assertEquals(22, args.sshPort);
+    assertNull(args.targetPort);
+    
+    args = new SshFenceByTcpPort.Args("bar.com:1234, 12345");
+    assertEquals(System.getProperty("user.name"), args.user);
+    assertEquals("bar.com", args.host);
+    assertEquals(1234, args.sshPort);
+    assertEquals(Integer.valueOf(12345), args.targetPort);
+    
+    args = new SshFenceByTcpPort.Args("bar, 8020");
+    assertEquals(Integer.valueOf(8020), args.targetPort);    
+  }
+  
+  @Test
+  public void testBadArgsParsing() throws BadFencingConfigurationException {
+    assertBadArgs(null);
+    assertBadArgs("");
+    assertBadArgs("bar.com:");
+    assertBadArgs("bar.com:x");
+    assertBadArgs("foo.com, x");
+  }
+  
+  private void assertBadArgs(String argStr) {
+    try {
+      new Args(argStr);
+      fail("Did not fail on bad args: " + argStr);
+    } catch (BadFencingConfigurationException e) {
+      // expected
+    }
+  }
+
+  private boolean isConfigured() {
+    return (TEST_FENCING_ARG != null && !TEST_FENCING_ARG.isEmpty()) &&
+      (TEST_KEYFILE != null && !TEST_KEYFILE.isEmpty());
+  }
+}

From 3f85da7aa9f649c874d71f4d742a1e412bb98855 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 4 Aug 2011 17:26:12 +0000
Subject: [PATCH 004/177] Add Apache license header to StreamPumper.java
 missing in HDFS-2179 commit.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1153940 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hdfs/server/namenode/ha/StreamPumper.java | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java
index c375da3bcd4..94802214ef3 100644
--- a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import java.io.BufferedReader;
@@ -70,4 +87,4 @@ class StreamPumper {
       }
     }
   }
-}
\ No newline at end of file
+}

From bed349526350d20443cc505977cd8eebe7fe03da Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 4 Aug 2011 18:58:44 +0000
Subject: [PATCH 005/177] Add another license header missing from HDFS-2179
 commit.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1153969 13f79535-47bb-0310-9956-ffa450edef68
---
 .../namenode/ha/TestSshFenceByTcpPort.java      | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java
index e407d4c928c..a249a45d5c9 100644
--- a/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java
+++ b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.junit.Assert.*;

From 9145ab5887e7528c0428fabca4208a1786e408f4 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Fri, 5 Aug 2011 01:34:32 +0000
Subject: [PATCH 006/177] HADOOP-7455. HA: Introduce HA Service Protocol
 Interface. Contributed by Suresh Srinivas.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1154063 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common/CHANGES.txt                     |  2 +
 .../apache/hadoop/ha/HAServiceProtocol.java   | 72 +++++++++++++++++++
 .../hadoop/ha/HealthCheckFailedException.java | 55 ++++++++++++++
 .../hadoop/ha/ServiceFailedException.java     | 56 +++++++++++++++
 4 files changed, 185 insertions(+)
 create mode 100644 hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
 create mode 100644 hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
 create mode 100644 hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java

diff --git a/hadoop-common/CHANGES.txt b/hadoop-common/CHANGES.txt
index c499c750f09..5435ea6814c 100644
--- a/hadoop-common/CHANGES.txt
+++ b/hadoop-common/CHANGES.txt
@@ -63,6 +63,8 @@ Trunk (unreleased changes)
     HADOOP-6385. dfs should support -rmdir (was HDFS-639). (Daryn Sharp
     via mattf)
 
+    HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh)
+
   IMPROVEMENTS
 
     HADOOP-7042. Updates to test-patch.sh to include failed test names and
diff --git a/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
new file mode 100644
index 00000000000..20f0d04bc21
--- /dev/null
+++ b/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
@@ -0,0 +1,72 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.ipc.VersionedProtocol;
+
+/**
+ * Protocol interface that provides High Availability related primitives to
+ * monitor and fail-over the service.
+ * 
+ * This interface could be used by HA frameworks to manage the service.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public interface HAServiceProtocol extends VersionedProtocol {
+  /**
+   * Initial version of the protocol
+   */
+  public static final long versionID = 1L;
+
+  /**
+   * Monitor the health of service. This periodically called by the HA
+   * frameworks to monitor the health of the service.
+   * 
+   * Service is expected to perform checks to ensure it is functional.
+   * If the service is not healthy due to failure or partial failure,
+   * it is expected to throw {@link HealthCheckFailedException}.
+   * The definition of service not healthy is left to the service.
+   * 
+   * Note that when health check of an Active service fails,
+   * failover to standby may be done.
+   * 
+   * @throws HealthCheckFailedException
+   *           if the health check of a service fails.
+   */
+  public void monitorHealth() throws HealthCheckFailedException;
+
+  /**
+   * Request service to transition to active state. No operation, if the
+   * service is already in active state.
+   * 
+   * @throws ServiceFailedException
+   *           if transition from standby to active fails.
+   */
+  public void transitionToActive() throws ServiceFailedException;
+
+  /**
+   * Request service to transition to standby state. No operation, if the
+   * service is already in standby state.
+   * 
+   * @throws ServiceFailedException
+   *           if transition from active to standby fails.
+   */
+  public void transitionToStandby() throws ServiceFailedException;
+}
diff --git a/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java b/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
new file mode 100644
index 00000000000..a73e4ef3c2f
--- /dev/null
+++ b/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Exception thrown to indicate that health check of a service
+ * failed.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class HealthCheckFailedException extends Exception {
+  private static final long serialVersionUID = 1L;
+
+  /**
+   * Constructs exception with the specified detail message.
+   * @param  message the detail message (which is saved for later retrieval
+   *         by the {@link #getMessage()} method).
+   */
+  public HealthCheckFailedException(final String message) {
+    super(message);
+  }
+  
+  /**
+   * Constructs a new exception with the specified detail message and
+   * cause.
+   *
+   * @param  message the detail message (which is saved for later retrieval
+   *         by the {@link #getMessage()} method).
+   * @param  cause the cause (which is saved for later retrieval by the
+   *         {@link #getCause()} method).  (A <tt>null</tt> value is
+   *         permitted, and indicates that the cause is nonexistent or
+   *         unknown.)
+   */
+  public HealthCheckFailedException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
diff --git a/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java b/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java
new file mode 100644
index 00000000000..e0f8cfc837c
--- /dev/null
+++ b/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+
+/**
+ * Exception thrown to indicate that an operation performed
+ * to modify the state of a service or application failed.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class ServiceFailedException extends Exception {
+  private static final long serialVersionUID = 1L;
+
+  /**
+   * Constructs exception with the specified detail message.
+   * @param  message the detail message (which is saved for later retrieval
+   *         by the {@link #getMessage()} method).
+   */
+  public ServiceFailedException(final String message) {
+    super(message);
+  }
+  
+  /**
+   * Constructs a new exception with the specified detail message and
+   * cause.
+   *
+   * @param  message the detail message (which is saved for later retrieval
+   *         by the {@link #getMessage()} method).
+   * @param  cause the cause (which is saved for later retrieval by the
+   *         {@link #getCause()} method).  (A <tt>null</tt> value is
+   *         permitted, and indicates that the cause is nonexistent or
+   *         unknown.)
+   */
+  public ServiceFailedException(String message, Throwable cause) {
+      super(message, cause);
+  }
+}

From f0ecbd0043dcc7a437a24f422d76f37a7f8c2622 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Fri, 5 Aug 2011 23:04:24 +0000
Subject: [PATCH 007/177] Create CHANGES.HDFS-1623.txt for HDFS-1623 branch
 changes and move one change from CHANGES.txt into it.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1154410 13f79535-47bb-0310-9956-ffa450edef68
---
 dev-support/test-patch.sh           | 4 ++--
 hadoop-common/CHANGES.HDFS-1623.txt | 7 +++++++
 hadoop-common/CHANGES.txt           | 2 --
 3 files changed, 9 insertions(+), 4 deletions(-)
 create mode 100644 hadoop-common/CHANGES.HDFS-1623.txt

diff --git a/dev-support/test-patch.sh b/dev-support/test-patch.sh
index 2e220200505..c5ba1c3e826 100755
--- a/dev-support/test-patch.sh
+++ b/dev-support/test-patch.sh
@@ -298,8 +298,8 @@ checkJavadocWarnings () {
   echo ""
   echo ""
   echo "$ANT_HOME/bin/ant -Dversion="${VERSION}" -DHadoopPatchProcess= clean javadoc | tee $PATCH_DIR/patchJavadocWarnings.txt"
-  (cd root; mvn install)
-  (cd doclet; mvn install)
+  (cd root; mvn install -DskipTests)
+  (cd doclet; mvn install -DskipTests)
   #$ANT_HOME/bin/ant -Dversion="${VERSION}" -DHadoopPatchProcess= clean javadoc | tee $PATCH_DIR/patchJavadocWarnings.txt
   $MAVEN_HOME/bin/mvn clean compile javadoc:javadoc -DskipTests -Pdocs -D${PROJECT_NAME}PatchProcess > $PATCH_DIR/patchJavadocWarnings.txt 2>&1
   javadocWarnings=`$GREP '\[WARNING\]' $PATCH_DIR/patchJavadocWarnings.txt | awk '/Javadoc Warnings/,EOF' | $GREP -v 'Javadoc Warnings' | awk 'BEGIN {total = 0} {total += 1} END {print total}'`
diff --git a/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common/CHANGES.HDFS-1623.txt
new file mode 100644
index 00000000000..f2073093752
--- /dev/null
+++ b/hadoop-common/CHANGES.HDFS-1623.txt
@@ -0,0 +1,7 @@
+Changes for HDFS-1623 branch.
+
+This change list will be merged into the trunk CHANGES.txt when the HDFS-1623
+branch is merged.
+------------------------------
+
+HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh)
diff --git a/hadoop-common/CHANGES.txt b/hadoop-common/CHANGES.txt
index 5435ea6814c..c499c750f09 100644
--- a/hadoop-common/CHANGES.txt
+++ b/hadoop-common/CHANGES.txt
@@ -63,8 +63,6 @@ Trunk (unreleased changes)
     HADOOP-6385. dfs should support -rmdir (was HDFS-639). (Daryn Sharp
     via mattf)
 
-    HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh)
-
   IMPROVEMENTS
 
     HADOOP-7042. Updates to test-patch.sh to include failed test names and

From 73f2092b7351b9cc13d0e12bc4ade6c470934ee8 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 11 Aug 2011 00:44:05 +0000
Subject: [PATCH 008/177] HDFS-1974. Introduce active and standy states to the
 namenode. Contributed by Suresh Srinivas.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1156418 13f79535-47bb-0310-9956-ffa450edef68
---
 hdfs/CHANGES.HDFS-1623.txt                    |   2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |  10 ++
 .../hdfs/server/namenode/BackupNode.java      |  40 ++---
 .../hadoop/hdfs/server/namenode/NameNode.java | 139 ++++++++++++++++--
 .../namenode/UnsupportedActionException.java  |   5 +-
 .../hdfs/server/namenode/ha/ActiveState.java  |  59 ++++++++
 .../hdfs/server/namenode/ha/HAState.java      | 104 +++++++++++++
 .../hdfs/server/namenode/ha/StandbyState.java |  58 ++++++++
 .../server/protocol/NamenodeProtocols.java    |   4 +-
 9 files changed, 377 insertions(+), 44 deletions(-)
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
 create mode 100644 hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java

diff --git a/hdfs/CHANGES.HDFS-1623.txt b/hdfs/CHANGES.HDFS-1623.txt
index eeb80edc5a6..91d58c04a27 100644
--- a/hdfs/CHANGES.HDFS-1623.txt
+++ b/hdfs/CHANGES.HDFS-1623.txt
@@ -5,3 +5,5 @@ branch is merged.
 ------------------------------
 
 HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd)
+
+HDFS-1974. Introduce active and standy states to the namenode. (suresh)
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/DFSUtil.java b/hdfs/src/java/org/apache/hadoop/hdfs/DFSUtil.java
index 8ccba1f8ae6..5d32c7a05ea 100644
--- a/hdfs/src/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -696,4 +696,14 @@ public class DFSUtil {
         ClientDatanodeProtocol.versionID, addr, ticket, confWithNoIpcIdle,
         NetUtils.getDefaultSocketFactory(conf), socketTimeout);
   }
+
+  /**
+   * Returns true if HA for namenode is configured.
+   * @param conf Configuration
+   * @return true if HA is configured in the configuration; else false.
+   */
+  public static boolean isHAEnabled(Configuration conf) {
+    // TODO:HA configuration changes pending
+    return false;
+  }
 }
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index ad575b9e27a..25667b65a2c 100644
--- a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -188,34 +188,6 @@ public class BackupNode extends NameNode implements JournalProtocol {
     }
   }
 
-  /////////////////////////////////////////////////////
-  // NamenodeProtocol implementation for backup node.
-  /////////////////////////////////////////////////////
-  @Override // NamenodeProtocol
-  public BlocksWithLocations getBlocks(DatanodeInfo datanode, long size)
-  throws IOException {
-    throw new UnsupportedActionException("getBlocks");
-  }
-
-  // Only active name-node can register other nodes.
-  @Override // NamenodeProtocol
-  public NamenodeRegistration register(NamenodeRegistration registration
-  ) throws IOException {
-    throw new UnsupportedActionException("register");
-  }
-
-  @Override // NamenodeProtocol
-  public NamenodeCommand startCheckpoint(NamenodeRegistration registration)
-  throws IOException {
-    throw new UnsupportedActionException("startCheckpoint");
-  }
-
-  @Override // NamenodeProtocol
-  public void endCheckpoint(NamenodeRegistration registration,
-                            CheckpointSignature sig) throws IOException {
-    throw new UnsupportedActionException("endCheckpoint");
-  }  
-
   /////////////////////////////////////////////////////
   // BackupNodeProtocol implementation for backup node.
   /////////////////////////////////////////////////////
@@ -224,6 +196,7 @@ public class BackupNode extends NameNode implements JournalProtocol {
   public void journal(NamenodeRegistration nnReg,
       long firstTxId, int numTxns,
       byte[] records) throws IOException {
+    checkOperation(OperationCategory.JOURNAL);
     verifyRequest(nnReg);
     if(!nnRpcAddress.equals(nnReg.getAddress()))
       throw new IOException("Journal request from unexpected name-node: "
@@ -234,6 +207,7 @@ public class BackupNode extends NameNode implements JournalProtocol {
   @Override
   public void startLogSegment(NamenodeRegistration registration, long txid)
       throws IOException {
+    checkOperation(OperationCategory.JOURNAL);
     verifyRequest(registration);
   
     getBNImage().namenodeStartedLogSegment(txid);
@@ -369,4 +343,14 @@ public class BackupNode extends NameNode implements JournalProtocol {
   String getClusterId() {
     return clusterId;
   }
+  
+  @Override // NameNode
+  protected void checkOperation(OperationCategory op)
+      throws UnsupportedActionException {
+    if (OperationCategory.JOURNAL != op) {
+      String msg = "Operation category " + op
+          + " is not supported at the BackupNode";
+      throw new UnsupportedActionException(msg);
+    }
+  }
 }
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 3c2f40bb65d..4b227225d49 100644
--- a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -31,6 +31,8 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HealthCheckFailedException;
+import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.CreateFlag;
@@ -67,11 +69,15 @@ import org.apache.hadoop.hdfs.server.common.HdfsConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.HdfsConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
 import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
+import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
+import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
@@ -145,6 +151,20 @@ public class NameNode implements NamenodeProtocols, FSConstants {
     HdfsConfiguration.init();
   }
   
+  /**
+   * Categories of operations supported by the namenode.
+   */
+  public static enum OperationCategory {
+    /** Read operation that does not change the namespace state */
+    READ,
+    /** Write operation that changes the namespace state */
+    WRITE,
+    /** Operations related to checkpointing */
+    CHECKPOINT,
+    /** Operations related to {@link JournalProtocol} */
+    JOURNAL
+  }
+  
   /**
    * HDFS federation configuration can have two types of parameters:
    * <ol>
@@ -204,9 +224,15 @@ public class NameNode implements NamenodeProtocols, FSConstants {
 
   public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
   public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
+  public static final HAState ACTIVE_STATE = new ActiveState();
+  public static final HAState STANDBY_STATE = new StandbyState();
   
   protected FSNamesystem namesystem; 
   protected NamenodeRole role;
+  private HAState state;
+  private final boolean haEnabled;
+
+  
   /** RPC server. Package-protected for use in tests. */
   Server server;
   /** RPC server for HDFS Services communication.
@@ -402,6 +428,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
    * @param conf the configuration
    */
   protected void initialize(Configuration conf) throws IOException {
+    initializeGenericKeys(conf);
     InetSocketAddress socAddr = getRpcServerAddress(conf);
     UserGroupInformation.setConfiguration(conf);
     loginAsNameNodeUser(conf);
@@ -450,10 +477,6 @@ public class NameNode implements NamenodeProtocols, FSConstants {
     }
 
     activate(conf);
-    LOG.info(getRole() + " up at: " + rpcAddress);
-    if (serviceRPCAddress != null) {
-      LOG.info(getRole() + " service server is up at: " + serviceRPCAddress); 
-    }
   }
   
   /**
@@ -503,6 +526,10 @@ public class NameNode implements NamenodeProtocols, FSConstants {
         LOG.warn("ServicePlugin " + p + " could not be started", t);
       }
     }
+    LOG.info(getRole() + " up at: " + rpcAddress);
+    if (serviceRPCAddress != null) {
+      LOG.info(getRole() + " service server is up at: " + serviceRPCAddress); 
+    }
   }
 
   private void startTrashEmptier(Configuration conf) throws IOException {
@@ -556,8 +583,9 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   protected NameNode(Configuration conf, NamenodeRole role) 
       throws IOException { 
     this.role = role;
+    this.haEnabled = DFSUtil.isHAEnabled(conf);
+    this.state = !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
     try {
-      initializeGenericKeys(conf);
       initialize(conf);
     } catch (IOException e) {
       this.stop();
@@ -638,6 +666,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   public void errorReport(NamenodeRegistration registration,
                           int errorCode, 
                           String msg) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     verifyRequest(registration);
     LOG.info("Error report from " + registration + ": " + msg);
     if(errorCode == FATAL)
@@ -665,27 +694,28 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // NamenodeProtocol
   public void endCheckpoint(NamenodeRegistration registration,
                             CheckpointSignature sig) throws IOException {
-    verifyRequest(registration);
-    if(!isRole(NamenodeRole.NAMENODE))
-      throw new IOException("Only an ACTIVE node can invoke endCheckpoint.");
+    checkOperation(OperationCategory.CHECKPOINT);
     namesystem.endCheckpoint(registration, sig);
   }
 
   @Override // ClientProtocol
   public Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     return namesystem.getDelegationToken(renewer);
   }
 
   @Override // ClientProtocol
   public long renewDelegationToken(Token<DelegationTokenIdentifier> token)
       throws InvalidToken, IOException {
+    checkOperation(OperationCategory.WRITE);
     return namesystem.renewDelegationToken(token);
   }
 
   @Override // ClientProtocol
   public void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.cancelDelegationToken(token);
   }
   
@@ -694,6 +724,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
                                           long offset, 
                                           long length) 
       throws IOException {
+    checkOperation(OperationCategory.READ);
     metrics.incrGetBlockLocations();
     return namesystem.getBlockLocations(getClientMachine(), 
                                         src, offset, length);
@@ -712,6 +743,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
                      boolean createParent,
                      short replication,
                      long blockSize) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     String clientMachine = getClientMachine();
     if (stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.create: file "
@@ -732,6 +764,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // ClientProtocol
   public LocatedBlock append(String src, String clientName) 
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     String clientMachine = getClientMachine();
     if (stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.append: file "
@@ -744,6 +777,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
 
   @Override // ClientProtocol
   public boolean recoverLease(String src, String clientName) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     String clientMachine = getClientMachine();
     return namesystem.recoverLease(src, clientName, clientMachine);
   }
@@ -751,18 +785,21 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // ClientProtocol
   public boolean setReplication(String src, short replication) 
     throws IOException {  
+    checkOperation(OperationCategory.WRITE);
     return namesystem.setReplication(src, replication);
   }
     
   @Override // ClientProtocol
   public void setPermission(String src, FsPermission permissions)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.setPermission(src, permissions);
   }
 
   @Override // ClientProtocol
   public void setOwner(String src, String username, String groupname)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.setOwner(src, username, groupname);
   }
 
@@ -772,6 +809,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
                                ExtendedBlock previous,
                                DatanodeInfo[] excludedNodes)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*BLOCK* NameNode.addBlock: file "
           +src+" for "+clientName);
@@ -795,6 +833,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
       final DatanodeInfo[] existings, final DatanodeInfo[] excludes,
       final int numAdditionalNodes, final String clientName
       ) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     if (LOG.isDebugEnabled()) {
       LOG.debug("getAdditionalDatanode: src=" + src
           + ", blk=" + blk
@@ -820,8 +859,10 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   /**
    * The client needs to give up on the block.
    */
+  @Override // ClientProtocol
   public void abandonBlock(ExtendedBlock b, String src, String holder)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*BLOCK* NameNode.abandonBlock: "
           +b+" of file "+src);
@@ -834,6 +875,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // ClientProtocol
   public boolean complete(String src, String clientName, ExtendedBlock last)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.complete: "
           + src + " for " + clientName);
@@ -847,8 +889,9 @@ public class NameNode implements NamenodeProtocols, FSConstants {
    * mark the block as corrupt.  In the future we might 
    * check the blocks are actually corrupt. 
    */
-  @Override
+  @Override // ClientProtocol, DatanodeProtocol
   public void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     stateChangeLog.info("*DIR* NameNode.reportBadBlocks");
     for (int i = 0; i < blocks.length; i++) {
       ExtendedBlock blk = blocks[i].getBlock();
@@ -863,6 +906,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // ClientProtocol
   public LocatedBlock updateBlockForPipeline(ExtendedBlock block, String clientName)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     return namesystem.updateBlockForPipeline(block, clientName);
   }
 
@@ -871,6 +915,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   public void updatePipeline(String clientName, ExtendedBlock oldBlock,
       ExtendedBlock newBlock, DatanodeID[] newNodes)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.updatePipeline(clientName, oldBlock, newBlock, newNodes);
   }
   
@@ -879,6 +924,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
       long newgenerationstamp, long newlength,
       boolean closeFile, boolean deleteblock, DatanodeID[] newtargets)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.commitBlockSynchronization(block,
         newgenerationstamp, newlength, closeFile, deleteblock, newtargets);
   }
@@ -886,12 +932,14 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // ClientProtocol
   public long getPreferredBlockSize(String filename) 
       throws IOException {
+    checkOperation(OperationCategory.READ);
     return namesystem.getPreferredBlockSize(filename);
   }
     
   @Deprecated
   @Override // ClientProtocol
   public boolean rename(String src, String dst) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.rename: " + src + " to " + dst);
     }
@@ -908,12 +956,14 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   
   @Override // ClientProtocol
   public void concat(String trg, String[] src) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.concat(trg, src);
   }
   
   @Override // ClientProtocol
   public void rename(String src, String dst, Options.Rename... options)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.rename: " + src + " to " + dst);
     }
@@ -928,11 +978,13 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Deprecated
   @Override // ClientProtocol
   public boolean delete(String src) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     return delete(src, true);
   }
 
   @Override // ClientProtocol
   public boolean delete(String src, boolean recursive) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     if (stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* Namenode.delete: src=" + src
           + ", recursive=" + recursive);
@@ -957,6 +1009,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // ClientProtocol
   public boolean mkdirs(String src, FsPermission masked, boolean createParent)
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.mkdirs: " + src);
     }
@@ -971,13 +1024,14 @@ public class NameNode implements NamenodeProtocols, FSConstants {
 
   @Override // ClientProtocol
   public void renewLease(String clientName) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.renewLease(clientName);        
   }
 
   @Override // ClientProtocol
   public DirectoryListing getListing(String src, byte[] startAfter,
-      boolean needLocation)
-  throws IOException {
+      boolean needLocation) throws IOException {
+    checkOperation(OperationCategory.READ);
     DirectoryListing files = namesystem.getListing(
         src, startAfter, needLocation);
     if (files != null) {
@@ -989,12 +1043,14 @@ public class NameNode implements NamenodeProtocols, FSConstants {
 
   @Override // ClientProtocol
   public HdfsFileStatus getFileInfo(String src)  throws IOException {
+    checkOperation(OperationCategory.READ);
     metrics.incrFileInfoOps();
     return namesystem.getFileInfo(src, true);
   }
 
   @Override // ClientProtocol
   public HdfsFileStatus getFileLinkInfo(String src) throws IOException { 
+    checkOperation(OperationCategory.READ);
     metrics.incrFileInfoOps();
     return namesystem.getFileInfo(src, false);
   }
@@ -1007,6 +1063,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // ClientProtocol
   public DatanodeInfo[] getDatanodeReport(DatanodeReportType type)
       throws IOException {
+    checkOperation(OperationCategory.READ);
     DatanodeInfo results[] = namesystem.datanodeReport(type);
     if (results == null ) {
       throw new IOException("Cannot find datanode report");
@@ -1016,6 +1073,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
     
   @Override // ClientProtocol
   public boolean setSafeMode(SafeModeAction action) throws IOException {
+    // TODO:HA decide on OperationCategory for this
     return namesystem.setSafeMode(action);
   }
 
@@ -1029,54 +1087,64 @@ public class NameNode implements NamenodeProtocols, FSConstants {
   @Override // ClientProtocol
   public boolean restoreFailedStorage(String arg) 
       throws AccessControlException {
+    // TODO:HA decide on OperationCategory for this
     return namesystem.restoreFailedStorage(arg);
   }
 
   @Override // ClientProtocol
   public void saveNamespace() throws IOException {
+    // TODO:HA decide on OperationCategory for this
     namesystem.saveNamespace();
   }
 
   @Override // ClientProtocol
   public void refreshNodes() throws IOException {
+    // TODO:HA decide on OperationCategory for this
     namesystem.refreshNodes(new HdfsConfiguration());
   }
 
   @Override // NamenodeProtocol
   public long getTransactionID() {
+    // TODO:HA decide on OperationCategory for this
     return namesystem.getTransactionID();
   }
 
   @Override // NamenodeProtocol
   public CheckpointSignature rollEditLog() throws IOException {
+    // TODO:HA decide on OperationCategory for this
     return namesystem.rollEditLog();
   }
   
-  @Override
+  @Override // NamenodeProtocol
   public RemoteEditLogManifest getEditLogManifest(long sinceTxId)
   throws IOException {
+    // TODO:HA decide on OperationCategory for this
     return namesystem.getEditLogManifest(sinceTxId);
   }
     
   @Override // ClientProtocol
   public void finalizeUpgrade() throws IOException {
+    // TODO:HA decide on OperationCategory for this
     namesystem.finalizeUpgrade();
   }
 
   @Override // ClientProtocol
   public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action)
       throws IOException {
+    // TODO:HA decide on OperationCategory for this
     return namesystem.distributedUpgradeProgress(action);
   }
 
   @Override // ClientProtocol
   public void metaSave(String filename) throws IOException {
+    // TODO:HA decide on OperationCategory for this
     namesystem.metaSave(filename);
   }
 
   @Override // ClientProtocol
   public CorruptFileBlocks listCorruptFileBlocks(String path, String cookie)
       throws IOException {
+    checkOperation(OperationCategory.READ);
     Collection<FSNamesystem.CorruptFileBlockInfo> fbs =
       namesystem.listCorruptFileBlocks(path, cookie);
     
@@ -1096,35 +1164,42 @@ public class NameNode implements NamenodeProtocols, FSConstants {
    * @param bandwidth Blanacer bandwidth in bytes per second for all datanodes.
    * @throws IOException
    */
+  @Override // ClientProtocol
   public void setBalancerBandwidth(long bandwidth) throws IOException {
+    // TODO:HA decide on OperationCategory for this
     namesystem.setBalancerBandwidth(bandwidth);
   }
   
   @Override // ClientProtocol
   public ContentSummary getContentSummary(String path) throws IOException {
+    checkOperation(OperationCategory.READ);
     return namesystem.getContentSummary(path);
   }
 
   @Override // ClientProtocol
   public void setQuota(String path, long namespaceQuota, long diskspaceQuota) 
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.setQuota(path, namespaceQuota, diskspaceQuota);
   }
   
   @Override // ClientProtocol
   public void fsync(String src, String clientName) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.fsync(src, clientName);
   }
 
   @Override // ClientProtocol
   public void setTimes(String src, long mtime, long atime) 
       throws IOException {
+    checkOperation(OperationCategory.WRITE);
     namesystem.setTimes(src, mtime, atime);
   }
 
   @Override // ClientProtocol
   public void createSymlink(String target, String link, FsPermission dirPerms,
       boolean createParent) throws IOException {
+    checkOperation(OperationCategory.WRITE);
     metrics.incrCreateSymlinkOps();
     /* We enforce the MAX_PATH_LENGTH limit even though a symlink target 
      * URI may refer to a non-HDFS file system. 
@@ -1144,6 +1219,7 @@ public class NameNode implements NamenodeProtocols, FSConstants {
 
   @Override // ClientProtocol
   public String getLinkTarget(String path) throws IOException {
+    checkOperation(OperationCategory.READ);
     metrics.incrGetLinkTargetOps();
     /* Resolves the first symlink in the given path, returning a
      * new path consisting of the target of the symlink and any 
@@ -1591,4 +1667,43 @@ public class NameNode implements NamenodeProtocols, FSConstants {
     }
     return clientMachine;
   }
+  
+  @Override // HAServiceProtocol
+  public synchronized void monitorHealth() throws HealthCheckFailedException {
+    if (!haEnabled) {
+      return; // no-op, if HA is not eanbled
+    }
+    // TODO:HA implement health check
+    return;
+  }
+  
+  @Override // HAServiceProtocol
+  public synchronized void transitionToActive() throws ServiceFailedException {
+    if (!haEnabled) {
+      throw new ServiceFailedException("HA for namenode is not enabled");
+    }
+    state.setState(this, ACTIVE_STATE);
+  }
+  
+  @Override // HAServiceProtocol
+  public synchronized void transitionToStandby() throws ServiceFailedException {
+    if (!haEnabled) {
+      throw new ServiceFailedException("HA for namenode is not enabled");
+    }
+    state.setState(this, STANDBY_STATE);
+  }
+  
+  /** Check if an operation of given category is allowed */
+  protected synchronized void checkOperation(final OperationCategory op)
+      throws UnsupportedActionException {
+    state.checkOperation(this, op);
+  }
+  
+  public synchronized HAState getState() {
+    return state;
+  }
+  
+  public synchronized void setState(final HAState s) {
+    state = s;
+  }
 }
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/UnsupportedActionException.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/UnsupportedActionException.java
index 9ac17fc57cd..ca7e1d77872 100644
--- a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/UnsupportedActionException.java
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/UnsupportedActionException.java
@@ -32,8 +32,7 @@ public class UnsupportedActionException extends IOException {
   /** for java.io.Serializable */
   private static final long serialVersionUID = 1L;
 
-  public UnsupportedActionException(String action) {
-    super("Action " + action + "() is not supported.");
+  public UnsupportedActionException(String msg) {
+    super(msg);
   }
-
 }
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
new file mode 100644
index 00000000000..1cf24f7f23a
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
+
+/**
+ * Active state of the namenode. In this state, namenode provides the namenode
+ * service and handles operations of type {@link OperationCategory#WRITE} and
+ * {@link OperationCategory#READ}.
+ */
+public class ActiveState extends HAState {
+  public ActiveState() {
+    super("active");
+  }
+
+  @Override
+  public void checkOperation(NameNode nn, OperationCategory op)
+      throws UnsupportedActionException {
+    return; // Other than journal all operations are allowed in active state
+  }
+  
+  @Override
+  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
+    if (s == NameNode.STANDBY_STATE) {
+      setStateInternal(nn, s);
+      return;
+    }
+    super.setState(nn, s);
+  }
+
+  @Override
+  protected void enterState(NameNode nn) throws ServiceFailedException {
+    // TODO:HA
+  }
+
+  @Override
+  protected void exitState(NameNode nn) throws ServiceFailedException {
+    // TODO:HA
+  }
+}
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
new file mode 100644
index 00000000000..1828f9c83db
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
+
+/**
+ * Namenode base state to implement state machine pattern.
+ */
+@InterfaceAudience.Private
+abstract public class HAState {
+  protected final String name;
+
+  /**
+   * Constructor
+   * @param name Name of the state.
+   */
+  public HAState(String name) {
+    this.name = name;
+  }
+
+  /**
+   * Internal method to transition the state of a given namenode to a new state.
+   * @param nn Namenode
+   * @param s new state
+   * @throws ServiceFailedException on failure to transition to new state.
+   */
+  protected final void setStateInternal(final NameNode nn, final HAState s)
+      throws ServiceFailedException {
+    exitState(nn);
+    nn.setState(s);
+    s.enterState(nn);
+  }
+
+  /**
+   * Method to be overridden by subclasses to perform steps necessary for
+   * entering a state.
+   * @param nn Namenode
+   * @throws ServiceFailedException on failure to enter the state.
+   */
+  protected abstract void enterState(final NameNode nn)
+      throws ServiceFailedException;
+
+  /**
+   * Method to be overridden by subclasses to perform steps necessary for
+   * exiting a state.
+   * @param nn Namenode
+   * @throws ServiceFailedException on failure to enter the state.
+   */
+  protected abstract void exitState(final NameNode nn)
+      throws ServiceFailedException;
+
+  /**
+   * Move from the existing state to a new state
+   * @param nn Namenode
+   * @param s new state
+   * @throws ServiceFailedException on failure to transition to new state.
+   */
+  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
+    if (this == s) { // Aleady in the new state
+      return;
+    }
+    throw new ServiceFailedException("Transtion from state " + this + " to "
+        + s + " is not allowed.");
+  }
+  
+  /**
+   * Check if an operation is supported in a given state.
+   * @param nn Namenode
+   * @param op Type of the operation.
+   * @throws UnsupportedActionException if a given type of operation is not
+   *           supported in this state.
+   */
+  public void checkOperation(final NameNode nn, final OperationCategory op)
+      throws UnsupportedActionException {
+    String msg = "Operation category " + op + " is not supported in state "
+        + nn.getState();
+    throw new UnsupportedActionException(msg);
+  }
+  
+  @Override
+  public String toString() {
+    return super.toString();
+  }
+}
\ No newline at end of file
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
new file mode 100644
index 00000000000..b63866dc713
--- /dev/null
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+
+/**
+ * Namenode standby state. In this state the namenode acts as warm standby and
+ * keeps the following updated:
+ * <ul>
+ * <li>Namespace by getting the edits.</li>
+ * <li>Block location information by receiving block reports and blocks
+ * received from the datanodes.</li>
+ * </ul>
+ * 
+ * It does not handle read/write/checkpoint operations.
+ */
+public class StandbyState extends HAState {
+  public StandbyState() {
+    super("standby");
+  }
+
+  @Override
+  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
+    if (s == NameNode.ACTIVE_STATE) {
+      setStateInternal(nn, s);
+      return;
+    }
+    super.setState(nn, s);
+  }
+
+  @Override
+  protected void enterState(NameNode nn) throws ServiceFailedException {
+    // TODO:HA
+  }
+
+  @Override
+  protected void exitState(NameNode nn) throws ServiceFailedException {
+    // TODO:HA
+  }
+}
+
diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocols.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocols.java
index 4de386f368d..e05b8fef283 100644
--- a/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocols.java
+++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocols.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hdfs.server.protocol;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
@@ -32,5 +33,6 @@ public interface NamenodeProtocols
           NamenodeProtocol,
           RefreshAuthorizationPolicyProtocol,
           RefreshUserMappingsProtocol,
-          GetUserMappingsProtocol {
+          GetUserMappingsProtocol,
+          HAServiceProtocol {
 }

From 95ecc91463a87587d83f5c3159a1ff6565dc73cb Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 11 Aug 2011 00:48:16 +0000
Subject: [PATCH 009/177] Ignore CHANGES.HDFS-1623.txt from release audit
 warnings

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1156421 13f79535-47bb-0310-9956-ffa450edef68
---
 hdfs/build.xml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hdfs/build.xml b/hdfs/build.xml
index 3660120d50f..7dd317bf85e 100644
--- a/hdfs/build.xml
+++ b/hdfs/build.xml
@@ -1370,6 +1370,7 @@
    <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
       <fileset dir="${dist.dir}">
         <exclude name="CHANGES.txt"/>
+        <exclude name="CHANGES.HDFS-1623.txt"/>
         <exclude name="docs/"/>
         <exclude name="lib/jdiff/"/>
         <exclude name="**/conf/*" />

From 9992cae54120d2742922745c1f513c6bfbde67a9 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 29 Sep 2011 00:33:34 +0000
Subject: [PATCH 010/177] Reverting the previous trunk merge since it added
 other unintended changes in addition

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1177127 13f79535-47bb-0310-9956-ffa450edef68
---
 dev-support/test-patch.sh                     |    4 +-
 .../hadoop-common/CHANGES.txt                 |   42 +-
 .../content/xdocs/HttpAuthentication.xml      |    8 +-
 .../org/apache/hadoop/conf/Configuration.java |    4 -
 .../apache/hadoop/fs/LocalDirAllocator.java   |   12 +-
 .../org/apache/hadoop/http/HttpServer.java    |   29 +-
 .../apache/hadoop/ipc/ProtocolSignature.java  |    2 +-
 .../java/org/apache/hadoop/net/NetUtils.java  |   21 -
 .../AuthenticationFilterInitializer.java      |   26 +-
 .../src/main/packages/hadoop-setup-conf.sh    |    5 +-
 .../templates/conf/hadoop-metrics2.properties |   20 -
 .../packages/templates/conf/hdfs-site.xml     |   20 -
 .../packages/templates/conf/log4j.properties  |  213 ----
 .../src/main/resources/core-default.xml       |    4 +-
 .../apache/hadoop/conf/TestConfiguration.java |   16 +-
 .../hadoop/fs/TestLocalDirAllocator.java      |  221 ++--
 .../java/org/apache/hadoop/fs/TestTrash.java  |    5 +-
 .../hadoop/http/HttpServerFunctionalTest.java |   27 -
 .../apache/hadoop/http/TestPathFilter.java    |  145 ---
 .../org/apache/hadoop/net/TestNetUtils.java   |   32 -
 .../security/TestAuthenticationFilter.java    |   16 +-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   36 -
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |    3 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |   86 +-
 .../hadoop/hdfs/protocol/DatanodeInfo.java    |   20 -
 .../server/blockmanagement/BlockManager.java  |    9 +-
 .../hadoop/hdfs/server/common/JspHelper.java  |   23 +-
 .../hadoop/hdfs/server/datanode/DataNode.java |    8 +-
 .../web/resources/DatanodeWebHdfsMethods.java |    4 -
 .../hdfs/server/namenode/BackupNode.java      |   15 +-
 .../hdfs/server/namenode/FSDirectory.java     |    2 +-
 .../hdfs/server/namenode/FSNamesystem.java    |  126 +-
 .../hadoop/hdfs/server/namenode/NameNode.java |  176 +--
 .../server/namenode/NameNodeRpcServer.java    |   17 +-
 .../server/namenode/SecondaryNameNode.java    |    9 +-
 .../hdfs/server/namenode/ha/ActiveState.java  |   26 +-
 .../hdfs/server/namenode/ha/HAState.java      |   27 +-
 .../hdfs/server/namenode/ha/StandbyState.java |   24 +-
 .../web/resources/NamenodeWebHdfsMethods.java |   63 +-
 .../hdfs/tools/DelegationTokenFetcher.java    |   45 +-
 .../org/apache/hadoop/hdfs/web/JsonUtil.java  |  286 +----
 .../apache/hadoop/hdfs/web/ParamFilter.java   |   85 --
 .../hadoop/hdfs/web/WebHdfsFileSystem.java    |   61 +-
 .../hdfs/web/resources/AccessTimeParam.java   |    2 +-
 .../hdfs/web/resources/BlockSizeParam.java    |    2 +-
 .../hdfs/web/resources/BufferSizeParam.java   |    2 +-
 .../hdfs/web/resources/DelegationParam.java   |    3 +-
 .../hdfs/web/resources/DeleteOpParam.java     |    3 +
 .../hdfs/web/resources/DstPathParam.java      |    2 +-
 .../hadoop/hdfs/web/resources/GetOpParam.java |    6 +-
 .../hdfs/web/resources/HttpOpParam.java       |    3 -
 .../web/resources/ModificationTimeParam.java  |    2 +-
 .../hdfs/web/resources/OverwriteParam.java    |    2 +-
 .../hdfs/web/resources/PostOpParam.java       |    3 +
 .../hadoop/hdfs/web/resources/PutOpParam.java |    3 +
 .../web/resources/RenameOptionSetParam.java   |    2 +-
 .../hdfs/web/resources/RenewerParam.java      |   41 -
 .../src/main/resources/hdfs-default.xml       |   20 +
 .../apache/hadoop/hdfs/TestDFSPermission.java |   50 +-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   |  204 ++--
 .../org/apache/hadoop/hdfs/TestQuota.java     |   20 +-
 .../hdfs/security/TestDelegationToken.java    |   36 +-
 .../blockmanagement/TestHost2NodesMap.java    |   26 +-
 .../TestMulitipleNNDataBlockScanner.java      |    5 +-
 .../hdfs/server/datanode/TestReplicasMap.java |   17 +-
 .../hdfs/server/namenode/FSImageTestUtil.java |    7 -
 .../namenode/TestProcessCorruptBlocks.java    |  290 -----
 .../web/TestWebHdfsFileSystemContract.java    |   44 -
 hadoop-mapreduce-project/CHANGES.txt          |  102 --
 .../hadoop-mapreduce-client-app/pom.xml       |   43 -
 .../hadoop/mapred/MapReduceChildJVM.java      |  218 ++--
 .../org/apache/hadoop/mapred/YarnChild.java   |    7 +-
 .../hadoop/mapreduce/v2/app/MRAppMaster.java  |   62 +-
 .../v2/app/client/MRClientService.java        |    2 +-
 .../mapreduce/v2/app/job/impl/JobImpl.java    |   56 +-
 .../v2/app/job/impl/TaskAttemptImpl.java      |   85 +-
 .../app/launcher/ContainerLauncherImpl.java   |   35 +-
 .../v2/app/local/LocalContainerAllocator.java |   21 +-
 .../v2/app/recover/RecoveryService.java       |   17 +-
 .../mapreduce/v2/app/rm/RMCommunicator.java   |   54 +-
 .../v2/app/rm/RMContainerAllocator.java       |   44 +-
 .../v2/app/rm/RMContainerRequestor.java       |   13 +-
 .../v2/app/speculate/DefaultSpeculator.java   |    3 +-
 .../mapreduce/v2/app/webapp/JobConfPage.java  |    1 -
 .../mapreduce/v2/app/webapp/NavBlock.java     |    6 +-
 .../mapreduce/v2/app/webapp/TaskPage.java     |    2 +-
 .../apache/hadoop/mapreduce/v2/app/MRApp.java |   26 +-
 .../v2/app/TestRMContainerAllocator.java      | 1069 +++++++----------
 .../hadoop/mapreduce/TypeConverter.java       |   34 +-
 .../hadoop/mapreduce/v2/MRConstants.java      |   50 +
 .../mapreduce/v2/api/records/JobReport.java   |    4 -
 .../api/records/impl/pb/JobReportPBImpl.java  |   24 -
 .../v2/jobhistory/JobHistoryUtils.java        |    2 +-
 .../hadoop/mapreduce/v2/util/MRApps.java      |  115 +-
 .../mapreduce/v2/util/MRBuilderUtils.java     |   32 +-
 .../src/main/proto/mr_protos.proto            |    2 -
 .../hadoop/mapreduce/TestTypeConverter.java   |   13 -
 .../hadoop/mapreduce/v2/util/TestMRApps.java  |    4 +-
 .../org/apache/hadoop/mapred/BackupStore.java |    3 +-
 .../org/apache/hadoop/mapred/Constants.java}  |   23 +-
 .../org/apache/hadoop/mapred/JobConf.java     |    1 -
 .../org/apache/hadoop/mapred/JobStatus.java   |    4 -
 .../org/apache/hadoop/mapred/MRConstants.java |    8 +-
 .../apache/hadoop/mapred/MROutputFiles.java   |   23 +-
 .../org/apache/hadoop/mapred/TaskLog.java     |   13 +-
 .../hadoop/mapred/pipes/Application.java      |    5 +-
 .../org/apache/hadoop/mapreduce/Cluster.java  |   41 +-
 .../java/org/apache/hadoop/mapreduce/Job.java |    3 +-
 .../apache/hadoop/mapreduce/JobStatus.java    |   18 -
 .../apache/hadoop/mapreduce/MRJobConfig.java  |   79 +-
 .../mapreduce/v2/hs/HistoryClientService.java |    2 +-
 .../hadoop/mapreduce/v2/hs/JobHistory.java    |   28 +-
 .../hadoop-mapreduce-client-jobclient/pom.xml |    6 -
 .../org/apache/hadoop/mapred/ClientCache.java |   58 +-
 .../hadoop/mapred/ClientServiceDelegate.java  |   52 +-
 .../apache/hadoop/mapred/NotRunningJob.java   |   45 +-
 .../hadoop/mapred/ResourceMgrDelegate.java    |   30 +-
 .../org/apache/hadoop/mapred/YARNRunner.java  |  106 +-
 .../hadoop/mapred/TestClientRedirect.java     |   25 +-
 .../mapred/TestClientServiceDelegate.java     |    9 +-
 .../TestYarnClientProtocolProvider.java       |   59 -
 .../mapreduce/v2/MiniMRYarnCluster.java       |    8 +-
 .../hadoop/mapreduce/v2/TestMRJobs.java       |    2 +-
 .../hadoop/mapreduce/v2/TestYARNRunner.java   |  121 +-
 .../hadoop-mapreduce-client/pom.xml           |    6 -
 hadoop-mapreduce-project/hadoop-yarn/README   |   14 +-
 .../dev-support/findbugs-exclude.xml          |    4 -
 .../hadoop/yarn/api/ApplicationConstants.java |  120 +-
 .../hadoop/yarn/api/ClientRMProtocol.java     |   26 +-
 ...est.java => FinishApplicationRequest.java} |    4 +-
 ...se.java => FinishApplicationResponse.java} |    4 +-
 ...t.java => GetNewApplicationIdRequest.java} |    4 +-
 ....java => GetNewApplicationIdResponse.java} |   31 +-
 ...va => FinishApplicationRequestPBImpl.java} |   24 +-
 ...a => FinishApplicationResponsePBImpl.java} |   20 +-
 ... => GetNewApplicationIdRequestPBImpl.java} |   21 +-
 .../pb/GetNewApplicationIdResponsePBImpl.java |  109 ++
 .../pb/GetNewApplicationResponsePBImpl.java   |  173 ---
 .../yarn/api/records/ApplicationReport.java   |   12 -
 .../hadoop/yarn/api/records/Container.java    |   13 -
 .../impl/pb/ApplicationReportPBImpl.java      |   36 +-
 .../api/records/impl/pb/ContainerPBImpl.java  |   39 -
 .../src/main/proto/client_RM_protocol.proto   |    4 +-
 .../src/main/proto/yarn_protos.proto          |   16 +-
 .../src/main/proto/yarn_service_protos.proto  |   10 +-
 .../client/ClientRMProtocolPBClientImpl.java  |   39 +-
 .../ClientRMProtocolPBServiceImpl.java        |   40 +-
 .../hadoop/yarn/conf/YarnConfiguration.java   |    8 +-
 .../yarn/ipc/ProtoOverHadoopRpcEngine.java    |    6 -
 .../yarn/state/StateMachineFactory.java       |   39 -
 .../apache/hadoop/yarn/util/BuilderUtils.java |   43 +-
 .../hadoop/yarn/util/ConverterUtils.java      |   52 +-
 .../org/apache/hadoop/yarn/util/Graph.java    |  210 ----
 .../yarn/util/VisualizeStateMachine.java      |   73 --
 .../apache/hadoop/yarn/webapp/Dispatcher.java |    9 -
 .../org/apache/hadoop/yarn/webapp/WebApp.java |   25 +-
 .../apache/hadoop/yarn/webapp/WebApps.java    |   11 +-
 .../src/main/resources/webapps/cluster/.keep  |    0
 .../main/resources/webapps/jobhistory/.keep   |    0
 .../main/resources/webapps/mapreduce/.keep    |    0
 .../src/main/resources/webapps/node/.keep     |    0
 .../java/org/apache/hadoop/yarn/MockApps.java |   10 -
 .../java/org/apache/hadoop/yarn/TestRPC.java  |   32 -
 .../yarn/conf/TestYarnConfiguration.java      |   54 -
 .../apache/hadoop/yarn/webapp/TestWebApp.java |   27 -
 .../hadoop-yarn-server-nodemanager/pom.xml    |   33 -
 .../nodemanager/DefaultContainerExecutor.java |    6 +-
 .../nodemanager/LinuxContainerExecutor.java   |    6 +-
 .../nodemanager/NodeStatusUpdaterImpl.java    |    2 +-
 .../container/ContainerImpl.java              |   62 +-
 .../launcher/ContainerLaunch.java             |  103 +-
 .../nodemanager/webapp/ContainerLogsPage.java |   14 +-
 .../nodemanager/webapp/ContainerPage.java     |   31 +-
 .../server/nodemanager/webapp/WebServer.java  |    2 +-
 .../main/resources/container-log4j.properties |    8 +-
 .../TestContainerManagerWithLCE.java          |   24 -
 .../TestContainerManager.java                 |   88 --
 .../container/TestContainer.java              |   24 +-
 .../pom.xml                                   |   51 -
 .../server/resourcemanager/AdminService.java  |    2 +-
 .../resourcemanager/ClientRMService.java      |   30 +-
 .../server/resourcemanager/RMAppManager.java  |   12 +-
 .../server/resourcemanager/RMContextImpl.java |    2 +
 .../resourcemanager/ResourceManager.java      |   22 +-
 .../amlauncher/AMLauncher.java                |   29 +-
 .../amlauncher/ApplicationMasterLauncher.java |   11 +-
 .../server/resourcemanager/rmapp/RMApp.java   |    3 +-
 .../resourcemanager/rmapp/RMAppImpl.java      |   41 +-
 .../rmapp/attempt/RMAppAttempt.java           |    4 +-
 .../rmapp/attempt/RMAppAttemptImpl.java       |  104 +-
 .../resourcemanager/rmnode/RMNodeImpl.java    |   25 +-
 .../scheduler/QueueMetrics.java               |   54 +-
 .../scheduler/SchedulerApp.java               |   15 +-
 .../scheduler/capacity/LeafQueue.java         |   22 +-
 .../event/NodeUpdateSchedulerEvent.java       |    3 +
 .../scheduler/fifo/FifoScheduler.java         |    7 +-
 .../resourcemanager/webapp/NodesPage.java     |    4 +-
 .../resourcemanager/webapp/RmController.java  |    2 +-
 .../yarn/server/resourcemanager/MockAM.java   |   10 +-
 .../yarn/server/resourcemanager/MockRM.java   |   21 +-
 .../server/resourcemanager/NodeManager.java   |    5 +-
 .../TestApplicationMasterLauncher.java        |  159 ---
 .../yarn/server/resourcemanager/TestRM.java   |   16 -
 .../resourcemanager/TestResourceManager.java  |   21 -
 .../TestAMRMRPCResponseId.java                |   13 +-
 .../TestApplicationMasterLauncher.java        |  193 +++
 .../resourcetracker/InlineDispatcher.java     |   65 +-
 .../resourcemanager/rmapp/MockRMApp.java      |    1 -
 .../rmapp/TestRMAppTransitions.java           |  191 +--
 .../attempt/TestRMAppAttemptTransitions.java  |  403 -------
 .../scheduler/capacity/TestLeafQueue.java     |   33 +-
 .../scheduler/capacity/TestUtils.java         |    4 +-
 .../resourcemanager/webapp/TestNodesPage.java |   55 -
 .../TestContainerTokenSecretManager.java      |    9 +-
 .../src/site/apt/SingleCluster.apt.vm         |  180 ---
 .../hadoop-yarn/src/site/apt/index.apt.vm     |   39 -
 .../hadoop-yarn/src/site/site.xml             |   34 -
 .../src/contrib/fairscheduler/ivy.xml         |    4 +-
 .../apache/hadoop/streaming/StreamJob.java    |   25 +-
 .../java/org/apache/hadoop/vaidya/vaidya.sh   |   73 +-
 .../JobTrackerClientProtocolProvider.java     |   16 +-
 .../mapred/LocalClientProtocolProvider.java   |    9 +-
 .../apache/hadoop/mapred/MiniMRCluster.java   |    1 -
 .../hadoop/mapred/QueueManagerTestUtils.java  |    2 -
 .../hadoop/mapred/TestMiniMRClasspath.java    |    2 -
 .../TestSpecialCharactersInOutputPath.java    |    4 +-
 .../TestClientProtocolProviderImpls.java      |   99 --
 hadoop-project/pom.xml                        |   22 -
 228 files changed, 2294 insertions(+), 7365 deletions(-)
 delete mode 100644 hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-metrics2.properties
 delete mode 100644 hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties
 delete mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestPathFilter.java
 delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/ParamFilter.java
 delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenewerParam.java
 delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/MRConstants.java
 rename hadoop-mapreduce-project/{hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFailedAttemptEvent.java => hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Constants.java} (64%)
 delete mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/{KillApplicationRequest.java => FinishApplicationRequest.java} (94%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/{KillApplicationResponse.java => FinishApplicationResponse.java} (91%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/{GetNewApplicationRequest.java => GetNewApplicationIdRequest.java} (91%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/{GetNewApplicationResponse.java => GetNewApplicationIdResponse.java} (66%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/{KillApplicationRequestPBImpl.java => FinishApplicationRequestPBImpl.java} (74%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/{KillApplicationResponsePBImpl.java => FinishApplicationResponsePBImpl.java} (62%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/{GetNewApplicationRequestPBImpl.java => GetNewApplicationIdRequestPBImpl.java} (68%)
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdResponsePBImpl.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationResponsePBImpl.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/cluster/.keep
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/jobhistory/.keep
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/mapreduce/.keep
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/node/.keep
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterLauncher.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml
 delete mode 100644 hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java

diff --git a/dev-support/test-patch.sh b/dev-support/test-patch.sh
index 6325e6a193b..76b020a01e3 100755
--- a/dev-support/test-patch.sh
+++ b/dev-support/test-patch.sh
@@ -598,8 +598,8 @@ runTests () {
   echo ""
   echo ""
   
-  echo "$MVN clean install test -Pnative -D${PROJECT_NAME}PatchProcess"
-  $MVN clean install test -Pnative -D${PROJECT_NAME}PatchProcess
+  echo "$MVN clean test -Pnative -D${PROJECT_NAME}PatchProcess"
+  $MVN clean test -Pnative -D${PROJECT_NAME}PatchProcess
   if [[ $? != 0 ]] ; then
     ### Find and format names of failed tests
     failed_tests=`find . -name 'TEST*.xml' | xargs $GREP  -l -E "<failure|<error" | sed -e "s|.*target/surefire-reports/TEST-|                  |g" | sed -e "s|\.xml||g"`
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index 4a0f0057f18..a253040539c 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -2,12 +2,6 @@ Hadoop Change Log
 
 Trunk (unreleased changes)
 
-  INCOMPATIBLE CHANGES
-   
-   HADOOP-7542. Change Configuration XML format to 1.1 to add support for
-                serializing additional characters. This requires XML1.1
-                support in the XML parser (Christopher Egner via harsh)
-
   IMPROVEMENTS
 
     HADOOP-7595. Upgrade dependency to Avro 1.5.3. (Alejandro Abdelnur via atm)
@@ -19,11 +13,6 @@ Trunk (unreleased changes)
 
     HADOOP-7635. RetryInvocationHandler should release underlying resources on
                  close (atm)
-    
-    HADOOP-7668. Add a NetUtils method that can tell if an InetAddress 
-    belongs to local host. (suresh)
-
-    HADOOP-7687 Make getProtocolSignature public  (sanjay)
 
   BUGS
 
@@ -34,16 +23,6 @@ Trunk (unreleased changes)
 
     HADOOP-7641. Add Apache License to template config files (Eric Yang via atm)
 
-    HADOOP-7621. alfredo config should be in a file not readable by users
-                 (Alejandro Abdelnur via atm)
-    
-    HADOOP-7669  Fix newly introduced release audit warning. 
-                 (Uma Maheswara Rao G via stevel)
-    
-    HADOOP-6220. HttpServer wraps InterruptedExceptions by IOExceptions if interrupted 
-                 in startup (stevel)
-                 
-
 Release 0.23.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -308,6 +287,9 @@ Release 0.23.0 - Unreleased
     HADOOP-7430. Improve error message when moving to trash fails due to 
     quota issue. (Ravi Prakash via mattf)
 
+    HADOOP-7457. Remove out-of-date Chinese language documentation.
+    (Jakob Homan via eli)
+
     HADOOP-7444. Add Checksum API to verify and calculate checksums "in bulk"
     (todd)
 
@@ -406,13 +388,6 @@ Release 0.23.0 - Unreleased
     HADOOP-7599. Script improvements to setup a secure Hadoop cluster
     (Eric Yang via ddas)
 
-    HADOOP-7639. Enhance HttpServer to allow passing path-specs for filtering,
-    so that servers like Yarn WebApp can get filtered the paths served by
-    their own injected servlets. (Thomas Graves via vinodkv)
-
-    HADOOP-7575. Enhanced LocalDirAllocator to support fully-qualified
-    paths. (Jonathan Eagles via vinodkv)
-
   OPTIMIZATIONS
   
     HADOOP-7333. Performance improvement in PureJavaCrc32. (Eric Caspole
@@ -423,9 +398,6 @@ Release 0.23.0 - Unreleased
 
   BUG FIXES
 
-    HADOOP-7630. hadoop-metrics2.properties should have a property *.period 
-    set to a default value for metrics. (Eric Yang via mattf)
-
     HADOOP-7327. FileSystem.listStatus() throws NullPointerException instead of
     IOException upon access permission failure. (mattf)
 
@@ -631,9 +603,6 @@ Release 0.23.0 - Unreleased
     HADOOP-7631. Fixes a config problem to do with running streaming jobs
     (Eric Yang via ddas)
 
-    HADOOP-7662. Fixed logs servlet to use the pathspec '/*' instead of '/'
-    for correct filtering. (Thomas Graves via vinodkv)
-
 Release 0.22.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -1149,11 +1118,6 @@ Release 0.22.0 - Unreleased
     HADOOP-7568. SequenceFile should not print into stdout.
     (Plamen Jeliazkov via shv)
 
-    HADOOP-7663. Fix TestHDFSTrash failure. (Mayank Bansal via shv)
-
-    HADOOP-7457. Remove out-of-date Chinese language documentation.
-    (Jakob Homan via eli)
-
 Release 0.21.1 - Unreleased
 
   IMPROVEMENTS
diff --git a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml
index 5c756ac21c2..15abfbb044a 100644
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml
@@ -82,12 +82,10 @@
       <code>36000</code>.
       </p>
 
-      <p><code>hadoop.http.authentication.signature.secret.file</code>: The signature secret 
-      file for signing the authentication tokens. If not set a random secret is generated at 
+      <p><code>hadoop.http.authentication.signature.secret</code>: The signature secret for  
+      signing the authentication tokens. If not set a random secret is generated at 
       startup time. The same secret should be used for all nodes in the cluster, JobTracker, 
-      NameNode, DataNode and TastTracker. The default value is 
-      <code>${user.home}/hadoop-http-auth-signature-secret</code>.
-      IMPORTANT: This file should be readable only by the Unix user running the daemons.
+      NameNode, DataNode and TastTracker. The default value is a <code>hadoop</code> value.
       </p>
         
       <p><code>hadoop.http.authentication.cookie.domain</code>: The domain to use for the HTTP 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index c310aa65e6c..4fb1d190663 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -1632,10 +1632,6 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     try {
       doc =
         DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
-
-      // Allow a broader set of control characters to appear in job confs.
-      // cf https://issues.apache.org/jira/browse/MAPREDUCE-109 
-      doc.setXmlVersion( "1.1" );
     } catch (ParserConfigurationException pe) {
       throw new IOException(pe);
     }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
index d1eae086f90..71c82357577 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
@@ -264,15 +264,9 @@ public class LocalDirAllocator {
             Path tmpDir = new Path(localDirs[i]);
             if(localFS.mkdirs(tmpDir)|| localFS.exists(tmpDir)) {
               try {
-
-                File tmpFile = tmpDir.isAbsolute()
-                  ? new File(localFS.makeQualified(tmpDir).toUri())
-                  : new File(localDirs[i]);
-
-                DiskChecker.checkDir(tmpFile);
-                dirs.add(tmpFile.getPath());
-                dfList.add(new DF(tmpFile, 30000));
-
+                DiskChecker.checkDir(new File(localDirs[i]));
+                dirs.add(localDirs[i]);
+                dfList.add(new DF(new File(localDirs[i]), 30000));
               } catch (DiskErrorException de) {
                 LOG.warn( localDirs[i] + " is not writable\n", de);
               }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java
index c526e102865..00cdf32746f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.http;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.io.InterruptedIOException;
 import java.net.BindException;
 import java.net.InetSocketAddress;
 import java.net.URL;
@@ -125,29 +124,6 @@ public class HttpServer implements FilterContainer {
       boolean findPort, Configuration conf, Connector connector) throws IOException {
     this(name, bindAddress, port, findPort, conf, null, connector);
   }
-
-  /**
-   * Create a status server on the given port. Allows you to specify the
-   * path specifications that this server will be serving so that they will be
-   * added to the filters properly.  
-   * 
-   * @param name The name of the server
-   * @param bindAddress The address for this server
-   * @param port The port to use on the server
-   * @param findPort whether the server should start at the given port and 
-   *        increment by 1 until it finds a free port.
-   * @param conf Configuration 
-   * @param pathSpecs Path specifications that this httpserver will be serving. 
-   *        These will be added to any filters.
-   */
-  public HttpServer(String name, String bindAddress, int port,
-      boolean findPort, Configuration conf, String[] pathSpecs) throws IOException {
-    this(name, bindAddress, port, findPort, conf, null, null);
-    for (String path : pathSpecs) {
-        LOG.info("adding path spec: " + path);
-      addFilterPathMapping(path, webAppContext);
-    }
-  }
   
   /**
    * Create a status server on the given port.
@@ -283,7 +259,7 @@ public class HttpServer implements FilterContainer {
     if (logDir != null) {
       Context logContext = new Context(parent, "/logs");
       logContext.setResourceBase(logDir);
-      logContext.addServlet(AdminAuthorizedServlet.class, "/*");
+      logContext.addServlet(AdminAuthorizedServlet.class, "/");
       logContext.setDisplayName("logs");
       setContextAttributes(logContext, conf);
       defaultContexts.put(logContext, true);
@@ -684,9 +660,6 @@ public class HttpServer implements FilterContainer {
       }
     } catch (IOException e) {
       throw e;
-    } catch (InterruptedException e) {
-      throw (IOException) new InterruptedIOException(
-          "Interrupted while starting HTTP server").initCause(e);
     } catch (Exception e) {
       throw new IOException("Problem starting http server", e);
     }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java
index 04d08c5142d..a055a7fd46a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java
@@ -199,7 +199,7 @@ public class ProtocolSignature implements Writable {
    * @param protocol protocol
    * @return the server's protocol signature
    */
-  public static ProtocolSignature getProtocolSignature(
+  static ProtocolSignature getProtocolSignature(
       int clientMethodsHashCode,
       long serverVersion,
       Class<? extends VersionedProtocol> protocol) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
index d94b69f1836..b22aaa009c1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
@@ -516,25 +516,4 @@ public class NetUtils {
     } catch (UnknownHostException ignore) { }
     return addr;
   }
-  
-  /**
-   * Given an InetAddress, checks to see if the address is a local address, by
-   * comparing the address with all the interfaces on the node.
-   * @param addr address to check if it is local node's address
-   * @return true if the address corresponds to the local node
-   */
-  public static boolean isLocalAddress(InetAddress addr) {
-    // Check if the address is any local or loop back
-    boolean local = addr.isAnyLocalAddress() || addr.isLoopbackAddress();
-
-    // Check if the address is defined on any interface
-    if (!local) {
-      try {
-        local = NetworkInterface.getByInetAddress(addr) != null;
-      } catch (SocketException e) {
-        local = false;
-      }
-    }
-    return local;
-  }
 }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java
index 666632d5bfa..cd6ab7b3260 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java
@@ -22,9 +22,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.http.FilterContainer;
 import org.apache.hadoop.http.FilterInitializer;
 
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.Reader;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -43,10 +40,8 @@ import java.util.Map;
  */
 public class AuthenticationFilterInitializer extends FilterInitializer {
 
-  static final String PREFIX = "hadoop.http.authentication.";
+  private static final String PREFIX = "hadoop.http.authentication.";
 
-  static final String SIGNATURE_SECRET_FILE = AuthenticationFilter.SIGNATURE_SECRET + ".file";
-  
   /**
    * Initializes Alfredo AuthenticationFilter.
    * <p/>
@@ -72,25 +67,6 @@ public class AuthenticationFilterInitializer extends FilterInitializer {
       }
     }
 
-    String signatureSecretFile = filterConfig.get(SIGNATURE_SECRET_FILE);
-    if (signatureSecretFile == null) {
-      throw new RuntimeException("Undefined property: " + SIGNATURE_SECRET_FILE);      
-    }
-    
-    try {
-      StringBuilder secret = new StringBuilder();
-      Reader reader = new FileReader(signatureSecretFile);
-      int c = reader.read();
-      while (c > -1) {
-        secret.append((char)c);
-        c = reader.read();
-      }
-      reader.close();
-      filterConfig.put(AuthenticationFilter.SIGNATURE_SECRET, secret.toString());
-    } catch (IOException ex) {
-      throw new RuntimeException("Could not read HTTP signature secret file: " + signatureSecretFile);            
-    }
-    
     container.addFilter("authentication",
                         AuthenticationFilter.class.getName(),
                         filterConfig);
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh
index 96a989fc39f..8e903cf308d 100644
--- a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh
+++ b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh
@@ -475,10 +475,7 @@ else
   template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg
   template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
   if [ ! -e ${HADOOP_CONF_DIR}/capacity-scheduler.xml ]; then
-    template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
-  fi
-  if [ ! -e ${HADOOP_CONF_DIR}/hadoop-metrics2.properties ]; then
-    cp ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
+    template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
   fi
   if [ ! -e ${HADOOP_CONF_DIR}/log4j.properties ]; then
     cp ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-metrics2.properties b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-metrics2.properties
deleted file mode 100644
index 4a1019385c0..00000000000
--- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-metrics2.properties
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# syntax: [prefix].[source|sink|jmx].[instance].[options]
-# See package.html for org.apache.hadoop.metrics2 for details
-
-*.period=60
-
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml
index ffec60355ae..69e078380c1 100644
--- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml
+++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml
@@ -144,26 +144,6 @@
     </description>
   </property>
 
-  <property>
-    <name>dfs.web.authentication.kerberos.principal</name>
-    <value>HTTP/_HOST@${local.realm}</value>
-    <description>
-      The HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
-
-      The HTTP Kerberos principal MUST start with 'HTTP/' per Kerberos
-      HTTP SPENGO specification.
-    </description>
-  </property>
-
-  <property>
-    <name>dfs.web.authentication.kerberos.keytab</name>
-    <value>/etc/security/keytabs/nn.service.keytab</value>
-    <description>
-      The Kerberos keytab file with the credentials for the
-      HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
-    </description>
-  </property>
-
   <property>
     <name>dfs.namenode.keytab.file</name>
     <value>/etc/security/keytabs/nn.service.keytab</value>
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties
deleted file mode 100644
index 16c6aa6890e..00000000000
--- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties
+++ /dev/null
@@ -1,213 +0,0 @@
-# Copyright 2011 The Apache Software Foundation
-# 
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Define some default values that can be overridden by system properties
-hadoop.root.logger=INFO,console
-hadoop.log.dir=.
-hadoop.log.file=hadoop.log
-
-#
-# Job Summary Appender 
-#
-# Use following logger to send summary to separate file defined by 
-# hadoop.mapreduce.jobsummary.log.file rolled daily:
-# hadoop.mapreduce.jobsummary.logger=INFO,JSA
-# 
-hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
-hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
-
-# Define the root logger to the system property "hadoop.root.logger".
-log4j.rootLogger=${hadoop.root.logger}, EventCounter
-
-# Logging Threshold
-log4j.threshold=ALL
-
-#
-# Daily Rolling File Appender
-#
-
-log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
-
-# Rollver at midnight
-log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
-
-# 30-day backup
-#log4j.appender.DRFA.MaxBackupIndex=30
-log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
-
-# Pattern format: Date LogLevel LoggerName LogMessage
-log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-# Debugging Pattern format
-#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
-
-
-#
-# console
-# Add "console" to rootlogger above if you want to use this 
-#
-
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
-
-#
-# TaskLog Appender
-#
-
-#Default values
-hadoop.tasklog.taskid=null
-hadoop.tasklog.iscleanup=false
-hadoop.tasklog.noKeepSplits=4
-hadoop.tasklog.totalLogFileSize=100
-hadoop.tasklog.purgeLogSplits=true
-hadoop.tasklog.logsRetainHours=12
-
-log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
-log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
-log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
-log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
-
-log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
-log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-
-#
-#Security appender
-#
-hadoop.security.log.file=SecurityAuth.audit
-log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
-log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
-
-log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
-log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-#new logger
-# Define some default values that can be overridden by system properties
-hadoop.security.logger=INFO,console
-log4j.category.SecurityLogger=${hadoop.security.logger}
-
-# hdfs audit logging
-
-hdfs.audit.logger=INFO,console
-log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
-log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
-log4j.appender.DRFAAUDIT=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.DRFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
-log4j.appender.DRFAAUDIT.layout=org.apache.log4j.PatternLayout
-log4j.appender.DRFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
-log4j.appender.DRFAAUDIT.DatePattern=.yyyy-MM-dd
-
-# mapred audit logging
-
-mapred.audit.logger=INFO,console
-log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
-log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
-log4j.appender.MRAUDIT=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
-log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
-log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
-log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd
-
-#
-# Rolling File Appender
-#
-
-#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
-#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
-
-# Logfile size and and 30-day backups
-#log4j.appender.RFA.MaxFileSize=1MB
-#log4j.appender.RFA.MaxBackupIndex=30
-
-#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
-#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
-#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
-
-#
-# FSNamesystem Audit logging
-# All audit events are logged at INFO level
-#
-log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
-
-# Custom Logging levels
-
-#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
-#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
-#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
-
-# Jets3t library
-log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
-
-#
-# Event Counter Appender
-# Sends counts of logging messages at different severity levels to Hadoop Metrics.
-#
-log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
-
-#
-# Job Summary Appender
-#
-log4j.appender.JSA=org.apache.log4j.DailyRollingFileAppender
-log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
-log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
-log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
-log4j.appender.JSA.DatePattern=.yyyy-MM-dd
-log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
-log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
-
-#
-# MapReduce Audit Log Appender
-#
-
-# Set the MapReduce audit log filename
-#hadoop.mapreduce.audit.log.file=hadoop-mapreduce.audit.log
-
-# Appender for AuditLogger.
-# Requires the following system properties to be set
-#    - hadoop.log.dir (Hadoop Log directory)
-#    - hadoop.mapreduce.audit.log.file (MapReduce audit log filename)
-
-#log4j.logger.org.apache.hadoop.mapred.AuditLogger=INFO,MRAUDIT
-#log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
-#log4j.appender.MRAUDIT=org.apache.log4j.DailyRollingFileAppender
-#log4j.appender.MRAUDIT.File=${hadoop.log.dir}/${hadoop.mapreduce.audit.log.file}
-#log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd
-#log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
-#log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-
-#
-# Yarn ResourceManager Application Summary Log 
-#
-# Set the ResourceManager summary log filename
-#yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
-# Set the ResourceManager summary log level and appender
-#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
-
-# Appender for ResourceManager Application Summary Log - rolled daily
-# Requires the following properties to be set
-#    - hadoop.log.dir (Hadoop Log directory)
-#    - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
-#    - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
-
-#log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
-#log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
-#log4j.appender.RMSUMMARY=org.apache.log4j.DailyRollingFileAppender
-#log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
-#log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
-#log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
-#log4j.appender.RMSUMMARY.DatePattern=.yyyy-MM-dd
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index e34c2023738..d4b40305592 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -808,8 +808,8 @@
 </property>
 
 <property>
-  <name>hadoop.http.authentication.signature.secret.file</name>
-  <value>${user.home}/hadoop-http-auth-signature-secret</value>
+  <name>hadoop.http.authentication.signature.secret</name>
+  <value>hadoop</value>
   <description>
     The signature secret for signing the authentication tokens.
     If not set a random secret is generated at startup time.
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
index 5842db199de..f9f14fb8480 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@@ -58,7 +58,7 @@ public class TestConfiguration extends TestCase {
   }
   
   private void startConfig() throws IOException{
-    out.write("<?xml version=\"1.1\"?>\n");
+    out.write("<?xml version=\"1.0\"?>\n");
     out.write("<configuration>\n");
   }
 
@@ -221,18 +221,6 @@ public class TestConfiguration extends TestCase {
     assertEquals("this  contains a comment", conf.get("my.comment"));
   }
   
-  public void testControlAInValue() throws IOException {
-    out = new BufferedWriter(new FileWriter(CONFIG));
-    startConfig();
-    appendProperty("my.char", "");
-    appendProperty("my.string", "somestring");
-    endConfig();
-    Path fileResource = new Path(CONFIG);
-    conf.addResource(fileResource);
-    assertEquals("\u0001", conf.get("my.char"));
-    assertEquals("some\u0001string", conf.get("my.string"));
-  }
-
   public void testTrim() throws IOException {
     out=new BufferedWriter(new FileWriter(CONFIG));
     startConfig();
@@ -310,7 +298,7 @@ public class TestConfiguration extends TestCase {
     conf.writeXml(baos);
     String result = baos.toString();
     assertTrue("Result has proper header", result.startsWith(
-        "<?xml version=\"1.1\" encoding=\"UTF-8\" standalone=\"no\"?><configuration>"));
+        "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><configuration>"));
     assertTrue("Result has proper footer", result.endsWith("</configuration>"));
   }
   
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
index e87f2d122bf..1e22a73bbac 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
@@ -20,48 +20,40 @@ package org.apache.hadoop.fs;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.Shell;
 
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.junit.runners.Parameterized.Parameters;
-import org.junit.Test;
-
-import static org.junit.Assert.*;
+import junit.framework.TestCase;
 
 /** This test LocalDirAllocator works correctly;
- * Every test case uses different buffer dirs to
+ * Every test case uses different buffer dirs to 
  * enforce the AllocatorPerContext initialization.
  * This test does not run on Cygwin because under Cygwin
  * a directory can be created in a read-only directory
  * which breaks this test.
- */
-@RunWith(Parameterized.class)
-public class TestLocalDirAllocator {
+ */ 
+public class TestLocalDirAllocator extends TestCase {
   final static private Configuration conf = new Configuration();
   final static private String BUFFER_DIR_ROOT = "build/test/temp";
-  final static private String ABSOLUTE_DIR_ROOT;
-  final static private String QUALIFIED_DIR_ROOT;
   final static private Path BUFFER_PATH_ROOT = new Path(BUFFER_DIR_ROOT);
   final static private File BUFFER_ROOT = new File(BUFFER_DIR_ROOT);
-  final static private String CONTEXT = "fs.client.buffer.dir";
+  final static private String BUFFER_DIR[] = new String[] {
+    BUFFER_DIR_ROOT+"/tmp0",  BUFFER_DIR_ROOT+"/tmp1", BUFFER_DIR_ROOT+"/tmp2",
+    BUFFER_DIR_ROOT+"/tmp3", BUFFER_DIR_ROOT+"/tmp4", BUFFER_DIR_ROOT+"/tmp5",
+    BUFFER_DIR_ROOT+"/tmp6"};
+  final static private Path BUFFER_PATH[] = new Path[] {
+    new Path(BUFFER_DIR[0]), new Path(BUFFER_DIR[1]), new Path(BUFFER_DIR[2]),
+    new Path(BUFFER_DIR[3]), new Path(BUFFER_DIR[4]), new Path(BUFFER_DIR[5]),
+    new Path(BUFFER_DIR[6])};
+  final static private String CONTEXT = "dfs.client.buffer.dir";
   final static private String FILENAME = "block";
-  final static private LocalDirAllocator dirAllocator =
+  final static private LocalDirAllocator dirAllocator = 
     new LocalDirAllocator(CONTEXT);
   static LocalFileSystem localFs;
   final static private boolean isWindows =
     System.getProperty("os.name").startsWith("Windows");
   final static int SMALL_FILE_SIZE = 100;
-  final static private String RELATIVE = "/RELATIVE";
-  final static private String ABSOLUTE = "/ABSOLUTE";
-  final static private String QUALIFIED = "/QUALIFIED";
-  final private String ROOT;
-  final private String PREFIX;
-
   static {
     try {
       localFs = FileSystem.getLocal(conf);
@@ -71,214 +63,170 @@ public class TestLocalDirAllocator {
       e.printStackTrace();
       System.exit(-1);
     }
-
-    ABSOLUTE_DIR_ROOT = new Path(localFs.getWorkingDirectory(),
-        BUFFER_DIR_ROOT).toUri().getPath();
-    QUALIFIED_DIR_ROOT = new Path(localFs.getWorkingDirectory(),
-        BUFFER_DIR_ROOT).toUri().toString();
-  }
-
-  public TestLocalDirAllocator(String root, String prefix) {
-    ROOT = root;
-    PREFIX = prefix;
-  }
-
-  @Parameters
-  public static Collection<Object[]> params() {
-    Object [][] data = new Object[][] {
-      { BUFFER_DIR_ROOT, RELATIVE },
-      { ABSOLUTE_DIR_ROOT, ABSOLUTE },
-      { QUALIFIED_DIR_ROOT, QUALIFIED }
-    };
-
-    return Arrays.asList(data);
   }
 
   private static void rmBufferDirs() throws IOException {
     assertTrue(!localFs.exists(BUFFER_PATH_ROOT) ||
         localFs.delete(BUFFER_PATH_ROOT, true));
   }
-
-  private static void validateTempDirCreation(String dir) throws IOException {
+  
+  private void validateTempDirCreation(int i) throws IOException {
     File result = createTempFile(SMALL_FILE_SIZE);
-    assertTrue("Checking for " + dir + " in " + result + " - FAILED!",
-        result.getPath().startsWith(new Path(dir, FILENAME).toUri().getPath()));
+    assertTrue("Checking for " + BUFFER_DIR[i] + " in " + result + " - FAILED!", 
+        result.getPath().startsWith(new File(BUFFER_DIR[i], FILENAME).getPath()));
   }
-
-  private static File createTempFile() throws IOException {
-    return createTempFile(-1);
+  
+  private File createTempFile() throws IOException {
+    File result = dirAllocator.createTmpFileForWrite(FILENAME, -1, conf);
+    result.delete();
+    return result;
   }
-
-  private static File createTempFile(long size) throws IOException {
+  
+  private File createTempFile(long size) throws IOException {
     File result = dirAllocator.createTmpFileForWrite(FILENAME, size, conf);
     result.delete();
     return result;
   }
-
-  private String buildBufferDir(String dir, int i) {
-    return dir + PREFIX + i;
-  }
-
-  /** Two buffer dirs. The first dir does not exist & is on a read-only disk;
+  
+  /** Two buffer dirs. The first dir does not exist & is on a read-only disk; 
    * The second dir exists & is RW
    * @throws Exception
    */
-  @Test
   public void test0() throws Exception {
     if (isWindows) return;
-    String dir0 = buildBufferDir(ROOT, 0);
-    String dir1 = buildBufferDir(ROOT, 1);
     try {
-      conf.set(CONTEXT, dir0 + "," + dir1);
-      assertTrue(localFs.mkdirs(new Path(dir1)));
+      conf.set(CONTEXT, BUFFER_DIR[0]+","+BUFFER_DIR[1]);
+      assertTrue(localFs.mkdirs(BUFFER_PATH[1]));
       BUFFER_ROOT.setReadOnly();
-      validateTempDirCreation(dir1);
-      validateTempDirCreation(dir1);
+      validateTempDirCreation(1);
+      validateTempDirCreation(1);
     } finally {
       Shell.execCommand(new String[]{"chmod", "u+w", BUFFER_DIR_ROOT});
       rmBufferDirs();
     }
   }
-
-  /** Two buffer dirs. The first dir exists & is on a read-only disk;
+    
+  /** Two buffer dirs. The first dir exists & is on a read-only disk; 
    * The second dir exists & is RW
    * @throws Exception
    */
-  @Test
   public void test1() throws Exception {
     if (isWindows) return;
-    String dir1 = buildBufferDir(ROOT, 1);
-    String dir2 = buildBufferDir(ROOT, 2);
     try {
-      conf.set(CONTEXT, dir1 + "," + dir2);
-      assertTrue(localFs.mkdirs(new Path(dir2)));
+      conf.set(CONTEXT, BUFFER_DIR[1]+","+BUFFER_DIR[2]);
+      assertTrue(localFs.mkdirs(BUFFER_PATH[2]));
       BUFFER_ROOT.setReadOnly();
-      validateTempDirCreation(dir2);
-      validateTempDirCreation(dir2);
+      validateTempDirCreation(2);
+      validateTempDirCreation(2);
     } finally {
       Shell.execCommand(new String[]{"chmod", "u+w", BUFFER_DIR_ROOT});
       rmBufferDirs();
     }
   }
   /** Two buffer dirs. Both do not exist but on a RW disk.
-   * Check if tmp dirs are allocated in a round-robin
+   * Check if tmp dirs are allocated in a round-robin 
    */
-  @Test
   public void test2() throws Exception {
     if (isWindows) return;
-    String dir2 = buildBufferDir(ROOT, 2);
-    String dir3 = buildBufferDir(ROOT, 3);
     try {
-      conf.set(CONTEXT, dir2 + "," + dir3);
+      conf.set(CONTEXT, BUFFER_DIR[2]+","+BUFFER_DIR[3]);
 
       // create the first file, and then figure the round-robin sequence
       createTempFile(SMALL_FILE_SIZE);
       int firstDirIdx = (dirAllocator.getCurrentDirectoryIndex() == 0) ? 2 : 3;
       int secondDirIdx = (firstDirIdx == 2) ? 3 : 2;
-
+      
       // check if tmp dirs are allocated in a round-robin manner
-      validateTempDirCreation(buildBufferDir(ROOT, firstDirIdx));
-      validateTempDirCreation(buildBufferDir(ROOT, secondDirIdx));
-      validateTempDirCreation(buildBufferDir(ROOT, firstDirIdx));
+      validateTempDirCreation(firstDirIdx);
+      validateTempDirCreation(secondDirIdx);
+      validateTempDirCreation(firstDirIdx);
     } finally {
       rmBufferDirs();
     }
   }
 
-  /** Two buffer dirs. Both exists and on a R/W disk.
+  /** Two buffer dirs. Both exists and on a R/W disk. 
    * Later disk1 becomes read-only.
    * @throws Exception
    */
-  @Test
   public void test3() throws Exception {
     if (isWindows) return;
-    String dir3 = buildBufferDir(ROOT, 3);
-    String dir4 = buildBufferDir(ROOT, 4);
     try {
-      conf.set(CONTEXT, dir3 + "," + dir4);
-      assertTrue(localFs.mkdirs(new Path(dir3)));
-      assertTrue(localFs.mkdirs(new Path(dir4)));
-
-      // Create the first small file
+      conf.set(CONTEXT, BUFFER_DIR[3]+","+BUFFER_DIR[4]);
+      assertTrue(localFs.mkdirs(BUFFER_PATH[3]));
+      assertTrue(localFs.mkdirs(BUFFER_PATH[4]));
+      
+      // create the first file with size, and then figure the round-robin sequence
       createTempFile(SMALL_FILE_SIZE);
 
-      // Determine the round-robin sequence
       int nextDirIdx = (dirAllocator.getCurrentDirectoryIndex() == 0) ? 3 : 4;
-      validateTempDirCreation(buildBufferDir(ROOT, nextDirIdx));
+      validateTempDirCreation(nextDirIdx);
 
       // change buffer directory 2 to be read only
-      new File(new Path(dir4).toUri().getPath()).setReadOnly();
-      validateTempDirCreation(dir3);
-      validateTempDirCreation(dir3);
+      new File(BUFFER_DIR[4]).setReadOnly();
+      validateTempDirCreation(3);
+      validateTempDirCreation(3);
     } finally {
       rmBufferDirs();
     }
   }
-
+  
   /**
    * Two buffer dirs, on read-write disk.
-   *
+   * 
    * Try to create a whole bunch of files.
    *  Verify that they do indeed all get created where they should.
-   *
+   *  
    *  Would ideally check statistical properties of distribution, but
    *  we don't have the nerve to risk false-positives here.
-   *
+   * 
    * @throws Exception
    */
   static final int TRIALS = 100;
-  @Test
   public void test4() throws Exception {
     if (isWindows) return;
-    String dir5 = buildBufferDir(ROOT, 5);
-    String dir6 = buildBufferDir(ROOT, 6);
     try {
 
-      conf.set(CONTEXT, dir5 + "," + dir6);
-      assertTrue(localFs.mkdirs(new Path(dir5)));
-      assertTrue(localFs.mkdirs(new Path(dir6)));
-
+      conf.set(CONTEXT, BUFFER_DIR[5]+","+BUFFER_DIR[6]);
+      assertTrue(localFs.mkdirs(BUFFER_PATH[5]));
+      assertTrue(localFs.mkdirs(BUFFER_PATH[6]));
+        
       int inDir5=0, inDir6=0;
       for(int i = 0; i < TRIALS; ++i) {
         File result = createTempFile();
-        if(result.getPath().startsWith(
-              new Path(dir5, FILENAME).toUri().getPath())) {
+        if(result.getPath().startsWith(new File(BUFFER_DIR[5], FILENAME).getPath())) {
           inDir5++;
-        } else if(result.getPath().startsWith(
-              new Path(dir6, FILENAME).toUri().getPath())) {
+        } else  if(result.getPath().startsWith(new File(BUFFER_DIR[6], FILENAME).getPath())) {
           inDir6++;
         }
         result.delete();
       }
-
-      assertTrue(inDir5 + inDir6 == TRIALS);
-
+      
+      assertTrue( inDir5 + inDir6 == TRIALS);
+        
     } finally {
       rmBufferDirs();
     }
   }
-
-  /** Two buffer dirs. The first dir does not exist & is on a read-only disk;
+  
+  /** Two buffer dirs. The first dir does not exist & is on a read-only disk; 
    * The second dir exists & is RW
    * getLocalPathForWrite with checkAccess set to false should create a parent
    * directory. With checkAccess true, the directory should not be created.
    * @throws Exception
    */
-  @Test
   public void testLocalPathForWriteDirCreation() throws IOException {
-    String dir0 = buildBufferDir(ROOT, 0);
-    String dir1 = buildBufferDir(ROOT, 1);
     try {
-      conf.set(CONTEXT, dir0 + "," + dir1);
-      assertTrue(localFs.mkdirs(new Path(dir1)));
+      conf.set(CONTEXT, BUFFER_DIR[0] + "," + BUFFER_DIR[1]);
+      assertTrue(localFs.mkdirs(BUFFER_PATH[1]));
       BUFFER_ROOT.setReadOnly();
       Path p1 =
-        dirAllocator.getLocalPathForWrite("p1/x", SMALL_FILE_SIZE, conf);
+          dirAllocator.getLocalPathForWrite("p1/x", SMALL_FILE_SIZE, conf);
       assertTrue(localFs.getFileStatus(p1.getParent()).isDirectory());
 
       Path p2 =
-        dirAllocator.getLocalPathForWrite("p2/x", SMALL_FILE_SIZE, conf,
-            false);
+          dirAllocator.getLocalPathForWrite("p2/x", SMALL_FILE_SIZE, conf,
+              false);
       try {
         localFs.getFileStatus(p2.getParent());
       } catch (Exception e) {
@@ -289,26 +237,5 @@ public class TestLocalDirAllocator {
       rmBufferDirs();
     }
   }
-
-  /** Test no side effect files are left over. After creating a temp
-   * temp file, remove both the temp file and its parent. Verify that
-   * no files or directories are left over as can happen when File objects
-   * are mistakenly created from fully qualified path strings.
-   * @throws IOException
-   */
-  @Test
-  public void testNoSideEffects() throws IOException {
-    if (isWindows) return;
-    String dir = buildBufferDir(ROOT, 0);
-    try {
-      conf.set(CONTEXT, dir);
-      File result = dirAllocator.createTmpFileForWrite(FILENAME, -1, conf);
-      assertTrue(result.delete());
-      assertTrue(result.getParentFile().delete());
-      assertFalse(new File(dir).exists());
-    } finally {
-      Shell.execCommand(new String[]{"chmod", "u+w", BUFFER_DIR_ROOT});
-      rmBufferDirs();
-    }
-  }
+  
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java
index 782e4e41674..3d739a07d8b 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java
@@ -486,9 +486,6 @@ public class TestTrash extends TestCase {
     conf.set(FS_TRASH_INTERVAL_KEY, "0.2"); // 12 seconds
     conf.setClass("fs.file.impl", TestLFS.class, FileSystem.class);
     conf.set(FS_TRASH_CHECKPOINT_INTERVAL_KEY, "0.1"); // 6 seconds
-    FileSystem fs = FileSystem.getLocal(conf);
-    conf.set("fs.default.name", fs.getUri().toString());
-    
     Trash trash = new Trash(conf);
 
     // Start Emptier in background
@@ -496,6 +493,8 @@ public class TestTrash extends TestCase {
     Thread emptierThread = new Thread(emptier);
     emptierThread.start();
 
+    FileSystem fs = FileSystem.getLocal(conf);
+    conf.set("fs.defaultFS", fs.getUri().toString());
     FsShell shell = new FsShell();
     shell.setConf(conf);
     shell.init();
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/HttpServerFunctionalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/HttpServerFunctionalTest.java
index aff74b573b0..07688137d5e 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/HttpServerFunctionalTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/HttpServerFunctionalTest.java
@@ -70,21 +70,6 @@ public class HttpServerFunctionalTest extends Assert {
     return createServer(TEST, conf);
   }
 
-  /**
-   * Create but do not start the test webapp server. The test webapp dir is
-   * prepared/checked in advance.
-   * @param conf the server configuration to use
-   * @return the server instance
-   *
-   * @throws IOException if a problem occurs
-   * @throws AssertionError if a condition was not met
-   */
-  public static HttpServer createTestServer(Configuration conf, 
-      String[] pathSpecs) throws IOException {
-    prepareTestWebapp();
-    return createServer(TEST, conf, pathSpecs);
-  }
-
   /**
    * Prepare the test webapp by creating the directory from the test properties
    * fail if the directory cannot be created.
@@ -119,18 +104,6 @@ public class HttpServerFunctionalTest extends Assert {
       throws IOException {
     return new HttpServer(webapp, "0.0.0.0", 0, true, conf);
   }
-  /**
-   * Create an HttpServer instance for the given webapp
-   * @param webapp the webapp to work with
-   * @param conf the configuration to use for the server
-   * @param pathSpecs the paths specifications the server will service
-   * @return the server
-   * @throws IOException if it could not be created
-   */
-  public static HttpServer createServer(String webapp, Configuration conf,
-      String[] pathSpecs) throws IOException {
-    return new HttpServer(webapp, "0.0.0.0", 0, true, conf, pathSpecs);
-  }
 
   /**
    * Create and start a server with the test webapp
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestPathFilter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestPathFilter.java
deleted file mode 100644
index 73aebea486f..00000000000
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestPathFilter.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.http;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.URL;
-import java.net.URLConnection;
-import java.util.Set;
-import java.util.TreeSet;
-
-import javax.servlet.Filter;
-import javax.servlet.FilterChain;
-import javax.servlet.FilterConfig;
-import javax.servlet.ServletException;
-import javax.servlet.ServletRequest;
-import javax.servlet.ServletResponse;
-import javax.servlet.http.HttpServletRequest;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.junit.Test;
-
-public class TestPathFilter extends HttpServerFunctionalTest {
-  static final Log LOG = LogFactory.getLog(HttpServer.class);
-  static final Set<String> RECORDS = new TreeSet<String>(); 
-
-  /** A very simple filter that records accessed uri's */
-  static public class RecordingFilter implements Filter {
-    private FilterConfig filterConfig = null;
-
-    public void init(FilterConfig filterConfig) {
-      this.filterConfig = filterConfig;
-    }
-
-    public void destroy() {
-      this.filterConfig = null;
-    }
-
-    public void doFilter(ServletRequest request, ServletResponse response,
-        FilterChain chain) throws IOException, ServletException {
-      if (filterConfig == null)
-         return;
-
-      String uri = ((HttpServletRequest)request).getRequestURI();
-      LOG.info("filtering " + uri);
-      RECORDS.add(uri);
-      chain.doFilter(request, response);
-    }
-
-    /** Configuration for RecordingFilter */
-    static public class Initializer extends FilterInitializer {
-      public Initializer() {}
-
-      public void initFilter(FilterContainer container, Configuration conf) {
-        container.addFilter("recording", RecordingFilter.class.getName(), null);
-      }
-    }
-  }
-  
-  
-  /** access a url, ignoring some IOException such as the page does not exist */
-  static void access(String urlstring) throws IOException {
-    LOG.warn("access " + urlstring);
-    URL url = new URL(urlstring);
-    
-    URLConnection connection = url.openConnection();
-    connection.connect();
-    
-    try {
-      BufferedReader in = new BufferedReader(new InputStreamReader(
-          connection.getInputStream()));
-      try {
-        for(; in.readLine() != null; );
-      } finally {
-        in.close();
-      }
-    } catch(IOException ioe) {
-      LOG.warn("urlstring=" + urlstring, ioe);
-    }
-  }
-
-  @Test
-  public void testPathSpecFilters() throws Exception {
-    Configuration conf = new Configuration();
-    
-    //start a http server with CountingFilter
-    conf.set(HttpServer.FILTER_INITIALIZER_PROPERTY,
-        RecordingFilter.Initializer.class.getName());
-    String[] pathSpecs = { "/path", "/path/*" };
-    HttpServer http = createTestServer(conf, pathSpecs);
-    http.start();
-
-    final String baseURL = "/path";
-    final String baseSlashURL = "/path/";
-    final String addedURL = "/path/nodes";
-    final String addedSlashURL = "/path/nodes/";
-    final String longURL = "/path/nodes/foo/job";
-    final String rootURL = "/";
-    final String allURL = "/*";
-
-    final String[] filteredUrls = {baseURL, baseSlashURL, addedURL, 
-        addedSlashURL, longURL};
-    final String[] notFilteredUrls = {rootURL, allURL};
-
-    // access the urls and verify our paths specs got added to the 
-    // filters
-    final String prefix = "http://localhost:" + http.getPort();
-    try {
-      for(int i = 0; i < filteredUrls.length; i++) {
-        access(prefix + filteredUrls[i]);
-      }
-      for(int i = 0; i < notFilteredUrls.length; i++) {
-        access(prefix + notFilteredUrls[i]);
-      }
-    } finally {
-      http.stop();
-    }
-
-    LOG.info("RECORDS = " + RECORDS);
-    
-    //verify records
-    for(int i = 0; i < filteredUrls.length; i++) {
-      assertTrue(RECORDS.remove(filteredUrls[i]));
-    }
-    assertTrue(RECORDS.isEmpty());
-  }
-}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java
index 7cc6f4d5213..f49d4c886ec 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java
@@ -18,17 +18,13 @@
 package org.apache.hadoop.net;
 
 import org.junit.Test;
-
 import static org.junit.Assert.*;
 
-import java.net.InetAddress;
-import java.net.NetworkInterface;
 import java.net.Socket;
 import java.net.ConnectException;
 import java.net.SocketException;
 import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
-import java.util.Enumeration;
 
 import org.apache.hadoop.conf.Configuration;
 
@@ -92,32 +88,4 @@ public class TestNetUtils {
       fail("NetUtils.verifyHostnames threw unexpected UnknownHostException");
     }
   }
-  
-  /** 
-   * Test for {@link NetUtils#isLocalAddress(java.net.InetAddress)}
-   */
-  @Test
-  public void testIsLocalAddress() throws Exception {
-    // Test - local host is local address
-    assertTrue(NetUtils.isLocalAddress(InetAddress.getLocalHost()));
-    
-    // Test - all addresses bound network interface is local address
-    Enumeration<NetworkInterface> interfaces = NetworkInterface
-        .getNetworkInterfaces();
-    if (interfaces != null) { // Iterate through all network interfaces
-      while (interfaces.hasMoreElements()) {
-        NetworkInterface i = interfaces.nextElement();
-        Enumeration<InetAddress> addrs = i.getInetAddresses();
-        if (addrs == null) {
-          continue;
-        }
-        // Iterate through all the addresses of a network interface
-        while (addrs.hasMoreElements()) {
-          InetAddress addr = addrs.nextElement();
-          assertTrue(NetUtils.isLocalAddress(addr));
-        }
-      }
-    }
-    assertFalse(NetUtils.isLocalAddress(InetAddress.getByName("8.8.8.8")));
-  }
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestAuthenticationFilter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestAuthenticationFilter.java
index 2d699ddcf1f..7a21e4c6b87 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestAuthenticationFilter.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestAuthenticationFilter.java
@@ -25,28 +25,14 @@ import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
-import java.io.File;
-import java.io.FileWriter;
-import java.io.Writer;
 import java.util.Map;
 
 public class TestAuthenticationFilter extends TestCase {
 
   @SuppressWarnings("unchecked")
-  public void testConfiguration() throws Exception {
+  public void testConfiguration() {
     Configuration conf = new Configuration();
     conf.set("hadoop.http.authentication.foo", "bar");
-    
-    File testDir = new File(System.getProperty("test.build.data", 
-                                               "target/test-dir"));
-    testDir.mkdirs();
-    File secretFile = new File(testDir, "http-secret.txt");
-    Writer writer = new FileWriter(new File(testDir, "http-secret.txt"));
-    writer.write("hadoop");
-    writer.close();
-    conf.set(AuthenticationFilterInitializer.PREFIX + 
-             AuthenticationFilterInitializer.SIGNATURE_SECRET_FILE, 
-             secretFile.getAbsolutePath());
 
     FilterContainer container = Mockito.mock(FilterContainer.class);
     Mockito.doAnswer(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 43c360fcb0c..459d2325d20 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -16,9 +16,6 @@ Trunk (unreleased changes)
     HDFS-2318. Provide authentication to webhdfs using SPNEGO and delegation
     tokens.  (szetszwo)
 
-    HDFS-2340. Support getFileBlockLocations and getDelegationToken in webhdfs.
-    (szetszwo)
-
   IMPROVEMENTS
 
     HADOOP-7524 Change RPC to allow multiple protocols including multuple versions of the same protocol (sanjay Radia)
@@ -38,18 +35,6 @@ Trunk (unreleased changes)
     not use ArrayWritable for writing non-array items.  (Uma Maheswara Rao G
     via szetszwo)
 
-    HDFS-2351 Change Namenode and Datanode to register each of their protocols
-    seperately. (Sanjay Radia)
-
-    HDFS-2356.  Support case insensitive query parameter names in webhdfs.
-    (szetszwo)
-
-    HDFS-2368.  Move SPNEGO conf properties from hdfs-default.xml to
-    hdfs-site.xml.  (szetszwo)
-
-    HDFS-2355. Federation: enable using the same configuration file across 
-    all the nodes in the cluster. (suresh)
-
   BUG FIXES
     HDFS-2287. TestParallelRead has a small off-by-one bug. (todd)
 
@@ -72,17 +57,6 @@ Trunk (unreleased changes)
     IOExceptions of stream closures can mask root exceptions.  (Uma Maheswara
     Rao G via szetszwo)
 
-    HDFS-46.   Change default namespace quota of root directory from
-    Integer.MAX_VALUE to Long.MAX_VALUE.  (Uma Maheswara Rao G via szetszwo)
-
-    HDFS-2366. Initialize WebHdfsFileSystem.ugi in object construction.
-    (szetszwo)
-
-    HDFS-2373. Commands using webhdfs and hftp print unnecessary debug 
-    info on the console with security enabled. (Arpit Gupta via suresh)
-
-    HDFS-2361. hftp is broken, fixed username checks in JspHelper. (jitendra)
-
 Release 0.23.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -765,12 +739,6 @@ Release 0.23.0 - Unreleased
     HDFS-1217.  Change some NameNode methods from public to package private.
     (Laxman via szetszwo)
 
-    HDFS-2332. Add test for HADOOP-7629 (using an immutable FsPermission
-    object as an RPC parameter fails). (todd)
-
-    HDFS-2363. Move datanodes size printing from FSNamesystem.metasave(..)
-    to BlockManager.  (Uma Maheswara Rao G via szetszwo)
-
   OPTIMIZATIONS
 
     HDFS-1458. Improve checkpoint performance by avoiding unnecessary image
@@ -1639,11 +1607,7 @@ Release 0.22.0 - Unreleased
     HDFS-2232. Generalize regular expressions in TestHDFSCLI.
     (Plamen Jeliazkov via shv)
 
-    HDFS-2290. Block with corrupt replica is not getting replicated.
-    (Benoy Antony via shv)
-
 Release 0.21.1 - Unreleased
-
     HDFS-1466. TestFcHdfsSymlink relies on /tmp/test not existing. (eli)
 
     HDFS-874. TestHDFSFileContextMainOperations fails on weirdly 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index c10d185acfc..f92064239c0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -244,6 +244,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_DEFAULT_MAX_CORRUPT_FILES_RETURNED_KEY = "dfs.corruptfilesreturned.max";
   public static final int     DFS_DEFAULT_MAX_CORRUPT_FILES_RETURNED = 500;
 
+  // HA related configuration
+  public static final String DFS_HA_NAMENODE_IDS_KEY = "dfs.ha.namenode.ids";
+  public static final String DFS_HA_NAMENODE_IDS_DEFAULT = "";
 
   // property for fsimage compression
   public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 34ea9e5697c..fea81f3d04e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -38,7 +38,6 @@ import java.util.Random;
 import java.util.StringTokenizer;
 import java.util.concurrent.TimeUnit;
 
-import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
@@ -64,6 +63,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 
 @InterfaceAudience.Private
 public class DFSUtil {
+  private DFSUtil() { /* Hidden constructor */ }
   private static final ThreadLocal<Random> RANDOM = new ThreadLocal<Random>() {
     @Override
     protected Random initialValue() {
@@ -577,6 +577,17 @@ public class DFSUtil {
     }
   }
   
+  /**
+   * Returns the configured nameservice Id
+   * 
+   * @param conf
+   *          Configuration object to lookup the nameserviceId
+   * @return nameserviceId string from conf
+   */
+  public static String getNameServiceId(Configuration conf) {
+    return conf.get(DFS_FEDERATION_NAMESERVICE_ID);
+  }
+  
   /** Return used as percentage of capacity */
   public static float getPercentUsed(long used, long capacity) {
     return capacity <= 0 ? 100 : ((float)used * 100.0f)/(float)capacity; 
@@ -696,77 +707,4 @@ public class DFSUtil {
     // TODO:HA configuration changes pending
     return false;
   }
-  
-  /**
-   * Get name service Id for the {@link NameNode} based on namenode RPC address
-   * matching the local node address.
-   */
-  public static String getNamenodeNameServiceId(Configuration conf) {
-    return getNameServiceId(conf, DFS_NAMENODE_RPC_ADDRESS_KEY);
-  }
-  
-  /**
-   * Get name service Id for the BackupNode based on backup node RPC address
-   * matching the local node address.
-   */
-  public static String getBackupNameServiceId(Configuration conf) {
-    return getNameServiceId(conf, DFS_NAMENODE_BACKUP_ADDRESS_KEY);
-  }
-  
-  /**
-   * Get name service Id for the secondary node based on secondary http address
-   * matching the local node address.
-   */
-  public static String getSecondaryNameServiceId(Configuration conf) {
-    return getNameServiceId(conf, DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY);
-  }
-  
-  /**
-   * Get the nameservice Id by matching the {@code addressKey} with the
-   * the address of the local node. 
-   * 
-   * If {@link DFSConfigKeys#DFS_FEDERATION_NAMESERVICE_ID} is not specifically
-   * configured, this method determines the nameservice Id by matching the local
-   * nodes address with the configured addresses. When a match is found, it
-   * returns the nameservice Id from the corresponding configuration key.
-   * 
-   * @param conf Configuration
-   * @param addressKey configuration key to get the address.
-   * @return name service Id on success, null on failure.
-   * @throws HadoopIllegalArgumentException on error
-   */
-  private static String getNameServiceId(Configuration conf, String addressKey) {
-    String nameserviceId = conf.get(DFS_FEDERATION_NAMESERVICE_ID);
-    if (nameserviceId != null) {
-      return nameserviceId;
-    }
-    
-    Collection<String> ids = getNameServiceIds(conf);
-    if (ids == null || ids.size() == 0) {
-      // Not federation configuration, hence no nameservice Id
-      return null;
-    }
-    
-    // Match the rpc address with that of local address
-    int found = 0;
-    for (String id : ids) {
-      String addr = conf.get(getNameServiceIdKey(addressKey, id));
-      InetSocketAddress s = NetUtils.createSocketAddr(addr);
-      if (NetUtils.isLocalAddress(s.getAddress())) {
-        nameserviceId = id;
-        found++;
-      }
-    }
-    if (found > 1) { // Only one address must match the local address
-      throw new HadoopIllegalArgumentException(
-          "Configuration has multiple RPC addresses that matches "
-              + "the local node's address. Please configure the system with "
-              + "the parameter " + DFS_FEDERATION_NAMESERVICE_ID);
-    }
-    if (found == 0) {
-      throw new HadoopIllegalArgumentException("Configuration address "
-          + addressKey + " is missing in configuration with name service Id");
-    }
-    return nameserviceId;
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
index af3283ee718..17a09f695ec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
@@ -115,26 +115,6 @@ public class DatanodeInfo extends DatanodeID implements Node {
     this.location = location;
     this.hostName = hostName;
   }
-
-  /** Constructor */
-  public DatanodeInfo(final String name, final String storageID,
-      final int infoPort, final int ipcPort,
-      final long capacity, final long dfsUsed, final long remaining,
-      final long blockPoolUsed, final long lastUpdate, final int xceiverCount,
-      final String networkLocation, final String hostName,
-      final AdminStates adminState) {
-    super(name, storageID, infoPort, ipcPort);
-
-    this.capacity = capacity;
-    this.dfsUsed = dfsUsed;
-    this.remaining = remaining;
-    this.blockPoolUsed = blockPoolUsed;
-    this.lastUpdate = lastUpdate;
-    this.xceiverCount = xceiverCount;
-    this.location = networkLocation;
-    this.hostName = hostName;
-    this.adminState = adminState;
-  }
   
   /** The raw capacity. */
   public long getCapacity() { return capacity; }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 994275aec06..682d272922b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -308,11 +308,6 @@ public class BlockManager {
   /** Dump meta data to out. */
   public void metaSave(PrintWriter out) {
     assert namesystem.hasWriteLock();
-    final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
-    final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
-    datanodeManager.fetchDatanodes(live, dead, false);
-    out.println("Live Datanodes: " + live.size());
-    out.println("Dead Datanodes: " + dead.size());
     //
     // Dump contents of neededReplication
     //
@@ -847,7 +842,7 @@ public class BlockManager {
 
     // Add this replica to corruptReplicas Map
     corruptReplicas.addToCorruptReplicasMap(storedBlock, node);
-    if (countNodes(storedBlock).liveReplicas() >= inode.getReplication()) {
+    if (countNodes(storedBlock).liveReplicas() > inode.getReplication()) {
       // the block is over-replicated so invalidate the replicas immediately
       invalidateBlock(storedBlock, node);
     } else if (namesystem.isPopulatingReplQueues()) {
@@ -872,7 +867,7 @@ public class BlockManager {
     // Check how many copies we have of the block. If we have at least one
     // copy on a live node, then we can delete it.
     int count = countNodes(blk).liveReplicas();
-    if (count >= 1) {
+    if (count > 1) {
       addToInvalidates(blk, dn);
       removeStoredBlock(blk, node);
       if(NameNode.stateChangeLog.isDebugEnabled()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
index 67f67c03958..e2ce26df6b9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
@@ -54,13 +54,11 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeHttpServer;
-import org.apache.hadoop.hdfs.web.resources.DelegationParam;
 import org.apache.hadoop.hdfs.web.resources.UserParam;
 import org.apache.hadoop.http.HtmlQuoting;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.AccessControlException;
-import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.token.Token;
@@ -70,7 +68,7 @@ import org.apache.hadoop.util.VersionInfo;
 public class JspHelper {
   public static final String CURRENT_CONF = "current.conf";
   final static public String WEB_UGI_PROPERTY_NAME = DFSConfigKeys.DFS_WEB_UGI_KEY;
-  public static final String DELEGATION_PARAMETER_NAME = DelegationParam.NAME;
+  public static final String DELEGATION_PARAMETER_NAME = "delegation";
   public static final String NAMENODE_ADDRESS = "nnaddr";
   static final String SET_DELEGATION = "&" + DELEGATION_PARAMETER_NAME +
                                               "=";
@@ -553,8 +551,7 @@ public class JspHelper {
         DelegationTokenIdentifier id = new DelegationTokenIdentifier();
         id.readFields(in);
         ugi = id.getUser();
-        checkUsername(ugi.getShortUserName(), usernameFromQuery);
-        checkUsername(ugi.getShortUserName(), user);
+        checkUsername(ugi.getUserName(), user);
         ugi.addToken(token);
         ugi.setAuthenticationMethod(AuthenticationMethod.TOKEN);
       } else {
@@ -563,11 +560,13 @@ public class JspHelper {
                                 "authenticated by filter");
         }
         ugi = UserGroupInformation.createRemoteUser(user);
-        checkUsername(ugi.getShortUserName(), usernameFromQuery);
         // This is not necessarily true, could have been auth'ed by user-facing
         // filter
         ugi.setAuthenticationMethod(secureAuthMethod);
       }
+
+      checkUsername(user, usernameFromQuery);
+
     } else { // Security's not on, pull from url
       ugi = usernameFromQuery == null?
           getDefaultWebUser(conf) // not specified in request
@@ -580,18 +579,10 @@ public class JspHelper {
     return ugi;
   }
 
-  /**
-   * Expected user name should be a short name.
-   */
   private static void checkUsername(final String expected, final String name
       ) throws IOException {
-    if (name == null) {
-      return;
-    }
-    KerberosName u = new KerberosName(name);
-    String shortName = u.getShortName();
-    if (!shortName.equals(expected)) {
-      throw new IOException("Usernames not matched: name=" + shortName
+    if (name != null && !name.equals(expected)) {
+      throw new IOException("Usernames not matched: name=" + name
           + " != expected=" + expected);
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index b6ec0c05b4d..73fbe50e535 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -425,7 +425,7 @@ public class DataNode extends Configured
   private List<ServicePlugin> plugins;
   
   // For InterDataNodeProtocol
-  public RPC.Server ipcServer;
+  public Server ipcServer;
 
   private SecureResources secureResources = null;
   private AbstractList<File> dataDirs;
@@ -575,15 +575,11 @@ public class DataNode extends Configured
   private void initIpcServer(Configuration conf) throws IOException {
     InetSocketAddress ipcAddr = NetUtils.createSocketAddr(
         conf.get("dfs.datanode.ipc.address"));
-    
-    // Add all the RPC protocols that the Datanode implements
-    ipcServer = RPC.getServer(ClientDatanodeProtocol.class, this, ipcAddr.getHostName(),
+    ipcServer = RPC.getServer(DataNode.class, this, ipcAddr.getHostName(),
                               ipcAddr.getPort(), 
                               conf.getInt(DFS_DATANODE_HANDLER_COUNT_KEY, 
                                           DFS_DATANODE_HANDLER_COUNT_DEFAULT), 
                               false, conf, blockPoolTokenSecretManager);
-    ipcServer.addProtocol(InterDatanodeProtocol.class, this);
-    
     // set service-level authorization security policy
     if (conf.getBoolean(
         CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, false)) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
index 0305024e4f0..4c5c61aac7c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
@@ -50,7 +50,6 @@ import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient.DFSDataInputStream;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.hdfs.web.ParamFilter;
 import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
 import org.apache.hadoop.hdfs.web.resources.BufferSizeParam;
@@ -67,11 +66,8 @@ import org.apache.hadoop.hdfs.web.resources.UriFsPathParam;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 
-import com.sun.jersey.spi.container.ResourceFilters;
-
 /** Web-hdfs DataNode implementation. */
 @Path("")
-@ResourceFilters(ParamFilter.class)
 public class DatanodeWebHdfsMethods {
   public static final Log LOG = LogFactory.getLog(DatanodeWebHdfsMethods.class);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index 1e8be5b7075..d8f68a0aaab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
-import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
@@ -81,13 +80,13 @@ public class BackupNode extends NameNode {
   // Common NameNode methods implementation for backup node.
   /////////////////////////////////////////////////////
   @Override // NameNode
-  protected InetSocketAddress getRpcServerAddress(Configuration conf) {
+  protected InetSocketAddress getRpcServerAddress(Configuration conf) throws IOException {
     String addr = conf.get(BN_ADDRESS_NAME_KEY, BN_ADDRESS_DEFAULT);
     return NetUtils.createSocketAddr(addr);
   }
   
   @Override
-  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
+  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) throws IOException {
     String addr = conf.get(BN_SERVICE_RPC_ADDRESS_KEY);
     if (addr == null || addr.isEmpty()) {
       return null;
@@ -135,6 +134,11 @@ public class BackupNode extends NameNode {
                  CommonConfigurationKeys.FS_TRASH_INTERVAL_DEFAULT);
     NamespaceInfo nsInfo = handshake(conf);
     super.initialize(conf);
+    // Backup node should never do lease recovery,
+    // therefore lease hard limit should never expire.
+    namesystem.leaseManager.setLeasePeriod(
+        HdfsConstants.LEASE_SOFTLIMIT_PERIOD, Long.MAX_VALUE);
+    
     clusterId = nsInfo.getClusterID();
     blockPoolId = nsInfo.getBlockPoolID();
 
@@ -368,9 +372,4 @@ public class BackupNode extends NameNode {
       throw new UnsupportedActionException(msg);
     }
   }
-  
-  @Override
-  protected String getNameServiceId(Configuration conf) {
-    return DFSUtil.getBackupNameServiceId(conf);
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index 654c3a231d4..4d7f2b9ca6e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -120,7 +120,7 @@ public class FSDirectory implements Closeable {
     this.cond = dirLock.writeLock().newCondition();
     rootDir = new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME,
         ns.createFsOwnerPermissions(new FsPermission((short)0755)),
-        Long.MAX_VALUE, UNKNOWN_DISK_SPACE);
+        Integer.MAX_VALUE, UNKNOWN_DISK_SPACE);
     this.fsImage = fsImage;
     int configuredLimit = conf.getInt(
         DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 9fd01446ba6..116fa4826ac 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -130,6 +130,7 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
 import org.apache.hadoop.util.Daemon;
+import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.VersionInfo;
 import org.mortbay.util.ajax.JSON;
 
@@ -346,30 +347,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     dir.imageLoadComplete();
   }
 
-  void startSecrectManager() throws IOException {
+  void activateSecretManager() throws IOException {
     if (dtSecretManager != null) {
       dtSecretManager.startThreads();
     }
   }
   
-  void stopSecretManager() {
-    if (dtSecretManager != null) {
-      dtSecretManager.stopThreads();
-    }
-  }
-  
-  /** 
-   * Start services common to both active and standby states
-   * @throws IOException
+  /**
+   * Activate FSNamesystem daemons.
    */
-  void startCommonServices(Configuration conf) throws IOException {
+  void activate(Configuration conf) throws IOException {
     this.registerMBean(); // register the MBean for the FSNamesystemState
+
     writeLock();
     try {
       nnResourceChecker = new NameNodeResourceChecker(conf);
       checkAvailableResources();
+
       setBlockTotal();
       blockManager.activate(conf);
+
+      this.lmthread = new Daemon(leaseManager.new Monitor());
+      lmthread.start();
       this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
       nnrmthread.start();
     } finally {
@@ -379,70 +378,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     registerMXBean();
     DefaultMetricsSystem.instance().register(this);
   }
-  
-  /** 
-   * Stop services common to both active and standby states
-   * @throws IOException
-   */
-  void stopCommonServices() {
-    writeLock();
-    try {
-      if (blockManager != null) blockManager.close();
-      if (nnrmthread != null) nnrmthread.interrupt();
-    } finally {
-      writeUnlock();
-    }
-  }
-  
-  /**
-   * Start services required in active state
-   * @throws IOException
-   */
-  void startActiveServices() throws IOException {
-    LOG.info("Starting services required for active state");
-    writeLock();
-    try {
-      startSecrectManager();
-      lmthread = new Daemon(leaseManager.new Monitor());
-      lmthread.start();
-    } finally {
-      writeUnlock();
-    }
-  }
-  
-  /** 
-   * Start services required in active state 
-   * @throws InterruptedException
-   */
-  void stopActiveServices() {
-    LOG.info("Stopping services started for active state");
-    writeLock();
-    try {
-      stopSecretManager();
-      if (lmthread != null) {
-        try {
-          lmthread.interrupt();
-          lmthread.join(3000);
-        } catch (InterruptedException ie) {
-          LOG.warn("Encountered exception ", ie);
-        }
-        lmthread = null;
-      }
-    } finally {
-      writeUnlock();
-    }
-  }
-  
-  /** Start services required in standby state */
-  void startStandbyServices() {
-    LOG.info("Starting services required for standby state");
-  }
 
-  /** Stop services required in standby state */
-  void stopStandbyServices() {
-    LOG.info("Stopping services started for standby state");
-  }
-  
   public static Collection<URI> getNamespaceDirs(Configuration conf) {
     return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
   }
@@ -566,7 +502,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
 
   /**
-   * Version of @see #getNamespaceInfo() that is not protected by a lock.
+   * Version of {@see #getNamespaceInfo()} that is not protected by a lock.
    */
   NamespaceInfo unprotectedGetNamespaceInfo() {
     return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(),
@@ -583,16 +519,23 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void close() {
     fsRunning = false;
     try {
-      stopCommonServices();
+      if (blockManager != null) blockManager.close();
       if (smmthread != null) smmthread.interrupt();
+      if (dtSecretManager != null) dtSecretManager.stopThreads();
+      if (nnrmthread != null) nnrmthread.interrupt();
+    } catch (Exception e) {
+      LOG.warn("Exception shutting down FSNamesystem", e);
     } finally {
       // using finally to ensure we also wait for lease daemon
       try {
-        stopActiveServices();
-        stopStandbyServices();
+        if (lmthread != null) {
+          lmthread.interrupt();
+          lmthread.join(3000);
+        }
         if (dir != null) {
           dir.close();
         }
+      } catch (InterruptedException ie) {
       } catch (IOException ie) {
         LOG.error("Error closing FSDirectory", ie);
         IOUtils.cleanup(LOG, dir);
@@ -621,6 +564,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       out.println(totalInodes + " files and directories, " + totalBlocks
           + " blocks = " + (totalInodes + totalBlocks) + " total");
 
+      final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
+      final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
+      blockManager.getDatanodeManager().fetchDatanodes(live, dead, false);
+      out.println("Live Datanodes: "+live.size());
+      out.println("Dead Datanodes: "+dead.size());
       blockManager.metaSave(out);
 
       out.flush();
@@ -1443,7 +1391,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       lb = startFileInternal(src, null, holder, clientMachine, 
                         EnumSet.of(CreateFlag.APPEND), 
-                        false, blockManager.maxReplication, 0);
+                        false, blockManager.maxReplication, (long)0);
     } finally {
       writeUnlock();
     }
@@ -1526,7 +1474,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       fileLength = pendingFile.computeContentSummary().getLength();
       blockSize = pendingFile.getPreferredBlockSize();
       clientNode = pendingFile.getClientNode();
-      replication = pendingFile.getReplication();
+      replication = (int)pendingFile.getReplication();
     } finally {
       writeUnlock();
     }
@@ -2321,7 +2269,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
   
   Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
-      INodeFileUnderConstruction pendingFile) {
+      INodeFileUnderConstruction pendingFile) throws IOException {
     assert hasWriteLock();
     pendingFile.setClientName(newHolder);
     return leaseManager.reassignLease(lease, src, newHolder);
@@ -2926,9 +2874,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * @return true if in safe mode
      */
     private synchronized boolean isOn() {
-      assert isConsistent() : " SafeMode: Inconsistent filesystem state: "
-        + "Total num of blocks, active blocks, or "
-        + "total safe blocks don't match.";
+      try {
+        assert isConsistent() : " SafeMode: Inconsistent filesystem state: "
+          + "Total num of blocks, active blocks, or "
+          + "total safe blocks don't match.";
+      } catch(IOException e) {
+        System.err.print(StringUtils.stringifyException(e));
+      }
       return this.reached >= 0;
     }
       
@@ -3082,7 +3034,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       this.blockTotal = total;
       this.blockThreshold = (int) (blockTotal * threshold);
       this.blockReplQueueThreshold = 
-        (int) (blockTotal * replQueueThreshold);
+        (int) (((double) blockTotal) * replQueueThreshold);
       checkMode();
     }
       
@@ -3092,7 +3044,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * @param replication current replication 
      */
     private synchronized void incrementSafeBlockCount(short replication) {
-      if (replication == safeReplication)
+      if ((int)replication == safeReplication)
         this.blockSafe++;
       checkMode();
     }
@@ -3225,7 +3177,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * Checks consistency of the class state.
      * This is costly and currently called only in assert.
      */
-    private boolean isConsistent() {
+    private boolean isConsistent() throws IOException {
       if (blockTotal == -1 && blockSafe == -1) {
         return true; // manual safe mode
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 9f41ad54b12..6db96924c24 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -27,7 +27,6 @@ import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ha.HealthCheckFailedException;
@@ -38,13 +37,15 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Trash;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
 import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
-import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
@@ -53,6 +54,9 @@ import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.NodeRegistration;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
@@ -167,18 +171,19 @@ public class NameNode {
     }
   }
     
+
+
   public static final int DEFAULT_PORT = 8020;
+
   public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
   public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
   public static final HAState ACTIVE_STATE = new ActiveState();
   public static final HAState STANDBY_STATE = new StandbyState();
   
   protected FSNamesystem namesystem; 
-  protected final Configuration conf;
   protected NamenodeRole role;
   private HAState state;
   private final boolean haEnabled;
-  private final HAContext haContext;
 
   
   /** httpServer */
@@ -307,11 +312,12 @@ public class NameNode {
    * Given a configuration get the address of the service rpc server
    * If the service rpc is not configured returns null
    */
-  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
+  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf)
+    throws IOException {
     return NameNode.getServiceAddress(conf, false);
   }
 
-  protected InetSocketAddress getRpcServerAddress(Configuration conf) {
+  protected InetSocketAddress getRpcServerAddress(Configuration conf) throws IOException {
     return getAddress(conf);
   }
   
@@ -374,6 +380,7 @@ public class NameNode {
    * @param conf the configuration
    */
   protected void initialize(Configuration conf) throws IOException {
+    initializeGenericKeys(conf);
     UserGroupInformation.setConfiguration(conf);
     loginAsNameNodeUser(conf);
 
@@ -389,7 +396,7 @@ public class NameNode {
       throw e;
     }
 
-    startCommonServices(conf);
+    activate(conf);
   }
   
   /**
@@ -423,10 +430,19 @@ public class NameNode {
     } 
   }
 
-  /** Start the services common to active and standby states */
-  private void startCommonServices(Configuration conf) throws IOException {
-    namesystem.startCommonServices(conf);
+  /**
+   * Activate name-node servers and threads.
+   */
+  void activate(Configuration conf) throws IOException {
+    if ((isRole(NamenodeRole.NAMENODE))
+        && (UserGroupInformation.isSecurityEnabled())) {
+      namesystem.activateSecretManager();
+    }
+    namesystem.activate(conf);
+    startHttpServer(conf);
     rpcServer.start();
+    startTrashEmptier(conf);
+    
     plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
         ServicePlugin.class);
     for (ServicePlugin p: plugins) {
@@ -436,29 +452,13 @@ public class NameNode {
         LOG.warn("ServicePlugin " + p + " could not be started", t);
       }
     }
+    
     LOG.info(getRole() + " up at: " + rpcServer.getRpcAddress());
     if (rpcServer.getServiceRpcAddress() != null) {
-      LOG.info(getRole() + " service server is up at: "
-          + rpcServer.getServiceRpcAddress());
+      LOG.info(getRole() + " service server is up at: " + rpcServer.getServiceRpcAddress()); 
     }
-    startHttpServer(conf);
   }
-  
-  private void stopCommonServices() {
-    if(namesystem != null) namesystem.close();
-    if(rpcServer != null) rpcServer.stop();
-    if (plugins != null) {
-      for (ServicePlugin p : plugins) {
-        try {
-          p.stop();
-        } catch (Throwable t) {
-          LOG.warn("ServicePlugin " + p + " could not be stopped", t);
-        }
-      }
-    }   
-    stopHttpServer();
-  }
-  
+
   private void startTrashEmptier(Configuration conf) throws IOException {
     long trashInterval 
       = conf.getLong(CommonConfigurationKeys.FS_TRASH_INTERVAL_KEY, 
@@ -470,26 +470,11 @@ public class NameNode {
     this.emptier.start();
   }
   
-  private void stopTrashEmptier() {
-    if (this.emptier != null) {
-      emptier.interrupt();
-      emptier = null;
-    }
-  }
-  
   private void startHttpServer(final Configuration conf) throws IOException {
     httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
     httpServer.start();
     setHttpServerAddress(conf);
   }
-  
-  private void stopHttpServer() {
-    try {
-      if (httpServer != null) httpServer.stop();
-    } catch (Exception e) {
-      LOG.error("Exception while stopping httpserver", e);
-    }
-  }
 
   /**
    * Start NameNode.
@@ -516,36 +501,22 @@ public class NameNode {
    * <code>zero</code> in the conf.
    * 
    * @param conf  confirguration
-   * @throws IOException on error
+   * @throws IOException
    */
   public NameNode(Configuration conf) throws IOException {
     this(conf, NamenodeRole.NAMENODE);
   }
 
   protected NameNode(Configuration conf, NamenodeRole role) 
-      throws IOException {
-    this.conf = conf;
+      throws IOException { 
     this.role = role;
     this.haEnabled = DFSUtil.isHAEnabled(conf);
-    this.haContext = new NameNodeHAContext();
+    this.state = !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
     try {
-      initializeGenericKeys(conf, getNameServiceId(conf));
       initialize(conf);
-      if (!haEnabled) {
-        state = ACTIVE_STATE;
-      } else {
-        state = STANDBY_STATE;;
-      }
-      state.enterState(haContext);
     } catch (IOException e) {
       this.stop();
       throw e;
-    } catch (ServiceFailedException e) {
-      this.stop();
-      throw new IOException("Service failed to start", e);
-    } catch (HadoopIllegalArgumentException e) {
-      this.stop();
-      throw e;
     }
   }
 
@@ -557,7 +528,6 @@ public class NameNode {
     try {
       this.rpcServer.join();
     } catch (InterruptedException ie) {
-      LOG.info("Caught interrupted exception " + ie);
     }
   }
 
@@ -570,12 +540,23 @@ public class NameNode {
         return;
       stopRequested = true;
     }
-    try {
-      state.exitState(haContext);
-    } catch (ServiceFailedException e) {
-      LOG.info("Encountered exception while exiting state " + e);
+    if (plugins != null) {
+      for (ServicePlugin p : plugins) {
+        try {
+          p.stop();
+        } catch (Throwable t) {
+          LOG.warn("ServicePlugin " + p + " could not be stopped", t);
+        }
+      }
     }
-    stopCommonServices();
+    try {
+      if (httpServer != null) httpServer.stop();
+    } catch (Exception e) {
+      LOG.error("Exception while stopping httpserver", e);
+    }
+    if(namesystem != null) namesystem.close();
+    if(emptier != null) emptier.interrupt();
+    if(rpcServer != null) rpcServer.stop();
     if (metrics != null) {
       metrics.shutdown();
     }
@@ -840,16 +821,16 @@ public class NameNode {
    * @param conf
    *          Configuration object to lookup specific key and to set the value
    *          to the key passed. Note the conf object is modified
-   * @param nameserviceId name service Id
    * @see DFSUtil#setGenericConf(Configuration, String, String...)
    */
-  public static void initializeGenericKeys(Configuration conf, String
-      nameserviceId) {
+  public static void initializeGenericKeys(Configuration conf) {
+    final String nameserviceId = DFSUtil.getNameServiceId(conf);
     if ((nameserviceId == null) || nameserviceId.isEmpty()) {
       return;
     }
     
     DFSUtil.setGenericConf(conf, nameserviceId, NAMESERVICE_SPECIFIC_KEYS);
+    
     if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
       URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
           + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
@@ -857,14 +838,6 @@ public class NameNode {
     }
   }
     
-  /** 
-   * Get the name service Id for the node
-   * @return name service Id or null if federation is not configured
-   */
-  protected String getNameServiceId(Configuration conf) {
-    return DFSUtil.getNamenodeNameServiceId(conf);
-  }
-  
   /**
    */
   public static void main(String argv[]) throws Exception {
@@ -891,56 +864,27 @@ public class NameNode {
     if (!haEnabled) {
       throw new ServiceFailedException("HA for namenode is not enabled");
     }
-    state.setState(haContext, ACTIVE_STATE);
+    state.setState(this, ACTIVE_STATE);
   }
   
   synchronized void transitionToStandby() throws ServiceFailedException {
     if (!haEnabled) {
       throw new ServiceFailedException("HA for namenode is not enabled");
     }
-    state.setState(haContext, STANDBY_STATE);
+    state.setState(this, STANDBY_STATE);
   }
   
   /** Check if an operation of given category is allowed */
   protected synchronized void checkOperation(final OperationCategory op)
       throws UnsupportedActionException {
-    state.checkOperation(haContext, op);
+    state.checkOperation(this, op);
   }
   
-  /**
-   * Class used as expose {@link NameNode} as context to {@link HAState}
-   */
-  private class NameNodeHAContext implements HAContext {
-    @Override
-    public void setState(HAState s) {
-      state = s;
-    }
-
-    @Override
-    public HAState getState() {
-      return state;
-    }
-
-    @Override
-    public void startActiveServices() throws IOException {
-      namesystem.startActiveServices();
-      startTrashEmptier(conf);
-    }
-
-    @Override
-    public void stopActiveServices() throws IOException {
-      namesystem.stopActiveServices();
-      stopTrashEmptier();
-    }
-
-    @Override
-    public void startStandbyServices() throws IOException {
-      // TODO:HA Start reading editlog from active
-    }
-
-    @Override
-    public void stopStandbyServices() throws IOException {
-      // TODO:HA Stop reading editlog from active
-    }
+  public synchronized HAState getState() {
+    return state;
+  }
+  
+  public synchronized void setState(final HAState s) {
+    state = s;
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 3e4eaf5f9dc..7fdf3e60d39 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -66,7 +66,6 @@ import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
-import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
@@ -146,17 +145,10 @@ class NameNodeRpcServer implements NamenodeProtocols {
       serviceRpcServer = null;
       serviceRPCAddress = null;
     }
-    // Add all the RPC protocols that the namenode implements
-    this.server = RPC.getServer(ClientProtocol.class, this,
+    this.server = RPC.getServer(NamenodeProtocols.class, this,
                                 socAddr.getHostName(), socAddr.getPort(),
                                 handlerCount, false, conf, 
                                 namesystem.getDelegationTokenSecretManager());
-    this.server.addProtocol(DatanodeProtocol.class, this);
-    this.server.addProtocol(NamenodeProtocol.class, this);
-    this.server.addProtocol(RefreshAuthorizationPolicyProtocol.class, this);
-    this.server.addProtocol(RefreshUserMappingsProtocol.class, this);
-    this.server.addProtocol(GetUserMappingsProtocol.class, this);
-    
 
     // set service-level authorization security policy
     if (serviceAuthEnabled =
@@ -979,11 +971,8 @@ class NameNodeRpcServer implements NamenodeProtocols {
   }
 
   private static String getClientMachine() {
-    String clientMachine = NamenodeWebHdfsMethods.getRemoteAddress();
-    if (clientMachine == null) { //not a web client
-      clientMachine = Server.getRemoteAddress();
-    }
-    if (clientMachine == null) { //not a RPC client
+    String clientMachine = Server.getRemoteAddress();
+    if (clientMachine == null) {
       clientMachine = "";
     }
     return clientMachine;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index d403629146f..9c5ef6f2c36 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -38,12 +38,10 @@ import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
-
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil.ErrorSimulator;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -175,17 +173,12 @@ public class SecondaryNameNode implements Runnable {
   public SecondaryNameNode(Configuration conf,
       CommandLineOpts commandLineOpts) throws IOException {
     try {
-      NameNode.initializeGenericKeys(conf,
-          DFSUtil.getSecondaryNameServiceId(conf));
+      NameNode.initializeGenericKeys(conf);
       initialize(conf, commandLineOpts);
     } catch(IOException e) {
       shutdown();
       LOG.fatal("Failed to start secondary namenode. ", e);
       throw e;
-    } catch(HadoopIllegalArgumentException e) {
-      shutdown();
-      LOG.fatal("Failed to start secondary namenode. ", e);
-      throw e;
     }
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
index 76e0f6a7151..1cf24f7f23a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
@@ -17,8 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
-import java.io.IOException;
-
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
@@ -35,35 +33,27 @@ public class ActiveState extends HAState {
   }
 
   @Override
-  public void checkOperation(HAContext context, OperationCategory op)
+  public void checkOperation(NameNode nn, OperationCategory op)
       throws UnsupportedActionException {
     return; // Other than journal all operations are allowed in active state
   }
   
   @Override
-  public void setState(HAContext context, HAState s) throws ServiceFailedException {
+  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
     if (s == NameNode.STANDBY_STATE) {
-      setStateInternal(context, s);
+      setStateInternal(nn, s);
       return;
     }
-    super.setState(context, s);
+    super.setState(nn, s);
   }
 
   @Override
-  public void enterState(HAContext context) throws ServiceFailedException {
-    try {
-      context.startActiveServices();
-    } catch (IOException e) {
-      throw new ServiceFailedException("Failed to start active services", e);
-    }
+  protected void enterState(NameNode nn) throws ServiceFailedException {
+    // TODO:HA
   }
 
   @Override
-  public void exitState(HAContext context) throws ServiceFailedException {
-    try {
-      context.stopActiveServices();
-    } catch (IOException e) {
-      throw new ServiceFailedException("Failed to stop active services", e);
-    }
+  protected void exitState(NameNode nn) throws ServiceFailedException {
+    // TODO:HA
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
index 6ee516c4cab..1828f9c83db 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
 
@@ -43,38 +44,38 @@ abstract public class HAState {
    * @param s new state
    * @throws ServiceFailedException on failure to transition to new state.
    */
-  protected final void setStateInternal(final HAContext context, final HAState s)
+  protected final void setStateInternal(final NameNode nn, final HAState s)
       throws ServiceFailedException {
-    exitState(context);
-    context.setState(s);
-    s.enterState(context);
+    exitState(nn);
+    nn.setState(s);
+    s.enterState(nn);
   }
 
   /**
    * Method to be overridden by subclasses to perform steps necessary for
    * entering a state.
-   * @param context HA context
+   * @param nn Namenode
    * @throws ServiceFailedException on failure to enter the state.
    */
-  public abstract void enterState(final HAContext context)
+  protected abstract void enterState(final NameNode nn)
       throws ServiceFailedException;
 
   /**
    * Method to be overridden by subclasses to perform steps necessary for
    * exiting a state.
-   * @param context HA context
+   * @param nn Namenode
    * @throws ServiceFailedException on failure to enter the state.
    */
-  public abstract void exitState(final HAContext context)
+  protected abstract void exitState(final NameNode nn)
       throws ServiceFailedException;
 
   /**
    * Move from the existing state to a new state
-   * @param context HA context
+   * @param nn Namenode
    * @param s new state
    * @throws ServiceFailedException on failure to transition to new state.
    */
-  public void setState(HAContext context, HAState s) throws ServiceFailedException {
+  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
     if (this == s) { // Aleady in the new state
       return;
     }
@@ -84,15 +85,15 @@ abstract public class HAState {
   
   /**
    * Check if an operation is supported in a given state.
-   * @param context HA context
+   * @param nn Namenode
    * @param op Type of the operation.
    * @throws UnsupportedActionException if a given type of operation is not
    *           supported in this state.
    */
-  public void checkOperation(final HAContext context, final OperationCategory op)
+  public void checkOperation(final NameNode nn, final OperationCategory op)
       throws UnsupportedActionException {
     String msg = "Operation category " + op + " is not supported in state "
-        + context.getState();
+        + nn.getState();
     throw new UnsupportedActionException(msg);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
index e0dc9af5919..b63866dc713 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -17,8 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
-import java.io.IOException;
-
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 
@@ -39,30 +37,22 @@ public class StandbyState extends HAState {
   }
 
   @Override
-  public void setState(HAContext context, HAState s) throws ServiceFailedException {
+  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
     if (s == NameNode.ACTIVE_STATE) {
-      setStateInternal(context, s);
+      setStateInternal(nn, s);
       return;
     }
-    super.setState(context, s);
+    super.setState(nn, s);
   }
 
   @Override
-  public void enterState(HAContext context) throws ServiceFailedException {
-    try {
-      context.startStandbyServices();
-    } catch (IOException e) {
-      throw new ServiceFailedException("Failed to start standby services", e);
-    }
+  protected void enterState(NameNode nn) throws ServiceFailedException {
+    // TODO:HA
   }
 
   @Override
-  public void exitState(HAContext context) throws ServiceFailedException {
-    try {
-      context.stopStandbyServices();
-    } catch (IOException e) {
-      throw new ServiceFailedException("Failed to stop standby services", e);
-    }
+  protected void exitState(NameNode nn) throws ServiceFailedException {
+    // TODO:HA
   }
 }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
index 2dd1db33410..948466f638f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
@@ -57,7 +57,6 @@ import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.web.JsonUtil;
-import org.apache.hadoop.hdfs.web.ParamFilter;
 import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
 import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
@@ -79,7 +78,6 @@ import org.apache.hadoop.hdfs.web.resources.PostOpParam;
 import org.apache.hadoop.hdfs.web.resources.PutOpParam;
 import org.apache.hadoop.hdfs.web.resources.RecursiveParam;
 import org.apache.hadoop.hdfs.web.resources.RenameOptionSetParam;
-import org.apache.hadoop.hdfs.web.resources.RenewerParam;
 import org.apache.hadoop.hdfs.web.resources.ReplicationParam;
 import org.apache.hadoop.hdfs.web.resources.UriFsPathParam;
 import org.apache.hadoop.hdfs.web.resources.UserParam;
@@ -91,20 +89,10 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 
-import com.sun.jersey.spi.container.ResourceFilters;
-
 /** Web-hdfs NameNode implementation. */
 @Path("")
-@ResourceFilters(ParamFilter.class)
 public class NamenodeWebHdfsMethods {
-  public static final Log LOG = LogFactory.getLog(NamenodeWebHdfsMethods.class);
-
-  private static final ThreadLocal<String> REMOTE_ADDRESS = new ThreadLocal<String>(); 
-
-  /** @return the remote client address. */
-  public static String getRemoteAddress() {
-    return REMOTE_ADDRESS.get();
-  }
+  private static final Log LOG = LogFactory.getLog(NamenodeWebHdfsMethods.class);
 
   private @Context ServletContext context;
   private @Context HttpServletRequest request;
@@ -227,8 +215,6 @@ public class NamenodeWebHdfsMethods {
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
       public Response run() throws IOException, URISyntaxException {
-        REMOTE_ADDRESS.set(request.getRemoteAddr());
-        try {
 
     final String fullpath = path.getAbsolutePath();
     final NameNode namenode = (NameNode)context.getAttribute("name.node");
@@ -286,10 +272,6 @@ public class NamenodeWebHdfsMethods {
     default:
       throw new UnsupportedOperationException(op + " is not supported");
     }
-
-        } finally {
-          REMOTE_ADDRESS.set(null);
-        }
       }
     });
   }
@@ -319,8 +301,6 @@ public class NamenodeWebHdfsMethods {
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
       public Response run() throws IOException, URISyntaxException {
-        REMOTE_ADDRESS.set(request.getRemoteAddr());
-        try {
 
     final String fullpath = path.getAbsolutePath();
     final NameNode namenode = (NameNode)context.getAttribute("name.node");
@@ -335,10 +315,6 @@ public class NamenodeWebHdfsMethods {
     default:
       throw new UnsupportedOperationException(op + " is not supported");
     }
-
-        } finally {
-          REMOTE_ADDRESS.set(null);
-        }
       }
     });
   }
@@ -359,12 +335,10 @@ public class NamenodeWebHdfsMethods {
           final OffsetParam offset,
       @QueryParam(LengthParam.NAME) @DefaultValue(LengthParam.DEFAULT)
           final LengthParam length,
-      @QueryParam(RenewerParam.NAME) @DefaultValue(RenewerParam.DEFAULT)
-          final RenewerParam renewer,
       @QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT)
           final BufferSizeParam bufferSize
       ) throws IOException, URISyntaxException, InterruptedException {
-    return get(ugi, delegation, ROOT, op, offset, length, renewer, bufferSize);
+    return get(ugi, delegation, ROOT, op, offset, length, bufferSize);
   }
 
   /** Handle HTTP GET request. */
@@ -382,23 +356,19 @@ public class NamenodeWebHdfsMethods {
           final OffsetParam offset,
       @QueryParam(LengthParam.NAME) @DefaultValue(LengthParam.DEFAULT)
           final LengthParam length,
-      @QueryParam(RenewerParam.NAME) @DefaultValue(RenewerParam.DEFAULT)
-          final RenewerParam renewer,
       @QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT)
           final BufferSizeParam bufferSize
       ) throws IOException, URISyntaxException, InterruptedException {
 
     if (LOG.isTraceEnabled()) {
       LOG.trace(op + ": " + path + ", ugi=" + ugi
-          + Param.toSortedString(", ", offset, length, renewer, bufferSize));
+          + Param.toSortedString(", ", offset, length, bufferSize));
     }
 
 
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
       public Response run() throws IOException, URISyntaxException {
-        REMOTE_ADDRESS.set(request.getRemoteAddr());
-        try {
 
     final NameNode namenode = (NameNode)context.getAttribute("name.node");
     final String fullpath = path.getAbsolutePath();
@@ -411,15 +381,6 @@ public class NamenodeWebHdfsMethods {
           op.getValue(), offset.getValue(), offset, length, bufferSize);
       return Response.temporaryRedirect(uri).build();
     }
-    case GETFILEBLOCKLOCATIONS:
-    {
-      final long offsetValue = offset.getValue();
-      final Long lengthValue = length.getValue();
-      final LocatedBlocks locatedblocks = np.getBlockLocations(fullpath,
-          offsetValue, lengthValue != null? lengthValue: offsetValue + 1);
-      final String js = JsonUtil.toJsonString(locatedblocks);
-      return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
-    }
     case GETFILESTATUS:
     {
       final HdfsFileStatus status = np.getFileInfo(fullpath);
@@ -431,20 +392,9 @@ public class NamenodeWebHdfsMethods {
       final StreamingOutput streaming = getListingStream(np, fullpath);
       return Response.ok(streaming).type(MediaType.APPLICATION_JSON).build();
     }
-    case GETDELEGATIONTOKEN:
-    {
-      final Token<? extends TokenIdentifier> token = generateDelegationToken(
-          namenode, ugi, renewer.getValue());
-      final String js = JsonUtil.toJsonString(token);
-      return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
-    }
     default:
       throw new UnsupportedOperationException(op + " is not supported");
     }    
-
-        } finally {
-          REMOTE_ADDRESS.set(null);
-        }
       }
     });
   }
@@ -512,9 +462,6 @@ public class NamenodeWebHdfsMethods {
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
       public Response run() throws IOException {
-        REMOTE_ADDRESS.set(request.getRemoteAddr());
-        try {
-
         final NameNode namenode = (NameNode)context.getAttribute("name.node");
         final String fullpath = path.getAbsolutePath();
 
@@ -528,10 +475,6 @@ public class NamenodeWebHdfsMethods {
         default:
           throw new UnsupportedOperationException(op + " is not supported");
         }
-
-        } finally {
-          REMOTE_ADDRESS.set(null);
-        }
       }
     });
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java
index 1e853933433..d085534e110 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java
@@ -149,9 +149,7 @@ public class DelegationTokenFetcher {
                 DataInputStream in = new DataInputStream(
                     new ByteArrayInputStream(token.getIdentifier()));
                 id.readFields(in);
-                if(LOG.isDebugEnabled()) {
-                  LOG.debug("Token (" + id + ") for " + token.getService());
-                }
+                System.out.println("Token (" + id + ") for " + token.getService());
               }
               return null;
             }
@@ -162,28 +160,22 @@ public class DelegationTokenFetcher {
                 for (Token<?> token : readTokens(tokenFile, conf)) {
                   result = renewDelegationToken(webUrl,
                       (Token<DelegationTokenIdentifier>) token);
-                  if(LOG.isDebugEnabled()) {
-                	  LOG.debug("Renewed token via " + webUrl + " for "
-                          + token.getService() + " until: " + new Date(result));
-                  }
+                  System.out.println("Renewed token via " + webUrl + " for "
+                      + token.getService() + " until: " + new Date(result));
                 }
               } else if (cancel) {
                 for (Token<?> token : readTokens(tokenFile, conf)) {
                   cancelDelegationToken(webUrl,
                       (Token<DelegationTokenIdentifier>) token);
-                  if(LOG.isDebugEnabled()) {
-                    LOG.debug("Cancelled token via " + webUrl + " for "
-                	    + token.getService());
-                  }
+                  System.out.println("Cancelled token via " + webUrl + " for "
+                      + token.getService());
                 }
               } else {
                 Credentials creds = getDTfromRemote(webUrl, renewer);
                 creds.writeTokenStorageFile(tokenFile, conf);
                 for (Token<?> token : creds.getAllTokens()) {
-                  if(LOG.isDebugEnabled()) {	
-                    LOG.debug("Fetched token via " + webUrl + " for "
-                        + token.getService() + " into " + tokenFile);
-                  }
+                  System.out.println("Fetched token via " + webUrl + " for "
+                      + token.getService() + " into " + tokenFile);
                 }
               }
             } else {
@@ -192,30 +184,24 @@ public class DelegationTokenFetcher {
                 for (Token<?> token : readTokens(tokenFile, conf)) {
                   ((DistributedFileSystem) fs)
                       .cancelDelegationToken((Token<DelegationTokenIdentifier>) token);
-                  if(LOG.isDebugEnabled()) {
-                    LOG.debug("Cancelled token for "
-                        + token.getService());
-                  }
+                  System.out.println("Cancelled token for "
+                      + token.getService());
                 }
               } else if (renew) {
                 long result;
                 for (Token<?> token : readTokens(tokenFile, conf)) {
                   result = ((DistributedFileSystem) fs)
                       .renewDelegationToken((Token<DelegationTokenIdentifier>) token);
-                  if(LOG.isDebugEnabled()) {
-                    LOG.debug("Renewed token for " + token.getService()
-                        + " until: " + new Date(result));
-                  }
+                  System.out.println("Renewed token for " + token.getService()
+                      + " until: " + new Date(result));
                 }
               } else {
                 Token<?> token = fs.getDelegationToken(renewer);
                 Credentials cred = new Credentials();
                 cred.addToken(token.getService(), token);
                 cred.writeTokenStorageFile(tokenFile, conf);
-                if(LOG.isDebugEnabled()) {
-                  LOG.debug("Fetched token for " + token.getService()
-                      + " into " + tokenFile);
-                }
+                System.out.println("Fetched token for " + token.getService()
+                    + " into " + tokenFile);
               }
             }
             return null;
@@ -235,11 +221,6 @@ public class DelegationTokenFetcher {
       } else {
         url.append(nnAddr).append(GetDelegationTokenServlet.PATH_SPEC);
       }
-      
-      if(LOG.isDebugEnabled()) {
-        LOG.debug("Retrieving token from: " + url);
-      }
-      
       URL remoteURL = new URL(url.toString());
       SecurityUtil.fetchServiceTicket(remoteURL);
       URLConnection connection = remoteURL.openConnection();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
index 314d53b38f2..1c18dc334e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
@@ -17,31 +17,19 @@
  */
 package org.apache.hadoop.hdfs.web;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSUtil;
-import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
-import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
-import org.apache.hadoop.hdfs.protocol.LocatedBlock;
-import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
-import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
-import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.security.token.TokenIdentifier;
 import org.mortbay.util.ajax.JSON;
 
 /** JSON Utilities */
 public class JsonUtil {
-  private static class ThreadLocalMap extends ThreadLocal<Map<String, Object>> {
+  private static final ThreadLocal<Map<String, Object>> jsonMap
+      = new ThreadLocal<Map<String, Object>>() {
     @Override
     protected Map<String, Object> initialValue() {
       return new TreeMap<String, Object>();
@@ -53,54 +41,7 @@ public class JsonUtil {
       m.clear();
       return m;
     }
-  }
-
-  private static final ThreadLocalMap jsonMap = new ThreadLocalMap();
-  private static final ThreadLocalMap tokenMap = new ThreadLocalMap();
-  private static final ThreadLocalMap datanodeInfoMap = new ThreadLocalMap();
-  private static final ThreadLocalMap extendedBlockMap = new ThreadLocalMap();
-  private static final ThreadLocalMap locatedBlockMap = new ThreadLocalMap();
-
-  private static final DatanodeInfo[] EMPTY_DATANODE_INFO_ARRAY = {};
-
-  /** Convert a token object to a Json string. */
-  public static String toJsonString(final Token<? extends TokenIdentifier> token
-      ) throws IOException {
-    if (token == null) {
-      return null;
-    }
-
-    final Map<String, Object> m = tokenMap.get();
-    m.put("urlString", token.encodeToUrlString());
-    return JSON.toString(m);
-  }
-
-  /** Convert a Json map to a Token. */
-  public static Token<? extends TokenIdentifier> toToken(
-      final Map<?, ?> m) throws IOException {
-    if (m == null) {
-      return null;
-    }
-
-    final Token<DelegationTokenIdentifier> token
-        = new Token<DelegationTokenIdentifier>();
-    token.decodeFromUrlString((String)m.get("urlString"));
-    return token;
-  }
-
-  /** Convert a Json map to a Token of DelegationTokenIdentifier. */
-  @SuppressWarnings("unchecked")
-  public static Token<DelegationTokenIdentifier> toDelegationToken(
-      final Map<?, ?> m) throws IOException {
-    return (Token<DelegationTokenIdentifier>)toToken(m);
-  }
-
-  /** Convert a Json map to a Token of BlockTokenIdentifier. */
-  @SuppressWarnings("unchecked")
-  public static Token<BlockTokenIdentifier> toBlockToken(
-      final Map<?, ?> m) throws IOException {
-    return (Token<BlockTokenIdentifier>)toToken(m);
-  }
+  };
 
   /** Convert an exception object to a Json string. */
   public static String toJsonString(final Exception e) {
@@ -136,10 +77,11 @@ public class JsonUtil {
 
   /** Convert a HdfsFileStatus object to a Json string. */
   public static String toJsonString(final HdfsFileStatus status) {
+    final Map<String, Object> m = jsonMap.get();
     if (status == null) {
-      return null;
+      m.put("isNull", true);
     } else {
-      final Map<String, Object> m = jsonMap.get();
+      m.put("isNull", false);
       m.put("localName", status.getLocalName());
       m.put("isDir", status.isDir());
       m.put("isSymlink", status.isSymlink());
@@ -155,8 +97,8 @@ public class JsonUtil {
       m.put("modificationTime", status.getModificationTime());
       m.put("blockSize", status.getBlockSize());
       m.put("replication", status.getReplication());
-      return JSON.toString(m);
     }
+    return JSON.toString(m);
   }
 
   @SuppressWarnings("unchecked")
@@ -164,9 +106,9 @@ public class JsonUtil {
     return (Map<String, Object>) JSON.parse(jsonString);
   }
 
-  /** Convert a Json map to a HdfsFileStatus object. */
+  /** Convert a Json string to a HdfsFileStatus object. */
   public static HdfsFileStatus toFileStatus(final Map<String, Object> m) {
-    if (m == null) {
+    if ((Boolean)m.get("isNull")) {
       return null;
     }
 
@@ -188,214 +130,4 @@ public class JsonUtil {
         permission, owner, group,
         symlink, DFSUtil.string2Bytes(localName));
   }
-
-  /** Convert a LocatedBlock to a Json string. */
-  public static String toJsonString(final ExtendedBlock extendedblock) {
-    if (extendedblock == null) {
-      return null;
-    }
-
-    final Map<String, Object> m = extendedBlockMap.get();
-    m.put("blockPoolId", extendedblock.getBlockPoolId());
-    m.put("blockId", extendedblock.getBlockId());
-    m.put("numBytes", extendedblock.getNumBytes());
-    m.put("generationStamp", extendedblock.getGenerationStamp());
-    return JSON.toString(m);
-  }
-
-  /** Convert a Json map to an ExtendedBlock object. */
-  public static ExtendedBlock toExtendedBlock(final Map<?, ?> m) {
-    if (m == null) {
-      return null;
-    }
-    
-    final String blockPoolId = (String)m.get("blockPoolId");
-    final long blockId = (Long)m.get("blockId");
-    final long numBytes = (Long)m.get("numBytes");
-    final long generationStamp = (Long)m.get("generationStamp");
-    return new ExtendedBlock(blockPoolId, blockId, numBytes, generationStamp);
-  }
-  
-  /** Convert a DatanodeInfo to a Json string. */
-  public static String toJsonString(final DatanodeInfo datanodeinfo) {
-    if (datanodeinfo == null) {
-      return null;
-    }
-
-    final Map<String, Object> m = datanodeInfoMap.get();
-    m.put("name", datanodeinfo.getName());
-    m.put("storageID", datanodeinfo.getStorageID());
-    m.put("infoPort", datanodeinfo.getInfoPort());
-
-    m.put("ipcPort", datanodeinfo.getIpcPort());
-
-    m.put("capacity", datanodeinfo.getCapacity());
-    m.put("dfsUsed", datanodeinfo.getDfsUsed());
-    m.put("remaining", datanodeinfo.getRemaining());
-    m.put("blockPoolUsed", datanodeinfo.getBlockPoolUsed());
-    m.put("lastUpdate", datanodeinfo.getLastUpdate());
-    m.put("xceiverCount", datanodeinfo.getXceiverCount());
-    m.put("networkLocation", datanodeinfo.getNetworkLocation());
-    m.put("hostName", datanodeinfo.getHostName());
-    m.put("adminState", datanodeinfo.getAdminState().name());
-    return JSON.toString(m);
-  }
-
-  /** Convert a Json map to an DatanodeInfo object. */
-  public static DatanodeInfo toDatanodeInfo(final Map<?, ?> m) {
-    if (m == null) {
-      return null;
-    }
-
-    return new DatanodeInfo(
-        (String)m.get("name"),
-        (String)m.get("storageID"),
-        (int)(long)(Long)m.get("infoPort"),
-        (int)(long)(Long)m.get("ipcPort"),
-
-        (Long)m.get("capacity"),
-        (Long)m.get("dfsUsed"),
-        (Long)m.get("remaining"),
-        (Long)m.get("blockPoolUsed"),
-        (Long)m.get("lastUpdate"),
-        (int)(long)(Long)m.get("xceiverCount"),
-        (String)m.get("networkLocation"),
-        (String)m.get("hostName"),
-        AdminStates.valueOf((String)m.get("adminState")));
-  }
-
-  /** Convert a DatanodeInfo[] to a Json string. */
-  public static String toJsonString(final DatanodeInfo[] array
-      ) throws IOException {
-    if (array == null) {
-      return null;
-    } else if (array.length == 0) {
-      return "[]";
-    } else {
-      final StringBuilder b = new StringBuilder().append('[').append(
-          toJsonString(array[0]));
-      for(int i = 1; i < array.length; i++) {
-        b.append(", ").append(toJsonString(array[i]));
-      }
-      return b.append(']').toString();
-    }
-  }
-
-  /** Convert an Object[] to a DatanodeInfo[]. */
-  public static DatanodeInfo[] toDatanodeInfoArray(final Object[] objects) {
-    if (objects == null) {
-      return null;
-    } else if (objects.length == 0) {
-      return EMPTY_DATANODE_INFO_ARRAY;
-    } else {
-      final DatanodeInfo[] array = new DatanodeInfo[objects.length];
-      for(int i = 0; i < array.length; i++) {
-        array[i] = (DatanodeInfo)toDatanodeInfo((Map<?, ?>) objects[i]);
-      }
-      return array;
-    }
-  }
-
-  /** Convert a LocatedBlock to a Json string. */
-  public static String toJsonString(final LocatedBlock locatedblock
-      ) throws IOException {
-    if (locatedblock == null) {
-      return null;
-    }
- 
-    final Map<String, Object> m = locatedBlockMap.get();
-    m.put("blockToken", toJsonString(locatedblock.getBlockToken()));
-    m.put("isCorrupt", locatedblock.isCorrupt());
-    m.put("startOffset", locatedblock.getStartOffset());
-    m.put("block", toJsonString(locatedblock.getBlock()));
-
-    m.put("locations", toJsonString(locatedblock.getLocations()));
-    return JSON.toString(m);
-  }
-
-  /** Convert a Json map to LocatedBlock. */
-  public static LocatedBlock toLocatedBlock(final Map<?, ?> m) throws IOException {
-    if (m == null) {
-      return null;
-    }
-
-    final ExtendedBlock b = toExtendedBlock((Map<?, ?>)JSON.parse((String)m.get("block")));
-    final DatanodeInfo[] locations = toDatanodeInfoArray(
-        (Object[])JSON.parse((String)m.get("locations")));
-    final long startOffset = (Long)m.get("startOffset");
-    final boolean isCorrupt = (Boolean)m.get("isCorrupt");
-
-    final LocatedBlock locatedblock = new LocatedBlock(b, locations, startOffset, isCorrupt);
-    locatedblock.setBlockToken(toBlockToken((Map<?, ?>)JSON.parse((String)m.get("blockToken"))));
-    return locatedblock;
-  }
-
-  /** Convert a LocatedBlock[] to a Json string. */
-  public static String toJsonString(final List<LocatedBlock> array
-      ) throws IOException {
-    if (array == null) {
-      return null;
-    } else if (array.size() == 0) {
-      return "[]";
-    } else {
-      final StringBuilder b = new StringBuilder().append('[').append(
-          toJsonString(array.get(0)));
-      for(int i = 1; i < array.size(); i++) {
-        b.append(",\n  ").append(toJsonString(array.get(i)));
-      }
-      return b.append(']').toString();
-    }
-  }
-
-  /** Convert an Object[] to a List of LocatedBlock. 
-   * @throws IOException */
-  public static List<LocatedBlock> toLocatedBlockList(final Object[] objects
-      ) throws IOException {
-    if (objects == null) {
-      return null;
-    } else if (objects.length == 0) {
-      return Collections.emptyList();
-    } else {
-      final List<LocatedBlock> list = new ArrayList<LocatedBlock>(objects.length);
-      for(int i = 0; i < objects.length; i++) {
-        list.add((LocatedBlock)toLocatedBlock((Map<?, ?>)objects[i]));
-      }
-      return list;
-    }
-  }
-
-  /** Convert LocatedBlocks to a Json string. */
-  public static String toJsonString(final LocatedBlocks locatedblocks
-      ) throws IOException {
-    if (locatedblocks == null) {
-      return null;
-    }
-
-    final Map<String, Object> m = jsonMap.get();
-    m.put("fileLength", locatedblocks.getFileLength());
-    m.put("isUnderConstruction", locatedblocks.isUnderConstruction());
-
-    m.put("locatedBlocks", toJsonString(locatedblocks.getLocatedBlocks()));
-    m.put("lastLocatedBlock", toJsonString(locatedblocks.getLastLocatedBlock()));
-    m.put("isLastBlockComplete", locatedblocks.isLastBlockComplete());
-    return JSON.toString(m);
-  }
-
-  /** Convert a Json map to LocatedBlock. */
-  public static LocatedBlocks toLocatedBlocks(final Map<String, Object> m
-      ) throws IOException {
-    if (m == null) {
-      return null;
-    }
-    
-    final long fileLength = (Long)m.get("fileLength");
-    final boolean isUnderConstruction = (Boolean)m.get("isUnderConstruction");
-    final List<LocatedBlock> locatedBlocks = toLocatedBlockList(
-        (Object[])JSON.parse((String) m.get("locatedBlocks")));
-    final LocatedBlock lastLocatedBlock = toLocatedBlock(
-        (Map<?, ?>)JSON.parse((String)m.get("lastLocatedBlock")));
-    final boolean isLastBlockComplete = (Boolean)m.get("isLastBlockComplete");
-    return new LocatedBlocks(fileLength, isUnderConstruction, locatedBlocks,
-        lastLocatedBlock, isLastBlockComplete);
-  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/ParamFilter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/ParamFilter.java
deleted file mode 100644
index 687b8747673..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/ParamFilter.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.web;
-
-import java.net.URI;
-import java.util.List;
-import java.util.Map;
-
-import javax.ws.rs.core.MultivaluedMap;
-import javax.ws.rs.core.UriBuilder;
-
-import com.sun.jersey.spi.container.ContainerRequest;
-import com.sun.jersey.spi.container.ContainerRequestFilter;
-import com.sun.jersey.spi.container.ContainerResponseFilter;
-import com.sun.jersey.spi.container.ResourceFilter;
-
-/**
- * A filter to change parameter names to lower cases
- * so that parameter names are considered as case insensitive.
- */
-public class ParamFilter implements ResourceFilter {
-  private static final ContainerRequestFilter LOWER_CASE
-      = new ContainerRequestFilter() {
-    @Override
-    public ContainerRequest filter(final ContainerRequest request) {
-      final MultivaluedMap<String, String> parameters = request.getQueryParameters();
-      if (containsUpperCase(parameters.keySet())) {
-        //rebuild URI
-        final URI lower = rebuildQuery(request.getRequestUri(), parameters);
-        request.setUris(request.getBaseUri(), lower);
-      }
-      return request;
-    }
-  };
-
-  @Override
-  public ContainerRequestFilter getRequestFilter() {
-    return LOWER_CASE;
-  }
-
-  @Override
-  public ContainerResponseFilter getResponseFilter() {
-    return null;
-  }
-
-  /** Do the strings contain upper case letters? */
-  private static boolean containsUpperCase(final Iterable<String> strings) {
-    for(String s : strings) {
-      for(int i = 0; i < s.length(); i++) {
-        if (Character.isUpperCase(s.charAt(i))) {
-          return true;
-        }
-      }
-    }
-    return false;
-  }
-
-  /** Rebuild the URI query with lower case parameter names. */
-  private static URI rebuildQuery(final URI uri,
-      final MultivaluedMap<String, String> parameters) {
-    UriBuilder b = UriBuilder.fromUri(uri).replaceQuery("");
-    for(Map.Entry<String, List<String>> e : parameters.entrySet()) {
-      final String key = e.getKey().toLowerCase();
-      for(String v : e.getValue()) {
-        b = b.queryParam(key, v);
-      }
-    }
-    return b.build();
-  }
-}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
index 35c325281b5..df86456e899 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
@@ -27,12 +27,9 @@ import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
-import java.util.Arrays;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
@@ -48,7 +45,6 @@ import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
-import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
 import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
@@ -58,9 +54,7 @@ import org.apache.hadoop.hdfs.web.resources.DstPathParam;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
 import org.apache.hadoop.hdfs.web.resources.GroupParam;
 import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
-import org.apache.hadoop.hdfs.web.resources.LengthParam;
 import org.apache.hadoop.hdfs.web.resources.ModificationTimeParam;
-import org.apache.hadoop.hdfs.web.resources.OffsetParam;
 import org.apache.hadoop.hdfs.web.resources.OverwriteParam;
 import org.apache.hadoop.hdfs.web.resources.OwnerParam;
 import org.apache.hadoop.hdfs.web.resources.Param;
@@ -69,16 +63,13 @@ import org.apache.hadoop.hdfs.web.resources.PostOpParam;
 import org.apache.hadoop.hdfs.web.resources.PutOpParam;
 import org.apache.hadoop.hdfs.web.resources.RecursiveParam;
 import org.apache.hadoop.hdfs.web.resources.RenameOptionSetParam;
-import org.apache.hadoop.hdfs.web.resources.RenewerParam;
 import org.apache.hadoop.hdfs.web.resources.ReplicationParam;
 import org.apache.hadoop.hdfs.web.resources.UserParam;
-import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authentication.client.AuthenticatedURL;
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
-import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Progressable;
 import org.mortbay.util.ajax.JSON;
 
@@ -91,24 +82,17 @@ public class WebHdfsFileSystem extends HftpFileSystem {
 
   private static final KerberosUgiAuthenticator AUTH = new KerberosUgiAuthenticator();
 
-  private final UserGroupInformation ugi;
+  private UserGroupInformation ugi;
   private final AuthenticatedURL.Token authToken = new AuthenticatedURL.Token();
   protected Path workingDir;
 
-  {
-    try {
-      ugi = UserGroupInformation.getCurrentUser();
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
   @Override
   public synchronized void initialize(URI uri, Configuration conf
       ) throws IOException {
     super.initialize(uri, conf);
     setConf(conf);
 
+    ugi = UserGroupInformation.getCurrentUser();
     this.workingDir = getHomeDirectory();
   }
 
@@ -179,11 +163,11 @@ public class WebHdfsFileSystem extends HftpFileSystem {
     }
   }
 
-  URL toUrl(final HttpOpParam.Op op, final Path fspath,
+  private URL toUrl(final HttpOpParam.Op op, final Path fspath,
       final Param<?,?>... parameters) throws IOException {
     //initialize URI path and query
     final String path = "/" + PATH_PREFIX
-        + (fspath == null? "/": makeQualified(fspath).toUri().getPath());
+        + makeQualified(fspath).toUri().getPath();
     final String query = op.toQueryString()
         + '&' + new UserParam(ugi)
         + Param.toSortedString("&", parameters);
@@ -412,41 +396,4 @@ public class WebHdfsFileSystem extends HftpFileSystem {
     }
     return statuses;
   }
-
-  @Override
-  public Token<DelegationTokenIdentifier> getDelegationToken(final String renewer
-      ) throws IOException {
-    final HttpOpParam.Op op = GetOpParam.Op.GETDELEGATIONTOKEN;
-    final Map<String, Object> m = run(op, null, new RenewerParam(renewer));
-    final Token<DelegationTokenIdentifier> token = JsonUtil.toDelegationToken(m); 
-    token.setService(new Text(getCanonicalServiceName()));
-    return token;
-  }
-
-  @Override
-  public List<Token<?>> getDelegationTokens(final String renewer
-      ) throws IOException {
-    final Token<?>[] t = {getDelegationToken(renewer)};
-    return Arrays.asList(t);
-  }
-
-  @Override
-  public BlockLocation[] getFileBlockLocations(final FileStatus status,
-      final long offset, final long length) throws IOException {
-    if (status == null) {
-      return null;
-    }
-    return getFileBlockLocations(status.getPath(), offset, length);
-  }
-
-  @Override
-  public BlockLocation[] getFileBlockLocations(final Path p, 
-      final long offset, final long length) throws IOException {
-    statistics.incrementReadOps(1);
-
-    final HttpOpParam.Op op = GetOpParam.Op.GETFILEBLOCKLOCATIONS;
-    final Map<String, Object> m = run(op, p, new OffsetParam(offset),
-        new LengthParam(length));
-    return DFSUtil.locatedBlocks2Locations(JsonUtil.toLocatedBlocks(m));
-  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/AccessTimeParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/AccessTimeParam.java
index 8d82131c703..830e5cd32dd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/AccessTimeParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/AccessTimeParam.java
@@ -20,7 +20,7 @@ package org.apache.hadoop.hdfs.web.resources;
 /** Access time parameter. */
 public class AccessTimeParam extends LongParam {
   /** Parameter name. */
-  public static final String NAME = "accesstime";
+  public static final String NAME = "accessTime";
   /** Default parameter value. */
   public static final String DEFAULT = "-1";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BlockSizeParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BlockSizeParam.java
index 96114968074..e50b282f33b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BlockSizeParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BlockSizeParam.java
@@ -25,7 +25,7 @@ import org.apache.hadoop.conf.Configuration;
 /** Block size parameter. */
 public class BlockSizeParam extends LongParam {
   /** Parameter name. */
-  public static final String NAME = "blocksize";
+  public static final String NAME = "blockSize";
   /** Default parameter value. */
   public static final String DEFAULT = NULL;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BufferSizeParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BufferSizeParam.java
index 148834b1024..424e5ba2533 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BufferSizeParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BufferSizeParam.java
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 /** Buffer size parameter. */
 public class BufferSizeParam extends IntegerParam {
   /** Parameter name. */
-  public static final String NAME = "buffersize";
+  public static final String NAME = "bufferSize";
   /** Default parameter value. */
   public static final String DEFAULT = NULL;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DelegationParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DelegationParam.java
index ad08773ea24..80f0c4b0b33 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DelegationParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DelegationParam.java
@@ -17,12 +17,13 @@
  */
 package org.apache.hadoop.hdfs.web.resources;
 
+import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.security.UserGroupInformation;
 
 /** Delegation token parameter. */
 public class DelegationParam extends StringParam {
   /** Parameter name. */
-  public static final String NAME = "delegation";
+  public static final String NAME = JspHelper.DELEGATION_PARAMETER_NAME;
   /** Default parameter value. */
   public static final String DEFAULT = "";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
index 12962b4a4ee..e61e858ee49 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
@@ -21,6 +21,9 @@ import java.net.HttpURLConnection;
 
 /** Http DELETE operation parameter. */
 public class DeleteOpParam extends HttpOpParam<DeleteOpParam.Op> {
+  /** Parameter name. */
+  public static final String NAME = "deleteOp";
+
   /** Delete operations. */
   public static enum Op implements HttpOpParam.Op {
     DELETE(HttpURLConnection.HTTP_OK),
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DstPathParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DstPathParam.java
index 5fa52456f92..7d522a38770 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DstPathParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DstPathParam.java
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.Path;
 /** Destination path parameter. */
 public class DstPathParam extends StringParam {
   /** Parameter name. */
-  public static final String NAME = "dstpath";
+  public static final String NAME = "dstPath";
   /** Default parameter value. */
   public static final String DEFAULT = "";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
index d547f1b1b4d..6f11871ebb8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
@@ -21,16 +21,16 @@ import java.net.HttpURLConnection;
 
 /** Http GET operation parameter. */
 public class GetOpParam extends HttpOpParam<GetOpParam.Op> {
+  /** Parameter name. */
+  public static final String NAME = "getOp";
+
   /** Get operations. */
   public static enum Op implements HttpOpParam.Op {
     OPEN(HttpURLConnection.HTTP_OK),
-    GETFILEBLOCKLOCATIONS(HttpURLConnection.HTTP_OK),
 
     GETFILESTATUS(HttpURLConnection.HTTP_OK),
     LISTSTATUS(HttpURLConnection.HTTP_OK),
 
-    GETDELEGATIONTOKEN(HttpURLConnection.HTTP_OK),
-
     NULL(HttpURLConnection.HTTP_NOT_IMPLEMENTED);
 
     final int expectedHttpResponseCode;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
index 422ec0f2f2f..644c4032dbc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
@@ -20,9 +20,6 @@ package org.apache.hadoop.hdfs.web.resources;
 /** Http operation parameter. */
 public abstract class HttpOpParam<E extends Enum<E> & HttpOpParam.Op>
     extends EnumParam<E> {
-  /** Parameter name. */
-  public static final String NAME = "op";
-
   /** Default parameter value. */
   public static final String DEFAULT = NULL;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ModificationTimeParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ModificationTimeParam.java
index a0e38a97e7d..d43da073280 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ModificationTimeParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ModificationTimeParam.java
@@ -20,7 +20,7 @@ package org.apache.hadoop.hdfs.web.resources;
 /** Modification time parameter. */
 public class ModificationTimeParam extends LongParam {
   /** Parameter name. */
-  public static final String NAME = "modificationtime";
+  public static final String NAME = "modificationTime";
   /** Default parameter value. */
   public static final String DEFAULT = "-1";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/OverwriteParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/OverwriteParam.java
index f6945bb4351..6639ece7b25 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/OverwriteParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/OverwriteParam.java
@@ -17,7 +17,7 @@
  */
 package org.apache.hadoop.hdfs.web.resources;
 
-/** Overwrite parameter. */
+/** Recursive parameter. */
 public class OverwriteParam extends BooleanParam {
   /** Parameter name. */
   public static final String NAME = "overwrite";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
index b553ecc6701..116d6af8b36 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
@@ -21,6 +21,9 @@ import java.net.HttpURLConnection;
 
 /** Http POST operation parameter. */
 public class PostOpParam extends HttpOpParam<PostOpParam.Op> {
+  /** Parameter name. */
+  public static final String NAME = "postOp";
+
   /** Post operations. */
   public static enum Op implements HttpOpParam.Op {
     APPEND(HttpURLConnection.HTTP_OK),
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
index dcfaa6f06cd..00703fefbc7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
@@ -21,6 +21,9 @@ import java.net.HttpURLConnection;
 
 /** Http POST operation parameter. */
 public class PutOpParam extends HttpOpParam<PutOpParam.Op> {
+  /** Parameter name. */
+  public static final String NAME = "putOp";
+
   /** Put operations. */
   public static enum Op implements HttpOpParam.Op {
     CREATE(true, HttpURLConnection.HTTP_CREATED),
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenameOptionSetParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenameOptionSetParam.java
index d7c157d5086..ec66a51c788 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenameOptionSetParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenameOptionSetParam.java
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.Options;
 /** Rename option set parameter. */
 public class RenameOptionSetParam extends EnumSetParam<Options.Rename> {
   /** Parameter name. */
-  public static final String NAME = "renameoptions";
+  public static final String NAME = "renameOptions";
   /** Default parameter value. */
   public static final String DEFAULT = "";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenewerParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenewerParam.java
deleted file mode 100644
index 750e8bc91b1..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenewerParam.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.web.resources;
-
-/** Renewer parameter. */
-public class RenewerParam extends StringParam {
-  /** Parameter name. */
-  public static final String NAME = "renewer";
-  /** Default parameter value. */
-  public static final String DEFAULT = NULL;
-
-  private static final Domain DOMAIN = new Domain(NAME, null);
-
-  /**
-   * Constructor.
-   * @param str a string representation of the parameter value.
-   */
-  public RenewerParam(final String str) {
-    super(DOMAIN, str == null || str.equals(DEFAULT)? null: str);
-  }
-
-  @Override
-  public String getName() {
-    return NAME;
-  }
-}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 59a8ff645ff..9fae462f04f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -683,4 +683,24 @@ creations/deletions), or "all".</description>
   </description>
 </property>
 
+<property>
+  <name>dfs.web.authentication.kerberos.principal</name>
+  <value>HTTP/${dfs.web.hostname}@${kerberos.realm}</value>
+  <description>
+    The HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
+
+    The HTTP Kerberos principal MUST start with 'HTTP/' per Kerberos
+    HTTP SPENGO specification.
+  </description>
+</property>
+
+<property>
+  <name>dfs.web.authentication.kerberos.keytab</name>
+  <value>${user.home}/dfs.web.keytab</value>
+  <description>
+    The Kerberos keytab file with the credentials for the
+    HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
+  </description>
+</property>
+
 </configuration>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java
index 495e8e191a3..2d50ce440f9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java
@@ -72,7 +72,6 @@ public class TestDFSPermission extends TestCase {
   final private static Path NON_EXISTENT_FILE = new Path("/NonExistentFile");
 
   private FileSystem fs;
-  private MiniDFSCluster cluster;
   private static Random r;
 
   static {
@@ -106,25 +105,18 @@ public class TestDFSPermission extends TestCase {
     }
   }
 
-  @Override
-  public void setUp() throws IOException {
-    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
-    cluster.waitActive();
-  }
-  
-  @Override
-  public void tearDown() throws IOException {
-    if (cluster != null) {
-      cluster.shutdown();
-    }
-  }
-  
   /** This tests if permission setting in create, mkdir, and 
    * setPermission works correctly
    */
   public void testPermissionSetting() throws Exception {
-    testPermissionSetting(OpType.CREATE); // test file creation
-    testPermissionSetting(OpType.MKDIRS); // test directory creation
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    try {
+      cluster.waitActive();
+      testPermissionSetting(OpType.CREATE); // test file creation
+      testPermissionSetting(OpType.MKDIRS); // test directory creation
+    } finally {
+      cluster.shutdown();
+    }
   }
 
   private void initFileSystem(short umask) throws Exception {
@@ -253,22 +245,17 @@ public class TestDFSPermission extends TestCase {
     }
   }
 
-  /**
-   * check that ImmutableFsPermission can be used as the argument
-   * to setPermission
-   */
-  public void testImmutableFsPermission() throws IOException {
-    fs = FileSystem.get(conf);
-
-    // set the permission of the root to be world-wide rwx
-    fs.setPermission(new Path("/"),
-        FsPermission.createImmutable((short)0777));
-  }
-  
   /* check if the ownership of a file/directory is set correctly */
   public void testOwnership() throws Exception {
-    testOwnership(OpType.CREATE); // test file creation
-    testOwnership(OpType.MKDIRS); // test directory creation
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    try {
+      cluster.waitActive();
+      testOwnership(OpType.CREATE); // test file creation
+      testOwnership(OpType.MKDIRS); // test directory creation
+    } finally {
+      fs.close();
+      cluster.shutdown();
+    }
   }
 
   /* change a file/directory's owner and group.
@@ -355,7 +342,9 @@ public class TestDFSPermission extends TestCase {
   /* Check if namenode performs permission checking correctly for
    * superuser, file owner, group owner, and other users */
   public void testPermissionChecking() throws Exception {
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
     try {
+      cluster.waitActive();
       fs = FileSystem.get(conf);
 
       // set the permission of the root to be world-wide rwx
@@ -412,6 +401,7 @@ public class TestDFSPermission extends TestCase {
           parentPermissions, permissions, parentPaths, filePaths, dirPaths);
     } finally {
       fs.close();
+      cluster.shutdown();
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index f154ff7d203..fc883118f82 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -29,7 +29,8 @@ import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 
-import org.apache.hadoop.HadoopIllegalArgumentException;
+import junit.framework.Assert;
+
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -39,7 +40,8 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
+import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION;
+
 
 public class TestDFSUtil {
   /**
@@ -74,141 +76,79 @@ public class TestDFSUtil {
       }
     }
 
-    assertTrue("expected 1 corrupt files but got " + corruptCount,
-        corruptCount == 1);
-
+    assertTrue("expected 1 corrupt files but got " + corruptCount, 
+               corruptCount == 1);
+    
     // test an empty location
     bs = DFSUtil.locatedBlocks2Locations(new LocatedBlocks());
     assertEquals(0, bs.length);
   }
 
-
-  private Configuration setupAddress(String key) {
+  /** 
+   * Test for
+   * {@link DFSUtil#getNameServiceIds(Configuration)}
+   * {@link DFSUtil#getNameServiceId(Configuration)}
+   * {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}
+   */
+  @Test
+  public void testMultipleNamenodes() throws IOException {
     HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1");
-    conf.set(DFSUtil.getNameServiceIdKey(key, "nn1"), "localhost:9000");
-    return conf;
-  }
-
-  /**
-   * Test {@link DFSUtil#getNamenodeNameServiceId(Configuration)} to ensure
-   * nameserviceId from the configuration returned
-   */
-  @Test
-  public void getNameServiceId() {
-    HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFS_FEDERATION_NAMESERVICE_ID, "nn1");
-    assertEquals("nn1", DFSUtil.getNamenodeNameServiceId(conf));
-  }
-  
-  /**
-   * Test {@link DFSUtil#getNameNodeNameServiceId(Configuration)} to ensure
-   * nameserviceId for namenode is determined based on matching the address with
-   * local node's address
-   */
-  @Test
-  public void getNameNodeNameServiceId() {
-    Configuration conf = setupAddress(DFS_NAMENODE_RPC_ADDRESS_KEY);
-    assertEquals("nn1", DFSUtil.getNamenodeNameServiceId(conf));
-  }
-
-  /**
-   * Test {@link DFSUtil#getBackupNameServiceId(Configuration)} to ensure
-   * nameserviceId for backup node is determined based on matching the address
-   * with local node's address
-   */
-  @Test
-  public void getBackupNameServiceId() {
-    Configuration conf = setupAddress(DFS_NAMENODE_BACKUP_ADDRESS_KEY);
-    assertEquals("nn1", DFSUtil.getBackupNameServiceId(conf));
-  }
-
-  /**
-   * Test {@link DFSUtil#getSecondaryNameServiceId(Configuration)} to ensure
-   * nameserviceId for backup node is determined based on matching the address
-   * with local node's address
-   */
-  @Test
-  public void getSecondaryNameServiceId() {
-    Configuration conf = setupAddress(DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY);
-    assertEquals("nn1", DFSUtil.getSecondaryNameServiceId(conf));
-  }
-
-  /**
-   * Test {@link DFSUtil#getNameServiceId(Configuration, String))} to ensure
-   * exception is thrown when multiple rpc addresses match the local node's
-   * address
-   */
-  @Test(expected = HadoopIllegalArgumentException.class)
-  public void testGetNameServiceIdException() {
-    HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
-    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"),
-        "localhost:9000");
-    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
-        "localhost:9001");
-    DFSUtil.getNamenodeNameServiceId(conf);
-    fail("Expected exception is not thrown");
-  }
-
-  /**
-   * Test {@link DFSUtil#getNameServiceIds(Configuration)}
-   */
-  @Test
-  public void testGetNameServiceIds() {
-    HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
+    
+    // Test - The configured nameserviceIds are returned
     Collection<String> nameserviceIds = DFSUtil.getNameServiceIds(conf);
     Iterator<String> it = nameserviceIds.iterator();
     assertEquals(2, nameserviceIds.size());
     assertEquals("nn1", it.next().toString());
     assertEquals("nn2", it.next().toString());
-  }
-
-  /**
-   * Test for {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}
-   * {@link DFSUtil#getNameServiceIdFromAddress(Configuration, InetSocketAddress, String...)
-   * (Configuration)}
-   */
-  @Test
-  public void testMultipleNamenodes() throws IOException {
-    HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
+    
+    // Tests default nameserviceId is returned
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, "nn1");
+    assertEquals("nn1", DFSUtil.getNameServiceId(conf));
+    
     // Test - configured list of namenodes are returned
     final String NN1_ADDRESS = "localhost:9000";
     final String NN2_ADDRESS = "localhost:9001";
     final String NN3_ADDRESS = "localhost:9002";
-    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"),
-        NN1_ADDRESS);
-    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
-        NN2_ADDRESS);
-
-    Collection<InetSocketAddress> nnAddresses = DFSUtil
-        .getNNServiceRpcAddresses(conf);
+    conf.set(DFSUtil.getNameServiceIdKey(
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"), NN1_ADDRESS);
+    conf.set(DFSUtil.getNameServiceIdKey(
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"), NN2_ADDRESS);
+    
+    Collection<InetSocketAddress> nnAddresses = 
+      DFSUtil.getNNServiceRpcAddresses(conf);
     assertEquals(2, nnAddresses.size());
     Iterator<InetSocketAddress> iterator = nnAddresses.iterator();
+    assertEquals(2, nameserviceIds.size());
     InetSocketAddress addr = iterator.next();
     assertEquals("localhost", addr.getHostName());
     assertEquals(9000, addr.getPort());
     addr = iterator.next();
     assertEquals("localhost", addr.getHostName());
     assertEquals(9001, addr.getPort());
-
+    
     // Test - can look up nameservice ID from service address
-    checkNameServiceId(conf, NN1_ADDRESS, "nn1");
-    checkNameServiceId(conf, NN2_ADDRESS, "nn2");
-    checkNameServiceId(conf, NN3_ADDRESS, null);
+    InetSocketAddress testAddress1 = NetUtils.createSocketAddr(NN1_ADDRESS);
+    String nameserviceId = DFSUtil.getNameServiceIdFromAddress(
+        conf, testAddress1,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals("nn1", nameserviceId);
+    InetSocketAddress testAddress2 = NetUtils.createSocketAddr(NN2_ADDRESS);
+    nameserviceId = DFSUtil.getNameServiceIdFromAddress(
+        conf, testAddress2,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals("nn2", nameserviceId);
+    InetSocketAddress testAddress3 = NetUtils.createSocketAddr(NN3_ADDRESS);
+    nameserviceId = DFSUtil.getNameServiceIdFromAddress(
+        conf, testAddress3,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertNull(nameserviceId);
   }
-
-  public void checkNameServiceId(Configuration conf, String addr,
-      String expectedNameServiceId) {
-    InetSocketAddress s = NetUtils.createSocketAddr(addr);
-    String nameserviceId = DFSUtil.getNameServiceIdFromAddress(conf, s,
-        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
-    assertEquals(expectedNameServiceId, nameserviceId);
-  }
-
-  /**
+  
+  /** 
    * Test for
    * {@link DFSUtil#isDefaultNamenodeAddress(Configuration, InetSocketAddress, String...)}
    */
@@ -217,25 +157,27 @@ public class TestDFSUtil {
     HdfsConfiguration conf = new HdfsConfiguration();
     final String DEFAULT_ADDRESS = "localhost:9000";
     final String NN2_ADDRESS = "localhost:9001";
-    conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, DEFAULT_ADDRESS);
-
+    conf.set(DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, DEFAULT_ADDRESS);
+    
     InetSocketAddress testAddress1 = NetUtils.createSocketAddr(DEFAULT_ADDRESS);
     boolean isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress1,
-        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
     assertTrue(isDefault);
     InetSocketAddress testAddress2 = NetUtils.createSocketAddr(NN2_ADDRESS);
     isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress2,
-        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
     assertFalse(isDefault);
   }
-
+  
   /** Tests to ensure default namenode is used as fallback */
   @Test
   public void testDefaultNamenode() throws IOException {
     HdfsConfiguration conf = new HdfsConfiguration();
     final String hdfs_default = "hdfs://localhost:9999/";
-    conf.set(FS_DEFAULT_NAME_KEY, hdfs_default);
-    // If DFS_FEDERATION_NAMESERVICES is not set, verify that
+    conf.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, hdfs_default);
+    // If DFSConfigKeys.DFS_FEDERATION_NAMESERVICES is not set, verify that 
     // default namenode address is returned.
     List<InetSocketAddress> addrList = DFSUtil.getNNServiceRpcAddresses(conf);
     assertEquals(1, addrList.size());
@@ -249,26 +191,26 @@ public class TestDFSUtil {
   @Test
   public void testConfModification() throws IOException {
     final HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1");
-    conf.set(DFS_FEDERATION_NAMESERVICE_ID, "nn1");
-    final String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
-
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, "nn1");
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, "nn1");
+    final String nameserviceId = DFSUtil.getNameServiceId(conf);
+    
     // Set the nameservice specific keys with nameserviceId in the config key
     for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
       // Note: value is same as the key
       conf.set(DFSUtil.getNameServiceIdKey(key, nameserviceId), key);
     }
-
+    
     // Initialize generic keys from specific keys
-    NameNode.initializeGenericKeys(conf, nameserviceId);
-
+    NameNode.initializeGenericKeys(conf);
+    
     // Retrieve the keys without nameserviceId and Ensure generic keys are set
     // to the correct value
     for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
       assertEquals(key, conf.get(key));
     }
   }
-
+  
   /**
    * Tests for empty configuration, an exception is thrown from
    * {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}
@@ -296,16 +238,16 @@ public class TestDFSUtil {
     } catch (IOException expected) {
     }
   }
-
+  
   @Test
-  public void testGetServerInfo() {
+  public void testGetServerInfo(){
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(HADOOP_SECURITY_AUTHENTICATION, "kerberos");
     UserGroupInformation.setConfiguration(conf);
     String httpsport = DFSUtil.getInfoServer(null, conf, true);
-    assertEquals("0.0.0.0:50470", httpsport);
+    Assert.assertEquals("0.0.0.0:50470", httpsport);
     String httpport = DFSUtil.getInfoServer(null, conf, false);
-    assertEquals("0.0.0.0:50070", httpport);
+    Assert.assertEquals("0.0.0.0:50070", httpport);
   }
 
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java
index d7ee516b0a8..a0727a6c90b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java
@@ -17,10 +17,6 @@
  */
 package org.apache.hadoop.hdfs;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
 import java.io.OutputStream;
 import java.security.PrivilegedExceptionAction;
 
@@ -28,15 +24,17 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
-import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
+import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
+
 import org.junit.Test;
+import static org.junit.Assert.*;
 
 /** A class for testing quota-related commands */
 public class TestQuota {
@@ -843,14 +841,6 @@ public class TestQuota {
     DFSAdmin admin = new DFSAdmin(conf);
 
     try {
-      
-      //Test for deafult NameSpace Quota
-      long nsQuota = FSImageTestUtil.getNSQuota(cluster.getNameNode()
-          .getNamesystem());
-      assertTrue(
-          "Default namespace quota expected as long max. But the value is :"
-              + nsQuota, nsQuota == Long.MAX_VALUE);
-      
       Path dir = new Path("/test");
       boolean exceededQuota = false;
       ContentSummary c;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
index 9c577f740ee..d6397b6a2ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
@@ -23,12 +23,12 @@ package org.apache.hadoop.hdfs.security;
 import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
-import java.net.URI;
 import java.security.PrivilegedExceptionAction;
 
+import junit.framework.Assert;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@@ -38,16 +38,12 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
-import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
-import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
-import org.apache.log4j.Level;
 import org.junit.After;
-import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -60,13 +56,12 @@ public class TestDelegationToken {
   @Before
   public void setUp() throws Exception {
     config = new HdfsConfiguration();
-    config.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
     config.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000);
     config.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000);
     config.set("hadoop.security.auth_to_local",
         "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT");
     FileSystem.setDefaultUri(config, "hdfs://localhost:" + "0");
-    cluster = new MiniDFSCluster.Builder(config).numDataNodes(0).build();
+    cluster = new MiniDFSCluster.Builder(config).build();
     cluster.waitActive();
     dtSecretManager = NameNodeAdapter.getDtSecretManager(
         cluster.getNamesystem());
@@ -158,31 +153,6 @@ public class TestDelegationToken {
     dtSecretManager.renewToken(token, "JobTracker");
   }
   
-  @Test
-  public void testDelegationTokenWebHdfsApi() throws Exception {
-    ((Log4JLogger)NamenodeWebHdfsMethods.LOG).getLogger().setLevel(Level.ALL);
-    final String uri = WebHdfsFileSystem.SCHEME  + "://"
-        + config.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
-    //get file system as JobTracker
-    final UserGroupInformation ugi = UserGroupInformation.createUserForTesting(
-        "JobTracker", new String[]{"user"});
-    final WebHdfsFileSystem webhdfs = ugi.doAs(
-        new PrivilegedExceptionAction<WebHdfsFileSystem>() {
-      @Override
-      public WebHdfsFileSystem run() throws Exception {
-        return (WebHdfsFileSystem)FileSystem.get(new URI(uri), config);
-      }
-    });
-
-    final Token<DelegationTokenIdentifier> token = webhdfs.getDelegationToken("JobTracker");
-    DelegationTokenIdentifier identifier = new DelegationTokenIdentifier();
-    byte[] tokenId = token.getIdentifier();
-    identifier.readFields(new DataInputStream(new ByteArrayInputStream(tokenId)));
-    LOG.info("A valid token should have non-null password, and should be renewed successfully");
-    Assert.assertTrue(null != dtSecretManager.retrievePassword(identifier));
-    dtSecretManager.renewToken(token, "JobTracker");
-  }
-
   @Test
   public void testDelegationTokenWithDoAs() throws Exception {
     final DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java
index d34cf1c4c6e..7b9126f7de8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java
@@ -18,34 +18,31 @@
 
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import junit.framework.TestCase;
 
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
-import org.junit.Before;
-import org.junit.Test;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.blockmanagement.Host2NodesMap;
 
-public class TestHost2NodesMap {
-  private Host2NodesMap map = new Host2NodesMap();
-  private final DatanodeDescriptor dataNodes[] = new DatanodeDescriptor[] {
+public class TestHost2NodesMap extends TestCase {
+  static private Host2NodesMap map = new Host2NodesMap();
+  private final static DatanodeDescriptor dataNodes[] = new DatanodeDescriptor[] {
     new DatanodeDescriptor(new DatanodeID("h1:5020"), "/d1/r1"),
     new DatanodeDescriptor(new DatanodeID("h2:5020"), "/d1/r1"),
     new DatanodeDescriptor(new DatanodeID("h3:5020"), "/d1/r2"),
     new DatanodeDescriptor(new DatanodeID("h3:5030"), "/d1/r2"),
   };
-  private final DatanodeDescriptor NULL_NODE = null; 
-  private final DatanodeDescriptor NODE = new DatanodeDescriptor(new DatanodeID("h3:5040"),
-      "/d1/r4");
+  private final static DatanodeDescriptor NULL_NODE = null; 
+  private final static DatanodeDescriptor NODE = 
+    new DatanodeDescriptor(new DatanodeID("h3:5040"), "/d1/r4");
 
-  @Before
-  public void setup() {
+  static {
     for(DatanodeDescriptor node:dataNodes) {
       map.add(node);
     }
     map.add(NULL_NODE);
   }
   
-  @Test
   public void testContains() throws Exception {
     for(int i=0; i<dataNodes.length; i++) {
       assertTrue(map.contains(dataNodes[i]));
@@ -54,7 +51,6 @@ public class TestHost2NodesMap {
     assertFalse(map.contains(NODE));
   }
 
-  @Test
   public void testGetDatanodeByHost() throws Exception {
     assertTrue(map.getDatanodeByHost("h1")==dataNodes[0]);
     assertTrue(map.getDatanodeByHost("h2")==dataNodes[1]);
@@ -63,7 +59,6 @@ public class TestHost2NodesMap {
     assertTrue(null==map.getDatanodeByHost("h4"));
   }
 
-  @Test
   public void testGetDatanodeByName() throws Exception {
     assertTrue(map.getDatanodeByName("h1:5020")==dataNodes[0]);
     assertTrue(map.getDatanodeByName("h1:5030")==null);
@@ -76,7 +71,6 @@ public class TestHost2NodesMap {
     assertTrue(map.getDatanodeByName(null)==null);
   }
 
-  @Test
   public void testRemove() throws Exception {
     assertFalse(map.remove(NODE));
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
index 1b9a19c6499..78ff00288b3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
@@ -96,8 +96,7 @@ public class TestMulitipleNNDataBlockScanner {
 
       String bpidToShutdown = cluster.getNamesystem(2).getBlockPoolId();
       for (int i = 0; i < 2; i++) {
-        String nsId = DFSUtil.getNamenodeNameServiceId(cluster
-            .getConfiguration(i));
+        String nsId = DFSUtil.getNameServiceId(cluster.getConfiguration(i));
         namenodesBuilder.append(nsId);
         namenodesBuilder.append(",");
       }
@@ -117,7 +116,7 @@ public class TestMulitipleNNDataBlockScanner {
         LOG.info(ex.getMessage());
       }
 
-      namenodesBuilder.append(DFSUtil.getNamenodeNameServiceId(cluster
+      namenodesBuilder.append(DFSUtil.getNameServiceId(cluster
           .getConfiguration(2)));
       conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, namenodesBuilder
           .toString());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReplicasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReplicasMap.java
index cacbfd62f6b..ff21636ddf4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReplicasMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReplicasMap.java
@@ -17,24 +17,21 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.fail;
-
 import org.apache.hadoop.hdfs.protocol.Block;
-import org.junit.Before;
+import static org.junit.Assert.*;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 /**
  * Unit test for ReplicasMap class
  */
 public class TestReplicasMap {
-  private final ReplicasMap map = new ReplicasMap(TestReplicasMap.class);
-  private final String bpid = "BP-TEST";
-  private final  Block block = new Block(1234, 1234, 1234);
+  private static final ReplicasMap map = new ReplicasMap(TestReplicasMap.class);
+  private static final String bpid = "BP-TEST";
+  private static final  Block block = new Block(1234, 1234, 1234);
   
-  @Before
-  public void setup() {
+  @BeforeClass
+  public static void setup() {
     map.add(bpid, new FinalizedReplica(block, null, null));
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index 39e7db17dc7..c90b2900db1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -412,11 +412,4 @@ public abstract class FSImageTestUtil {
   public static FSImage getFSImage(NameNode node) {
     return node.getFSImage();
   }
-
-  /**
-   * get NameSpace quota.
-   */
-  public static long getNSQuota(FSNamesystem ns) {
-    return ns.dir.rootDir.getNsQuota();
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
deleted file mode 100644
index a843962e081..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
+++ /dev/null
@@ -1,290 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.server.namenode;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
-import org.apache.hadoop.hdfs.DFSTestUtil;
-import org.apache.hadoop.hdfs.HdfsConfiguration;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas;
-import org.junit.Test;
-
-public class TestProcessCorruptBlocks {
-  /**
-   * The corrupt block has to be removed when the number of valid replicas
-   * matches replication factor for the file. In this the above condition is
-   * tested by reducing the replication factor 
-   * The test strategy : 
-   *   Bring up Cluster with 3 DataNodes
-   *   Create a file of replication factor 3 
-   *   Corrupt one replica of a block of the file 
-   *   Verify that there are still 2 good replicas and 1 corrupt replica
-   *    (corrupt replica should not be removed since number of good
-   *     replicas (2) is less than replication factor (3))
-   *   Set the replication factor to 2 
-   *   Verify that the corrupt replica is removed. 
-   *     (corrupt replica  should not be removed since number of good
-   *      replicas (2) is equal to replication factor (2))
-   */
-  @Test
-  public void testWhenDecreasingReplication() throws IOException {
-    Configuration conf = new HdfsConfiguration();
-    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
-    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
-    FileSystem fs = cluster.getFileSystem();
-    final FSNamesystem namesystem = cluster.getNamesystem();
-
-    try {
-      final Path fileName = new Path("/foo1");
-      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
-      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
-
-      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
-      corruptBlock(cluster, fs, fileName, 0, block);
-
-      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
-
-      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
-      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());
-
-      namesystem.setReplication(fileName.toString(), (short) 2);
-
-      // wait for 3 seconds so that all block reports are processed.
-      try {
-        Thread.sleep(3000);
-      } catch (InterruptedException ignored) {
-      }
-
-      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
-      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
-
-    } finally {
-      cluster.shutdown();
-    }
-  }
-
-  /**
-   * The corrupt block has to be removed when the number of valid replicas
-   * matches replication factor for the file. In this test, the above 
-   * condition is achieved by increasing the number of good replicas by 
-   * replicating on a new Datanode. 
-   * The test strategy : 
-   *   Bring up Cluster with 3 DataNodes
-   *   Create a file  of replication factor 3
-   *   Corrupt one replica of a block of the file 
-   *   Verify that there are still 2 good replicas and 1 corrupt replica 
-   *     (corrupt replica should not be removed since number of good replicas
-   *      (2) is less  than replication factor (3)) 
-   *   Start a new data node 
-   *   Verify that the a new replica is created and corrupt replica is
-   *   removed.
-   * 
-   */
-  @Test
-  public void testByAddingAnExtraDataNode() throws IOException {
-    Configuration conf = new HdfsConfiguration();
-    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
-    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
-    FileSystem fs = cluster.getFileSystem();
-    final FSNamesystem namesystem = cluster.getNamesystem();
-    DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);
-
-    try {
-      final Path fileName = new Path("/foo1");
-      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
-      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
-
-      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
-      corruptBlock(cluster, fs, fileName, 0, block);
-
-      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
-
-      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
-      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());
-
-      cluster.restartDataNode(dnPropsFourth);
-
-      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
-
-      assertEquals(3, countReplicas(namesystem, block).liveReplicas());
-      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
-    } finally {
-      cluster.shutdown();
-    }
-  }
-
-  /**
-   * The corrupt block has to be removed when the number of valid replicas
-   * matches replication factor for the file. The above condition should hold
-   * true as long as there is one good replica. This test verifies that.
-   * 
-   * The test strategy : 
-   *   Bring up Cluster with 2 DataNodes
-   *   Create a file of replication factor 2 
-   *   Corrupt one replica of a block of the file 
-   *   Verify that there is  one good replicas and 1 corrupt replica 
-   *     (corrupt replica should not be removed since number of good 
-   *     replicas (1) is less than replication factor (2)).
-   *   Set the replication factor to 1 
-   *   Verify that the corrupt replica is removed. 
-   *     (corrupt replica should  be removed since number of good
-   *      replicas (1) is equal to replication factor (1))
-   */
-  @Test
-  public void testWithReplicationFactorAsOne() throws IOException {
-    Configuration conf = new HdfsConfiguration();
-    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
-    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
-    FileSystem fs = cluster.getFileSystem();
-    final FSNamesystem namesystem = cluster.getNamesystem();
-
-    try {
-      final Path fileName = new Path("/foo1");
-      DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
-      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
-
-      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
-      corruptBlock(cluster, fs, fileName, 0, block);
-
-      DFSTestUtil.waitReplication(fs, fileName, (short) 1);
-
-      assertEquals(1, countReplicas(namesystem, block).liveReplicas());
-      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());
-
-      namesystem.setReplication(fileName.toString(), (short) 1);
-
-      // wait for 3 seconds so that all block reports are processed.
-      try {
-        Thread.sleep(3000);
-      } catch (InterruptedException ignored) {
-      }
-
-      assertEquals(1, countReplicas(namesystem, block).liveReplicas());
-      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
-
-    } finally {
-      cluster.shutdown();
-    }
-  }
-
-  /**
-   * None of the blocks can be removed if all blocks are corrupt.
-   * 
-   * The test strategy : 
-   *    Bring up Cluster with 3 DataNodes
-   *    Create a file of replication factor 3 
-   *    Corrupt all three replicas 
-   *    Verify that all replicas are corrupt and 3 replicas are present.
-   *    Set the replication factor to 1 
-   *    Verify that all replicas are corrupt and 3 replicas are present.
-   */
-  @Test
-  public void testWithAllCorruptReplicas() throws IOException {
-    Configuration conf = new HdfsConfiguration();
-    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
-    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
-    FileSystem fs = cluster.getFileSystem();
-    final FSNamesystem namesystem = cluster.getNamesystem();
-
-    try {
-      final Path fileName = new Path("/foo1");
-      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
-      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
-
-      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
-      corruptBlock(cluster, fs, fileName, 0, block);
-
-      corruptBlock(cluster, fs, fileName, 1, block);
-
-      corruptBlock(cluster, fs, fileName, 2, block);
-
-      // wait for 3 seconds so that all block reports are processed.
-      try {
-        Thread.sleep(3000);
-      } catch (InterruptedException ignored) {
-      }
-
-      assertEquals(0, countReplicas(namesystem, block).liveReplicas());
-      assertEquals(3, countReplicas(namesystem, block).corruptReplicas());
-
-      namesystem.setReplication(fileName.toString(), (short) 1);
-
-      // wait for 3 seconds so that all block reports are processed.
-      try {
-        Thread.sleep(3000);
-      } catch (InterruptedException ignored) {
-      }
-
-      assertEquals(0, countReplicas(namesystem, block).liveReplicas());
-      assertEquals(3, countReplicas(namesystem, block).corruptReplicas());
-
-    } finally {
-      cluster.shutdown();
-    }
-  }
-
-  private static NumberReplicas countReplicas(final FSNamesystem namesystem, ExtendedBlock block) {
-    return namesystem.getBlockManager().countNodes(block.getLocalBlock());
-  }
-
-  private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName,
-      int dnIndex, ExtendedBlock block) throws IOException {
-    // corrupt the block on datanode dnIndex
-    // the indexes change once the nodes are restarted.
-    // But the datadirectory will not change
-    assertTrue(MiniDFSCluster.corruptReplica(dnIndex, block));
-
-    DataNodeProperties dnProps = cluster.stopDataNode(0);
-
-    // Each datanode has multiple data dirs, check each
-    for (int dirIndex = 0; dirIndex < 2; dirIndex++) {
-      final String bpid = cluster.getNamesystem().getBlockPoolId();
-      File storageDir = MiniDFSCluster.getStorageDir(dnIndex, dirIndex);
-      File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
-      File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr");
-      if (scanLogFile.exists()) {
-        // wait for one minute for deletion to succeed;
-        for (int i = 0; !scanLogFile.delete(); i++) {
-          assertTrue("Could not delete log file in one minute", i < 60);
-          try {
-            Thread.sleep(1000);
-          } catch (InterruptedException ignored) {
-          }
-        }
-      }
-    }
-
-    // restart the detained so the corrupt replica will be detected
-    cluster.restartDataNode(dnProps);
-  }
-}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsFileSystemContract.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsFileSystemContract.java
index abe07fc51f1..4c2264fea8e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsFileSystemContract.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsFileSystemContract.java
@@ -18,23 +18,17 @@
 
 package org.apache.hadoop.hdfs.web;
 
-import java.io.BufferedReader;
 import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.HttpURLConnection;
 import java.net.URI;
-import java.net.URL;
 import java.security.PrivilegedExceptionAction;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystemContractBaseTest;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.web.resources.PutOpParam;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 
@@ -120,42 +114,4 @@ public class TestWebHdfsFileSystemContract extends FileSystemContractBaseTest {
       // also okay for HDFS.
     }    
   }
-  
-  public void testGetFileBlockLocations() throws IOException {
-    final String f = "/test/testGetFileBlockLocations";
-    createFile(path(f));
-    final BlockLocation[] computed = fs.getFileBlockLocations(new Path(f), 0L, 1L);
-    final BlockLocation[] expected = cluster.getFileSystem().getFileBlockLocations(
-        new Path(f), 0L, 1L);
-    assertEquals(expected.length, computed.length);
-    for(int i = 0; i < computed.length; i++) {
-      assertEquals(expected[i].toString(), computed[i].toString());
-    }
-  }
-
-  public void testCaseInsensitive() throws IOException {
-    final Path p = new Path("/test/testCaseInsensitive");
-    final WebHdfsFileSystem webhdfs = (WebHdfsFileSystem)fs;
-    final PutOpParam.Op op = PutOpParam.Op.MKDIRS;
-
-    //replace query with mix case letters
-    final URL url = webhdfs.toUrl(op, p);
-    WebHdfsFileSystem.LOG.info("url      = " + url);
-    final URL replaced = new URL(url.toString().replace(op.toQueryString(),
-        "Op=mkDIrs"));
-    WebHdfsFileSystem.LOG.info("replaced = " + replaced);
-
-    //connect with the replaced URL.
-    final HttpURLConnection conn = (HttpURLConnection)replaced.openConnection();
-    conn.setRequestMethod(op.getType().toString());
-    conn.connect();
-    final BufferedReader in = new BufferedReader(new InputStreamReader(
-        conn.getInputStream()));
-    for(String line; (line = in.readLine()) != null; ) {
-      WebHdfsFileSystem.LOG.info("> " + line);
-    }
-
-    //check if the command successes.
-    assertTrue(fs.getFileStatus(p).isDirectory());
-  }
 }
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 43fed6baa1c..1d637419eb5 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -29,8 +29,6 @@ Trunk (unreleased changes)
     findBugs, correct links to findBugs artifacts and no links to the
     artifacts when there are no warnings. (Tom White via vinodkv).
 
-    MAPREDUCE-3081. Fix vaidya startup script. (gkesavan via suhas).
-
 Release 0.23.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -72,9 +70,6 @@ Release 0.23.0 - Unreleased
     MAPREDUCE-2037. Capture intermediate progress, CPU and memory usage for
     tasks. (Dick King via acmurthy) 
 
-    MAPREDUCE-2930. Added the ability to be able to generate graphs from the
-    state-machine definitions. (Binglin Chang via vinodkv)
-
   IMPROVEMENTS
 
     MAPREDUCE-2187. Reporter sends progress during sort/merge. (Anupam Seth via
@@ -312,15 +307,6 @@ Release 0.23.0 - Unreleased
     MAPREDUCE-2726. Added job-file to the AM and JobHistoryServer web
     interfaces. (Jeffrey Naisbitt via vinodkv)
 
-    MAPREDUCE-3055. Simplified ApplicationAttemptId passing to
-    ApplicationMaster via environment variable. (vinodkv)
-
-    MAPREDUCE-3092. Removed a special comparator for JobIDs in JobHistory as
-    JobIDs are already comparable. (Devaraj K via vinodkv)
-
-    MAPREDUCE-3099. Add docs for setting up a single node MRv2 cluster.
-    (mahadev)
-
   OPTIMIZATIONS
 
     MAPREDUCE-2026. Make JobTracker.getJobCounters() and
@@ -332,9 +318,6 @@ Release 0.23.0 - Unreleased
 
     MAPREDUCE-901. Efficient framework counters. (llu via acmurthy)
 
-    MAPREDUCE-2880. Improve classpath-construction for mapreduce AM and
-    containers. (Arun C Murthy via vinodkv)
-
   BUG FIXES
 
     MAPREDUCE-2603. Disable High-Ram emulation in system tests. 
@@ -1387,91 +1370,6 @@ Release 0.23.0 - Unreleased
     YarnClientProtocolProvider and ensured MiniMRYarnCluster sets JobHistory
     configuration for tests. (acmurthy) 
 
-    MAPREDUCE-3018. Fixed -file option for streaming. (mahadev via acmurthy) 
-
-    MAPREDUCE-3036. Fixed metrics for reserved resources in CS. (Robert Evans
-    via acmurthy)
-
-    MAPREDUCE-2998. Fixed a bug in TaskAttemptImpl which caused it to fork
-    bin/mapred too many times. (vinodkv via acmurthy)
-
-    MAPREDUCE-3023. Fixed clients to display queue state correctly. (Ravi
-    Prakash via acmurthy) 
-
-    MAPREDUCE-2970. Fixed NPEs in corner cases with different configurations
-    for mapreduce.framework.name. (Venu Gopala Rao via vinodkv)
-
-    MAPREDUCE-3062. Fixed default RMAdmin address. (Chris Riccomini
-    via acmurthy) 
-
-    MAPREDUCE-3066. Fixed default ResourceTracker address for the NodeManager. 
-    (Chris Riccomini via acmurthy) 
-
-    MAPREDUCE-3044. Pipes jobs stuck without making progress. (mahadev)
-
-    MAPREDUCE-2754. Fixed MR AM stdout, stderr and syslog to redirect to
-    correct log-files. (Ravi Teja Ch N V via vinodkv)
-
-    MAPREDUCE-3073. Fixed build issues in MR1. (mahadev via acmurthy)
-
-    MAPREDUCE-2691. Increase threadpool size for launching containers in
-    MapReduce ApplicationMaster. (vinodkv via acmurthy)
-
-
-    MAPREDUCE-2990. Fixed display of NodeHealthStatus. (Subroto Sanyal via
-    acmurthy) 
-
-    MAPREDUCE-3053. Better diagnostic message for unknown methods in ProtoBuf
-    RPCs. (vinodkv via acmurthy)
-
-    MAPREDUCE-2952. Fixed ResourceManager/MR-client to consume diagnostics
-    for AM failures in a couple of corner cases. (Arun C Murthy via vinodkv)
-
-    MAPREDUCE-3064. 27 unit test failures with Invalid 
-    "mapreduce.jobtracker.address" configuration value for 
-    JobTracker: "local" (Venu Gopala Rao via mahadev)
-
-    MAPREDUCE-3090. Fix MR AM to use ApplicationAttemptId rather than
-    (ApplicationId, startCount) consistently. (acmurthy)  
-
-    MAPREDUCE-2646. Fixed AMRMProtocol to return containers based on
-    priority. (Sharad Agarwal and Arun C Murthy via vinodkv)
-
-    MAPREDUCE-3031. Proper handling of killed containers to prevent stuck
-    containers/AMs on an external kill signal. (Siddharth Seth via vinodkv)
-
-    MAPREDUCE-2984. Better error message for displaying completed containers.
-    (Devaraj K via acmurthy)
-
-    MAPREDUCE-3071. app master configuration web UI link under the Job menu 
-    opens up application menu. (thomas graves  via mahadev)
-
-    MAPREDUCE-3067. Ensure exit-code is set correctly for containers. (Hitesh
-    Shah via acmurthy)
-
-    MAPREDUCE-2999. Fix YARN webapp framework to properly filter servlet
-    paths. (Thomas Graves via vinodkv)
-
-    MAPREDUCE-3095. fairscheduler ivy including wrong version for hdfs.
-    (John George via mahadev)
-
-    MAPREDUCE-3054. Unable to kill submitted jobs. (mahadev)
-
-    MAPREDUCE-3021. Change base urls for RM web-ui. (Thomas Graves via
-    acmurthy) 
-
-    MAPREDUCE-3041. Fixed ClientRMProtocol to provide min/max resource
-    capabilities along-with new ApplicationId for application submission.
-    (Hitesh Shah via acmurthy)
-
-    MAPREDUCE-2843. Fixed the node-table to be completely displayed and making
-    node entries on RM UI to be sortable. (Abhijit Suresh Shingate via vinodkv)
-
-    MAPREDUCE-3110. Fixed TestRPC failure. (vinodkv)
-
-    MAPREDUCE-3078. Ensure MapReduce AM reports progress correctly for
-    displaying on the RM Web-UI. (vinodkv via acmurthy)
-
 Release 0.22.0 - Unreleased
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
index 0f12598fc17..17d4b8b0e54 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
@@ -55,12 +55,6 @@
       <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-mapreduce-client-shuffle</artifactId>
@@ -119,41 +113,4 @@
       </plugin>
     </plugins>
   </build>
-
-  <profiles>
-    <profile>
-      <id>visualize</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.codehaus.mojo</groupId>
-            <artifactId>exec-maven-plugin</artifactId>
-            <version>1.2</version>
-            <executions>
-              <execution>
-                <phase>compile</phase>
-                <goals>
-                  <goal>java</goal>
-                </goals>
-                <configuration>
-                  <classpathScope>test</classpathScope>
-                  <mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
-                  <arguments>
-                    <argument>MapReduce</argument>
-                    <argument>org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl,
-                      org.apache.hadoop.mapreduce.v2.app.job.impl.TaskImpl,
-                      org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl</argument>
-                    <argument>MapReduce.gv</argument>
-                  </arguments>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
 </project>
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
index ce6557abd03..d9d5b1f3076 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
@@ -18,27 +18,27 @@
 
 package org.apache.hadoop.mapred;
 
+import java.io.File;
 import java.net.InetSocketAddress;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Vector;
 
-import org.apache.hadoop.fs.Path;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.mapred.TaskLog.LogName;
 import org.apache.hadoop.mapreduce.ID;
-import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.v2.util.MRApps;
-import org.apache.hadoop.yarn.api.ApplicationConstants;
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
 
 public class MapReduceChildJVM {
+  private static final String SYSTEM_PATH_SEPARATOR = 
+    System.getProperty("path.separator");
 
-  private static String getTaskLogFile(LogName filter) {
-    return ApplicationConstants.LOG_DIR_EXPANSION_VAR + Path.SEPARATOR + 
-        filter.toString();
+  private static final Log LOG = LogFactory.getLog(MapReduceChildJVM.class);
+
+  private static File getTaskLogFile(String logDir, LogName filter) {
+    return new File(logDir, filter.toString());
   }
 
   private static String getChildEnv(JobConf jobConf, boolean isMap) {
@@ -50,53 +50,32 @@ public class MapReduceChildJVM {
         jobConf.get(jobConf.MAPRED_TASK_ENV));
   }
 
-  private static String getChildLogLevel(JobConf conf, boolean isMap) {
-    if (isMap) {
-      return conf.get(
-          MRJobConfig.MAP_LOG_LEVEL, 
-          JobConf.DEFAULT_LOG_LEVEL.toString()
-          );
-    } else {
-      return conf.get(
-          MRJobConfig.REDUCE_LOG_LEVEL, 
-          JobConf.DEFAULT_LOG_LEVEL.toString()
-          );
-    }
-  }
-  
-  public static void setVMEnv(Map<String, String> environment,
-      Task task) {
+  public static void setVMEnv(Map<String, String> env,
+      List<String> classPaths, String pwd, String containerLogDir,
+      String nmLdLibraryPath, Task task, CharSequence applicationTokensFile) {
 
     JobConf conf = task.conf;
 
-    // Shell
-    environment.put(
-        Environment.SHELL.name(), 
-        conf.get(
-            MRJobConfig.MAPRED_ADMIN_USER_SHELL, 
-            MRJobConfig.DEFAULT_SHELL)
-            );
-    
-    // Add pwd to LD_LIBRARY_PATH, add this before adding anything else
-    MRApps.addToEnvironment(
-        environment, 
-        Environment.LD_LIBRARY_PATH.name(), 
-        Environment.PWD.$());
+    // Add classpath.
+    CharSequence cp = env.get("CLASSPATH");
+    String classpath = StringUtils.join(SYSTEM_PATH_SEPARATOR, classPaths);
+    if (null == cp) {
+      env.put("CLASSPATH", classpath);
+    } else {
+      env.put("CLASSPATH", classpath + SYSTEM_PATH_SEPARATOR + cp);
+    }
 
-    // Add the env variables passed by the user & admin
-    String mapredChildEnv = getChildEnv(conf, task.isMapTask());
-    MRApps.setEnvFromInputString(environment, mapredChildEnv);
-    MRApps.setEnvFromInputString(
-        environment, 
-        conf.get(
-            MRJobConfig.MAPRED_ADMIN_USER_ENV, 
-            MRJobConfig.DEFAULT_MAPRED_ADMIN_USER_ENV)
-        );
+    /////// Environmental variable LD_LIBRARY_PATH
+    StringBuilder ldLibraryPath = new StringBuilder();
 
-    // Set logging level
-    environment.put(
-        "HADOOP_ROOT_LOGGER", 
-        getChildLogLevel(conf, task.isMapTask()) + ",CLA"); 
+    ldLibraryPath.append(nmLdLibraryPath);
+    ldLibraryPath.append(SYSTEM_PATH_SEPARATOR);
+    ldLibraryPath.append(pwd);
+    env.put("LD_LIBRARY_PATH", ldLibraryPath.toString());
+    /////// Environmental variable LD_LIBRARY_PATH
+
+    // for the child of task jvm, set hadoop.root.logger
+    env.put("HADOOP_ROOT_LOGGER", "DEBUG,CLA"); // TODO: Debug
 
     // TODO: The following is useful for instance in streaming tasks. Should be
     // set in ApplicationMaster's env by the RM.
@@ -110,69 +89,76 @@ public class MapReduceChildJVM {
     // properties.
     long logSize = TaskLog.getTaskLogLength(conf);
     Vector<String> logProps = new Vector<String>(4);
-    setupLog4jProperties(logProps, logSize);
+    setupLog4jProperties(logProps, logSize, containerLogDir);
     Iterator<String> it = logProps.iterator();
     StringBuffer buffer = new StringBuffer();
     while (it.hasNext()) {
       buffer.append(" " + it.next());
     }
     hadoopClientOpts = hadoopClientOpts + buffer.toString();
-    environment.put("HADOOP_CLIENT_OPTS", hadoopClientOpts);
+    
+    env.put("HADOOP_CLIENT_OPTS", hadoopClientOpts);
 
-    // Add stdout/stderr env
-    environment.put(
-        MRJobConfig.STDOUT_LOGFILE_ENV,
-        getTaskLogFile(TaskLog.LogName.STDOUT)
-        );
-    environment.put(
-        MRJobConfig.STDERR_LOGFILE_ENV,
-        getTaskLogFile(TaskLog.LogName.STDERR)
-        );
+    // add the env variables passed by the user
+    String mapredChildEnv = getChildEnv(conf, task.isMapTask());
+    if (mapredChildEnv != null && mapredChildEnv.length() > 0) {
+      String childEnvs[] = mapredChildEnv.split(",");
+      for (String cEnv : childEnvs) {
+        String[] parts = cEnv.split("="); // split on '='
+        String value = (String) env.get(parts[0]);
+        if (value != null) {
+          // replace $env with the child's env constructed by tt's
+          // example LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp
+          value = parts[1].replace("$" + parts[0], value);
+        } else {
+          // this key is not configured by the tt for the child .. get it 
+          // from the tt's env
+          // example PATH=$PATH:/tmp
+          value = System.getenv(parts[0]); // Get from NM?
+          if (value != null) {
+            // the env key is present in the tt's env
+            value = parts[1].replace("$" + parts[0], value);
+          } else {
+            // the env key is note present anywhere .. simply set it
+            // example X=$X:/tmp or X=/tmp
+            value = parts[1].replace("$" + parts[0], "");
+          }
+        }
+        env.put(parts[0], value);
+      }
+    }
+
+    //This should not be set here (If an OS check is requied. moved to ContainerLuanch)
+    // env.put("JVM_PID", "`echo $$`");
+
+    env.put(Constants.STDOUT_LOGFILE_ENV,
+        getTaskLogFile(containerLogDir, TaskLog.LogName.STDOUT).toString());
+    env.put(Constants.STDERR_LOGFILE_ENV,
+        getTaskLogFile(containerLogDir, TaskLog.LogName.STDERR).toString());
   }
 
   private static String getChildJavaOpts(JobConf jobConf, boolean isMapTask) {
-    String userClasspath = "";
-    String adminClasspath = "";
     if (isMapTask) {
-      userClasspath = 
-          jobConf.get(
-              JobConf.MAPRED_MAP_TASK_JAVA_OPTS, 
-              jobConf.get(
-                  JobConf.MAPRED_TASK_JAVA_OPTS, 
-                  JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS)
-          );
-      adminClasspath = 
-          jobConf.get(
-              MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,
-              MRJobConfig.DEFAULT_MAPRED_ADMIN_JAVA_OPTS);
-    } else {
-      userClasspath =
-          jobConf.get(
-              JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, 
-              jobConf.get(
-                  JobConf.MAPRED_TASK_JAVA_OPTS,
-                  JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS)
-              );
-      adminClasspath =
-          jobConf.get(
-              MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,
-              MRJobConfig.DEFAULT_MAPRED_ADMIN_JAVA_OPTS);
+      return jobConf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, jobConf.get(
+          JobConf.MAPRED_TASK_JAVA_OPTS,
+          JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS));
     }
-    
-    // Add admin classpath first so it can be overridden by user.
-    return adminClasspath + " " + userClasspath;
+    return jobConf
+        .get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, jobConf.get(
+            JobConf.MAPRED_TASK_JAVA_OPTS,
+            JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS));
   }
 
   private static void setupLog4jProperties(Vector<String> vargs,
-      long logSize) {
+      long logSize, String containerLogDir) {
     vargs.add("-Dlog4j.configuration=container-log4j.properties");
-    vargs.add("-D" + MRJobConfig.TASK_LOG_DIR + "=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
-    vargs.add("-D" + MRJobConfig.TASK_LOG_SIZE + "=" + logSize);
+    vargs.add("-Dhadoop.yarn.mr.containerLogDir=" + containerLogDir);
+    vargs.add("-Dhadoop.yarn.mr.totalLogFileSize=" + logSize);
   }
 
   public static List<String> getVMCommand(
-      InetSocketAddress taskAttemptListenerAddr, Task task, 
-      ID jvmID) {
+      InetSocketAddress taskAttemptListenerAddr, Task task, String javaHome,
+      String workDir, String logDir, String childTmpDir, ID jvmID) {
 
     TaskAttemptID attemptID = task.getTaskID();
     JobConf conf = task.conf;
@@ -180,7 +166,7 @@ public class MapReduceChildJVM {
     Vector<String> vargs = new Vector<String>(8);
 
     vargs.add("exec");
-    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
+    vargs.add(javaHome + "/bin/java");
 
     // Add child (task) java-vm options.
     //
@@ -213,26 +199,44 @@ public class MapReduceChildJVM {
     String javaOpts = getChildJavaOpts(conf, task.isMapTask());
     javaOpts = javaOpts.replace("@taskid@", attemptID.toString());
     String [] javaOptsSplit = javaOpts.split(" ");
+    
+    // Add java.library.path; necessary for loading native libraries.
+    //
+    // 1. We add the 'cwd' of the task to it's java.library.path to help 
+    //    users distribute native libraries via the DistributedCache.
+    // 2. The user can also specify extra paths to be added to the 
+    //    java.library.path via mapred.{map|reduce}.child.java.opts.
+    //
+    String libraryPath = workDir;
+    boolean hasUserLDPath = false;
+    for(int i=0; i<javaOptsSplit.length ;i++) { 
+      if(javaOptsSplit[i].startsWith("-Djava.library.path=")) {
+        // TODO: Does the above take care of escaped space chars
+        javaOptsSplit[i] += SYSTEM_PATH_SEPARATOR + libraryPath;
+        hasUserLDPath = true;
+        break;
+      }
+    }
+    if(!hasUserLDPath) {
+      vargs.add("-Djava.library.path=" + libraryPath);
+    }
     for (int i = 0; i < javaOptsSplit.length; i++) {
       vargs.add(javaOptsSplit[i]);
     }
 
-    String childTmpDir = Environment.PWD.$() + Path.SEPARATOR + "tmp";
-    vargs.add("-Djava.io.tmpdir=" + childTmpDir);
+    if (childTmpDir != null) {
+      vargs.add("-Djava.io.tmpdir=" + childTmpDir);
+    }
 
     // Setup the log4j prop
     long logSize = TaskLog.getTaskLogLength(conf);
-    setupLog4jProperties(vargs, logSize);
+    setupLog4jProperties(vargs, logSize, logDir);
 
     if (conf.getProfileEnabled()) {
       if (conf.getProfileTaskRange(task.isMapTask()
                                    ).isIncluded(task.getPartition())) {
-        vargs.add(
-            String.format(
-                conf.getProfileParams(), 
-                getTaskLogFile(TaskLog.LogName.PROFILE)
-                )
-            );
+        File prof = getTaskLogFile(logDir, TaskLog.LogName.PROFILE);
+        vargs.add(String.format(conf.getProfileParams(), prof.toString()));
       }
     }
 
@@ -245,8 +249,8 @@ public class MapReduceChildJVM {
 
     // Finally add the jvmID
     vargs.add(String.valueOf(jvmID.getId()));
-    vargs.add("1>" + getTaskLogFile(TaskLog.LogName.STDOUT));
-    vargs.add("2>" + getTaskLogFile(TaskLog.LogName.STDERR));
+    vargs.add("1>" + getTaskLogFile(logDir, TaskLog.LogName.STDERR));
+    vargs.add("2>" + getTaskLogFile(logDir, TaskLog.LogName.STDOUT));
 
     // Final commmand
     StringBuilder mergedCommand = new StringBuilder();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java
index 0ab220bf383..3021004f9dd 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java
@@ -47,6 +47,7 @@ import org.apache.hadoop.mapreduce.filecache.DistributedCache;
 import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.source.JvmMetrics;
 import org.apache.hadoop.security.Credentials;
@@ -70,7 +71,7 @@ class YarnChild {
     LOG.debug("Child starting");
 
     final JobConf defaultConf = new JobConf();
-    defaultConf.addResource(MRJobConfig.JOB_CONF_FILE);
+    defaultConf.addResource(MRConstants.JOB_CONF_FILE);
     UserGroupInformation.setConfiguration(defaultConf);
 
     String host = args[0];
@@ -237,7 +238,7 @@ class YarnChild {
 
   private static JobConf configureTask(Task task, Credentials credentials,
       Token<JobTokenIdentifier> jt) throws IOException {
-    final JobConf job = new JobConf(MRJobConfig.JOB_CONF_FILE);
+    final JobConf job = new JobConf(MRConstants.JOB_CONF_FILE);
     job.setCredentials(credentials);
     // set tcp nodelay
     job.setBoolean("ipc.client.tcpnodelay", true);
@@ -259,7 +260,7 @@ class YarnChild {
 
     // Overwrite the localized task jobconf which is linked to in the current
     // work-dir.
-    Path localTaskFile = new Path(MRJobConfig.JOB_CONF_FILE);
+    Path localTaskFile = new Path(Constants.JOBFILE);
     writeLocalJobFile(localTaskFile, job);
     task.setJobFile(localTaskFile.toString());
     task.setConf(job);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
index 8b7d578fc9b..20c7e9779e8 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
@@ -77,7 +78,6 @@ import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.SystemClock;
 import org.apache.hadoop.yarn.YarnException;
-import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -88,7 +88,6 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.apache.hadoop.yarn.service.CompositeService;
 import org.apache.hadoop.yarn.service.Service;
-import org.apache.hadoop.yarn.util.ConverterUtils;
 
 /**
  * The Map-Reduce Application Master.
@@ -115,6 +114,8 @@ public class MRAppMaster extends CompositeService {
   private Clock clock;
   private final long startTime = System.currentTimeMillis();
   private String appName;
+  private final int startCount;
+  private final ApplicationId appID;
   private final ApplicationAttemptId appAttemptID;
   protected final MRAppMetrics metrics;
   private Set<TaskId> completedTasksFromPreviousRun;
@@ -132,16 +133,21 @@ public class MRAppMaster extends CompositeService {
 
   private Job job;
   
-  public MRAppMaster(ApplicationAttemptId applicationAttemptId) {
-    this(applicationAttemptId, new SystemClock());
+  public MRAppMaster(ApplicationId applicationId, int startCount) {
+    this(applicationId, new SystemClock(), startCount);
   }
 
-  public MRAppMaster(ApplicationAttemptId applicationAttemptId, Clock clock) {
+  public MRAppMaster(ApplicationId applicationId, Clock clock, int startCount) {
     super(MRAppMaster.class.getName());
     this.clock = clock;
-    this.appAttemptID = applicationAttemptId;
+    this.appID = applicationId;
+    this.appAttemptID = RecordFactoryProvider.getRecordFactory(null)
+        .newRecordInstance(ApplicationAttemptId.class);
+    this.appAttemptID.setApplicationId(appID);
+    this.appAttemptID.setAttemptId(startCount);
+    this.startCount = startCount;
     this.metrics = MRAppMetrics.create();
-    LOG.info("Created MRAppMaster for application " + applicationAttemptId);
+    LOG.info("Created MRAppMaster for application " + applicationId);
   }
 
   @Override
@@ -153,9 +159,9 @@ public class MRAppMaster extends CompositeService {
     appName = conf.get(MRJobConfig.JOB_NAME, "<missing app name>");
 
     if (conf.getBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, false)
-         && appAttemptID.getAttemptId() > 1) {
+         && startCount > 1) {
       LOG.info("Recovery is enabled. Will try to recover from previous life.");
-      Recovery recoveryServ = new RecoveryService(appAttemptID, clock);
+      Recovery recoveryServ = new RecoveryService(appID, clock, startCount);
       addIfService(recoveryServ);
       dispatcher = recoveryServ.getDispatcher();
       clock = recoveryServ.getClock();
@@ -237,10 +243,10 @@ public class MRAppMaster extends CompositeService {
         // Read the file-system tokens from the localized tokens-file.
         Path jobSubmitDir = 
             FileContext.getLocalFSFileContext().makeQualified(
-                new Path(new File(MRJobConfig.JOB_SUBMIT_DIR)
+                new Path(new File(MRConstants.JOB_SUBMIT_DIR)
                     .getAbsolutePath()));
         Path jobTokenFile = 
-            new Path(jobSubmitDir, MRJobConfig.APPLICATION_TOKENS_FILE);
+            new Path(jobSubmitDir, MRConstants.APPLICATION_TOKENS_FILE);
         fsTokens.addAll(Credentials.readTokenStorageFile(jobTokenFile, conf));
         LOG.info("jobSubmitDir=" + jobSubmitDir + " jobTokenFile="
             + jobTokenFile);
@@ -258,8 +264,8 @@ public class MRAppMaster extends CompositeService {
     // ////////// End of obtaining the tokens needed by the job. //////////
 
     // create single job
-    Job newJob = new JobImpl(appAttemptID, conf, dispatcher.getEventHandler(),
-        taskAttemptListener, jobTokenSecretManager, fsTokens, clock,
+    Job newJob = new JobImpl(appID, conf, dispatcher.getEventHandler(),
+        taskAttemptListener, jobTokenSecretManager, fsTokens, clock, startCount,
         completedTasksFromPreviousRun, metrics, currentUser.getUserName());
     ((RunningAppContext) context).jobs.put(newJob.getID(), newJob);
 
@@ -370,11 +376,11 @@ public class MRAppMaster extends CompositeService {
   }
 
   public ApplicationId getAppID() {
-    return appAttemptID.getApplicationId();
+    return appID;
   }
 
   public int getStartCount() {
-    return appAttemptID.getAttemptId();
+    return startCount;
   }
 
   public AppContext getContext() {
@@ -499,7 +505,7 @@ public class MRAppMaster extends CompositeService {
 
     @Override
     public ApplicationId getApplicationID() {
-      return appAttemptID.getApplicationId();
+      return appID;
     }
 
     @Override
@@ -549,9 +555,9 @@ public class MRAppMaster extends CompositeService {
     // It's more test friendly to put it here.
     DefaultMetricsSystem.initialize("MRAppMaster");
 
-    // create a job event for job intialization
+    /** create a job event for job intialization */
     JobEvent initJobEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT);
-    // Send init to the job (this does NOT trigger job execution)
+    /** send init to the job (this does NOT trigger job execution) */
     // This is a synchronous call, not an event through dispatcher. We want
     // job-init to be done completely here.
     jobEventDispatcher.handle(initJobEvent);
@@ -642,21 +648,17 @@ public class MRAppMaster extends CompositeService {
 
   public static void main(String[] args) {
     try {
-      String applicationAttemptIdStr = System
-          .getenv(ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV);
-      if (applicationAttemptIdStr == null) {
-        String msg = ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV
-            + " is null";
-        LOG.error(msg);
-        throw new IOException(msg);
-      }
-      ApplicationAttemptId applicationAttemptId = ConverterUtils
-          .toApplicationAttemptId(applicationAttemptIdStr);
-      MRAppMaster appMaster = new MRAppMaster(applicationAttemptId);
+      //Configuration.addDefaultResource("job.xml");
+      ApplicationId applicationId = RecordFactoryProvider
+          .getRecordFactory(null).newRecordInstance(ApplicationId.class);
+      applicationId.setClusterTimestamp(Long.valueOf(args[0]));
+      applicationId.setId(Integer.valueOf(args[1]));
+      int failCount = Integer.valueOf(args[2]);
+      MRAppMaster appMaster = new MRAppMaster(applicationId, failCount);
       Runtime.getRuntime().addShutdownHook(
           new CompositeServiceShutdownHook(appMaster));
       YarnConfiguration conf = new YarnConfiguration(new JobConf());
-      conf.addResource(new Path(MRJobConfig.JOB_CONF_FILE));
+      conf.addResource(new Path(MRConstants.JOB_CONF_FILE));
       conf.set(MRJobConfig.USER_NAME, 
           System.getProperty("user.name")); 
       UserGroupInformation.setConfiguration(conf);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java
index f84a4d9dbe4..73359bb12a5 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java
@@ -149,7 +149,7 @@ public class MRClientService extends AbstractService
             + ":" + server.getPort());
     LOG.info("Instantiated MRClientService at " + this.bindAddress);
     try {
-      webApp = WebApps.$for("mapreduce", AppContext.class, appContext).with(conf).
+      webApp = WebApps.$for("yarn", AppContext.class, appContext).with(conf).
           start(new AMWebApp());
     } catch (Exception e) {
       LOG.error("Webapps failed to start. Ignoring for now:", e);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
index c26bc24695c..69de493b16b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@@ -64,6 +64,7 @@ import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
 import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
 import org.apache.hadoop.mapreduce.task.JobContextImpl;
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.Counter;
 import org.apache.hadoop.mapreduce.v2.api.records.CounterGroup;
 import org.apache.hadoop.mapreduce.v2.api.records.Counters;
@@ -92,7 +93,6 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
-import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.AccessControlList;
@@ -101,7 +101,6 @@ import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
@@ -130,11 +129,11 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
       RecordFactoryProvider.getRecordFactory(null);
   
   //final fields
-  private final ApplicationAttemptId applicationAttemptId;
   private final Clock clock;
   private final JobACLsManager aclsManager;
   private final String username;
   private final Map<JobACL, AccessControlList> jobACLs;
+  private final int startCount;
   private final Set<TaskId> completedTasksFromPreviousRun;
   private final Lock readLock;
   private final Lock writeLock;
@@ -366,26 +365,26 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   private Token<JobTokenIdentifier> jobToken;
   private JobTokenSecretManager jobTokenSecretManager;
 
-  public JobImpl(ApplicationAttemptId applicationAttemptId, Configuration conf,
+  public JobImpl(ApplicationId appID, Configuration conf,
       EventHandler eventHandler, TaskAttemptListener taskAttemptListener,
       JobTokenSecretManager jobTokenSecretManager,
-      Credentials fsTokenCredentials, Clock clock, 
+      Credentials fsTokenCredentials, Clock clock, int startCount, 
       Set<TaskId> completedTasksFromPreviousRun, MRAppMetrics metrics,
       String userName) {
-    this.applicationAttemptId = applicationAttemptId;
+
     this.jobId = recordFactory.newRecordInstance(JobId.class);
     this.jobName = conf.get(JobContext.JOB_NAME, "<missing job name>");
     this.conf = conf;
     this.metrics = metrics;
     this.clock = clock;
     this.completedTasksFromPreviousRun = completedTasksFromPreviousRun;
+    this.startCount = startCount;
     this.userName = userName;
-    ApplicationId applicationId = applicationAttemptId.getApplicationId();
-    jobId.setAppId(applicationId);
-    jobId.setId(applicationId.getId());
+    jobId.setAppId(appID);
+    jobId.setId(appID.getId());
     oldJobId = TypeConverter.fromYarn(jobId);
     LOG.info("Job created" +
-    		" appId=" + applicationId + 
+    		" appId=" + appID + 
     		" jobId=" + jobId + 
     		" oldJobId=" + oldJobId);
     
@@ -585,17 +584,25 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   public JobReport getReport() {
     readLock.lock();
     try {
-      JobState state = getState();
-
-      if (getState() == JobState.NEW) {
-        return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
-            startTime, finishTime, setupProgress, 0.0f,
-            0.0f, cleanupProgress);
+      JobReport report = recordFactory.newRecordInstance(JobReport.class);
+      report.setJobId(jobId);
+      report.setJobState(getState());
+      
+      // TODO - Fix to correctly setup report and to check state
+      if (report.getJobState() == JobState.NEW) {
+        return report;
       }
+      
+      report.setStartTime(startTime);
+      report.setFinishTime(finishTime);
+      report.setSetupProgress(setupProgress);
+      report.setCleanupProgress(cleanupProgress);
+      report.setMapProgress(computeProgress(mapTasks));
+      report.setReduceProgress(computeProgress(reduceTasks));
+      report.setJobName(jobName);
+      report.setUser(username);
 
-      return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
-          startTime, finishTime, setupProgress, computeProgress(mapTasks),
-          computeProgress(reduceTasks), cleanupProgress);
+      return report;
     } finally {
       readLock.unlock();
     }
@@ -1000,7 +1007,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
           FileSystem.get(job.conf).makeQualified(
               new Path(path, oldJobIDString));
       job.remoteJobConfFile =
-          new Path(job.remoteJobSubmitDir, MRJobConfig.JOB_CONF_FILE);
+          new Path(job.remoteJobSubmitDir, MRConstants.JOB_CONF_FILE);
 
       // Prepare the TaskAttemptListener server for authentication of Containers
       // TaskAttemptListener gets the information via jobTokenSecretManager.
@@ -1026,7 +1033,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
       Path remoteJobTokenFile =
           new Path(job.remoteJobSubmitDir,
-              MRJobConfig.APPLICATION_TOKENS_FILE);
+              MRConstants.APPLICATION_TOKENS_FILE);
       tokenStorage.writeTokenStorageFile(remoteJobTokenFile, job.conf);
       LOG.info("Writing back the job-token file on the remote file system:"
           + remoteJobTokenFile.toString());
@@ -1071,8 +1078,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
                 job.conf, splits[i], 
                 job.taskAttemptListener, 
                 job.committer, job.jobToken, job.fsTokens.getAllTokens(), 
-                job.clock, job.completedTasksFromPreviousRun, 
-                job.applicationAttemptId.getAttemptId(),
+                job.clock, job.completedTasksFromPreviousRun, job.startCount,
                 job.metrics);
         job.addTask(task);
       }
@@ -1089,9 +1095,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
                 job.conf, job.numMapTasks, 
                 job.taskAttemptListener, job.committer, job.jobToken,
                 job.fsTokens.getAllTokens(), job.clock, 
-                job.completedTasksFromPreviousRun, 
-                job.applicationAttemptId.getAttemptId(),
-                job.metrics);
+                job.completedTasksFromPreviousRun, job.startCount, job.metrics);
         job.addTask(task);
       }
       LOG.info("Number of reduces for job " + job.jobId + " = "
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
index 495d00e22c9..cc9f6bddf06 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.app.job.impl;
 import java.io.File;
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.net.URI;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -61,6 +62,7 @@ import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStartedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent;
 import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.Counter;
 import org.apache.hadoop.mapreduce.v2.api.records.Counters;
 import org.apache.hadoop.mapreduce.v2.api.records.Phase;
@@ -101,7 +103,6 @@ import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerToken;
@@ -116,6 +117,7 @@ import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
 import org.apache.hadoop.yarn.state.SingleArcTransition;
 import org.apache.hadoop.yarn.state.StateMachine;
 import org.apache.hadoop.yarn.state.StateMachineFactory;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.RackResolver;
 
@@ -151,7 +153,7 @@ public abstract class TaskAttemptImpl implements
   private Token<JobTokenIdentifier> jobToken;
   private static AtomicBoolean initialClasspathFlag = new AtomicBoolean();
   private static String initialClasspath = null;
-  private static final Object classpathLock = new Object();
+  private final Object classpathLock = new Object();
   private long launchTime;
   private long finishTime;
   private WrappedProgressSplitsBlock progressSplitBlock;
@@ -516,8 +518,8 @@ public abstract class TaskAttemptImpl implements
         return initialClasspath;
       }
       Map<String, String> env = new HashMap<String, String>();
-      MRApps.setClasspath(env);
-      initialClasspath = env.get(Environment.CLASSPATH.name());
+      MRApps.setInitialClasspath(env);
+      initialClasspath = env.get(MRApps.CLASSPATH);
       initialClasspathFlag.set(true);
       return initialClasspath;
     }
@@ -529,18 +531,16 @@ public abstract class TaskAttemptImpl implements
    */
   private ContainerLaunchContext createContainerLaunchContext() {
 
+    ContainerLaunchContext container =
+        recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
     // Application resources
     Map<String, LocalResource> localResources = 
         new HashMap<String, LocalResource>();
     
     // Application environment
     Map<String, String> environment = new HashMap<String, String>();
-
-    // Service data
-    Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>();
-
-    // Tokens
-    ByteBuffer tokens = ByteBuffer.wrap(new byte[]{});
+    
     try {
       FileSystem remoteFS = FileSystem.get(conf);
 
@@ -550,7 +550,7 @@ public abstract class TaskAttemptImpl implements
               MRJobConfig.JAR))).makeQualified(remoteFS.getUri(), 
                                                remoteFS.getWorkingDirectory());
         localResources.put(
-            MRJobConfig.JOB_JAR,
+            MRConstants.JOB_JAR,
             createLocalResource(remoteFS, recordFactory, remoteJobJar,
                 LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
         LOG.info("The job-jar file on the remote FS is "
@@ -570,9 +570,9 @@ public abstract class TaskAttemptImpl implements
       Path remoteJobSubmitDir =
           new Path(path, oldJobId.toString());
       Path remoteJobConfPath = 
-          new Path(remoteJobSubmitDir, MRJobConfig.JOB_CONF_FILE);
+          new Path(remoteJobSubmitDir, MRConstants.JOB_CONF_FILE);
       localResources.put(
-          MRJobConfig.JOB_CONF_FILE,
+          MRConstants.JOB_CONF_FILE,
           createLocalResource(remoteFS, recordFactory, remoteJobConfPath,
               LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
       LOG.info("The job-conf file on the remote FS is "
@@ -580,8 +580,12 @@ public abstract class TaskAttemptImpl implements
       // //////////// End of JobConf setup
 
       // Setup DistributedCache
-      MRApps.setupDistributedCache(conf, localResources);
+      MRApps.setupDistributedCache(conf, localResources, environment);
 
+      // Set local-resources and environment
+      container.setLocalResources(localResources);
+      container.setEnvironment(environment);
+      
       // Setup up tokens
       Credentials taskCredentials = new Credentials();
 
@@ -602,43 +606,52 @@ public abstract class TaskAttemptImpl implements
       LOG.info("Size of containertokens_dob is "
           + taskCredentials.numberOfTokens());
       taskCredentials.writeTokenStorageToStream(containerTokens_dob);
-      tokens = 
+      container.setContainerTokens(
           ByteBuffer.wrap(containerTokens_dob.getData(), 0,
-              containerTokens_dob.getLength());
+              containerTokens_dob.getLength()));
 
       // Add shuffle token
       LOG.info("Putting shuffle token in serviceData");
+      Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>();
       serviceData.put(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID,
           ShuffleHandler.serializeServiceData(jobToken));
+      container.setServiceData(serviceData);
 
-      MRApps.addToEnvironment(
-          environment,  
-          Environment.CLASSPATH.name(), 
-          getInitialClasspath());
+      MRApps.addToClassPath(container.getEnvironment(), getInitialClasspath());
     } catch (IOException e) {
       throw new YarnException(e);
     }
-
-    // Setup environment
-    MapReduceChildJVM.setVMEnv(environment, remoteTask);
-
-    // Set up the launch command
-    List<String> commands = MapReduceChildJVM.getVMCommand(
-        taskAttemptListener.getAddress(), remoteTask,
-        jvmID);
     
+    container.setContainerId(containerID);
+    container.setUser(conf.get(MRJobConfig.USER_NAME)); // TODO: Fix
+
+    File workDir = new File("$PWD"); // Will be expanded by the shell.
+    String containerLogDir =
+        new File(ApplicationConstants.LOG_DIR_EXPANSION_VAR).toString();
+    String childTmpDir = new File(workDir, "tmp").toString();
+    String javaHome = "${JAVA_HOME}"; // Will be expanded by the shell.
+    String nmLdLibraryPath = "{LD_LIBRARY_PATH}"; // Expanded by the shell?
+    List<String> classPaths = new ArrayList<String>();
+
+    String localizedApplicationTokensFile =
+        new File(workDir, MRConstants.APPLICATION_TOKENS_FILE).toString();
+    classPaths.add(MRConstants.JOB_JAR);
+    classPaths.add(MRConstants.YARN_MAPREDUCE_APP_JAR_PATH);
+    classPaths.add(workDir.toString()); // TODO
+
+    // Construct the actual Container
+    container.setCommands(MapReduceChildJVM.getVMCommand(
+        taskAttemptListener.getAddress(), remoteTask, javaHome,
+        workDir.toString(), containerLogDir, childTmpDir, jvmID));
+
+    MapReduceChildJVM.setVMEnv(container.getEnvironment(), classPaths,
+        workDir.toString(), containerLogDir, nmLdLibraryPath, remoteTask,
+        localizedApplicationTokensFile);
+
     // Construct the actual Container
-    ContainerLaunchContext container =
-        recordFactory.newRecordInstance(ContainerLaunchContext.class);
     container.setContainerId(containerID);
     container.setUser(conf.get(MRJobConfig.USER_NAME));
     container.setResource(assignedCapability);
-    container.setLocalResources(localResources);
-    container.setEnvironment(environment);
-    container.setCommands(commands);
-    container.setServiceData(serviceData);
-    container.setContainerTokens(tokens);
-    
     return container;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
index 95e17d8f4f6..982f7d334ae 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
@@ -73,8 +73,6 @@ public class ContainerLauncherImpl extends AbstractService implements
 
   private AppContext context;
   private ThreadPoolExecutor launcherPool;
-  private static final int INITIAL_POOL_SIZE = 10;
-  private int limitOnPoolSize;
   private Thread eventHandlingThread;
   private BlockingQueue<ContainerLauncherEvent> eventQueue =
       new LinkedBlockingQueue<ContainerLauncherEvent>();
@@ -98,17 +96,16 @@ public class ContainerLauncherImpl extends AbstractService implements
         YarnConfiguration.YARN_SECURITY_INFO,
         ContainerManagerSecurityInfo.class, SecurityInfo.class);
     this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
-    this.limitOnPoolSize = conf.getInt(
-        MRJobConfig.MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT,
-        MRJobConfig.DEFAULT_MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT);
     super.init(myLocalConfig);
   }
 
   public void start() {
-    // Start with a default core-pool size of 10 and change it dynamically.
-    launcherPool = new ThreadPoolExecutor(INITIAL_POOL_SIZE,
-        Integer.MAX_VALUE, 1, TimeUnit.HOURS,
-        new LinkedBlockingQueue<Runnable>());
+    launcherPool =
+        new ThreadPoolExecutor(getConfig().getInt(
+            MRJobConfig.MR_AM_CONTAINERLAUNCHER_THREAD_COUNT, 10),
+            Integer.MAX_VALUE, 1, TimeUnit.HOURS,
+            new LinkedBlockingQueue<Runnable>());
+    launcherPool.prestartAllCoreThreads(); // Wait for work.
     eventHandlingThread = new Thread(new Runnable() {
       @Override
       public void run() {
@@ -120,26 +117,6 @@ public class ContainerLauncherImpl extends AbstractService implements
             LOG.error("Returning, interrupted : " + e);
             return;
           }
-
-          int poolSize = launcherPool.getCorePoolSize();
-
-          // See if we need up the pool size only if haven't reached the
-          // maximum limit yet.
-          if (poolSize != limitOnPoolSize) {
-
-            // nodes where containers will run at *this* point of time. This is
-            // *not* the cluster size and doesn't need to be.
-            int numNodes = ugiMap.size();
-            int idealPoolSize = Math.min(limitOnPoolSize, numNodes);
-
-            if (poolSize <= idealPoolSize) {
-              // Bump up the pool size to idealPoolSize+INITIAL_POOL_SIZE, the
-              // later is just a buffer so we are not always increasing the
-              // pool-size
-              launcherPool.setCorePoolSize(idealPoolSize + INITIAL_POOL_SIZE);
-            }
-          }
-
           // the events from the queue are handled in parallel
           // using a thread pool
           launcherPool.execute(new EventProcessor(event));
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java
index 0261e18b56f..18a0f2d5a6a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.mapreduce.v2.app.local;
 
-import java.util.ArrayList;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.logging.Log;
@@ -31,19 +30,15 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent;
+import org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 
 /**
@@ -70,20 +65,6 @@ public class LocalContainerAllocator extends RMCommunicator
     this.appID = context.getApplicationID();
   }
 
-  @Override
-  protected synchronized void heartbeat() throws Exception {
-    AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
-        this.applicationAttemptId, this.lastResponseID, super
-            .getApplicationProgress(), new ArrayList<ResourceRequest>(),
-        new ArrayList<ContainerId>());
-    AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
-    AMResponse response = allocateResponse.getAMResponse();
-    if (response.getReboot()) {
-      // TODO
-      LOG.info("Event from RM: shutting down Application Master");
-    }
-  }
-
   @Override
   public void handle(ContainerAllocatorEvent event) {
     if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
index ca213f17f86..073411c9b47 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
@@ -58,7 +58,7 @@ import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleaner;
 import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleanupEvent;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.yarn.Clock;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -92,9 +92,10 @@ public class RecoveryService extends CompositeService implements Recovery {
 
   private static final Log LOG = LogFactory.getLog(RecoveryService.class);
 
-  private final ApplicationAttemptId applicationAttemptId;
+  private final ApplicationId appID;
   private final Dispatcher dispatcher;
   private final ControlledClock clock;
+  private final int startCount;
 
   private JobInfo jobInfo = null;
   private final Map<TaskId, TaskInfo> completedTasks =
@@ -105,10 +106,10 @@ public class RecoveryService extends CompositeService implements Recovery {
 
   private volatile boolean recoveryMode = false;
 
-  public RecoveryService(ApplicationAttemptId applicationAttemptId, 
-      Clock clock) {
+  public RecoveryService(ApplicationId appID, Clock clock, int startCount) {
     super("RecoveringDispatcher");
-    this.applicationAttemptId = applicationAttemptId;
+    this.appID = appID;
+    this.startCount = startCount;
     this.dispatcher = new RecoveryDispatcher();
     this.clock = new ControlledClock(clock);
       addService((Service) dispatcher);
@@ -151,8 +152,7 @@ public class RecoveryService extends CompositeService implements Recovery {
 
   private void parse() throws IOException {
     // TODO: parse history file based on startCount
-    String jobName = 
-        TypeConverter.fromYarn(applicationAttemptId.getApplicationId()).toString();
+    String jobName = TypeConverter.fromYarn(appID).toString();
     String jobhistoryDir = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(getConfig());
     FSDataInputStream in = null;
     Path historyFile = null;
@@ -160,9 +160,8 @@ public class RecoveryService extends CompositeService implements Recovery {
         new Path(jobhistoryDir));
     FileContext fc = FileContext.getFileContext(histDirPath.toUri(),
         getConfig());
-    //read the previous history file
     historyFile = fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(
-        histDirPath, jobName, (applicationAttemptId.getAttemptId() - 1)));          
+        histDirPath, jobName, startCount - 1));          //read the previous history file
     in = fc.open(historyFile);
     JobHistoryParser parser = new JobHistoryParser(in);
     jobInfo = parser.parse();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
index 15a7e3f6a5a..db4a60b1dcc 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.mapreduce.v2.app.rm;
 
 import java.io.IOException;
 import java.security.PrivilegedAction;
+import java.util.ArrayList;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -28,7 +29,6 @@ import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
-import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
@@ -42,12 +42,17 @@ import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.AMRMProtocol;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
@@ -59,7 +64,7 @@ import org.apache.hadoop.yarn.service.AbstractService;
 /**
  * Registers/unregisters to RM and sends heartbeats to RM.
  */
-public abstract class RMCommunicator extends AbstractService  {
+public class RMCommunicator extends AbstractService  {
   private static final Log LOG = LogFactory.getLog(RMContainerAllocator.class);
   private int rmPollInterval;//millis
   protected ApplicationId applicationId;
@@ -69,7 +74,7 @@ public abstract class RMCommunicator extends AbstractService  {
   protected EventHandler eventHandler;
   protected AMRMProtocol scheduler;
   private final ClientService clientService;
-  protected int lastResponseID;
+  private int lastResponseID;
   private Resource minContainerCapability;
   private Resource maxContainerCapability;
 
@@ -116,34 +121,6 @@ public abstract class RMCommunicator extends AbstractService  {
     return job;
   }
 
-  /**
-   * Get the appProgress. Can be used only after this component is started.
-   * @return the appProgress.
-   */
-  protected float getApplicationProgress() {
-    // For now just a single job. In future when we have a DAG, we need an
-    // aggregate progress.
-    JobReport report = this.job.getReport();
-    float setupWeight = 0.05f;
-    float cleanupWeight = 0.05f;
-    float mapWeight = 0.0f;
-    float reduceWeight = 0.0f;
-    int numMaps = this.job.getTotalMaps();
-    int numReduces = this.job.getTotalReduces();
-    if (numMaps == 0 && numReduces == 0) {
-    } else if (numMaps == 0) {
-      reduceWeight = 0.9f;
-    } else if (numReduces == 0) {
-      mapWeight = 0.9f;
-    } else {
-      mapWeight = reduceWeight = 0.45f;
-    }
-    return (report.getSetupProgress() * setupWeight
-        + report.getCleanupProgress() * cleanupWeight
-        + report.getMapProgress() * mapWeight + report.getReduceProgress()
-        * reduceWeight);
-  }
-
   protected void register() {
     //Register
     String host = 
@@ -285,5 +262,18 @@ public abstract class RMCommunicator extends AbstractService  {
     });
   }
 
-  protected abstract void heartbeat() throws Exception;
+  protected synchronized void heartbeat() throws Exception {
+    AllocateRequest allocateRequest =
+        recordFactory.newRecordInstance(AllocateRequest.class);
+    allocateRequest.setApplicationAttemptId(applicationAttemptId);
+    allocateRequest.setResponseId(lastResponseID);
+    allocateRequest.addAllAsks(new ArrayList<ResourceRequest>());
+    allocateRequest.addAllReleases(new ArrayList<ContainerId>());
+    AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
+    AMResponse response = allocateResponse.getAMResponse();
+    if (response.getReboot()) {
+      LOG.info("Event from RM: shutting down Application Master");
+    }
+  }
+
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
index 7b75cd1fbd7..ff232104bd4 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
@@ -586,21 +586,37 @@ public class RMContainerAllocator extends RMContainerRequestor
     private ContainerRequest assign(Container allocated) {
       ContainerRequest assigned = null;
       
-      Priority priority = allocated.getPriority();
-      if (PRIORITY_FAST_FAIL_MAP.equals(priority)) {
-        LOG.info("Assigning container " + allocated + " to fast fail map");
-        assigned = assignToFailedMap(allocated);
-      } else if (PRIORITY_REDUCE.equals(priority)) {
-        LOG.info("Assigning container " + allocated + " to reduce");
-        assigned = assignToReduce(allocated);
-      } else if (PRIORITY_MAP.equals(priority)) {
-        LOG.info("Assigning container " + allocated + " to map");
-        assigned = assignToMap(allocated);
-      } else {
-        LOG.warn("Container allocated at unwanted priority: " + priority + 
-            ". Returning to RM...");
-      }
+      if (mapResourceReqt != reduceResourceReqt) {
+        //assign based on size
+        LOG.info("Assigning based on container size");
+        if (allocated.getResource().getMemory() == mapResourceReqt) {
+          assigned = assignToFailedMap(allocated);
+          if (assigned == null) {
+            assigned = assignToMap(allocated);
+          }
+        } else if (allocated.getResource().getMemory() == reduceResourceReqt) {
+          assigned = assignToReduce(allocated);
+        }
         
+        return assigned;
+      }
+      
+      //container can be given to either map or reduce
+      //assign based on priority
+      
+      //try to assign to earlierFailedMaps if present
+      assigned = assignToFailedMap(allocated);
+      
+      //Assign to reduces before assigning to maps ?
+      if (assigned == null) {
+        assigned = assignToReduce(allocated);
+      }
+      
+      //try to assign to maps if present
+      if (assigned == null) {
+        assigned = assignToMap(allocated);
+      }
+      
       return assigned;
     }
     
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java
index cda2ed678af..b9f0c6ee45e 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java
@@ -43,7 +43,6 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.util.BuilderUtils;
 
 /**
  * Keeps the data structures to send container requests to RM.
@@ -108,11 +107,15 @@ public abstract class RMContainerRequestor extends RMCommunicator {
     LOG.info("maxTaskFailuresPerNode is " + maxTaskFailuresPerNode);
   }
 
+  protected abstract void heartbeat() throws Exception;
+
   protected AMResponse makeRemoteRequest() throws YarnRemoteException {
-    AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
-        applicationAttemptId, lastResponseID, super.getApplicationProgress(),
-        new ArrayList<ResourceRequest>(ask), new ArrayList<ContainerId>(
-            release));
+    AllocateRequest allocateRequest = recordFactory
+        .newRecordInstance(AllocateRequest.class);
+    allocateRequest.setApplicationAttemptId(applicationAttemptId);
+    allocateRequest.setResponseId(lastResponseID);
+    allocateRequest.addAllAsks(new ArrayList<ResourceRequest>(ask));
+    allocateRequest.addAllReleases(new ArrayList<ContainerId>(release));
     AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
     AMResponse response = allocateResponse.getAMResponse();
     lastResponseID = response.getResponseId();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java
index ab7d23ef9dc..feb019fe162 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java
@@ -35,6 +35,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
@@ -86,7 +87,7 @@ public class DefaultSpeculator extends AbstractService implements
   private final ConcurrentMap<JobId, AtomicInteger> reduceContainerNeeds
       = new ConcurrentHashMap<JobId, AtomicInteger>();
 
-  private final Set<TaskId> mayHaveSpeculated = new HashSet<TaskId>();
+  private final Set<TaskId> mayHaveSpeculated = new HashSet();
 
   private final Configuration conf;
   private AppContext context;
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobConfPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobConfPage.java
index 983859e7d67..8bf2ce1955f 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobConfPage.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobConfPage.java
@@ -44,7 +44,6 @@ public class JobConfPage extends AppView {
     set(TITLE, jobID.isEmpty() ? "Bad request: missing job ID"
         : join("Configuration for MapReduce Job ", $(JOB_ID)));
     commonPreHead(html);
-    set(initID(ACCORDION, "nav"), "{autoHeight:false, active:2}");
     set(DATATABLES_ID, "conf");
     set(initID(DATATABLES, "conf"), confTableInit());
     set(postInitID(DATATABLES, "conf"), confPostTableInit());
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java
index 8b4524ad117..bb4e2390a75 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java
@@ -38,9 +38,9 @@ public class NavBlock extends HtmlBlock {
       div("#nav").
         h3("Cluster").
         ul().
-          li().a(url(rmweb, "cluster", "cluster"), "About")._().
-          li().a(url(rmweb, "cluster", "apps"), "Applications")._().
-          li().a(url(rmweb, "cluster", "scheduler"), "Scheduler")._()._().
+          li().a(url(rmweb, prefix(), "cluster"), "About")._().
+          li().a(url(rmweb, prefix(), "apps"), "Applications")._().
+          li().a(url(rmweb, prefix(), "scheduler"), "Scheduler")._()._().
         h3("Application").
         ul().
           li().a(url("app/info"), "About")._().
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
index 736bef639e0..d9884d146a6 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
@@ -85,7 +85,7 @@ public class TaskPage extends AppView {
         if (containerId != null) {
           String containerIdStr = ConverterUtils.toString(containerId);
           nodeTd._(" ").
-            a(".logslink", url("http://", nodeHttpAddr, "node", "containerlogs",
+            a(".logslink", url("http://", nodeHttpAddr, "yarn", "containerlogs",
               containerIdStr), "logs");
         }
         nodeTd._().
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
index d6e2d968173..548d754a6c6 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
@@ -66,7 +66,6 @@ import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -92,7 +91,7 @@ public class MRApp extends MRAppMaster {
   private File testWorkDir;
   private Path testAbsPath;
 
-  private static final RecordFactory recordFactory =
+  private final RecordFactory recordFactory =
       RecordFactoryProvider.getRecordFactory(null);
 
   //if true, tasks complete automatically as soon as they are launched
@@ -101,7 +100,7 @@ public class MRApp extends MRAppMaster {
   static ApplicationId applicationId;
 
   static {
-    applicationId = recordFactory.newRecordInstance(ApplicationId.class);
+    applicationId = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(ApplicationId.class);
     applicationId.setClusterTimestamp(0);
     applicationId.setId(0);
   }
@@ -109,19 +108,9 @@ public class MRApp extends MRAppMaster {
   public MRApp(int maps, int reduces, boolean autoComplete, String testName, boolean cleanOnStart) {
     this(maps, reduces, autoComplete, testName, cleanOnStart, 1);
   }
-  
-  private static ApplicationAttemptId getApplicationAttemptId(
-      ApplicationId applicationId, int startCount) {
-    ApplicationAttemptId applicationAttemptId =
-        recordFactory.newRecordInstance(ApplicationAttemptId.class);
-    applicationAttemptId.setApplicationId(applicationId);
-    applicationAttemptId.setAttemptId(startCount);
-    return applicationAttemptId;
-  }
 
-  public MRApp(int maps, int reduces, boolean autoComplete, String testName, 
-      boolean cleanOnStart, int startCount) {
-    super(getApplicationAttemptId(applicationId, startCount));
+  public MRApp(int maps, int reduces, boolean autoComplete, String testName, boolean cleanOnStart, int startCount) {
+    super(applicationId, startCount);
     this.testWorkDir = new File("target", testName);
     testAbsPath = new Path(testWorkDir.getAbsolutePath());
     LOG.info("PathUsed: " + testAbsPath);
@@ -402,12 +391,11 @@ public class MRApp extends MRAppMaster {
       return localStateMachine;
     }
 
-    public TestJob(Configuration conf, ApplicationId applicationId,
+    public TestJob(Configuration conf, ApplicationId appID,
         EventHandler eventHandler, TaskAttemptListener taskAttemptListener,
         Clock clock, String user) {
-      super(getApplicationAttemptId(applicationId, getStartCount()), 
-          conf, eventHandler, taskAttemptListener,
-          new JobTokenSecretManager(), new Credentials(), clock, 
+      super(appID, conf, eventHandler, taskAttemptListener,
+          new JobTokenSecretManager(), new Credentials(), clock, getStartCount(), 
           getCompletedTaskFromPreviousRun(), metrics, user);
 
       // This "this leak" is okay because the retained pointer is in an
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
index a1eb928919e..cbf3ab0a658 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
@@ -18,15 +18,12 @@
 
 package org.apache.hadoop.mapreduce.v2.app;
 
-import static org.mockito.Matchers.isA;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import junit.framework.Assert;
@@ -35,651 +32,475 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
-import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
-import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
-import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
 import org.apache.hadoop.mapreduce.v2.app.job.Job;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent;
-import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator;
-import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
-import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.AMRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationMaster;
+import org.apache.hadoop.yarn.api.records.ApplicationStatus;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
-import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
-import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
-import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.ipc.RPCUtil;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
 import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
-import org.apache.hadoop.yarn.util.BuilderUtils;
-import org.junit.After;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestRMContainerAllocator {
-
-  static final Log LOG = LogFactory
-      .getLog(TestRMContainerAllocator.class);
-  static final RecordFactory recordFactory = RecordFactoryProvider
-      .getRecordFactory(null);
-
-  @After
-  public void tearDown() {
-    DefaultMetricsSystem.shutdown();
-  }
-
-  @Test
-  public void testSimple() throws Exception {
-
-    LOG.info("Running testSimple");
-
-    Configuration conf = new Configuration();
-    MyResourceManager rm = new MyResourceManager(conf);
-    rm.start();
-    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
-        .getDispatcher();
-
-    // Submit the application
-    RMApp app = rm.submitApp(1024);
-    dispatcher.await();
-
-    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
-    amNodeManager.nodeHeartbeat(true);
-    dispatcher.await();
-
-    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
-        .getAppAttemptId();
-    rm.sendAMLaunched(appAttemptId);
-    dispatcher.await();
-
-    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
-    Job mockJob = mock(Job.class);
-    when(mockJob.getReport()).thenReturn(
-        MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING,
-            0, 0, 0, 0, 0, 0));
-    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
-        appAttemptId, mockJob);
-
-    // add resources to scheduler
-    MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
-    MockNM nodeManager2 = rm.registerNode("h2:1234", 10240);
-    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
-    dispatcher.await();
-
-    // create the container request
-    ContainerRequestEvent event1 = createReq(jobId, 1, 1024,
-        new String[] { "h1" });
-    allocator.sendRequest(event1);
-
-    // send 1 more request with different resource req
-    ContainerRequestEvent event2 = createReq(jobId, 2, 1024,
-        new String[] { "h2" });
-    allocator.sendRequest(event2);
-
-    // this tells the scheduler about the requests
-    // as nodes are not added, no allocations
-    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
-
-    // send another request with different resource and priority
-    ContainerRequestEvent event3 = createReq(jobId, 3, 1024,
-        new String[] { "h3" });
-    allocator.sendRequest(event3);
-
-    // this tells the scheduler about the requests
-    // as nodes are not added, no allocations
-    assigned = allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
-
-    // update resources in scheduler
-    nodeManager1.nodeHeartbeat(true); // Node heartbeat
-    nodeManager2.nodeHeartbeat(true); // Node heartbeat
-    nodeManager3.nodeHeartbeat(true); // Node heartbeat
-    dispatcher.await();
-
-    assigned = allocator.schedule();
-    dispatcher.await();
-    checkAssignments(new ContainerRequestEvent[] { event1, event2, event3 },
-        assigned, false);
-  }
-
-  @Test
-  public void testResource() throws Exception {
-
-    LOG.info("Running testResource");
-
-    Configuration conf = new Configuration();
-    MyResourceManager rm = new MyResourceManager(conf);
-    rm.start();
-    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
-        .getDispatcher();
-
-    // Submit the application
-    RMApp app = rm.submitApp(1024);
-    dispatcher.await();
-
-    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
-    amNodeManager.nodeHeartbeat(true);
-    dispatcher.await();
-
-    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
-        .getAppAttemptId();
-    rm.sendAMLaunched(appAttemptId);
-    dispatcher.await();
-
-    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
-    Job mockJob = mock(Job.class);
-    when(mockJob.getReport()).thenReturn(
-        MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING,
-            0, 0, 0, 0, 0, 0));
-    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
-        appAttemptId, mockJob);
-
-    // add resources to scheduler
-    MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
-    MockNM nodeManager2 = rm.registerNode("h2:1234", 10240);
-    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
-    dispatcher.await();
-
-    // create the container request
-    ContainerRequestEvent event1 = createReq(jobId, 1, 1024,
-        new String[] { "h1" });
-    allocator.sendRequest(event1);
-
-    // send 1 more request with different resource req
-    ContainerRequestEvent event2 = createReq(jobId, 2, 2048,
-        new String[] { "h2" });
-    allocator.sendRequest(event2);
-
-    // this tells the scheduler about the requests
-    // as nodes are not added, no allocations
-    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
-
-    // update resources in scheduler
-    nodeManager1.nodeHeartbeat(true); // Node heartbeat
-    nodeManager2.nodeHeartbeat(true); // Node heartbeat
-    nodeManager3.nodeHeartbeat(true); // Node heartbeat
-    dispatcher.await();
-
-    assigned = allocator.schedule();
-    dispatcher.await();
-    checkAssignments(new ContainerRequestEvent[] { event1, event2 },
-        assigned, false);
-  }
-
-  @Test
-  public void testMapReduceScheduling() throws Exception {
-
-    LOG.info("Running testMapReduceScheduling");
-
-    Configuration conf = new Configuration();
-    MyResourceManager rm = new MyResourceManager(conf);
-    rm.start();
-    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
-        .getDispatcher();
-
-    // Submit the application
-    RMApp app = rm.submitApp(1024);
-    dispatcher.await();
-
-    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
-    amNodeManager.nodeHeartbeat(true);
-    dispatcher.await();
-
-    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
-        .getAppAttemptId();
-    rm.sendAMLaunched(appAttemptId);
-    dispatcher.await();
-
-    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
-    Job mockJob = mock(Job.class);
-    when(mockJob.getReport()).thenReturn(
-        MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING,
-            0, 0, 0, 0, 0, 0));
-    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
-        appAttemptId, mockJob);
-
-    // add resources to scheduler
-    MockNM nodeManager1 = rm.registerNode("h1:1234", 1024);
-    MockNM nodeManager2 = rm.registerNode("h2:1234", 10240);
-    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
-    dispatcher.await();
-
-    // create the container request
-    // send MAP request
-    ContainerRequestEvent event1 = createReq(jobId, 1, 2048, new String[] {
-        "h1", "h2" }, true, false);
-    allocator.sendRequest(event1);
-
-    // send REDUCE request
-    ContainerRequestEvent event2 = createReq(jobId, 2, 3000,
-        new String[] { "h1" }, false, true);
-    allocator.sendRequest(event2);
-
-    // send MAP request
-    ContainerRequestEvent event3 = createReq(jobId, 3, 2048,
-        new String[] { "h3" }, false, false);
-    allocator.sendRequest(event3);
-
-    // this tells the scheduler about the requests
-    // as nodes are not added, no allocations
-    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
-
-    // update resources in scheduler
-    nodeManager1.nodeHeartbeat(true); // Node heartbeat
-    nodeManager2.nodeHeartbeat(true); // Node heartbeat
-    nodeManager3.nodeHeartbeat(true); // Node heartbeat
-    dispatcher.await();
-
-    assigned = allocator.schedule();
-    dispatcher.await();
-    checkAssignments(new ContainerRequestEvent[] { event1, event3 },
-        assigned, false);
-
-    // validate that no container is assigned to h1 as it doesn't have 2048
-    for (TaskAttemptContainerAssignedEvent assig : assigned) {
-      Assert.assertFalse("Assigned count not correct", "h1".equals(assig
-          .getContainer().getNodeId().getHost()));
-    }
-  }
-
-  private static class MyResourceManager extends MockRM {
-
-    public MyResourceManager(Configuration conf) {
-      super(conf);
-    }
-
-    @Override
-    protected Dispatcher createDispatcher() {
-      return new DrainDispatcher();
-    }
-
-    @Override
-    protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
-      // Dispatch inline for test sanity
-      return new EventHandler<SchedulerEvent>() {
-        @Override
-        public void handle(SchedulerEvent event) {
-          scheduler.handle(event);
-        }
-      };
-    }
-    @Override
-    protected ResourceScheduler createScheduler() {
-      return new MyFifoScheduler(getRMContext());
-    }
-  }
-
-  private static class FakeJob extends JobImpl {
-
-    public FakeJob(ApplicationAttemptId appAttemptID, Configuration conf,
-        int numMaps, int numReduces) {
-      super(appAttemptID, conf, null, null, null, null, null, null, null,
-          null);
-      this.jobId = MRBuilderUtils
-          .newJobId(appAttemptID.getApplicationId(), 0);
-      this.numMaps = numMaps;
-      this.numReduces = numReduces;
-    }
-
-    private float setupProgress;
-    private float mapProgress;
-    private float reduceProgress;
-    private float cleanupProgress;
-    private final int numMaps;
-    private final int numReduces;
-    private JobId jobId;
-
-    void setProgress(float setupProgress, float mapProgress,
-        float reduceProgress, float cleanupProgress) {
-      this.setupProgress = setupProgress;
-      this.mapProgress = mapProgress;
-      this.reduceProgress = reduceProgress;
-      this.cleanupProgress = cleanupProgress;
-    }
-
-    @Override
-    public int getTotalMaps() { return this.numMaps; }
-    @Override
-    public int getTotalReduces() { return this.numReduces;}
-
-    @Override
-    public JobReport getReport() {
-      return MRBuilderUtils.newJobReport(this.jobId, "job", "user",
-          JobState.RUNNING, 0, 0, this.setupProgress, this.mapProgress,
-          this.reduceProgress, this.cleanupProgress);
-    }
-  }
-
-  @Test
-  public void testReportedAppProgress() throws Exception {
-
-    LOG.info("Running testReportedAppProgress");
-
-    Configuration conf = new Configuration();
-    MyResourceManager rm = new MyResourceManager(conf);
-    rm.start();
-    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
-        .getDispatcher();
-
-    // Submit the application
-    RMApp app = rm.submitApp(1024);
-    dispatcher.await();
-
-    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
-    amNodeManager.nodeHeartbeat(true);
-    dispatcher.await();
-
-    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
-        .getAppAttemptId();
-    rm.sendAMLaunched(appAttemptId);
-    dispatcher.await();
-
-    FakeJob job = new FakeJob(appAttemptId, conf, 2, 2);
-    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
-        appAttemptId, job);
-
-    allocator.schedule(); // Send heartbeat
-    dispatcher.await();
-    Assert.assertEquals(0.0, app.getProgress(), 0.0);
-
-    job.setProgress(100, 10, 0, 0);
-    allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals(9.5f, app.getProgress(), 0.0);
-
-    job.setProgress(100, 80, 0, 0);
-    allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals(41.0f, app.getProgress(), 0.0);
-
-    job.setProgress(100, 100, 20, 0);
-    allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals(59.0f, app.getProgress(), 0.0);
-
-    job.setProgress(100, 100, 100, 100);
-    allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals(100.0f, app.getProgress(), 0.0);
-  }
-
-  @Test
-  public void testReportedAppProgressWithOnlyMaps() throws Exception {
-
-    LOG.info("Running testReportedAppProgressWithOnlyMaps");
-
-    Configuration conf = new Configuration();
-    MyResourceManager rm = new MyResourceManager(conf);
-    rm.start();
-    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
-        .getDispatcher();
-
-    // Submit the application
-    RMApp app = rm.submitApp(1024);
-    dispatcher.await();
-
-    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
-    amNodeManager.nodeHeartbeat(true);
-    dispatcher.await();
-
-    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
-        .getAppAttemptId();
-    rm.sendAMLaunched(appAttemptId);
-    dispatcher.await();
-
-    FakeJob job = new FakeJob(appAttemptId, conf, 2, 0);
-    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
-        appAttemptId, job);
-
-    allocator.schedule(); // Send heartbeat
-    dispatcher.await();
-    Assert.assertEquals(0.0, app.getProgress(), 0.0);
-
-    job.setProgress(100, 10, 0, 0);
-    allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals(14f, app.getProgress(), 0.0);
-
-    job.setProgress(100, 60, 0, 0);
-    allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals(59.0f, app.getProgress(), 0.0);
-
-    job.setProgress(100, 100, 0, 100);
-    allocator.schedule();
-    dispatcher.await();
-    Assert.assertEquals(100.0f, app.getProgress(), 0.0);
-  }
-
-  private static class MyFifoScheduler extends FifoScheduler {
-
-    public MyFifoScheduler(RMContext rmContext) {
-      super();
-      try {
-        reinitialize(new Configuration(), new ContainerTokenSecretManager(),
-            rmContext);
-      } catch (IOException ie) {
-        LOG.info("add application failed with ", ie);
-        assert (false);
-      }
-    }
-
-    // override this to copy the objects otherwise FifoScheduler updates the
-    // numContainers in same objects as kept by RMContainerAllocator
-    @Override
-    public synchronized Allocation allocate(
-        ApplicationAttemptId applicationAttemptId, List<ResourceRequest> ask,
-        List<ContainerId> release) {
-      List<ResourceRequest> askCopy = new ArrayList<ResourceRequest>();
-      for (ResourceRequest req : ask) {
-        ResourceRequest reqCopy = BuilderUtils.newResourceRequest(req
-            .getPriority(), req.getHostName(), req.getCapability(), req
-            .getNumContainers());
-        askCopy.add(reqCopy);
-      }
-      return super.allocate(applicationAttemptId, askCopy, release);
-    }
-  }
-
-  private ContainerRequestEvent createReq(JobId jobId, int taskAttemptId,
-      int memory, String[] hosts) {
-    return createReq(jobId, taskAttemptId, memory, hosts, false, false);
-  }
-
-  private ContainerRequestEvent
-      createReq(JobId jobId, int taskAttemptId, int memory, String[] hosts,
-          boolean earlierFailedAttempt, boolean reduce) {
-    TaskId taskId;
-    if (reduce) {
-      taskId = MRBuilderUtils.newTaskId(jobId, 0, TaskType.REDUCE);
-    } else {
-      taskId = MRBuilderUtils.newTaskId(jobId, 0, TaskType.MAP);
-    }
-    TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId,
-        taskAttemptId);
-    Resource containerNeed = BuilderUtils.newResource(memory);
-    if (earlierFailedAttempt) {
-      return ContainerRequestEvent
-          .createContainerRequestEventForFailedContainer(attemptId,
-              containerNeed);
-    }
-    return new ContainerRequestEvent(attemptId, containerNeed, hosts,
-        new String[] { NetworkTopology.DEFAULT_RACK });
-  }
-
-  private void checkAssignments(ContainerRequestEvent[] requests,
-      List<TaskAttemptContainerAssignedEvent> assignments,
-      boolean checkHostMatch) {
-    Assert.assertNotNull("Container not assigned", assignments);
-    Assert.assertEquals("Assigned count not correct", requests.length,
-        assignments.size());
-
-    // check for uniqueness of containerIDs
-    Set<ContainerId> containerIds = new HashSet<ContainerId>();
-    for (TaskAttemptContainerAssignedEvent assigned : assignments) {
-      containerIds.add(assigned.getContainer().getId());
-    }
-    Assert.assertEquals("Assigned containers must be different", assignments
-        .size(), containerIds.size());
-
-    // check for all assignment
-    for (ContainerRequestEvent req : requests) {
-      TaskAttemptContainerAssignedEvent assigned = null;
-      for (TaskAttemptContainerAssignedEvent ass : assignments) {
-        if (ass.getTaskAttemptID().equals(req.getAttemptID())) {
-          assigned = ass;
-          break;
-        }
-      }
-      checkAssignment(req, assigned, checkHostMatch);
-    }
-  }
-
-  private void checkAssignment(ContainerRequestEvent request,
-      TaskAttemptContainerAssignedEvent assigned, boolean checkHostMatch) {
-    Assert.assertNotNull("Nothing assigned to attempt "
-        + request.getAttemptID(), assigned);
-    Assert.assertEquals("assigned to wrong attempt", request.getAttemptID(),
-        assigned.getTaskAttemptID());
-    if (checkHostMatch) {
-      Assert.assertTrue("Not assigned to requested host", Arrays.asList(
-          request.getHosts()).contains(
-          assigned.getContainer().getNodeId().toString()));
-    }
-  }
-
-  // Mock RMContainerAllocator
-  // Instead of talking to remote Scheduler,uses the local Scheduler
-  private static class MyContainerAllocator extends RMContainerAllocator {
-    static final List<TaskAttemptContainerAssignedEvent> events
-      = new ArrayList<TaskAttemptContainerAssignedEvent>();
-
-    private MyResourceManager rm;
-
-    @SuppressWarnings("rawtypes")
-    private static AppContext createAppContext(
-        ApplicationAttemptId appAttemptId, Job job) {
-      AppContext context = mock(AppContext.class);
-      ApplicationId appId = appAttemptId.getApplicationId();
-      when(context.getApplicationID()).thenReturn(appId);
-      when(context.getApplicationAttemptId()).thenReturn(appAttemptId);
-      when(context.getJob(isA(JobId.class))).thenReturn(job);
-      when(context.getEventHandler()).thenReturn(new EventHandler() {
-        @Override
-        public void handle(Event event) {
-          // Only capture interesting events.
-          if (event instanceof TaskAttemptContainerAssignedEvent) {
-            events.add((TaskAttemptContainerAssignedEvent) event);
-          }
-        }
-      });
-      return context;
-    }
-
-    private static ClientService createMockClientService() {
-      ClientService service = mock(ClientService.class);
-      when(service.getBindAddress()).thenReturn(
-          NetUtils.createSocketAddr("localhost:4567"));
-      when(service.getHttpPort()).thenReturn(890);
-      return service;
-    }
-
-    MyContainerAllocator(MyResourceManager rm, Configuration conf,
-        ApplicationAttemptId appAttemptId, Job job) {
-      super(createMockClientService(), createAppContext(appAttemptId, job));
-      this.rm = rm;
-      super.init(conf);
-      super.start();
-    }
-
-    @Override
-    protected AMRMProtocol createSchedulerProxy() {
-      return this.rm.getApplicationMasterService();
-    }
-
-    @Override
-    protected void register() {
-      super.register();
-    }
-
-    @Override
-    protected void unregister() {
-    }
-
-    @Override
-    protected Resource getMinContainerCapability() {
-      return BuilderUtils.newResource(1024);
-    }
-
-    @Override
-    protected Resource getMaxContainerCapability() {
-      return BuilderUtils.newResource(10240);
-    }
-
-    public void sendRequest(ContainerRequestEvent req) {
-      sendRequests(Arrays.asList(new ContainerRequestEvent[] { req }));
-    }
-
-    public void sendRequests(List<ContainerRequestEvent> reqs) {
-      for (ContainerRequestEvent req : reqs) {
-        super.handle(req);
-      }
-    }
-
-    // API to be used by tests
-    public List<TaskAttemptContainerAssignedEvent> schedule() {
-      // run the scheduler
-      try {
-        super.heartbeat();
-      } catch (Exception e) {
-        LOG.error("error in heartbeat ", e);
-        throw new YarnException(e);
-      }
-
-      List<TaskAttemptContainerAssignedEvent> result
-        = new ArrayList<TaskAttemptContainerAssignedEvent>(events);
-      events.clear();
-      return result;
-    }
-
-    protected void startAllocatorThread() {
-      // override to NOT start thread
-    }
-  }
-
-  public static void main(String[] args) throws Exception {
-    TestRMContainerAllocator t = new TestRMContainerAllocator();
-    t.testSimple();
-    t.testResource();
-    t.testMapReduceScheduling();
-    t.testReportedAppProgress();
-    t.testReportedAppProgressWithOnlyMaps();
-  }
+//  private static final Log LOG = LogFactory.getLog(TestRMContainerAllocator.class);
+//  private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
+//
+//  @BeforeClass
+//  public static void preTests() {
+//    DefaultMetricsSystem.shutdown();
+//  }
+//
+//  @Test
+//  public void testSimple() throws Exception {
+//    FifoScheduler scheduler = createScheduler();
+//    LocalRMContainerAllocator allocator = new LocalRMContainerAllocator(
+//        scheduler, new Configuration());
+//
+//    //add resources to scheduler
+//    RMNode nodeManager1 = addNode(scheduler, "h1", 10240);
+//    RMNode nodeManager2 = addNode(scheduler, "h2", 10240);
+//    RMNode nodeManager3 = addNode(scheduler, "h3", 10240);
+//
+//    //create the container request
+//    ContainerRequestEvent event1 = 
+//      createReq(1, 1024, new String[]{"h1"});
+//    allocator.sendRequest(event1);
+//
+//    //send 1 more request with different resource req
+//    ContainerRequestEvent event2 = createReq(2, 1024, new String[]{"h2"});
+//    allocator.sendRequest(event2);
+//
+//    //this tells the scheduler about the requests
+//    //as nodes are not added, no allocations
+//    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
+//    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
+//
+//    //send another request with different resource and priority
+//    ContainerRequestEvent event3 = createReq(3, 1024, new String[]{"h3"});
+//    allocator.sendRequest(event3);
+//
+//    //this tells the scheduler about the requests
+//    //as nodes are not added, no allocations
+//    assigned = allocator.schedule();
+//    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
+//
+//    //update resources in scheduler
+//    scheduler.nodeUpdate(nodeManager1); // Node heartbeat
+//    scheduler.nodeUpdate(nodeManager2); // Node heartbeat
+//    scheduler.nodeUpdate(nodeManager3); // Node heartbeat
+//
+//
+//    assigned = allocator.schedule();
+//    checkAssignments(
+//        new ContainerRequestEvent[]{event1, event2, event3}, assigned, false);
+//  }
+//
+//  //TODO: Currently Scheduler seems to have bug where it does not work
+//  //for Application asking for containers with different capabilities.
+//  //@Test
+//  public void testResource() throws Exception {
+//    FifoScheduler scheduler = createScheduler();
+//    LocalRMContainerAllocator allocator = new LocalRMContainerAllocator(
+//        scheduler, new Configuration());
+//
+//    //add resources to scheduler
+//    RMNode nodeManager1 = addNode(scheduler, "h1", 10240);
+//    RMNode nodeManager2 = addNode(scheduler, "h2", 10240);
+//    RMNode nodeManager3 = addNode(scheduler, "h3", 10240);
+//
+//    //create the container request
+//    ContainerRequestEvent event1 = 
+//      createReq(1, 1024, new String[]{"h1"});
+//    allocator.sendRequest(event1);
+//
+//    //send 1 more request with different resource req
+//    ContainerRequestEvent event2 = createReq(2, 2048, new String[]{"h2"});
+//    allocator.sendRequest(event2);
+//
+//    //this tells the scheduler about the requests
+//    //as nodes are not added, no allocations
+//    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
+//    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
+//
+//    //update resources in scheduler
+//    scheduler.nodeUpdate(nodeManager1); // Node heartbeat
+//    scheduler.nodeUpdate(nodeManager2); // Node heartbeat
+//    scheduler.nodeUpdate(nodeManager3); // Node heartbeat
+//
+//    assigned = allocator.schedule();
+//    checkAssignments(
+//        new ContainerRequestEvent[]{event1, event2}, assigned, false);
+//  }
+//
+//  @Test
+//  public void testMapReduceScheduling() throws Exception {
+//    FifoScheduler scheduler = createScheduler();
+//    Configuration conf = new Configuration();
+//    LocalRMContainerAllocator allocator = new LocalRMContainerAllocator(
+//        scheduler, conf);
+//
+//    //add resources to scheduler
+//    RMNode nodeManager1 = addNode(scheduler, "h1", 1024);
+//    RMNode nodeManager2 = addNode(scheduler, "h2", 10240);
+//    RMNode nodeManager3 = addNode(scheduler, "h3", 10240);
+//
+//    //create the container request
+//    //send MAP request
+//    ContainerRequestEvent event1 = 
+//      createReq(1, 2048, new String[]{"h1", "h2"}, true, false);
+//    allocator.sendRequest(event1);
+//
+//    //send REDUCE request
+//    ContainerRequestEvent event2 = createReq(2, 3000, new String[]{"h1"}, false, true);
+//    allocator.sendRequest(event2);
+//
+//    //send MAP request
+//    ContainerRequestEvent event3 = createReq(3, 2048, new String[]{"h3"}, false, false);
+//    allocator.sendRequest(event3);
+//
+//    //this tells the scheduler about the requests
+//    //as nodes are not added, no allocations
+//    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
+//    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
+//
+//    //update resources in scheduler
+//    scheduler.nodeUpdate(nodeManager1); // Node heartbeat
+//    scheduler.nodeUpdate(nodeManager2); // Node heartbeat
+//    scheduler.nodeUpdate(nodeManager3); // Node heartbeat
+//
+//    assigned = allocator.schedule();
+//    checkAssignments(
+//        new ContainerRequestEvent[]{event1, event3}, assigned, false);
+//
+//    //validate that no container is assigned to h1 as it doesn't have 2048
+//    for (TaskAttemptContainerAssignedEvent assig : assigned) {
+//      Assert.assertFalse("Assigned count not correct", 
+//          "h1".equals(assig.getContainer().getNodeId().getHost()));
+//    }
+//  }
+//
+//
+//
+//  private RMNode addNode(FifoScheduler scheduler, 
+//      String nodeName, int memory) {
+//    NodeId nodeId = recordFactory.newRecordInstance(NodeId.class);
+//    nodeId.setHost(nodeName);
+//    nodeId.setPort(1234);
+//    Resource resource = recordFactory.newRecordInstance(Resource.class);
+//    resource.setMemory(memory);
+//    RMNode nodeManager = new RMNodeImpl(nodeId, null, nodeName, 0, 0,
+//        ResourceTrackerService.resolve(nodeName), resource);
+//    scheduler.addNode(nodeManager); // Node registration
+//    return nodeManager;
+//  }
+//
+//  private FifoScheduler createScheduler() throws YarnRemoteException {
+//    FifoScheduler fsc = new FifoScheduler() {
+//      //override this to copy the objects
+//      //otherwise FifoScheduler updates the numContainers in same objects as kept by
+//      //RMContainerAllocator
+//      
+//      @Override
+//      public synchronized void allocate(ApplicationAttemptId applicationId,
+//          List<ResourceRequest> ask) {
+//        List<ResourceRequest> askCopy = new ArrayList<ResourceRequest>();
+//        for (ResourceRequest req : ask) {
+//          ResourceRequest reqCopy = recordFactory.newRecordInstance(ResourceRequest.class);
+//          reqCopy.setPriority(req.getPriority());
+//          reqCopy.setHostName(req.getHostName());
+//          reqCopy.setCapability(req.getCapability());
+//          reqCopy.setNumContainers(req.getNumContainers());
+//          askCopy.add(reqCopy);
+//        }
+//        super.allocate(applicationId, askCopy);
+//      }
+//    };
+//    try {
+//      fsc.reinitialize(new Configuration(), new ContainerTokenSecretManager(), null);
+//      fsc.addApplication(recordFactory.newRecordInstance(ApplicationId.class),
+//          recordFactory.newRecordInstance(ApplicationMaster.class),
+//          "test", null, null, StoreFactory.createVoidAppStore());
+//    } catch(IOException ie) {
+//      LOG.info("add application failed with ", ie);
+//      assert(false);
+//    }
+//    return fsc;
+//  }
+//
+//  private ContainerRequestEvent createReq(
+//      int attemptid, int memory, String[] hosts) {
+//    return createReq(attemptid, memory, hosts, false, false);
+//  }
+//  
+//  private ContainerRequestEvent createReq(
+//      int attemptid, int memory, String[] hosts, boolean earlierFailedAttempt, boolean reduce) {
+//    ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
+//    appId.setClusterTimestamp(0);
+//    appId.setId(0);
+//    JobId jobId = recordFactory.newRecordInstance(JobId.class);
+//    jobId.setAppId(appId);
+//    jobId.setId(0);
+//    TaskId taskId = recordFactory.newRecordInstance(TaskId.class);
+//    taskId.setId(0);
+//    taskId.setJobId(jobId);
+//    if (reduce) {
+//      taskId.setTaskType(TaskType.REDUCE);
+//    } else {
+//      taskId.setTaskType(TaskType.MAP);
+//    }
+//    TaskAttemptId attemptId = recordFactory.newRecordInstance(TaskAttemptId.class);
+//    attemptId.setId(attemptid);
+//    attemptId.setTaskId(taskId);
+//    Resource containerNeed = recordFactory.newRecordInstance(Resource.class);
+//    containerNeed.setMemory(memory);
+//    if (earlierFailedAttempt) {
+//      return ContainerRequestEvent.
+//           createContainerRequestEventForFailedContainer(attemptId, containerNeed);
+//    }
+//    return new ContainerRequestEvent(attemptId, 
+//        containerNeed, 
+//        hosts, new String[] {NetworkTopology.DEFAULT_RACK});
+//  }
+//
+//  private void checkAssignments(ContainerRequestEvent[] requests, 
+//      List<TaskAttemptContainerAssignedEvent> assignments, 
+//      boolean checkHostMatch) {
+//    Assert.assertNotNull("Container not assigned", assignments);
+//    Assert.assertEquals("Assigned count not correct", 
+//        requests.length, assignments.size());
+//
+//    //check for uniqueness of containerIDs
+//    Set<ContainerId> containerIds = new HashSet<ContainerId>();
+//    for (TaskAttemptContainerAssignedEvent assigned : assignments) {
+//      containerIds.add(assigned.getContainer().getId());
+//    }
+//    Assert.assertEquals("Assigned containers must be different", 
+//        assignments.size(), containerIds.size());
+//
+//    //check for all assignment
+//    for (ContainerRequestEvent req : requests) {
+//      TaskAttemptContainerAssignedEvent assigned = null;
+//      for (TaskAttemptContainerAssignedEvent ass : assignments) {
+//        if (ass.getTaskAttemptID().equals(req.getAttemptID())){
+//          assigned = ass;
+//          break;
+//        }
+//      }
+//      checkAssignment(req, assigned, checkHostMatch);
+//    }
+//  }
+//
+//  private void checkAssignment(ContainerRequestEvent request, 
+//      TaskAttemptContainerAssignedEvent assigned, boolean checkHostMatch) {
+//    Assert.assertNotNull("Nothing assigned to attempt " + request.getAttemptID(),
+//        assigned);
+//    Assert.assertEquals("assigned to wrong attempt", request.getAttemptID(),
+//        assigned.getTaskAttemptID());
+//    if (checkHostMatch) {
+//      Assert.assertTrue("Not assigned to requested host", Arrays.asList(
+//          request.getHosts()).contains(
+//          assigned.getContainer().getNodeId().toString()));
+//    }
+//
+//  }
+//
+//  //Mock RMContainerAllocator
+//  //Instead of talking to remote Scheduler,uses the local Scheduler
+//  public static class LocalRMContainerAllocator extends RMContainerAllocator {
+//    private static final List<TaskAttemptContainerAssignedEvent> events = 
+//      new ArrayList<TaskAttemptContainerAssignedEvent>();
+//
+//    public static class AMRMProtocolImpl implements AMRMProtocol {
+//
+//      private ResourceScheduler resourceScheduler;
+//
+//      public AMRMProtocolImpl(ResourceScheduler resourceScheduler) {
+//        this.resourceScheduler = resourceScheduler;
+//      }
+//
+//      @Override
+//      public RegisterApplicationMasterResponse registerApplicationMaster(RegisterApplicationMasterRequest request) throws YarnRemoteException {
+//        RegisterApplicationMasterResponse response = recordFactory.newRecordInstance(RegisterApplicationMasterResponse.class);
+//        return response;
+//      }
+//
+//      public AllocateResponse allocate(AllocateRequest request) throws YarnRemoteException {
+//        List<ResourceRequest> ask = request.getAskList();
+//        List<Container> release = request.getReleaseList();
+//        try {
+//          AMResponse response = recordFactory.newRecordInstance(AMResponse.class);
+//          Allocation allocation = resourceScheduler.allocate(request.getApplicationAttemptId(), ask);
+//          response.addAllNewContainers(allocation.getContainers());
+//          response.setAvailableResources(allocation.getResourceLimit());
+//          AllocateResponse allocateResponse = recordFactory.newRecordInstance(AllocateResponse.class);
+//          allocateResponse.setAMResponse(response);
+//          return allocateResponse;
+//        } catch(IOException ie) {
+//          throw RPCUtil.getRemoteException(ie);
+//        }
+//      }
+//
+//      @Override
+//      public FinishApplicationMasterResponse finishApplicationMaster(FinishApplicationMasterRequest request) throws YarnRemoteException {
+//        FinishApplicationMasterResponse response = recordFactory.newRecordInstance(FinishApplicationMasterResponse.class);
+//        return response;
+//      }
+//
+//    }
+//
+//    private ResourceScheduler scheduler;
+//    LocalRMContainerAllocator(ResourceScheduler scheduler, Configuration conf) {
+//      super(null, new TestContext(events));
+//      this.scheduler = scheduler;
+//      super.init(conf);
+//      super.start();
+//    }
+//
+//    protected AMRMProtocol createSchedulerProxy() {
+//      return new AMRMProtocolImpl(scheduler);
+//    }
+//
+//    @Override
+//    protected void register() {}
+//    @Override
+//    protected void unregister() {}
+//
+//    @Override
+//    protected Resource getMinContainerCapability() {
+//      Resource res = recordFactory.newRecordInstance(Resource.class);
+//      res.setMemory(1024);
+//      return res;
+//    }
+//    
+//    @Override
+//    protected Resource getMaxContainerCapability() {
+//      Resource res = recordFactory.newRecordInstance(Resource.class);
+//      res.setMemory(10240);
+//      return res;
+//    }
+//    
+//    public void sendRequest(ContainerRequestEvent req) {
+//      sendRequests(Arrays.asList(new ContainerRequestEvent[]{req}));
+//    }
+//
+//    public void sendRequests(List<ContainerRequestEvent> reqs) {
+//      for (ContainerRequestEvent req : reqs) {
+//        handle(req);
+//      }
+//    }
+//
+//    //API to be used by tests
+//    public List<TaskAttemptContainerAssignedEvent> schedule() {
+//      //run the scheduler
+//      try {
+//        heartbeat();
+//      } catch (Exception e) {
+//        LOG.error("error in heartbeat ", e);
+//        throw new YarnException(e);
+//      }
+//
+//      List<TaskAttemptContainerAssignedEvent> result = new ArrayList(events);
+//      events.clear();
+//      return result;
+//    }
+//
+//    protected void startAllocatorThread() {
+//      //override to NOT start thread
+//    }
+//
+//    static class TestContext implements AppContext {
+//      private List<TaskAttemptContainerAssignedEvent> events;
+//      TestContext(List<TaskAttemptContainerAssignedEvent> events) {
+//        this.events = events;
+//      }
+//      @Override
+//      public Map<JobId, Job> getAllJobs() {
+//        return null;
+//      }
+//      @Override
+//      public ApplicationAttemptId getApplicationAttemptId() {
+//        return recordFactory.newRecordInstance(ApplicationAttemptId.class);
+//      }
+//      @Override
+//      public ApplicationId getApplicationID() {
+//        return recordFactory.newRecordInstance(ApplicationId.class);
+//      }
+//      @Override
+//      public EventHandler getEventHandler() {
+//        return new EventHandler() {
+//          @Override
+//          public void handle(Event event) {
+//            events.add((TaskAttemptContainerAssignedEvent) event);
+//          }
+//        };
+//      }
+//      @Override
+//      public Job getJob(JobId jobID) {
+//        return null;
+//      }
+//
+//      @Override
+//      public String getUser() {
+//        return null;
+//      }
+//
+//      @Override
+//      public Clock getClock() {
+//        return null;
+//      }
+//
+//      @Override
+//      public String getApplicationName() {
+//        return null;
+//      }
+//
+//      @Override
+//      public long getStartTime() {
+//        return 0;
+//      }
+//    }
+//  }
+//
+//  public static void main(String[] args) throws Exception {
+//    TestRMContainerAllocator t = new TestRMContainerAllocator();
+//    t.testSimple();
+//    //t.testResource();
+//    t.testMapReduceScheduling();
+//  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java
index 9f221e6354a..a678e4660e7 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java
@@ -47,7 +47,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationState;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.QueueACL;
-import org.apache.hadoop.yarn.api.records.QueueState;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -281,28 +280,16 @@ public class TypeConverter {
   }
   
   public static org.apache.hadoop.mapred.JobStatus fromYarn(
-      JobReport jobreport, String jobFile) {
+      JobReport jobreport, String jobFile, String trackingUrl) {
     JobPriority jobPriority = JobPriority.NORMAL;
-    org.apache.hadoop.mapred.JobStatus jobStatus =
-        new org.apache.hadoop.mapred.JobStatus(fromYarn(jobreport.getJobId()),
-            jobreport.getSetupProgress(), jobreport.getMapProgress(),
-            jobreport.getReduceProgress(), jobreport.getCleanupProgress(),
-            fromYarn(jobreport.getJobState()),
-            jobPriority, jobreport.getUser(), jobreport.getJobName(),
-            jobFile, jobreport.getTrackingUrl());
-    jobStatus.setFailureInfo(jobreport.getDiagnostics());
-    return jobStatus;
+    return new org.apache.hadoop.mapred.JobStatus(fromYarn(jobreport.getJobId()),
+        jobreport.getSetupProgress(), jobreport.getMapProgress(),
+        jobreport.getReduceProgress(), jobreport.getCleanupProgress(),
+        fromYarn(jobreport.getJobState()),
+        jobPriority, jobreport.getUser(), jobreport.getJobName(),
+        jobFile, trackingUrl);
   }
   
-  public static org.apache.hadoop.mapreduce.QueueState fromYarn(
-      QueueState state) {
-    org.apache.hadoop.mapreduce.QueueState qState = 
-      org.apache.hadoop.mapreduce.QueueState.getState(
-        state.toString().toLowerCase());
-    return qState;
-  }
-
-  
   public static int fromYarn(JobState state) {
     switch (state) {
     case NEW:
@@ -425,7 +412,6 @@ public class TypeConverter {
       );
     jobStatus.setSchedulingInfo(trackingUrl); // Set AM tracking url
     jobStatus.setStartTime(application.getStartTime());
-    jobStatus.setFailureInfo(application.getDiagnostics());
     return jobStatus;
   }
 
@@ -445,9 +431,9 @@ public class TypeConverter {
   
   public static QueueInfo fromYarn(org.apache.hadoop.yarn.api.records.QueueInfo 
       queueInfo, Configuration conf) {
-    return new QueueInfo(queueInfo.getQueueName(),queueInfo.toString(),
-        fromYarn(queueInfo.getQueueState()), TypeConverter.fromYarnApps(
-        queueInfo.getApplications(), conf));
+    return new QueueInfo(queueInfo.getQueueName(), 
+        queueInfo.toString(), QueueState.RUNNING, 
+        TypeConverter.fromYarnApps(queueInfo.getApplications(), conf));
   }
   
   public static QueueInfo[] fromYarnQueueInfo(
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/MRConstants.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/MRConstants.java
new file mode 100644
index 00000000000..6ac05361dce
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/MRConstants.java
@@ -0,0 +1,50 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.mapreduce.v2;
+
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public interface MRConstants {
+  // This should be the directory where splits file gets localized on the node
+  // running ApplicationMaster.
+  public static final String JOB_SUBMIT_DIR = "jobSubmitDir";
+  
+  // This should be the name of the localized job-configuration file on the node
+  // running ApplicationMaster and Task
+  public static final String JOB_CONF_FILE = "job.xml";
+  // This should be the name of the localized job-jar file on the node running
+  // individual containers/tasks.
+  public static final String JOB_JAR = "job.jar";
+
+  public static final String HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME =
+      "hadoop-mapreduce-client-app-0.24.0-SNAPSHOT.jar";
+
+  public static final String YARN_MAPREDUCE_APP_JAR_PATH =
+    "$YARN_HOME/modules/" + HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME;
+
+  // The token file for the application. Should contain tokens for access to
+  // remote file system and may optionally contain application specific tokens.
+  // For now, generated by the AppManagers and used by NodeManagers and the
+  // Containers.
+  public static final String APPLICATION_TOKENS_FILE = "appTokens";
+}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java
index 0bfc9db3ed4..fb585e8dd27 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java
@@ -29,8 +29,6 @@ public interface JobReport {
   public abstract long getFinishTime();
   public abstract String getUser();
   public abstract String getJobName();
-  public abstract String getTrackingUrl();
-  public abstract String getDiagnostics();
 
   public abstract void setJobId(JobId jobId);
   public abstract void setJobState(JobState jobState);
@@ -42,6 +40,4 @@ public interface JobReport {
   public abstract void setFinishTime(long finishTime);
   public abstract void setUser(String user);
   public abstract void setJobName(String jobName);
-  public abstract void setTrackingUrl(String trackingUrl);
-  public abstract void setDiagnostics(String diagnostics);
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java
index c5d2527a9da..a4033e695f2 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java
@@ -206,30 +206,6 @@ public class JobReportPBImpl extends ProtoBase<JobReportProto> implements JobRep
     builder.setJobName((jobName));
   }
 
-  @Override
-  public String getTrackingUrl() {
-    JobReportProtoOrBuilder p = viaProto ? proto : builder;
-    return (p.getTrackingUrl());
-  }
-
-  @Override
-  public void setTrackingUrl(String trackingUrl) {
-    maybeInitBuilder();
-    builder.setTrackingUrl(trackingUrl);
-  }
-
-  @Override
-  public String getDiagnostics() {
-    JobReportProtoOrBuilder p = viaProto ? proto : builder;
-    return p.getDiagnostics();
-  }
-
-  @Override
-  public void setDiagnostics(String diagnostics) {
-    maybeInitBuilder();
-    builder.setDiagnostics(diagnostics);
-  }
-
   private JobIdPBImpl convertFromProtoFormat(JobIdProto p) {
     return new JobIdPBImpl(p);
   }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
index e57cf8d3c63..dcddd126cc2 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
@@ -489,7 +489,7 @@ public class JobHistoryUtils {
       sb.append(address.getHostName());
     }
     sb.append(":").append(address.getPort());
-    sb.append("/jobhistory/job/"); // TODO This will change when the history server
+    sb.append("/yarn/job/"); // TODO This will change when the history server
                             // understands apps.
     // TOOD Use JobId toString once UI stops using _id_id
     sb.append("job_").append(appId.getClusterTimestamp());
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
index 9094da39ba3..68499497ac3 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
@@ -39,14 +39,14 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.filecache.DistributedCache;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
+import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.yarn.YarnException;
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
-import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
@@ -167,7 +167,7 @@ public class MRApps extends Apps {
     return TaskAttemptStateUI.valueOf(attemptStateStr);
   }
 
-  private static void setMRFrameworkClasspath(
+  public static void setInitialClasspath(
       Map<String, String> environment) throws IOException {
     InputStream classpathFileStream = null;
     BufferedReader reader = null;
@@ -182,17 +182,30 @@ public class MRApps extends Apps {
       reader = new BufferedReader(new InputStreamReader(classpathFileStream));
       String cp = reader.readLine();
       if (cp != null) {
-        addToEnvironment(environment, Environment.CLASSPATH.name(), cp.trim());
+        addToClassPath(environment, cp.trim());
       }
       // Put the file itself on classpath for tasks.
-      addToEnvironment(
-          environment,
-          Environment.CLASSPATH.name(),
+      addToClassPath(environment,
           thisClassLoader.getResource(mrAppGeneratedClasspathFile).getFile());
 
-      // Add standard Hadoop classes
-      for (String c : ApplicationConstants.APPLICATION_CLASSPATH) {
-        addToEnvironment(environment, Environment.CLASSPATH.name(), c);
+      // If runtime env is different.
+      if (System.getenv().get("YARN_HOME") != null) {
+        ShellCommandExecutor exec =
+            new ShellCommandExecutor(new String[] {
+                System.getenv().get("YARN_HOME") + "/bin/yarn",
+            "classpath" });
+        exec.execute();
+        addToClassPath(environment, exec.getOutput().trim());
+      }
+
+      // Get yarn mapreduce-app classpath
+      if (System.getenv().get("HADOOP_MAPRED_HOME")!= null) {
+        ShellCommandExecutor exec =
+            new ShellCommandExecutor(new String[] {
+                System.getenv().get("HADOOP_MAPRED_HOME") + "/bin/mapred",
+            "classpath" });
+        exec.execute();
+        addToClassPath(environment, exec.getOutput().trim());
       }
     } finally {
       if (classpathFileStream != null) {
@@ -204,35 +217,20 @@ public class MRApps extends Apps {
     }
     // TODO: Remove duplicates.
   }
-  
-  private static final String SYSTEM_PATH_SEPARATOR = 
-      System.getProperty("path.separator");
 
-  public static void addToEnvironment(
-      Map<String, String> environment, 
-      String variable, String value) {
-    String val = environment.get(variable);
-    if (val == null) {
-      val = value;
+  public static void addToClassPath(
+      Map<String, String> environment, String fileName) {
+    String classpath = environment.get(CLASSPATH);
+    if (classpath == null) {
+      classpath = fileName;
     } else {
-      val = val + SYSTEM_PATH_SEPARATOR + value;
+      classpath = classpath + ":" + fileName;
     }
-    environment.put(variable, val);
+    environment.put(CLASSPATH, classpath);
   }
 
-  public static void setClasspath(Map<String, String> environment) 
-      throws IOException {
-    MRApps.addToEnvironment(
-        environment, 
-        Environment.CLASSPATH.name(), 
-        MRJobConfig.JOB_JAR);
-    MRApps.addToEnvironment(
-        environment, 
-        Environment.CLASSPATH.name(),
-        Environment.PWD.$() + Path.SEPARATOR + "*");
-    MRApps.setMRFrameworkClasspath(environment);
-  }
-  
+  public static final String CLASSPATH = "CLASSPATH";
+
   private static final String STAGING_CONSTANT = ".staging";
   public static Path getStagingAreaDir(Configuration conf, String user) {
     return new Path(
@@ -243,7 +241,7 @@ public class MRApps extends Apps {
   public static String getJobFile(Configuration conf, String user, 
       org.apache.hadoop.mapreduce.JobID jobId) {
     Path jobFile = new Path(MRApps.getStagingAreaDir(conf, user),
-        jobId.toString() + Path.SEPARATOR + MRJobConfig.JOB_CONF_FILE);
+        jobId.toString() + Path.SEPARATOR + MRConstants.JOB_CONF_FILE);
     return jobFile.toString();
   }
   
@@ -262,11 +260,12 @@ public class MRApps extends Apps {
 
   public static void setupDistributedCache( 
       Configuration conf, 
-      Map<String, LocalResource> localResources) 
+      Map<String, LocalResource> localResources,
+      Map<String, String> env) 
   throws IOException {
     
     // Cache archives
-    parseDistributedCacheArtifacts(conf, localResources,  
+    parseDistributedCacheArtifacts(conf, localResources, env, 
         LocalResourceType.ARCHIVE, 
         DistributedCache.getCacheArchives(conf), 
         parseTimeStamps(DistributedCache.getArchiveTimestamps(conf)), 
@@ -276,7 +275,7 @@ public class MRApps extends Apps {
     
     // Cache files
     parseDistributedCacheArtifacts(conf, 
-        localResources,  
+        localResources, env, 
         LocalResourceType.FILE, 
         DistributedCache.getCacheFiles(conf),
         parseTimeStamps(DistributedCache.getFileTimestamps(conf)),
@@ -291,6 +290,7 @@ public class MRApps extends Apps {
   private static void parseDistributedCacheArtifacts(
       Configuration conf,
       Map<String, LocalResource> localResources,
+      Map<String, String> env,
       LocalResourceType type,
       URI[] uris, long[] timestamps, long[] sizes, boolean visibilities[], 
       Path[] pathsToPutOnClasspath) throws IOException {
@@ -339,6 +339,9 @@ public class MRApps extends Apps {
                   : LocalResourceVisibility.PRIVATE,
                 sizes[i], timestamps[i])
         );
+        if (classPaths.containsKey(u.getPath())) {
+          MRApps.addToClassPath(env, linkName);
+        }
       }
     }
   }
@@ -355,42 +358,6 @@ public class MRApps extends Apps {
     }
     return result;
   }
-
-  public static void setEnvFromInputString(Map<String, String> env,
-      String envString) {
-    if (envString != null && envString.length() > 0) {
-      String childEnvs[] = envString.split(",");
-      for (String cEnv : childEnvs) {
-        String[] parts = cEnv.split("="); // split on '='
-        String value = env.get(parts[0]);
-  
-        if (value != null) {
-          // Replace $env with the child's env constructed by NM's
-          // For example: LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp
-          value = parts[1].replace("$" + parts[0], value);
-        } else {
-          // example PATH=$PATH:/tmp
-          value = System.getenv(parts[0]);
-          if (value != null) {
-            // the env key is present in the tt's env
-            value = parts[1].replace("$" + parts[0], value);
-          } else {
-            // check for simple variable substitution
-            // for e.g. ROOT=$HOME
-            String envValue = System.getenv(parts[1].substring(1)); 
-            if (envValue != null) {
-              value = envValue;
-            } else {
-              // the env key is note present anywhere .. simply set it
-              // example X=$X:/tmp or X=/tmp
-              value = parts[1].replace("$" + parts[0], "");
-            }
-          }
-        }
-        addToEnvironment(env, parts[0], value);
-      }
-    }
-  }
   
 
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java
index d710a6f7b88..c429ca55b51 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java
@@ -19,25 +19,27 @@
 package org.apache.hadoop.mapreduce.v2.util;
 
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
-import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
-import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.util.Records;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 
 public class MRBuilderUtils {
 
+  private static final RecordFactory recordFactory = RecordFactoryProvider
+      .getRecordFactory(null);
+
   public static JobId newJobId(ApplicationId appId, int id) {
-    JobId jobId = Records.newRecord(JobId.class);
+    JobId jobId = recordFactory.newRecordInstance(JobId.class);
     jobId.setAppId(appId);
     jobId.setId(id);
     return jobId;
   }
 
   public static TaskId newTaskId(JobId jobId, int id, TaskType taskType) {
-    TaskId taskId = Records.newRecord(TaskId.class);
+    TaskId taskId = recordFactory.newRecordInstance(TaskId.class);
     taskId.setJobId(jobId);
     taskId.setId(id);
     taskId.setTaskType(taskType);
@@ -46,27 +48,9 @@ public class MRBuilderUtils {
 
   public static TaskAttemptId newTaskAttemptId(TaskId taskId, int attemptId) {
     TaskAttemptId taskAttemptId =
-        Records.newRecord(TaskAttemptId.class);
+        recordFactory.newRecordInstance(TaskAttemptId.class);
     taskAttemptId.setTaskId(taskId);
     taskAttemptId.setId(attemptId);
     return taskAttemptId;
   }
-
-  public static JobReport newJobReport(JobId jobId, String jobName,
-      String userName, JobState state, long startTime, long finishTime,
-      float setupProgress, float mapProgress, float reduceProgress,
-      float cleanupProgress) {
-    JobReport report = Records.newRecord(JobReport.class);
-    report.setJobId(jobId);
-    report.setJobName(jobName);
-    report.setUser(userName);
-    report.setJobState(state);
-    report.setStartTime(startTime);
-    report.setFinishTime(finishTime);
-    report.setSetupProgress(setupProgress);
-    report.setCleanupProgress(cleanupProgress);
-    report.setMapProgress(mapProgress);
-    report.setReduceProgress(reduceProgress);
-    return report;
-  }
 }
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto
index 29184da4868..7d8d1b2e0b8 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto
@@ -143,8 +143,6 @@ message JobReportProto {
   optional int64 finish_time = 8;
   optional string user = 9;
   optional string jobName = 10;
-  optional string trackingUrl = 11;
-  optional string diagnostics = 12;
 }
 
 enum TaskAttemptCompletionEventStatusProto {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java
index 1aeae987c80..bda7fb9d658 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java
@@ -19,14 +19,11 @@ package org.apache.hadoop.mapreduce;
 
 import junit.framework.Assert;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationState;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationReportPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.QueueInfoPBImpl;
-
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 import org.junit.Test;
@@ -70,14 +67,4 @@ public class TestTypeConverter {
     Assert.assertEquals("jobId set incorrectly", 6789, status.getJobID().getId());
     Assert.assertEquals("state set incorrectly", JobStatus.State.KILLED, status.getState());
   }
-
-  @Test
-  public void testFromYarnQueueInfo() {
-    org.apache.hadoop.yarn.api.records.QueueInfo queueInfo = new QueueInfoPBImpl();
-    queueInfo.setQueueState(org.apache.hadoop.yarn.api.records.QueueState.STOPPED);
-    org.apache.hadoop.mapreduce.QueueInfo returned =
-      TypeConverter.fromYarn(queueInfo, new Configuration());
-    Assert.assertEquals("queueInfo translation didn't work.",
-      returned.getState().toString(), queueInfo.getQueueState().toString().toLowerCase());
-  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
index 11589980625..7a2ee00a92d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -114,8 +115,7 @@ public class TestMRApps {
   @Test public void testGetJobFileWithUser() {
     Configuration conf = new Configuration();
     conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/my/path/to/staging");
-    String jobFile = MRApps.getJobFile(conf, "dummy-user", 
-        new JobID("dummy-job", 12345));
+    String jobFile = MRApps.getJobFile(conf, "dummy-user", new JobID("dummy-job", 12345));
     assertNotNull("getJobFile results in null.", jobFile);
     assertEquals("jobFile with specified user is not as expected.",
         "/my/path/to/staging/dummy-user/.staging/job_dummy-job_12345/job.xml", jobFile);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/BackupStore.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/BackupStore.java
index f409d2298eb..026793c5374 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/BackupStore.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/BackupStore.java
@@ -41,7 +41,6 @@ import org.apache.hadoop.mapred.IFile.Reader;
 import org.apache.hadoop.mapred.IFile.Writer;
 import org.apache.hadoop.mapred.Merger.Segment;
 import org.apache.hadoop.mapreduce.MRConfig;
-import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 
 /**
@@ -561,7 +560,7 @@ public class BackupStore<K,V> {
 
     private Writer<K,V> createSpillFile() throws IOException {
       Path tmp =
-          new Path(MRJobConfig.OUTPUT + "/backup_" + tid.getId() + "_"
+          new Path(Constants.OUTPUT + "/backup_" + tid.getId() + "_"
               + (spillNumber++) + ".out");
 
       LOG.info("Created file: " + tmp);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFailedAttemptEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Constants.java
similarity index 64%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFailedAttemptEvent.java
rename to hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Constants.java
index 111c6acc41b..e8a202ed44b 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFailedAttemptEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Constants.java
@@ -16,21 +16,12 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
+package org.apache.hadoop.mapred;
 
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-
-public class RMAppFailedAttemptEvent extends RMAppEvent {
-
-  private final String diagnostics;
-
-  public RMAppFailedAttemptEvent(ApplicationId appId, RMAppEventType event, 
-      String diagnostics) {
-    super(appId, event);
-    this.diagnostics = diagnostics;
-  }
-
-  public String getDiagnostics() {
-    return this.diagnostics;
-  }
+public class Constants {
+  static final String OUTPUT = "output";
+  public static final String HADOOP_WORK_DIR = "HADOOP_WORK_DIR";
+  public static final String JOBFILE = "job.xml";
+  public static final String STDOUT_LOGFILE_ENV = "STDOUT_LOGFILE_ENV";
+  public static final String STDERR_LOGFILE_ENV = "STDERR_LOGFILE_ENV";
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
index b489d41b17c..49d12d764d5 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
@@ -348,7 +348,6 @@ public class JobConf extends Configuration {
    */
   public static final Level DEFAULT_LOG_LEVEL = Level.INFO;
   
-  
   /**
    * Construct a map/reduce job configuration.
    */
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobStatus.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobStatus.java
index e5add2139f5..90b68872ff4 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobStatus.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobStatus.java
@@ -321,10 +321,6 @@ public class JobStatus extends org.apache.hadoop.mapreduce.JobStatus {
      super.setJobACLs(acls);
    }
 
-   public synchronized void setFailureInfo(String failureInfo) {
-     super.setFailureInfo(failureInfo);
-   }
-   
   /**
    * Set the priority of the job, defaulting to NORMAL.
    * @param jp new job priority
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MRConstants.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MRConstants.java
index 3d7363e5faa..e2c16fbfac1 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MRConstants.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MRConstants.java
@@ -17,16 +17,11 @@
  */
 package org.apache.hadoop.mapred;
 
-import org.apache.hadoop.classification.InterfaceAudience.Private;
-import org.apache.hadoop.classification.InterfaceStability.Unstable;
-
 /*******************************
  * Some handy constants
  * 
  *******************************/
-@Private
-@Unstable
-public interface MRConstants {
+interface MRConstants {
   //
   // Timeouts, constants
   //
@@ -58,6 +53,5 @@ public interface MRConstants {
    */
   public static final String FOR_REDUCE_TASK = "for-reduce-task";
   
-  /** Used in MRv1, mostly in TaskTracker code **/
   public static final String WORKDIR = "work";
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MROutputFiles.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MROutputFiles.java
index a9e25f287d4..e81e11d3fb6 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MROutputFiles.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MROutputFiles.java
@@ -27,7 +27,6 @@ import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.MRConfig;
-import org.apache.hadoop.mapreduce.MRJobConfig;
 
 /**
  * Manipulate the working area for the transient store for maps and reduces.
@@ -55,7 +54,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getOutputFile()
       throws IOException {
-    return lDirAlloc.getLocalPathToRead(MRJobConfig.OUTPUT + Path.SEPARATOR
+    return lDirAlloc.getLocalPathToRead(Constants.OUTPUT + Path.SEPARATOR
         + MAP_OUTPUT_FILENAME_STRING, getConf());
   }
 
@@ -69,7 +68,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getOutputFileForWrite(long size)
       throws IOException {
-    return lDirAlloc.getLocalPathForWrite(MRJobConfig.OUTPUT + Path.SEPARATOR
+    return lDirAlloc.getLocalPathForWrite(Constants.OUTPUT + Path.SEPARATOR
         + MAP_OUTPUT_FILENAME_STRING, size, getConf());
   }
 
@@ -90,7 +89,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getOutputIndexFile()
       throws IOException {
-    return lDirAlloc.getLocalPathToRead(MRJobConfig.OUTPUT + Path.SEPARATOR
+    return lDirAlloc.getLocalPathToRead(Constants.OUTPUT + Path.SEPARATOR
         + MAP_OUTPUT_FILENAME_STRING + MAP_OUTPUT_INDEX_SUFFIX_STRING,
         getConf());
   }
@@ -105,7 +104,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getOutputIndexFileForWrite(long size)
       throws IOException {
-    return lDirAlloc.getLocalPathForWrite(MRJobConfig.OUTPUT + Path.SEPARATOR
+    return lDirAlloc.getLocalPathForWrite(Constants.OUTPUT + Path.SEPARATOR
         + MAP_OUTPUT_FILENAME_STRING + MAP_OUTPUT_INDEX_SUFFIX_STRING,
         size, getConf());
   }
@@ -129,7 +128,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getSpillFile(int spillNumber)
       throws IOException {
-    return lDirAlloc.getLocalPathToRead(MRJobConfig.OUTPUT + "/spill"
+    return lDirAlloc.getLocalPathToRead(Constants.OUTPUT + "/spill"
         + spillNumber + ".out", getConf());
   }
 
@@ -144,7 +143,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getSpillFileForWrite(int spillNumber, long size)
       throws IOException {
-    return lDirAlloc.getLocalPathForWrite(MRJobConfig.OUTPUT + "/spill"
+    return lDirAlloc.getLocalPathForWrite(Constants.OUTPUT + "/spill"
         + spillNumber + ".out", size, getConf());
   }
 
@@ -158,7 +157,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getSpillIndexFile(int spillNumber)
       throws IOException {
-    return lDirAlloc.getLocalPathToRead(MRJobConfig.OUTPUT + "/spill"
+    return lDirAlloc.getLocalPathToRead(Constants.OUTPUT + "/spill"
         + spillNumber + ".out.index", getConf());
   }
 
@@ -173,7 +172,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getSpillIndexFileForWrite(int spillNumber, long size)
       throws IOException {
-    return lDirAlloc.getLocalPathForWrite(MRJobConfig.OUTPUT + "/spill"
+    return lDirAlloc.getLocalPathForWrite(Constants.OUTPUT + "/spill"
         + spillNumber + ".out.index", size, getConf());
   }
 
@@ -188,7 +187,7 @@ public class MROutputFiles extends MapOutputFile {
   public Path getInputFile(int mapId)
       throws IOException {
     return lDirAlloc.getLocalPathToRead(String.format(
-        REDUCE_INPUT_FILE_FORMAT_STRING, MRJobConfig.OUTPUT, Integer
+        REDUCE_INPUT_FILE_FORMAT_STRING, Constants.OUTPUT, Integer
             .valueOf(mapId)), getConf());
   }
 
@@ -205,7 +204,7 @@ public class MROutputFiles extends MapOutputFile {
                                    long size)
       throws IOException {
     return lDirAlloc.getLocalPathForWrite(String.format(
-        REDUCE_INPUT_FILE_FORMAT_STRING, MRJobConfig.OUTPUT, mapId.getId()),
+        REDUCE_INPUT_FILE_FORMAT_STRING, Constants.OUTPUT, mapId.getId()),
         size, getConf());
   }
 
@@ -213,7 +212,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public void removeAll()
       throws IOException {
-    ((JobConf)getConf()).deleteLocalFiles(MRJobConfig.OUTPUT);
+    ((JobConf)getConf()).deleteLocalFiles(Constants.OUTPUT);
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java
index 597b2edaa39..7e978e9cf94 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java
@@ -44,7 +44,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.SecureIOUtils;
 import org.apache.hadoop.mapreduce.JobID;
-import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.util.ProcessTree;
 import org.apache.hadoop.util.Shell;
 import org.apache.log4j.Appender;
@@ -76,18 +75,10 @@ public class TaskLog {
       }
     }
   }
-  
-  public static String getMRv2LogDir() {
-    return System.getProperty(MRJobConfig.TASK_LOG_DIR);
-  }
-  
+
   public static File getTaskLogFile(TaskAttemptID taskid, boolean isCleanup,
       LogName filter) {
-    if (getMRv2LogDir() != null) {
-      return new File(getMRv2LogDir(), filter.toString());
-    } else {
-      return new File(getAttemptDir(taskid, isCleanup), filter.toString());
-    }
+    return new File(getAttemptDir(taskid, isCleanup), filter.toString());
   }
 
   static File getRealTaskLogFileLocation(TaskAttemptID taskid,
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java
index 0a108d73b63..cb8b476ac75 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.mapred.pipes;
 
-import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.IOException;
 import java.net.ServerSocket;
@@ -27,7 +26,6 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Random;
 import javax.crypto.SecretKey;
 
@@ -113,6 +111,7 @@ class Application<K1 extends WritableComparable, V1 extends Writable,
     if (interpretor != null) {
       cmd.add(interpretor);
     }
+
     String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
     if (!new File(executable).canExecute()) {
       // LinuxTaskController sets +x permissions on all distcache files already.
@@ -130,7 +129,7 @@ class Application<K1 extends WritableComparable, V1 extends Writable,
     long logLength = TaskLog.getTaskLogLength(conf);
     cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength,
                                      false);
-    
+
     process = runClient(cmd, env);
     clientSocket = serverSocket.accept();
     
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java
index 33d5f81b4fc..5112c86e7bb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java
@@ -41,8 +41,8 @@ import org.apache.hadoop.mapreduce.util.ConfigUtil;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 
 /**
  * Provides a way to access information about the map/reduce cluster.
@@ -68,40 +68,29 @@ public class Cluster {
   }
   
   public Cluster(Configuration conf) throws IOException {
-    this(null, conf);
-  }
-
-  public Cluster(InetSocketAddress jobTrackAddr, Configuration conf) 
-      throws IOException {
     this.conf = conf;
     this.ugi = UserGroupInformation.getCurrentUser();
-    initialize(jobTrackAddr, conf);
-  }
-  
-  private void initialize(InetSocketAddress jobTrackAddr, Configuration conf)
-      throws IOException {
-
-    for (ClientProtocolProvider provider : ServiceLoader
-        .load(ClientProtocolProvider.class)) {
-      ClientProtocol clientProtocol = null;
-      if (jobTrackAddr == null) {
-        clientProtocol = provider.create(conf);
-      } else {
-        clientProtocol = provider.create(jobTrackAddr, conf);
-      }
-
+    for (ClientProtocolProvider provider : ServiceLoader.load(ClientProtocolProvider.class)) {
+      ClientProtocol clientProtocol = provider.create(conf);
       if (clientProtocol != null) {
         clientProtocolProvider = provider;
         client = clientProtocol;
         break;
       }
     }
+  }
 
-    if (null == clientProtocolProvider || null == client) {
-      throw new IOException(
-          "Cannot initialize Cluster. Please check your configuration for "
-              + MRConfig.FRAMEWORK_NAME
-              + " and the correspond server addresses.");
+  public Cluster(InetSocketAddress jobTrackAddr, Configuration conf) 
+      throws IOException {
+    this.conf = conf;
+    this.ugi = UserGroupInformation.getCurrentUser();
+    for (ClientProtocolProvider provider : ServiceLoader.load(ClientProtocolProvider.class)) {
+      ClientProtocol clientProtocol = provider.create(jobTrackAddr, conf);
+      if (clientProtocol != null) {
+        clientProtocolProvider = provider;
+        client = clientProtocol;
+        break;
+      }
     }
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
index f616df80b8f..c30216e0669 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
@@ -1239,8 +1239,7 @@ public class Job extends JobContextImpl implements JobContext {
     if (success) {
       LOG.info("Job " + jobId + " completed successfully");
     } else {
-      LOG.info("Job " + jobId + " failed with state " + status.getState() + 
-          " due to: " + status.getFailureInfo());
+      LOG.info("Job " + jobId + " failed with state " + status.getState());
     }
     Counters counters = getCounters();
     if (counters != null) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java
index 6f57f1733ad..9e438989cfb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java
@@ -81,7 +81,6 @@ public class JobStatus implements Writable, Cloneable {
   private String queue;
   private JobPriority priority;
   private String schedulingInfo="NA";
-  private String failureInfo = "NA";
 
   private Map<JobACL, AccessControlList> jobACLs =
       new HashMap<JobACL, AccessControlList>();
@@ -279,14 +278,6 @@ public class JobStatus implements Writable, Cloneable {
     this.queue = queue;
   }
 
-  /**
-   * Set diagnostic information.
-   * @param failureInfo diagnostic information
-   */
-  protected synchronized void setFailureInfo(String failureInfo) {
-    this.failureInfo = failureInfo;
-  }
-  
   /**
    * Get queue name
    * @return queue name
@@ -368,15 +359,6 @@ public class JobStatus implements Writable, Cloneable {
    */
    public synchronized JobPriority getPriority() { return priority; }
   
-   /**
-    * Gets any available info on the reason of failure of the job.
-    * @return diagnostic information on why a job might have failed.
-    */
-   public synchronized String getFailureInfo() {
-     return this.failureInfo;
-   }
-
-
   /**
    * Returns true if the status is for a completed job.
    */
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
index accfdddc3db..33884bb82e9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@@ -210,8 +210,6 @@ public interface MRJobConfig {
 
   public static final String REDUCE_LOG_LEVEL = "mapreduce.reduce.log.level";
 
-  public static final String DEFAULT_LOG_LEVEL = "INFO";
-
   public static final String REDUCE_MERGE_INMEM_THRESHOLD = "mapreduce.reduce.merge.inmem.threshold";
 
   public static final String REDUCE_INPUT_BUFFER_PERCENT = "mapreduce.reduce.input.buffer.percent";
@@ -332,15 +330,9 @@ public interface MRJobConfig {
     MR_AM_PREFIX+"num-progress-splits";
   public static final int DEFAULT_MR_AM_NUM_PROGRESS_SPLITS = 12;
 
-  /**
-   * Upper limit on the number of threads user to launch containers in the app
-   * master. Expect level config, you shouldn't be needing it in most cases.
-   */
-  public static final String MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT =
-    MR_AM_PREFIX+"containerlauncher.thread-count-limit";
-
-  public static final int DEFAULT_MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT = 
-      500;
+  /** Number of threads user to launch containers in the app master.*/
+  public static final String MR_AM_CONTAINERLAUNCHER_THREAD_COUNT =
+    MR_AM_PREFIX+"containerlauncher.thread-count";
 
   /** Number of threads to handle job client RPC requests.*/
   public static final String MR_AM_JOB_CLIENT_THREAD_COUNT =
@@ -408,69 +400,4 @@ public interface MRJobConfig {
    */
   public static final String MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR = 
     MR_AM_PREFIX + "create-intermediate-jh-base-dir";
-  
-  public static final String MAPRED_MAP_ADMIN_JAVA_OPTS =
-      "mapreduce.admin.map.child.java.opts";
-
-  public static final String MAPRED_REDUCE_ADMIN_JAVA_OPTS =
-      "mapreduce.admin.reduce.child.java.opts";
-
-  public static final String DEFAULT_MAPRED_ADMIN_JAVA_OPTS =
-      "-Djava.net.preferIPv4Stack=true " +
-          "-Dhadoop.metrics.log.level=WARN ";
-
-  public static final String MAPRED_ADMIN_USER_SHELL =
-      "mapreduce.admin.user.shell";
-
-  public static final String DEFAULT_SHELL = "/bin/bash";
-
-  public static final String MAPRED_ADMIN_USER_ENV =
-      "mapreduce.admin.user.env";
-
-  public static final String DEFAULT_MAPRED_ADMIN_USER_ENV =
-      "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib";
-
-  public static final String WORKDIR = "work";
-
-  public static final String OUTPUT = "output";
-
-  public static final String HADOOP_WORK_DIR = "HADOOP_WORK_DIR";
-
-  public static final String STDOUT_LOGFILE_ENV = "STDOUT_LOGFILE_ENV";
-
-  public static final String STDERR_LOGFILE_ENV = "STDERR_LOGFILE_ENV";
-
-  // This should be the directory where splits file gets localized on the node
-  // running ApplicationMaster.
-  public static final String JOB_SUBMIT_DIR = "jobSubmitDir";
-
-  // This should be the name of the localized job-configuration file on the node
-  // running ApplicationMaster and Task
-  public static final String JOB_CONF_FILE = "job.xml";
-
-  // This should be the name of the localized job-jar file on the node running
-  // individual containers/tasks.
-  public static final String JOB_JAR = "job.jar";
-
-  public static final String JOB_SPLIT = "job.split";
-
-  public static final String JOB_SPLIT_METAINFO = "job.splitmetainfo";
-
-  public static final String APPLICATION_MASTER_CLASS =
-      "org.apache.hadoop.mapreduce.v2.app.MRAppMaster";
-
-  // The token file for the application. Should contain tokens for access to
-  // remote file system and may optionally contain application specific tokens.
-  // For now, generated by the AppManagers and used by NodeManagers and the
-  // Containers.
-  public static final String APPLICATION_TOKENS_FILE = "appTokens";
-  
-  /** The log directory for the containers */
-  public static final String TASK_LOG_DIR = MR_PREFIX + "container.log.dir";
-  
-  public static final String TASK_LOG_SIZE = MR_PREFIX + "container.log.filesize";
-  
-  public static final String MAPREDUCE_V2_CHILD_CLASS = 
-      "org.apache.hadoop.mapred.YarnChild";
-
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
index e86eb279e9a..56f114adc5c 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
@@ -135,7 +135,7 @@ public class HistoryClientService extends AbstractService {
     webApp = new HsWebApp(history);
     String bindAddress = conf.get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS,
         JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS);
-    WebApps.$for("jobhistory", this).at(bindAddress).start(webApp); 
+    WebApps.$for("yarn", this).at(bindAddress).start(webApp); 
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java
index 7e9e67c3c3d..c9f90b9e79e 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java
@@ -22,6 +22,7 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -83,6 +84,25 @@ public class JobHistory extends AbstractService implements HistoryContext   {
 
   private static final Log SUMMARY_LOG = LogFactory.getLog(JobSummary.class);
 
+  /*
+   * TODO Get rid of this once JobId has it's own comparator
+   */
+  private static final Comparator<JobId> JOB_ID_COMPARATOR = 
+    new Comparator<JobId>() {
+    @Override
+    public int compare(JobId o1, JobId o2) {
+      if (o1.getAppId().getClusterTimestamp() > 
+          o2.getAppId().getClusterTimestamp()) {
+        return 1;
+      } else if (o1.getAppId().getClusterTimestamp() < 
+          o2.getAppId().getClusterTimestamp()) {
+        return -1;
+      } else {
+        return o1.getId() - o2.getId();
+      }
+    }
+  };
+  
   private static String DONE_BEFORE_SERIAL_TAIL = 
     JobHistoryUtils.doneSubdirsBeforeSerialTail();
   
@@ -98,19 +118,19 @@ public class JobHistory extends AbstractService implements HistoryContext   {
   //Maintains minimal details for recent jobs (parsed from history file name).
   //Sorted on Job Completion Time.
   private final SortedMap<JobId, MetaInfo> jobListCache = 
-    new ConcurrentSkipListMap<JobId, MetaInfo>();
+    new ConcurrentSkipListMap<JobId, MetaInfo>(JOB_ID_COMPARATOR);
   
   
   // Re-use exisiting MetaInfo objects if they exist for the specific JobId. (synchronization on MetaInfo)
   // Check for existance of the object when using iterators.
   private final SortedMap<JobId, MetaInfo> intermediateListCache = 
-    new ConcurrentSkipListMap<JobId, JobHistory.MetaInfo>();
+    new ConcurrentSkipListMap<JobId, JobHistory.MetaInfo>(JOB_ID_COMPARATOR);
   
   //Maintains a list of known done subdirectories. Not currently used.
   private final Set<Path> existingDoneSubdirs = new HashSet<Path>();
   
   private final SortedMap<JobId, Job> loadedJobCache = 
-    new ConcurrentSkipListMap<JobId, Job>();
+    new ConcurrentSkipListMap<JobId, Job>(JOB_ID_COMPARATOR);
 
   /**
    * Maintains a mapping between intermediate user directories and the last 
@@ -653,7 +673,7 @@ public class JobHistory extends AbstractService implements HistoryContext   {
   private Map<JobId, Job> getAllJobsInternal() {
     //TODO This should ideally be using getAllJobsMetaInfo
     // or get rid of that method once Job has APIs for user, finishTime etc.
-    SortedMap<JobId, Job> result = new TreeMap<JobId, Job>();
+    SortedMap<JobId, Job> result = new TreeMap<JobId, Job>(JOB_ID_COMPARATOR);
     try {
       scanIntermediateDirectory();
     } catch (IOException e) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
index ef388fcd86a..4b605cb2ae5 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
@@ -64,12 +64,6 @@
       <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-common</artifactId>
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java
index 20c6ce7c000..80c8d91a1b3 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java
@@ -1,20 +1,20 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 
 package org.apache.hadoop.mapred;
 
@@ -42,29 +42,29 @@ public class ClientCache {
 
   private final Configuration conf;
   private final ResourceMgrDelegate rm;
-
+  
   private static final Log LOG = LogFactory.getLog(ClientCache.class);
 
   private Map<JobID, ClientServiceDelegate> cache = 
-      new HashMap<JobID, ClientServiceDelegate>();
-
+    new HashMap<JobID, ClientServiceDelegate>();
+  
   private MRClientProtocol hsProxy;
 
-  public ClientCache(Configuration conf, ResourceMgrDelegate rm) {
+  ClientCache(Configuration conf, ResourceMgrDelegate rm) {
     this.conf = conf;
     this.rm = rm;
   }
 
   //TODO: evict from the cache on some threshold
-  public synchronized ClientServiceDelegate getClient(JobID jobId) {
-    if (hsProxy == null) {
+  synchronized ClientServiceDelegate getClient(JobID jobId) {
+	if (hsProxy == null) {
       try {
-        hsProxy = instantiateHistoryProxy();
-      } catch (IOException e) {
-        LOG.warn("Could not connect to History server.", e);
-        throw new YarnException("Could not connect to History server.", e);
-      }
-    }
+		hsProxy = instantiateHistoryProxy();
+	  } catch (IOException e) {
+		LOG.warn("Could not connect to History server.", e);
+		throw new YarnException("Could not connect to History server.", e);
+	  }
+	}
     ClientServiceDelegate client = cache.get(jobId);
     if (client == null) {
       client = new ClientServiceDelegate(conf, rm, jobId, hsProxy);
@@ -74,7 +74,7 @@ public class ClientCache {
   }
 
   private MRClientProtocol instantiateHistoryProxy()
-      throws IOException {
+  throws IOException {
     final String serviceAddr = conf.get(JHAdminConfig.MR_HISTORY_ADDRESS);
     if (StringUtils.isEmpty(serviceAddr)) {
       return null;
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
index 341e17e9513..605c44e5ed9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
@@ -70,7 +70,7 @@ import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.security.ApplicationTokenIdentifier;
 import org.apache.hadoop.yarn.security.SchedulerSecurityInfo;
 
-public class ClientServiceDelegate {
+class ClientServiceDelegate {
   private static final Log LOG = LogFactory.getLog(ClientServiceDelegate.class);
 
   // Caches for per-user NotRunningJobs
@@ -87,7 +87,7 @@ public class ClientServiceDelegate {
   private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
   private static String UNKNOWN_USER = "Unknown User";
 
-  public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, 
+  ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, 
       JobID jobId, MRClientProtocol historyServerProxy) {
     this.conf = new Configuration(conf); // Cloning for modifying.
     // For faster redirects from AM to HS.
@@ -101,20 +101,16 @@ public class ClientServiceDelegate {
 
   // Get the instance of the NotRunningJob corresponding to the specified
   // user and state
-  private NotRunningJob getNotRunningJob(ApplicationReport applicationReport, 
-      JobState state) {
+  private NotRunningJob getNotRunningJob(String user, JobState state) {
     synchronized (notRunningJobs) {
       HashMap<String, NotRunningJob> map = notRunningJobs.get(state);
       if (map == null) {
         map = new HashMap<String, NotRunningJob>();
         notRunningJobs.put(state, map);
       }
-      String user = 
-          (applicationReport == null) ? 
-              UNKNOWN_USER : applicationReport.getUser();
       NotRunningJob notRunningJob = map.get(user);
       if (notRunningJob == null) {
-        notRunningJob = new NotRunningJob(applicationReport, state);
+        notRunningJob = new NotRunningJob(user, state);
         map.put(user, notRunningJob);
       }
       return notRunningJob;
@@ -134,7 +130,7 @@ public class ClientServiceDelegate {
       if (application == null) {
         LOG.info("Could not get Job info from RM for job " + jobId
             + ". Redirecting to job history server.");
-        return checkAndGetHSProxy(null, JobState.NEW);
+        return checkAndGetHSProxy(UNKNOWN_USER, JobState.NEW);
       }
       try {
         if (application.getHost() == null || "".equals(application.getHost())) {
@@ -175,7 +171,7 @@ public class ClientServiceDelegate {
         if (application == null) {
           LOG.info("Could not get Job info from RM for job " + jobId
               + ". Redirecting to job history server.");
-          return checkAndGetHSProxy(null, JobState.RUNNING);
+          return checkAndGetHSProxy(UNKNOWN_USER, JobState.RUNNING);
         }
       } catch (InterruptedException e) {
         LOG.warn("getProxy() call interruped", e);
@@ -195,17 +191,17 @@ public class ClientServiceDelegate {
     if (application.getState() == ApplicationState.NEW ||
         application.getState() == ApplicationState.SUBMITTED) {
       realProxy = null;
-      return getNotRunningJob(application, JobState.NEW);
+      return getNotRunningJob(user, JobState.NEW);
     }
     
     if (application.getState() == ApplicationState.FAILED) {
       realProxy = null;
-      return getNotRunningJob(application, JobState.FAILED);
+      return getNotRunningJob(user, JobState.FAILED);
     }
     
     if (application.getState() == ApplicationState.KILLED) {
       realProxy = null;
-      return getNotRunningJob(application, JobState.KILLED);
+      return getNotRunningJob(user, JobState.KILLED);
     }
     
     //History server can serve a job only if application 
@@ -213,16 +209,15 @@ public class ClientServiceDelegate {
     if (application.getState() == ApplicationState.SUCCEEDED) {
       LOG.info("Application state is completed. " +
           "Redirecting to job history server");
-      realProxy = checkAndGetHSProxy(application, JobState.SUCCEEDED);
+      realProxy = checkAndGetHSProxy(user, JobState.SUCCEEDED);
     }
     return realProxy;
   }
 
-  private MRClientProtocol checkAndGetHSProxy(
-      ApplicationReport applicationReport, JobState state) {
+  private MRClientProtocol checkAndGetHSProxy(String user, JobState state) {
     if (null == historyServerProxy) {
       LOG.warn("Job History Server is not configured.");
-      return getNotRunningJob(applicationReport, state);
+      return getNotRunningJob(user, state);
     }
     return historyServerProxy;
   }
@@ -279,7 +274,7 @@ public class ClientServiceDelegate {
     }
   }
 
-  public org.apache.hadoop.mapreduce.Counters getJobCounters(JobID arg0) throws IOException,
+  org.apache.hadoop.mapreduce.Counters getJobCounters(JobID arg0) throws IOException,
   InterruptedException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobID = TypeConverter.toYarn(arg0);
       GetCountersRequest request = recordFactory.newRecordInstance(GetCountersRequest.class);
@@ -290,7 +285,7 @@ public class ClientServiceDelegate {
       
   }
 
-  public TaskCompletionEvent[] getTaskCompletionEvents(JobID arg0, int arg1, int arg2)
+  TaskCompletionEvent[] getTaskCompletionEvents(JobID arg0, int arg1, int arg2)
       throws IOException, InterruptedException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobID = TypeConverter
         .toYarn(arg0);
@@ -308,7 +303,7 @@ public class ClientServiceDelegate {
             .toArray(new org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent[0]));
   }
 
-  public String[] getTaskDiagnostics(org.apache.hadoop.mapreduce.TaskAttemptID arg0)
+  String[] getTaskDiagnostics(org.apache.hadoop.mapreduce.TaskAttemptID arg0)
       throws IOException, InterruptedException {
 
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter
@@ -326,25 +321,24 @@ public class ClientServiceDelegate {
     return result;
   }
   
-  public JobStatus getJobStatus(JobID oldJobID) throws YarnRemoteException {
+  JobStatus getJobStatus(JobID oldJobID) throws YarnRemoteException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = 
       TypeConverter.toYarn(oldJobID);
-    GetJobReportRequest request = 
-        recordFactory.newRecordInstance(GetJobReportRequest.class);
+    GetJobReportRequest request = recordFactory.newRecordInstance(GetJobReportRequest.class);
     request.setJobId(jobId);
     JobReport report = ((GetJobReportResponse) invoke("getJobReport", 
         GetJobReportRequest.class, request)).getJobReport();
     String jobFile = MRApps.getJobFile(conf, report.getUser(), oldJobID); 
 
-    return TypeConverter.fromYarn(report, jobFile);
+    //TODO: add tracking url in JobReport
+    return TypeConverter.fromYarn(report, jobFile, "");
   }
 
-  public org.apache.hadoop.mapreduce.TaskReport[] getTaskReports(JobID oldJobID, TaskType taskType)
+  org.apache.hadoop.mapreduce.TaskReport[] getTaskReports(JobID oldJobID, TaskType taskType)
        throws YarnRemoteException, YarnRemoteException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = 
       TypeConverter.toYarn(oldJobID);
-    GetTaskReportsRequest request = 
-        recordFactory.newRecordInstance(GetTaskReportsRequest.class);
+    GetTaskReportsRequest request = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
     request.setJobId(jobId);
     request.setTaskType(TypeConverter.toYarn(taskType));
     
@@ -356,7 +350,7 @@ public class ClientServiceDelegate {
     (taskReports).toArray(new org.apache.hadoop.mapreduce.TaskReport[0]);
   }
 
-  public boolean killTask(TaskAttemptID taskAttemptID, boolean fail)
+  boolean killTask(TaskAttemptID taskAttemptID, boolean fail)
        throws YarnRemoteException {
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID 
       = TypeConverter.toYarn(taskAttemptID);
@@ -372,7 +366,7 @@ public class ClientServiceDelegate {
     return true;
   }
   
-  public boolean killJob(JobID oldJobID)
+  boolean killJob(JobID oldJobID)
        throws YarnRemoteException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobId 
     = TypeConverter.toYarn(oldJobID);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java
index 17ad9f62aae..a40fcedda39 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java
@@ -22,8 +22,6 @@ import java.util.ArrayList;
 import java.util.HashMap;
 
 import org.apache.commons.lang.NotImplementedException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.FailTaskAttemptRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.FailTaskAttemptResponse;
@@ -55,41 +53,20 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskReport;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
-import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 
 public class NotRunningJob implements MRClientProtocol {
 
-  private static final Log LOG = LogFactory.getLog(NotRunningJob.class);
-  
   private RecordFactory recordFactory = 
     RecordFactoryProvider.getRecordFactory(null);
   
   private final JobState jobState;
-  private final ApplicationReport applicationReport;
-  
-  
-  private ApplicationReport getUnknownApplicationReport() {
-    ApplicationReport unknown = 
-        recordFactory.newRecordInstance(ApplicationReport.class);
-    unknown.setUser("N/A");
-    unknown.setHost("N/A");
-    unknown.setName("N/A");
-    unknown.setQueue("N/A");
-    unknown.setStartTime(0);
-    unknown.setFinishTime(0);
-    unknown.setTrackingUrl("N/A");
-    unknown.setDiagnostics("N/A");
-    LOG.info("getUnknownApplicationReport");
-    return unknown;
-  }
-  
-  NotRunningJob(ApplicationReport applicationReport, JobState jobState) {
-    this.applicationReport = 
-        (applicationReport ==  null) ? 
-            getUnknownApplicationReport() : applicationReport;
+  private final String user;
+
+  NotRunningJob(String username, JobState jobState) {
+    this.user = username;
     this.jobState = jobState;
   }
 
@@ -124,19 +101,15 @@ public class NotRunningJob implements MRClientProtocol {
   @Override
   public GetJobReportResponse getJobReport(GetJobReportRequest request)
       throws YarnRemoteException {
+    GetJobReportResponse resp = 
+      recordFactory.newRecordInstance(GetJobReportResponse.class);
     JobReport jobReport =
       recordFactory.newRecordInstance(JobReport.class);
     jobReport.setJobId(request.getJobId());
-    jobReport.setJobState(jobState);
-    jobReport.setUser(applicationReport.getUser());
-    jobReport.setStartTime(applicationReport.getStartTime());
-    jobReport.setDiagnostics(applicationReport.getDiagnostics());
-    jobReport.setJobName(applicationReport.getName());
-    jobReport.setTrackingUrl(applicationReport.getTrackingUrl());
-    jobReport.setFinishTime(applicationReport.getFinishTime());
+    jobReport.setJobState(this.jobState);
 
-    GetJobReportResponse resp = 
-        recordFactory.newRecordInstance(GetJobReportResponse.class);
+    jobReport.setUser(this.user);
+    // TODO: Add jobName & other job information that is available
     resp.setJobReport(jobReport);
     return resp;
   }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java
index 8b7c818b1e3..8e8081abe4d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java
@@ -32,19 +32,19 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.ClusterMetrics;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.JobStatus;
-import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.QueueAclsInfo;
 import org.apache.hadoop.mapreduce.QueueInfo;
 import org.apache.hadoop.mapreduce.TaskTrackerInfo;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.SecurityInfo;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -53,7 +53,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
@@ -79,10 +79,6 @@ public class ResourceMgrDelegate {
   private ApplicationId applicationId;
   private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
 
-  /**
-   * Delegate responsible for communicating with the Resource Manager's {@link ClientRMProtocol}.
-   * @param conf the configuration object.
-   */
   public ResourceMgrDelegate(YarnConfiguration conf) {
     this.conf = conf;
     YarnRPC rpc = YarnRPC.create(this.conf);
@@ -101,16 +97,6 @@ public class ResourceMgrDelegate {
     LOG.info("Connected to ResourceManager at " + rmAddress);
   }
   
-  /**
-   * Used for injecting applicationsManager, mostly for testing.
-   * @param conf the configuration object
-   * @param applicationsManager the handle to talk the resource managers {@link ClientRMProtocol}.
-   */
-  public ResourceMgrDelegate(YarnConfiguration conf, ClientRMProtocol applicationsManager) {
-    this.conf = conf;
-    this.applicationsManager = applicationsManager;
-  }
-  
   public void cancelDelegationToken(Token<DelegationTokenIdentifier> arg0)
       throws IOException, InterruptedException {
     return;
@@ -169,8 +155,8 @@ public class ResourceMgrDelegate {
   }
 
   public JobID getNewJobID() throws IOException, InterruptedException {
-    GetNewApplicationRequest request = recordFactory.newRecordInstance(GetNewApplicationRequest.class);
-    applicationId = applicationsManager.getNewApplication(request).getApplicationId();
+    GetNewApplicationIdRequest request = recordFactory.newRecordInstance(GetNewApplicationIdRequest.class);
+    applicationId = applicationsManager.getNewApplicationId(request).getApplicationId();
     return TypeConverter.fromYarn(applicationId);
   }
 
@@ -268,7 +254,7 @@ public class ResourceMgrDelegate {
 
 
   public String getSystemDir() throws IOException, InterruptedException {
-    Path sysDir = new Path(MRJobConfig.JOB_SUBMIT_DIR);
+    Path sysDir = new Path(MRConstants.JOB_SUBMIT_DIR);
     //FileContext.getFileContext(conf).delete(sysDir, true);
     return sysDir.toString();
   }
@@ -308,9 +294,9 @@ public class ResourceMgrDelegate {
   }
   
   public void killApplication(ApplicationId applicationId) throws IOException {
-    KillApplicationRequest request = recordFactory.newRecordInstance(KillApplicationRequest.class);
+    FinishApplicationRequest request = recordFactory.newRecordInstance(FinishApplicationRequest.class);
     request.setApplicationId(applicationId);
-    applicationsManager.forceKillApplication(request);
+    applicationsManager.finishApplication(request);
     LOG.info("Killing application " + applicationId);
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
index a11968a16f9..82134c7520f 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
 import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.security.Credentials;
@@ -59,7 +60,6 @@ import org.apache.hadoop.security.authorize.AccessControlList;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationState;
@@ -105,22 +105,10 @@ public class YARNRunner implements ClientProtocol {
    * @param resMgrDelegate the resourcemanager client handle.
    */
   public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate) {
-   this(conf, resMgrDelegate, new ClientCache(conf, resMgrDelegate));
-  }
-  
-  /**
-   * Similar to {@link YARNRunner#YARNRunner(Configuration, ResourceMgrDelegate)} 
-   * but allowing injecting {@link ClientCache}. Enable mocking and testing.
-   * @param conf the configuration object
-   * @param resMgrDelegate the resource manager delegate 
-   * @param clientCache the client cache object.
-   */
-  public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate,
-      ClientCache clientCache) {
     this.conf = conf;
     try {
       this.resMgrDelegate = resMgrDelegate;
-      this.clientCache = clientCache;
+      this.clientCache = new ClientCache(this.conf, resMgrDelegate);
       this.defaultFileContext = FileContext.getFileContext(this.conf);
     } catch (UnsupportedFileSystemException ufe) {
       throw new RuntimeException("Error in instantiating YarnClient", ufe);
@@ -222,7 +210,7 @@ public class YARNRunner implements ClientProtocol {
 
     // Upload only in security mode: TODO
     Path applicationTokensFile =
-        new Path(jobSubmitDir, MRJobConfig.APPLICATION_TOKENS_FILE);
+        new Path(jobSubmitDir, MRConstants.APPLICATION_TOKENS_FILE);
     try {
       ts.writeTokenStorageFile(applicationTokensFile, conf);
     } catch (IOException e) {
@@ -238,9 +226,7 @@ public class YARNRunner implements ClientProtocol {
     
     ApplicationReport appMaster = resMgrDelegate
         .getApplicationReport(applicationId);
-    String diagnostics = 
-        (appMaster == null ? 
-            "application report is null" : appMaster.getDiagnostics());
+    String diagnostics = (appMaster == null ? "application report is null" : appMaster.getDiagnostics());
     if (appMaster == null || appMaster.getState() == ApplicationState.FAILED 
         || appMaster.getState() == ApplicationState.KILLED) {
       throw new IOException("Failed to run job : " + 
@@ -277,7 +263,7 @@ public class YARNRunner implements ClientProtocol {
     Map<String, LocalResource> localResources =
         new HashMap<String, LocalResource>();
     
-    Path jobConfPath = new Path(jobSubmitDir, MRJobConfig.JOB_CONF_FILE);
+    Path jobConfPath = new Path(jobSubmitDir, MRConstants.JOB_CONF_FILE);
     
     URL yarnUrlForJobSubmitDir = ConverterUtils
         .getYarnUrlFromPath(defaultFileContext.getDefaultFileSystem()
@@ -286,13 +272,13 @@ public class YARNRunner implements ClientProtocol {
     LOG.debug("Creating setup context, jobSubmitDir url is "
         + yarnUrlForJobSubmitDir);
 
-    localResources.put(MRJobConfig.JOB_CONF_FILE,
+    localResources.put(MRConstants.JOB_CONF_FILE,
         createApplicationResource(defaultFileContext,
             jobConfPath));
     if (jobConf.get(MRJobConfig.JAR) != null) {
-      localResources.put(MRJobConfig.JOB_JAR,
+      localResources.put(MRConstants.JOB_JAR,
           createApplicationResource(defaultFileContext,
-              new Path(jobSubmitDir, MRJobConfig.JOB_JAR)));
+              new Path(jobSubmitDir, MRConstants.JOB_JAR)));
     } else {
       // Job jar may be null. For e.g, for pipes, the job jar is the hadoop
       // mapreduce jar itself which is already on the classpath.
@@ -301,12 +287,10 @@ public class YARNRunner implements ClientProtocol {
     }
     
     // TODO gross hack
-    for (String s : new String[] { 
-        MRJobConfig.JOB_SPLIT, 
-        MRJobConfig.JOB_SPLIT_METAINFO,
-        MRJobConfig.APPLICATION_TOKENS_FILE }) {
+    for (String s : new String[] { "job.split", "job.splitmetainfo",
+        MRConstants.APPLICATION_TOKENS_FILE }) {
       localResources.put(
-          MRJobConfig.JOB_SUBMIT_DIR + "/" + s,
+          MRConstants.JOB_SUBMIT_DIR + "/" + s,
           createApplicationResource(defaultFileContext, 
               new Path(jobSubmitDir, s)));
     }
@@ -320,24 +304,22 @@ public class YARNRunner implements ClientProtocol {
     }
 
     // Setup the command to run the AM
+    String javaHome = "$JAVA_HOME";
     Vector<CharSequence> vargs = new Vector<CharSequence>(8);
-    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
-    
-    long logSize = TaskLog.getTaskLogLength(new JobConf(conf));
-    vargs.add("-Dlog4j.configuration=container-log4j.properties");
-    vargs.add("-D" + MRJobConfig.TASK_LOG_DIR + "="
-        + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
-    vargs.add("-D" + MRJobConfig.TASK_LOG_SIZE + "=" + logSize);
+    vargs.add(javaHome + "/bin/java");
+    vargs.add("-Dhadoop.root.logger="
+        + conf.get(MRJobConfig.MR_AM_LOG_OPTS,
+            MRJobConfig.DEFAULT_MR_AM_LOG_OPTS) + ",console");
     
     vargs.add(conf.get(MRJobConfig.MR_AM_COMMAND_OPTS,
         MRJobConfig.DEFAULT_MR_AM_COMMAND_OPTS));
 
-    vargs.add(MRJobConfig.APPLICATION_MASTER_CLASS);
-    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + 
-        Path.SEPARATOR + ApplicationConstants.STDOUT);
-    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + 
-        Path.SEPARATOR + ApplicationConstants.STDERR);
-
+    vargs.add("org.apache.hadoop.mapreduce.v2.app.MRAppMaster");
+    vargs.add(String.valueOf(applicationId.getClusterTimestamp()));
+    vargs.add(String.valueOf(applicationId.getId()));
+    vargs.add(ApplicationConstants.AM_FAIL_COUNT_STRING);
+    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout");
+    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr");
 
     Vector<String> vargsFinal = new Vector<String>(8);
     // Final commmand
@@ -350,13 +332,15 @@ public class YARNRunner implements ClientProtocol {
     LOG.info("Command to launch container for ApplicationMaster is : "
         + mergedCommand);
     
-    // Setup the CLASSPATH in environment 
-    // i.e. add { job jar, CWD, Hadoop jars} to classpath.
+    // Setup the environment - Add { job jar, MR app jar } to classpath.
     Map<String, String> environment = new HashMap<String, String>();
-    MRApps.setClasspath(environment);
-    
+    MRApps.setInitialClasspath(environment);
+    MRApps.addToClassPath(environment, MRConstants.JOB_JAR);
+    MRApps.addToClassPath(environment,
+        MRConstants.YARN_MAPREDUCE_APP_JAR_PATH);
+
     // Parse distributed cache
-    MRApps.setupDistributedCache(jobConf, localResources);
+    MRApps.setupDistributedCache(jobConf, localResources, environment);
 
     // Setup ContainerLaunchContext for AM container
     ContainerLaunchContext amContainer =
@@ -441,35 +425,9 @@ public class YARNRunner implements ClientProtocol {
 
   @Override
   public void killJob(JobID arg0) throws IOException, InterruptedException {
-    /* check if the status is not running, if not send kill to RM */
-    JobStatus status = clientCache.getClient(arg0).getJobStatus(arg0);
-    if (status.getState() != JobStatus.State.RUNNING) {
-      resMgrDelegate.killApplication(TypeConverter.toYarn(arg0).getAppId());
-      return;
-    } 
-    
-    try {
-      /* send a kill to the AM */
-      clientCache.getClient(arg0).killJob(arg0);
-      long currentTimeMillis = System.currentTimeMillis();
-      long timeKillIssued = currentTimeMillis;
-      while ((currentTimeMillis < timeKillIssued + 10000L) && (status.getState()
-          != JobStatus.State.KILLED)) {
-          try {
-            Thread.sleep(1000L);
-          } catch(InterruptedException ie) {
-            /** interrupted, just break */
-            break;
-          }
-          currentTimeMillis = System.currentTimeMillis();
-          status = clientCache.getClient(arg0).getJobStatus(arg0);
-      }
-    } catch(IOException io) {
-      LOG.debug("Error when checking for application status", io);
-    }
-    if (status.getState() != JobStatus.State.KILLED) {
-      resMgrDelegate.killApplication(TypeConverter.toYarn(arg0).getAppId());
-    }
+    if (!clientCache.getClient(arg0).killJob(arg0)) {
+    resMgrDelegate.killApplication(TypeConverter.toYarn(arg0).getAppId());
+  }
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java
index d90e7216941..e2cb1e05ea7 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java
@@ -68,8 +68,8 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -78,8 +78,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
@@ -245,7 +245,7 @@ public class TestClientRedirect {
     }
 
     @Override
-    public GetNewApplicationResponse getNewApplication(GetNewApplicationRequest request) throws YarnRemoteException {
+    public GetNewApplicationIdResponse getNewApplicationId(GetNewApplicationIdRequest request) throws YarnRemoteException {
       return null;
     }
     
@@ -267,13 +267,6 @@ public class TestClientRedirect {
       application.setHost(split[0]);
       application.setRpcPort(Integer.parseInt(split[1]));
       application.setUser("TestClientRedirect-user");
-      application.setName("N/A");
-      application.setQueue("N/A");
-      application.setStartTime(0);
-      application.setFinishTime(0);
-      application.setTrackingUrl("N/A");
-      application.setDiagnostics("N/A");
-
       GetApplicationReportResponse response = recordFactory
           .newRecordInstance(GetApplicationReportResponse.class);
       response.setApplicationReport(application);
@@ -288,9 +281,9 @@ public class TestClientRedirect {
     }
 
     @Override
-    public KillApplicationResponse forceKillApplication(
-        KillApplicationRequest request) throws YarnRemoteException {
-      return recordFactory.newRecordInstance(KillApplicationResponse.class);
+    public FinishApplicationResponse finishApplication(
+        FinishApplicationRequest request) throws YarnRemoteException {
+      return null;
     }
 
     @Override
@@ -451,7 +444,7 @@ public class TestClientRedirect {
     @Override
     public KillJobResponse killJob(KillJobRequest request)
         throws YarnRemoteException {
-      return recordFactory.newRecordInstance(KillJobResponse.class);
+      return null;
     }
 
     @Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java
index 5b07d4997d7..b7fd6c9475a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java
@@ -109,7 +109,7 @@ public class TestClientServiceDelegate {
     ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
         null, getRMDelegate());
     JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
-    Assert.assertEquals("N/A", jobStatus.getUsername());
+    Assert.assertEquals("Unknown User", jobStatus.getUsername());
     Assert.assertEquals(JobStatus.State.PREP, jobStatus.getState());
 
     //RM has app report and job History Server is not configured
@@ -145,13 +145,6 @@ public class TestClientServiceDelegate {
         .newRecord(ApplicationReport.class);
     applicationReport.setState(ApplicationState.SUCCEEDED);
     applicationReport.setUser("root");
-    applicationReport.setHost("N/A");
-    applicationReport.setName("N/A");
-    applicationReport.setQueue("N/A");
-    applicationReport.setStartTime(0);
-    applicationReport.setFinishTime(0);
-    applicationReport.setTrackingUrl("N/A");
-    applicationReport.setDiagnostics("N/A");
     return applicationReport;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java
deleted file mode 100644
index 2bc9030bf85..00000000000
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java
+++ /dev/null
@@ -1,59 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.mapreduce;
-
-import java.io.IOException;
-
-import junit.framework.TestCase;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.YARNRunner;
-import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
-import org.junit.Test;
-
-public class TestYarnClientProtocolProvider extends TestCase {
-
-  @Test
-  public void testClusterWithYarnClientProtocolProvider() throws Exception {
-
-    Configuration conf = new Configuration(false);
-    Cluster cluster = null;
-
-    try {
-      cluster = new Cluster(conf);
-      fail("Cluster should not be initialized with out any framework name");
-    } catch (IOException e) {
-
-    }
-
-    try {
-      conf = new Configuration();
-      conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
-      cluster = new Cluster(conf);
-      ClientProtocol client = cluster.getClient();
-      assertTrue(client instanceof YARNRunner);
-    } catch (IOException e) {
-
-    } finally {
-      if (cluster != null) {
-        cluster.close();
-      }
-    }
-  }
-}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
index 49a63db44ba..fcb2a79fafb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
@@ -43,15 +43,9 @@ import org.apache.hadoop.yarn.service.Service;
  */
 public class MiniMRYarnCluster extends MiniYARNCluster {
 
-  public static final String HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME =
-  "hadoop-mapreduce-client-app-0.24.0-SNAPSHOT.jar";
-  
-  public static final String YARN_MAPREDUCE_APP_JAR_PATH =
-  "$YARN_HOME/modules/" + HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME;
-
   public static final String APPJAR =
     "../hadoop-mapreduce-client-app/target/"
-        + HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME;
+        + MRConstants.HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME;
 
   private static final Log LOG = LogFactory.getLog(MiniMRYarnCluster.class);
   private JobHistoryServer historyServer;
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
index aa832aa1cc2..0a1943c013b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
@@ -402,7 +402,7 @@ public class TestMRJobs {
       // both should be reachable via the class loader.
       Assert.assertNotNull(cl.getResource("distributed.jar.inside2"));
       Assert.assertNotNull(cl.getResource("distributed.jar.inside3"));
-      Assert.assertNotNull(cl.getResource("distributed.jar.inside4"));
+      Assert.assertNull(cl.getResource("distributed.jar.inside4"));
 
       // Check that the symlink for the renaming was created in the cwd;
       File symlinkFile = new File("distributed.first.symlink");
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java
index 346ccd2f0da..bc0dfe5fa4a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java
@@ -22,7 +22,6 @@ import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.spy;
-import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import java.io.File;
@@ -37,37 +36,15 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.ClientCache;
-import org.apache.hadoop.mapred.ClientServiceDelegate;
 import org.apache.hadoop.mapred.ResourceMgrDelegate;
 import org.apache.hadoop.mapred.YARNRunner;
 import org.apache.hadoop.mapreduce.JobID;
-import org.apache.hadoop.mapreduce.JobPriority;
-import org.apache.hadoop.mapreduce.JobStatus.State;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.security.Credentials;
-import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationState;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.QueueInfo;
-import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -77,8 +54,9 @@ import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
 /**
- * Test YarnRunner and make sure the client side plugin works 
- * fine
+ * Test if the jobclient shows enough diagnostics 
+ * on a job failure.
+ *
  */
 public class TestYARNRunner extends TestCase {
   private static final Log LOG = LogFactory.getLog(TestYARNRunner.class);
@@ -87,22 +65,18 @@ public class TestYARNRunner extends TestCase {
   private YARNRunner yarnRunner;
   private ResourceMgrDelegate resourceMgrDelegate;
   private YarnConfiguration conf;
-  private ClientCache clientCache;
   private ApplicationId appId;
   private JobID jobId;
   private File testWorkDir = 
       new File("target", TestYARNRunner.class.getName());
   private ApplicationSubmissionContext submissionContext;
-  private  ClientServiceDelegate clientDelegate;
   private static final String failString = "Rejected job";
  
   @Before
   public void setUp() throws Exception {
     resourceMgrDelegate = mock(ResourceMgrDelegate.class);
     conf = new YarnConfiguration();
-    clientCache = new ClientCache(conf, resourceMgrDelegate);
-    clientCache = spy(clientCache);
-    yarnRunner = new YARNRunner(conf, resourceMgrDelegate, clientCache);
+    yarnRunner = new YARNRunner(conf, resourceMgrDelegate);
     yarnRunner = spy(yarnRunner);
     submissionContext = mock(ApplicationSubmissionContext.class);
     doAnswer(
@@ -127,31 +101,6 @@ public class TestYARNRunner extends TestCase {
    }
   
   
-  @Test
-  public void testJobKill() throws Exception {
-    clientDelegate = mock(ClientServiceDelegate.class);
-    when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new 
-        org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, 
-            State.PREP, JobPriority.HIGH, "tmp", "tmp", "tmp", "tmp"));
-    when(clientDelegate.killJob(any(JobID.class))).thenReturn(true);
-    doAnswer(
-        new Answer<ClientServiceDelegate>() {
-          @Override
-          public ClientServiceDelegate answer(InvocationOnMock invocation)
-              throws Throwable {
-            return clientDelegate;
-          }
-        }
-        ).when(clientCache).getClient(any(JobID.class));
-    yarnRunner.killJob(jobId);
-    verify(resourceMgrDelegate).killApplication(appId);
-    when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new 
-        org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, 
-            State.RUNNING, JobPriority.HIGH, "tmp", "tmp", "tmp", "tmp"));
-    yarnRunner.killJob(jobId);
-    verify(clientDelegate).killJob(jobId);
-  }
-  
   @Test
   public void testJobSubmissionFailure() throws Exception {
     when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))).
@@ -173,66 +122,4 @@ public class TestYARNRunner extends TestCase {
       assertTrue(io.getLocalizedMessage().contains(failString));
     }
   }
-  
-  @Test
-  public void testResourceMgrDelegate() throws Exception {
-    /* we not want a mock of resourcemgr deleagte */
-    ClientRMProtocol clientRMProtocol = mock(ClientRMProtocol.class);
-    ResourceMgrDelegate delegate = new ResourceMgrDelegate(conf, clientRMProtocol);
-    /* make sure kill calls finish application master */
-    when(clientRMProtocol.forceKillApplication(any(KillApplicationRequest.class)))
-    .thenReturn(null);
-    delegate.killApplication(appId);
-    verify(clientRMProtocol).forceKillApplication(any(KillApplicationRequest.class));
-    
-    /* make sure getalljobs calls get all applications */
-    when(clientRMProtocol.getAllApplications(any(GetAllApplicationsRequest.class))).
-    thenReturn(recordFactory.newRecordInstance(GetAllApplicationsResponse.class));
-    delegate.getAllJobs();
-    verify(clientRMProtocol).getAllApplications(any(GetAllApplicationsRequest.class));
-    
-    /* make sure getapplication report is called */
-    when(clientRMProtocol.getApplicationReport(any(GetApplicationReportRequest.class)))
-    .thenReturn(recordFactory.newRecordInstance(GetApplicationReportResponse.class));
-    delegate.getApplicationReport(appId);
-    verify(clientRMProtocol).getApplicationReport(any(GetApplicationReportRequest.class));
-    
-    /* make sure metrics is called */
-    GetClusterMetricsResponse clusterMetricsResponse = recordFactory.newRecordInstance
-        (GetClusterMetricsResponse.class);
-    clusterMetricsResponse.setClusterMetrics(recordFactory.newRecordInstance(
-        YarnClusterMetrics.class));
-    when(clientRMProtocol.getClusterMetrics(any(GetClusterMetricsRequest.class)))
-    .thenReturn(clusterMetricsResponse);
-    delegate.getClusterMetrics();
-    verify(clientRMProtocol).getClusterMetrics(any(GetClusterMetricsRequest.class));
-    
-    when(clientRMProtocol.getClusterNodes(any(GetClusterNodesRequest.class))).
-    thenReturn(recordFactory.newRecordInstance(GetClusterNodesResponse.class));
-    delegate.getActiveTrackers();
-    verify(clientRMProtocol).getClusterNodes(any(GetClusterNodesRequest.class));
-    
-    GetNewApplicationResponse newAppResponse = recordFactory.newRecordInstance(
-        GetNewApplicationResponse.class);
-    newAppResponse.setApplicationId(appId);
-    when(clientRMProtocol.getNewApplication(any(GetNewApplicationRequest.class))).
-    thenReturn(newAppResponse);
-    delegate.getNewJobID();
-    verify(clientRMProtocol).getNewApplication(any(GetNewApplicationRequest.class));
-    
-    GetQueueInfoResponse queueInfoResponse = recordFactory.newRecordInstance(
-        GetQueueInfoResponse.class);
-    queueInfoResponse.setQueueInfo(recordFactory.newRecordInstance(QueueInfo.class));
-    when(clientRMProtocol.getQueueInfo(any(GetQueueInfoRequest.class))).
-    thenReturn(queueInfoResponse);
-    delegate.getQueues();
-    verify(clientRMProtocol).getQueueInfo(any(GetQueueInfoRequest.class));
-    
-    GetQueueUserAclsInfoResponse aclResponse = recordFactory.newRecordInstance(
-        GetQueueUserAclsInfoResponse.class);
-    when(clientRMProtocol.getQueueUserAcls(any(GetQueueUserAclsInfoRequest.class)))
-    .thenReturn(aclResponse);
-    delegate.getQueueAclsForCurrentUser();
-    verify(clientRMProtocol).getQueueUserAcls(any(GetQueueUserAclsInfoRequest.class));
-  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml
index 2a5cef3cbc9..ab1ffcca988 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml
@@ -88,12 +88,6 @@
         <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
         <version>${yarn.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-        <version>${yarn.version}</version>
-        <type>test-jar</type> 
-      </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-mapreduce-client-core</artifactId>
diff --git a/hadoop-mapreduce-project/hadoop-yarn/README b/hadoop-mapreduce-project/hadoop-yarn/README
index 713871ab768..8c4f43454ea 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/README
+++ b/hadoop-mapreduce-project/hadoop-yarn/README
@@ -30,6 +30,7 @@ clean and test: mvn clean install
 run selected test after compile: mvn test -Dtest=TestClassName (combined: mvn clean install -Dtest=TestClassName)
 create runnable binaries after install: mvn assembly:assembly (combined: mvn clean install assembly:assembly)
 
+
 Eclipse Projects
 ----------------
 http://maven.apache.org/guides/mini/guide-ide-eclipse.html
@@ -70,16 +71,3 @@ hadoop-yarn-server - Implementation of the hadoop-yarn-api
 	hadoop-yarn-server-common - APIs shared between resourcemanager and nodemanager
 	hadoop-yarn-server-nodemanager (TaskTracker replacement)
 	hadoop-yarn-server-resourcemanager (JobTracker replacement)
-
-Utilities for understanding the code
-------------------------------------
-Almost all of the yarn components as well as the mapreduce framework use
-state-machines for all the data objects. To understand those central pieces of
-the code, a visual representation of the state-machines helps much. You can first
-convert the state-machines into graphviz(.gv) format by
-running:
-   mvn compile -Pvisualize
-Then you can use the dot program for generating directed graphs and convert the above
-.gv files to images. The graphviz package has the needed dot program and related
-utilites.For e.g., to generate png files you can run:
-   dot -Tpng NodeManager.gv > NodeManager.png
diff --git a/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
index 7e34ff5487d..219fd1eb579 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
@@ -49,10 +49,6 @@
     <Class name="org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl" />
     <Bug pattern="BC_UNCONFIRMED_CAST" />
   </Match>
-  <Match>
-    <Class name="~org\.apache\.hadoop\.yarn\.server\.resourcemanager\.rmapp\.RMAppImpl.*" />
-    <Bug pattern="BC_UNCONFIRMED_CAST" />
-  </Match>
   <Match>
     <Class name="~org\.apache\.hadoop\.yarn\.server\.resourcemanager\.rmapp\.attempt\.RMAppAttemptImpl.*" />
     <Bug pattern="BC_UNCONFIRMED_CAST" />
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
index 99f145fbdc3..212ca671c89 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
@@ -37,11 +37,8 @@ public interface ApplicationConstants {
   public static final String APPLICATION_CLIENT_SECRET_ENV_NAME =
     "AppClientTokenEnv";
 
-  /**
-   * The environmental variable for APPLICATION_ATTEMPT_ID. Set in
-   * ApplicationMaster's environment only.
-   */
-  public static final String APPLICATION_ATTEMPT_ID_ENV = "APPLICATION_ATTEMPT_ID";
+  // TODO: Weird. This is part of AM command line. Instead it should be a env.
+  public static final String AM_FAIL_COUNT_STRING = "<FAILCOUNT>";
 
   public static final String CONTAINER_TOKEN_FILE_ENV_NAME =
       UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
@@ -49,117 +46,4 @@ public interface ApplicationConstants {
   public static final String LOCAL_DIR_ENV = "YARN_LOCAL_DIRS";
 
   public static final String LOG_DIR_EXPANSION_VAR = "<LOG_DIR>";
-
-  public static final String STDERR = "stderr";
-
-  public static final String STDOUT = "stdout";
-  
-  /**
-   * Classpath for typical applications.
-   */
-  public static final String[] APPLICATION_CLASSPATH =
-      new String[] {
-        "$HADOOP_CONF_DIR",
-        "$HADOOP_COMMON_HOME/share/hadoop/common/*",
-        "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*",
-        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*",
-        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*",
-        "$YARN_HOME/modules/*",
-        "$YARN_HOME/lib/*"
-      };
-  
-  /**
-   * Environment for Applications.
-   * 
-   * Some of the environment variables for applications are <em>final</em> 
-   * i.e. they cannot be modified by the applications.
-   */
-  public enum Environment {
-    /**
-     * $USER
-     * Final, non-modifiable.
-     */
-    USER("USER"),
-    
-    /**
-     * $LOGNAME
-     * Final, non-modifiable.
-     */
-    LOGNAME("LOGNAME"),
-    
-    /**
-     * $HOME
-     * Final, non-modifiable.
-     */
-    HOME("HOME"),
-    
-    /**
-     * $PWD
-     * Final, non-modifiable.
-     */
-    PWD("PWD"),
-    
-    /**
-     * $PATH
-     */
-    PATH("PATH"),
-    
-    /**
-     * $SHELL
-     */
-    SHELL("SHELL"),
-    
-    /**
-     * $JAVA_HOME
-     */
-    JAVA_HOME("JAVA_HOME"),
-    
-    /**
-     * $CLASSPATH
-     */
-    CLASSPATH("CLASSPATH"),
-    
-    /**
-     * $LD_LIBRARY_PATH
-     */
-    LD_LIBRARY_PATH("LD_LIBRARY_PATH"),
-    
-    /**
-     * $HADOOP_CONF_DIR
-     * Final, non-modifiable.
-     */
-    HADOOP_CONF_DIR("HADOOP_CONF_DIR"),
-    
-    /**
-     * $HADOOP_COMMON_HOME
-     */
-    HADOOP_COMMON_HOME("HADOOP_COMMON_HOME"),
-    
-    /**
-     * $HADOOP_HDFS_HOME
-     */
-    HADOOP_HDFS_HOME("HADOOP_HDFS_HOME"),
-    
-    /**
-     * $YARN_HOME
-     */
-    YARN_HOME("YARN_HOME");
-
-    private final String variable;
-    private Environment(String variable) {
-      this.variable = variable;
-    }
-    
-    public String key() {
-      return variable;
-    }
-    
-    public String toString() {
-      return variable;
-    }
-    
-    public String $() {
-      return "$" + variable;
-    }
-  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ClientRMProtocol.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ClientRMProtocol.java
index fb934591354..db4c4790cf0 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ClientRMProtocol.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ClientRMProtocol.java
@@ -21,8 +21,8 @@ package org.apache.hadoop.yarn.api;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -31,8 +31,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
@@ -62,18 +62,14 @@ public interface ClientRMProtocol {
    * <p>The <code>ResourceManager</code> responds with a new, monotonically
    * increasing, {@link ApplicationId} which is used by the client to submit
    * a new application.</p>
-   *
-   * <p>The <code>ResourceManager</code> also responds with details such 
-   * as minimum and maximum resource capabilities in the cluster as specified in
-   * {@link GetNewApplicationResponse}.</p>
-   *
+   * 
    * @param request request to get a new <code>ApplicationId</code>
    * @return new <code>ApplicationId</code> to be used to submit an application
    * @throws YarnRemoteException
    * @see #submitApplication(SubmitApplicationRequest)
    */
-  public GetNewApplicationResponse getNewApplication(
-      GetNewApplicationRequest request)
+  public GetNewApplicationIdResponse getNewApplicationId(
+      GetNewApplicationIdRequest request) 
   throws YarnRemoteException;
   
   /**
@@ -96,7 +92,7 @@ public interface ClientRMProtocol {
    * @param request request to submit a new application
    * @return (empty) response on accepting the submission
    * @throws YarnRemoteException
-   * @see #getNewApplication(GetNewApplicationRequest)
+   * @see #getNewApplicationId(GetNewApplicationIdRequest)
    */
   public SubmitApplicationResponse submitApplication(
       SubmitApplicationRequest request) 
@@ -106,7 +102,7 @@ public interface ClientRMProtocol {
    * <p>The interface used by clients to request the 
    * <code>ResourceManager</code> to abort submitted application.</p>
    * 
-   * <p>The client, via {@link KillApplicationRequest} provides the
+   * <p>The client, via {@link FinishApplicationRequest} provides the
    * {@link ApplicationId} of the application to be aborted.</p>
    * 
    * <p> In secure mode,the <code>ResourceManager</code> verifies access to the
@@ -121,8 +117,8 @@ public interface ClientRMProtocol {
    * @throws YarnRemoteException
    * @see #getQueueUserAcls(GetQueueUserAclsInfoRequest) 
    */
-  public KillApplicationResponse forceKillApplication(
-      KillApplicationRequest request) 
+  public FinishApplicationResponse finishApplication(
+      FinishApplicationRequest request) 
   throws YarnRemoteException;
 
   /**
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationRequest.java
similarity index 94%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationRequest.java
index c033e64bb20..023ee3c4ac7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationRequest.java
@@ -32,11 +32,11 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
  * <p>The request includes the {@link ApplicationId} of the application to be
  * aborted.</p>
  * 
- * @see ClientRMProtocol#forceKillApplication(KillApplicationRequest)
+ * @see ClientRMProtocol#finishApplication(FinishApplicationRequest)
  */
 @Public
 @Stable
-public interface KillApplicationRequest {
+public interface FinishApplicationRequest {
   /**
    * Get the <code>ApplicationId</code> of the application to be aborted.
    * @return <code>ApplicationId</code> of the application to be aborted
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationResponse.java
similarity index 91%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationResponse.java
index 2a8d0f06d29..cd0c728e536 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationResponse.java
@@ -28,10 +28,10 @@ import org.apache.hadoop.yarn.api.ClientRMProtocol;
  *
  * <p>Currently it's empty.</p>
  * 
- * @see ClientRMProtocol#forceKillApplication(KillApplicationRequest)
+ * @see ClientRMProtocol#finishApplication(FinishApplicationRequest)
  */
 @Public
 @Stable
-public interface KillApplicationResponse {
+public interface FinishApplicationResponse {
 
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationRequest.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdRequest.java
similarity index 91%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationRequest.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdRequest.java
index a70989f1aab..c841070080d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationRequest.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdRequest.java
@@ -27,10 +27,10 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
  * <p>The request sent by clients to get a new {@link ApplicationId} for
  * submitting an application.</p>
  * 
- * @see ClientRMProtocol#getNewApplication(GetNewApplicationRequest)
+ * @see ClientRMProtocol#getNewApplicationId(GetNewApplicationIdRequest)
  */
 @Public
 @Stable
-public interface GetNewApplicationRequest {
+public interface GetNewApplicationIdRequest {
 
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationResponse.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdResponse.java
similarity index 66%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationResponse.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdResponse.java
index 4e7a7e565be..93a1ab680b3 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationResponse.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdResponse.java
@@ -24,17 +24,16 @@ import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.Resource;
 
 /**
  * <p>The response sent by the <code>ResourceManager</code> to the client for 
  * a request to a new {@link ApplicationId} for submitting applications.</p>
  * 
- * @see ClientRMProtocol#getNewApplication(GetNewApplicationRequest)
+ * @see ClientRMProtocol#getNewApplicationId(GetNewApplicationIdRequest)
  */
 @Public
 @Stable
-public interface GetNewApplicationResponse {
+public interface GetNewApplicationIdResponse {
   /**
    * Get the <em>new</em> <code>ApplicationId</code> allocated by the 
    * <code>ResourceManager</code>.
@@ -48,30 +47,4 @@ public interface GetNewApplicationResponse {
   @Private
   @Unstable
   public abstract void setApplicationId(ApplicationId applicationId);
-  
-  /**
-   * Get the minimum capability for any {@link Resource} allocated by the 
-   * <code>ResourceManager</code> in the cluster.
-   * @return minimum capability of allocated resources in the cluster
-   */
-  @Public
-  @Stable
-  public Resource getMinimumResourceCapability();
-  
-  @Private
-  @Unstable
-  public void setMinimumResourceCapability(Resource capability);
-  
-  /**
-   * Get the maximum capability for any {@link Resource} allocated by the 
-   * <code>ResourceManager</code> in the cluster.
-   * @return maximum capability of allocated resources in the cluster
-   */
-  @Public
-  @Stable
-  public Resource getMaximumResourceCapability();
-  
-  @Private
-  @Unstable
-  public void setMaximumResourceCapability(Resource capability); 
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationRequestPBImpl.java
similarity index 74%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationRequestPBImpl.java
index e2761a090be..044382bddeb 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationRequestPBImpl.java
@@ -19,34 +19,34 @@
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProtoOrBuilder;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationRequestProtoOrBuilder;
 
 
     
-public class KillApplicationRequestPBImpl extends ProtoBase<KillApplicationRequestProto> implements KillApplicationRequest {
-  KillApplicationRequestProto proto = KillApplicationRequestProto.getDefaultInstance();
-  KillApplicationRequestProto.Builder builder = null;
+public class FinishApplicationRequestPBImpl extends ProtoBase<FinishApplicationRequestProto> implements FinishApplicationRequest {
+  FinishApplicationRequestProto proto = FinishApplicationRequestProto.getDefaultInstance();
+  FinishApplicationRequestProto.Builder builder = null;
   boolean viaProto = false;
   
   private ApplicationId applicationId = null;
   
   
-  public KillApplicationRequestPBImpl() {
-    builder = KillApplicationRequestProto.newBuilder();
+  public FinishApplicationRequestPBImpl() {
+    builder = FinishApplicationRequestProto.newBuilder();
   }
 
-  public KillApplicationRequestPBImpl(KillApplicationRequestProto proto) {
+  public FinishApplicationRequestPBImpl(FinishApplicationRequestProto proto) {
     this.proto = proto;
     viaProto = true;
   }
   
-  public KillApplicationRequestProto getProto() {
+  public FinishApplicationRequestProto getProto() {
       mergeLocalToProto();
     proto = viaProto ? proto : builder.build();
     viaProto = true;
@@ -69,7 +69,7 @@ public class KillApplicationRequestPBImpl extends ProtoBase<KillApplicationReque
 
   private void maybeInitBuilder() {
     if (viaProto || builder == null) {
-      builder = KillApplicationRequestProto.newBuilder(proto);
+      builder = FinishApplicationRequestProto.newBuilder(proto);
     }
     viaProto = false;
   }
@@ -77,7 +77,7 @@ public class KillApplicationRequestPBImpl extends ProtoBase<KillApplicationReque
   
   @Override
   public ApplicationId getApplicationId() {
-    KillApplicationRequestProtoOrBuilder p = viaProto ? proto : builder;
+    FinishApplicationRequestProtoOrBuilder p = viaProto ? proto : builder;
     if (this.applicationId != null) {
       return this.applicationId;
     }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationResponsePBImpl.java
similarity index 62%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationResponsePBImpl.java
index 61c42fd20d7..b8ad6dd7ea4 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationResponsePBImpl.java
@@ -19,27 +19,27 @@
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationResponseProto;
 
 
     
-public class KillApplicationResponsePBImpl extends ProtoBase<KillApplicationResponseProto> implements KillApplicationResponse {
-  KillApplicationResponseProto proto = KillApplicationResponseProto.getDefaultInstance();
-  KillApplicationResponseProto.Builder builder = null;
+public class FinishApplicationResponsePBImpl extends ProtoBase<FinishApplicationResponseProto> implements FinishApplicationResponse {
+  FinishApplicationResponseProto proto = FinishApplicationResponseProto.getDefaultInstance();
+  FinishApplicationResponseProto.Builder builder = null;
   boolean viaProto = false;
   
-  public KillApplicationResponsePBImpl() {
-    builder = KillApplicationResponseProto.newBuilder();
+  public FinishApplicationResponsePBImpl() {
+    builder = FinishApplicationResponseProto.newBuilder();
   }
 
-  public KillApplicationResponsePBImpl(KillApplicationResponseProto proto) {
+  public FinishApplicationResponsePBImpl(FinishApplicationResponseProto proto) {
     this.proto = proto;
     viaProto = true;
   }
   
-  public KillApplicationResponseProto getProto() {
+  public FinishApplicationResponseProto getProto() {
     proto = viaProto ? proto : builder.build();
     viaProto = true;
     return proto;
@@ -47,7 +47,7 @@ public class KillApplicationResponsePBImpl extends ProtoBase<KillApplicationResp
 
   private void maybeInitBuilder() {
     if (viaProto || builder == null) {
-      builder = KillApplicationResponseProto.newBuilder(proto);
+      builder = FinishApplicationResponseProto.newBuilder(proto);
     }
     viaProto = false;
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationRequestPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdRequestPBImpl.java
similarity index 68%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationRequestPBImpl.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdRequestPBImpl.java
index 90eae078397..0d318674d56 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationRequestPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdRequestPBImpl.java
@@ -19,26 +19,27 @@
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdRequestProto;
+
 
     
-public class GetNewApplicationRequestPBImpl extends ProtoBase<GetNewApplicationRequestProto> implements GetNewApplicationRequest {
-  GetNewApplicationRequestProto proto = GetNewApplicationRequestProto.getDefaultInstance();
-  GetNewApplicationRequestProto.Builder builder = null;
+public class GetNewApplicationIdRequestPBImpl extends ProtoBase<GetNewApplicationIdRequestProto> implements GetNewApplicationIdRequest {
+  GetNewApplicationIdRequestProto proto = GetNewApplicationIdRequestProto.getDefaultInstance();
+  GetNewApplicationIdRequestProto.Builder builder = null;
   boolean viaProto = false;
   
-  public GetNewApplicationRequestPBImpl() {
-    builder = GetNewApplicationRequestProto.newBuilder();
+  public GetNewApplicationIdRequestPBImpl() {
+    builder = GetNewApplicationIdRequestProto.newBuilder();
   }
 
-  public GetNewApplicationRequestPBImpl(GetNewApplicationRequestProto proto) {
+  public GetNewApplicationIdRequestPBImpl(GetNewApplicationIdRequestProto proto) {
     this.proto = proto;
     viaProto = true;
   }
   
-  public GetNewApplicationRequestProto getProto() {
+  public GetNewApplicationIdRequestProto getProto() {
     proto = viaProto ? proto : builder.build();
     viaProto = true;
     return proto;
@@ -46,7 +47,7 @@ public class GetNewApplicationRequestPBImpl extends ProtoBase<GetNewApplicationR
 
   private void maybeInitBuilder() {
     if (viaProto || builder == null) {
-      builder = GetNewApplicationRequestProto.newBuilder(proto);
+      builder = GetNewApplicationIdRequestProto.newBuilder(proto);
     }
     viaProto = false;
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdResponsePBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdResponsePBImpl.java
new file mode 100644
index 00000000000..45fefd390ee
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdResponsePBImpl.java
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
+
+
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ProtoBase;
+import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
+import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdResponseProtoOrBuilder;
+
+
+    
+public class GetNewApplicationIdResponsePBImpl extends ProtoBase<GetNewApplicationIdResponseProto> implements GetNewApplicationIdResponse {
+  GetNewApplicationIdResponseProto proto = GetNewApplicationIdResponseProto.getDefaultInstance();
+  GetNewApplicationIdResponseProto.Builder builder = null;
+  boolean viaProto = false;
+  
+  private ApplicationId applicationId = null;
+  
+  
+  public GetNewApplicationIdResponsePBImpl() {
+    builder = GetNewApplicationIdResponseProto.newBuilder();
+  }
+
+  public GetNewApplicationIdResponsePBImpl(GetNewApplicationIdResponseProto proto) {
+    this.proto = proto;
+    viaProto = true;
+  }
+  
+  public GetNewApplicationIdResponseProto getProto() {
+      mergeLocalToProto();
+    proto = viaProto ? proto : builder.build();
+    viaProto = true;
+    return proto;
+  }
+
+  private void mergeLocalToBuilder() {
+    if (applicationId != null) {
+      builder.setApplicationId(convertToProtoFormat(this.applicationId));
+    }
+  }
+
+  private void mergeLocalToProto() {
+    if (viaProto) 
+      maybeInitBuilder();
+    mergeLocalToBuilder();
+    proto = builder.build();
+    viaProto = true;
+  }
+
+  private void maybeInitBuilder() {
+    if (viaProto || builder == null) {
+      builder = GetNewApplicationIdResponseProto.newBuilder(proto);
+    }
+    viaProto = false;
+  }
+    
+  
+  @Override
+  public ApplicationId getApplicationId() {
+    GetNewApplicationIdResponseProtoOrBuilder p = viaProto ? proto : builder;
+    if (this.applicationId != null) {
+      return this.applicationId;
+    }
+    if (!p.hasApplicationId()) {
+      return null;
+    }
+    this.applicationId = convertFromProtoFormat(p.getApplicationId());
+    return this.applicationId;
+  }
+
+  @Override
+  public void setApplicationId(ApplicationId applicationId) {
+    maybeInitBuilder();
+    if (applicationId == null) 
+      builder.clearApplicationId();
+    this.applicationId = applicationId;
+  }
+
+  private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) {
+    return new ApplicationIdPBImpl(p);
+  }
+
+  private ApplicationIdProto convertToProtoFormat(ApplicationId t) {
+    return ((ApplicationIdPBImpl)t).getProto();
+  }
+
+
+
+}  
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationResponsePBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationResponsePBImpl.java
deleted file mode 100644
index d15f1b75274..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationResponsePBImpl.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
-
-
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
-import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
-import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationResponseProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationResponseProtoOrBuilder;
-    
-public class GetNewApplicationResponsePBImpl extends ProtoBase<GetNewApplicationResponseProto> implements GetNewApplicationResponse {
-  GetNewApplicationResponseProto proto = GetNewApplicationResponseProto.getDefaultInstance();
-  GetNewApplicationResponseProto.Builder builder = null;
-  boolean viaProto = false;
-  
-  private ApplicationId applicationId = null;
-  private Resource minimumResourceCapability = null;
-  private Resource maximumResourceCapability = null;
-  
-  public GetNewApplicationResponsePBImpl() {
-    builder = GetNewApplicationResponseProto.newBuilder();
-  }
-
-  public GetNewApplicationResponsePBImpl(GetNewApplicationResponseProto proto) {
-    this.proto = proto;
-    viaProto = true;
-  }
-  
-  public GetNewApplicationResponseProto getProto() {
-      mergeLocalToProto();
-    proto = viaProto ? proto : builder.build();
-    viaProto = true;
-    return proto;
-  }
-
-  private void mergeLocalToBuilder() {
-    if (applicationId != null) {
-      builder.setApplicationId(convertToProtoFormat(this.applicationId));
-    }
-    if (minimumResourceCapability != null) {
-    	builder.setMinimumCapability(convertToProtoFormat(this.minimumResourceCapability));
-    }
-    if (maximumResourceCapability != null) {
-    	builder.setMaximumCapability(convertToProtoFormat(this.maximumResourceCapability));
-    }
-  }
-
-  private void mergeLocalToProto() {
-    if (viaProto) 
-      maybeInitBuilder();
-    mergeLocalToBuilder();
-    proto = builder.build();
-    viaProto = true;
-  }
-
-  private void maybeInitBuilder() {
-    if (viaProto || builder == null) {
-      builder = GetNewApplicationResponseProto.newBuilder(proto);
-    }
-    viaProto = false;
-  }
-    
-  
-  @Override
-  public ApplicationId getApplicationId() {
-    if (this.applicationId != null) {
-      return this.applicationId;
-    }
-    
-    GetNewApplicationResponseProtoOrBuilder p = viaProto ? proto : builder;
-    if (!p.hasApplicationId()) {
-      return null;
-    }
-    
-    this.applicationId = convertFromProtoFormat(p.getApplicationId());
-    return this.applicationId;
-  }
-
-  @Override
-  public void setApplicationId(ApplicationId applicationId) {
-    maybeInitBuilder();
-    if (applicationId == null) 
-      builder.clearApplicationId();
-    this.applicationId = applicationId;
-  }
-
-  @Override
-  public Resource getMaximumResourceCapability() {
-    if (this.maximumResourceCapability != null) {
-      return this.maximumResourceCapability;
-    }
- 
-    GetNewApplicationResponseProtoOrBuilder p = viaProto ? proto : builder;
-    if (!p.hasMaximumCapability()) {
-      return null;
-    }
-    
-    this.maximumResourceCapability = convertFromProtoFormat(p.getMaximumCapability());
-    return this.maximumResourceCapability;
-  }
-
-  @Override
-  public Resource getMinimumResourceCapability() {
-    if (this.minimumResourceCapability != null) {
-      return this.minimumResourceCapability;
-    }
-    
-    GetNewApplicationResponseProtoOrBuilder p = viaProto ? proto : builder;
-    if (!p.hasMinimumCapability()) {
-      return null;
-    }
-    
-    this.minimumResourceCapability = convertFromProtoFormat(p.getMinimumCapability());
-    return this.minimumResourceCapability;
-  }
-
-  @Override
-  public void setMaximumResourceCapability(Resource capability) {
-    maybeInitBuilder();
-    if(maximumResourceCapability == null) {
-      builder.clearMaximumCapability();
-    }
-    this.maximumResourceCapability = capability;
-  }
-
-  @Override
-  public void setMinimumResourceCapability(Resource capability) {
-    maybeInitBuilder();
-    if(minimumResourceCapability == null) {
-      builder.clearMinimumCapability();
-    }
-    this.minimumResourceCapability = capability;
-  }
-    
-  private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) {
-    return new ApplicationIdPBImpl(p);
-  }
-
-  private ApplicationIdProto convertToProtoFormat(ApplicationId t) {
-    return ((ApplicationIdPBImpl)t).getProto();
-  }
-  
-  private Resource convertFromProtoFormat(ResourceProto resource) {
-	  return new ResourcePBImpl(resource);
-  }
-
-  private ResourceProto convertToProtoFormat(Resource resource) {
-	  return ((ResourcePBImpl)resource).getProto();
-  }
-
-}  
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java
index ffb920d5b90..ca7a6f415a1 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java
@@ -186,16 +186,4 @@ public interface ApplicationReport {
   @Private
   @Unstable
   void setStartTime(long startTime);
-
-  /**
-   * Get the <em>finish time</em> of the application.
-   * @return <em>finish time</em> of the application
-   */
-  @Public
-  @Stable
-  long getFinishTime();
-  
-  @Private
-  @Unstable
-  void setFinishTime(long finishTime);
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java
index ff054b22ac5..97c84e4d10a 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java
@@ -43,7 +43,6 @@ import org.apache.hadoop.yarn.api.ContainerManager;
  *     </li>
  *     <li>HTTP uri of the node.</li>
  *     <li>{@link Resource} allocated to the container.</li>
- *     <li>{@link Priority} at which the container was allocated.</li>
  *     <li>{@link ContainerState} of the container.</li>
  *     <li>
  *       {@link ContainerToken} of the container, used to securely verify 
@@ -112,18 +111,6 @@ public interface Container extends Comparable<Container> {
   @Private
   @Unstable
   void setResource(Resource resource);
-
-  /**
-   * Get the <code>Priority</code> at which the <code>Container</code> was
-   * allocated.
-   * @return <code>Priority</code> at which the <code>Container</code> was
-   *         allocated
-   */
-  Priority getPriority();
-  
-  @Private
-  @Unstable
-  void setPriority(Priority priority);
   
   /**
    * Get the current <code>ContainerState</code> of the container.
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java
index 2ea2ddbcdb2..b1e80fc7598 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java
@@ -240,30 +240,6 @@ implements ApplicationReport {
     return proto;
   }
 
-  @Override
-  public long getStartTime() {
-    ApplicationReportProtoOrBuilder p = viaProto ? proto : builder;
-    return p.getStartTime();
-  }
-
-  @Override
-  public void setStartTime(long startTime) {
-    maybeInitBuilder();
-    builder.setStartTime(startTime);
-  }
-
-  @Override
-  public long getFinishTime() {
-    ApplicationReportProtoOrBuilder p = viaProto ? proto : builder;
-    return p.getFinishTime();
-  }
-
-  @Override
-  public void setFinishTime(long finishTime) {
-    maybeInitBuilder();
-    builder.setFinishTime(finishTime);
-  }
-
   private void mergeLocalToBuilder() {
     if (this.applicationId != null
         && !((ApplicationIdPBImpl) this.applicationId).getProto().equals(
@@ -303,4 +279,16 @@ implements ApplicationReport {
       ApplicationIdProto applicationId) {
     return new ApplicationIdPBImpl(applicationId);
   }
+
+  @Override
+  public long getStartTime() {
+    ApplicationReportProtoOrBuilder p = viaProto ? proto : builder;
+    return p.getStartTime();
+  }
+
+  @Override
+  public void setStartTime(long startTime) {
+    maybeInitBuilder();
+    builder.setStartTime(startTime);
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java
index 39b15e0cefd..388cad0f4d8 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.ContainerToken;
 import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto;
@@ -35,7 +34,6 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStateProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerTokenProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
-import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
 import org.apache.hadoop.yarn.util.ProtoUtils;
 
@@ -50,7 +48,6 @@ public class ContainerPBImpl extends ProtoBase<ContainerProto> implements Contai
   private ContainerId containerId = null;
   private NodeId nodeId = null;
   private Resource resource = null;
-  private Priority priority = null;
   private ContainerToken containerToken = null;
   private ContainerStatus containerStatus = null;
   
@@ -87,11 +84,6 @@ public class ContainerPBImpl extends ProtoBase<ContainerProto> implements Contai
             builder.getResource())) {
       builder.setResource(convertToProtoFormat(this.resource));
     }
-    if (this.priority != null && 
-        !((PriorityPBImpl) this.priority).getProto().equals(
-            builder.getPriority())) {
-      builder.setPriority(convertToProtoFormat(this.priority));
-    }
     if (this.containerToken != null
         && !((ContainerTokenPBImpl) this.containerToken).getProto().equals(
             builder.getContainerToken())) {
@@ -219,29 +211,6 @@ public class ContainerPBImpl extends ProtoBase<ContainerProto> implements Contai
       builder.clearResource();
     this.resource = resource;
   }
-  
-  @Override
-  public Priority getPriority() {
-    ContainerProtoOrBuilder p = viaProto ? proto : builder;
-    if (this.priority != null) {
-      return this.priority;
-    }
-    if (!p.hasPriority()) {
-      return null;
-    }
-    this.priority = convertFromProtoFormat(p.getPriority());
-    return this.priority;
-  }
-
-  @Override
-  public void setPriority(Priority priority) {
-    maybeInitBuilder();
-    if (priority == null) {
-      builder.clearPriority();
-    }
-    this.priority = priority;
-  }
-
   @Override
   public ContainerToken getContainerToken() {
     ContainerProtoOrBuilder p = viaProto ? proto : builder;
@@ -316,14 +285,6 @@ public class ContainerPBImpl extends ProtoBase<ContainerProto> implements Contai
     return ((ResourcePBImpl)t).getProto();
   }
 
-  private PriorityPBImpl convertFromProtoFormat(PriorityProto p) {
-    return new PriorityPBImpl(p);
-  }
-
-  private PriorityProto convertToProtoFormat(Priority p) {
-    return ((PriorityPBImpl)p).getProto();
-  }
-  
   private ContainerTokenPBImpl convertFromProtoFormat(ContainerTokenProto p) {
     return new ContainerTokenPBImpl(p);
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/client_RM_protocol.proto b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/client_RM_protocol.proto
index fb5f5f6e741..cfb14ff3518 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/client_RM_protocol.proto
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/client_RM_protocol.proto
@@ -24,10 +24,10 @@ option java_generate_equals_and_hash = true;
 import "yarn_service_protos.proto";
 
 service ClientRMProtocolService {
-  rpc getNewApplication (GetNewApplicationRequestProto) returns (GetNewApplicationResponseProto);
+  rpc getNewApplicationId (GetNewApplicationIdRequestProto) returns (GetNewApplicationIdResponseProto);
   rpc getApplicationReport (GetApplicationReportRequestProto) returns (GetApplicationReportResponseProto);
   rpc submitApplication (SubmitApplicationRequestProto) returns (SubmitApplicationResponseProto);
-  rpc forceKillApplication (KillApplicationRequestProto) returns (KillApplicationResponseProto);
+  rpc finishApplication (FinishApplicationRequestProto) returns (FinishApplicationResponseProto);
   rpc getClusterMetrics (GetClusterMetricsRequestProto) returns (GetClusterMetricsResponseProto);
   rpc getAllApplications (GetAllApplicationsRequestProto) returns (GetAllApplicationsResponseProto);
   rpc getClusterNodes (GetClusterNodesRequestProto) returns (GetClusterNodesResponseProto);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
index 704c7109964..cdcd1a747b8 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
@@ -48,10 +48,6 @@ message ResourceProto {
   optional int32 memory = 1;
 }
 
-message PriorityProto {
-  optional int32 priority = 1;
-}
-
 enum ContainerStateProto {
   C_NEW = 1;
   C_RUNNING = 2;
@@ -70,10 +66,9 @@ message ContainerProto {
   optional NodeIdProto nodeId = 2;
   optional string node_http_address = 3;
   optional ResourceProto resource = 4;
-  optional PriorityProto priority = 5;
-  optional ContainerStateProto state = 6;
-  optional ContainerTokenProto container_token = 7;
-  optional ContainerStatusProto container_status = 8;
+  optional ContainerStateProto state = 5;
+  optional ContainerTokenProto container_token = 6;
+  optional ContainerStatusProto container_status = 7;
 }
 
 enum ApplicationStateProto {
@@ -145,7 +140,6 @@ message ApplicationReportProto {
   optional string trackingUrl = 11;
   optional string diagnostics = 12 [default = "N/A"];
   optional int64 startTime = 13;
-  optional int64 finishTime = 14;
 }
 
 message NodeIdProto {
@@ -258,6 +252,10 @@ message ContainerStatusProto {
 ////////////////////////////////////////////////////////////////////////
 ////// From common//////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
+message PriorityProto {
+  optional int32 priority = 1;
+}
+
 message StringURLMapProto {
   optional string key = 1;
   optional URLProto value = 2;
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
index 1a992ad578e..753c6b8c9a8 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
@@ -66,13 +66,11 @@ message AllocateResponseProto {
 /////// client_RM_Protocol ///////////////////////////
 //////////////////////////////////////////////////////
 
-message GetNewApplicationRequestProto {
+message GetNewApplicationIdRequestProto {
 }
 
-message GetNewApplicationResponseProto {
+message GetNewApplicationIdResponseProto {
   optional ApplicationIdProto application_id = 1;
-  optional ResourceProto minimumCapability = 2;
-  optional ResourceProto maximumCapability = 3;
 }
 
 message GetApplicationReportRequestProto {
@@ -90,11 +88,11 @@ message SubmitApplicationRequestProto {
 message SubmitApplicationResponseProto {
 }
 
-message KillApplicationRequestProto {
+message FinishApplicationRequestProto {
   optional ApplicationIdProto application_id = 1;
 }
 
-message KillApplicationResponseProto {
+message FinishApplicationResponseProto {
 }
 
 message GetClusterMetricsRequestProto {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ClientRMProtocolPBClientImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ClientRMProtocolPBClientImpl.java
index b4f2dc46e0f..8972c656d91 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ClientRMProtocolPBClientImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ClientRMProtocolPBClientImpl.java
@@ -25,6 +25,8 @@ import java.net.InetSocketAddress;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -33,16 +35,16 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllApplicationsRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllApplicationsResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationReportRequestPBImpl;
@@ -51,28 +53,27 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterMetricsReque
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterMetricsResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterNodesRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterNodesResponsePBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationRequestPBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationIdRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationIdResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoResponsePBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationRequestPBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.ipc.ProtoOverHadoopRpcEngine;
 import org.apache.hadoop.yarn.proto.ClientRMProtocol.ClientRMProtocolService;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllApplicationsRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationReportRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterMetricsRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterNodesRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoResponseProto;
 
 import com.google.protobuf.ServiceException;
 
@@ -87,11 +88,11 @@ public class ClientRMProtocolPBClientImpl implements ClientRMProtocol {
   }
   
   @Override
-  public KillApplicationResponse forceKillApplication(
-      KillApplicationRequest request) throws YarnRemoteException {
-    KillApplicationRequestProto requestProto = ((KillApplicationRequestPBImpl)request).getProto();
+  public FinishApplicationResponse finishApplication(
+      FinishApplicationRequest request) throws YarnRemoteException {
+    FinishApplicationRequestProto requestProto = ((FinishApplicationRequestPBImpl)request).getProto();
     try {
-      return new KillApplicationResponsePBImpl(proxy.forceKillApplication(null, requestProto));
+      return new FinishApplicationResponsePBImpl(proxy.finishApplication(null, requestProto));
     } catch (ServiceException e) {
       if (e.getCause() instanceof YarnRemoteException) {
         throw (YarnRemoteException)e.getCause();
@@ -138,11 +139,11 @@ public class ClientRMProtocolPBClientImpl implements ClientRMProtocol {
   }
 
   @Override
-  public GetNewApplicationResponse getNewApplication(
-      GetNewApplicationRequest request) throws YarnRemoteException {
-    GetNewApplicationRequestProto requestProto = ((GetNewApplicationRequestPBImpl)request).getProto();
+  public GetNewApplicationIdResponse getNewApplicationId(
+      GetNewApplicationIdRequest request) throws YarnRemoteException {
+    GetNewApplicationIdRequestProto requestProto = ((GetNewApplicationIdRequestPBImpl)request).getProto();
     try {
-      return new GetNewApplicationResponsePBImpl(proxy.getNewApplication(null, requestProto));
+      return new GetNewApplicationIdResponsePBImpl(proxy.getNewApplicationId(null, requestProto));
     } catch (ServiceException e) {
       if (e.getCause() instanceof YarnRemoteException) {
         throw (YarnRemoteException)e.getCause();
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ClientRMProtocolPBServiceImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ClientRMProtocolPBServiceImpl.java
index 342d864ca5d..35e4be53984 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ClientRMProtocolPBServiceImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ClientRMProtocolPBServiceImpl.java
@@ -19,15 +19,17 @@
 package org.apache.hadoop.yarn.api.impl.pb.service;
 
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllApplicationsRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllApplicationsResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationReportRequestPBImpl;
@@ -36,18 +38,18 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterMetricsReque
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterMetricsResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterNodesRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterNodesResponsePBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationRequestPBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationIdRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationIdResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoResponsePBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationRequestPBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.proto.ClientRMProtocol.ClientRMProtocolService.BlockingInterface;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllApplicationsRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllApplicationsResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationReportRequestProto;
@@ -56,14 +58,12 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterMetricsRequestPr
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterMetricsResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterNodesRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterNodesResponseProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoResponseProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationResponseProto;
 
@@ -79,12 +79,12 @@ public class ClientRMProtocolPBServiceImpl implements BlockingInterface {
   }
   
   @Override
-  public KillApplicationResponseProto forceKillApplication(RpcController arg0,
-      KillApplicationRequestProto proto) throws ServiceException {
-    KillApplicationRequestPBImpl request = new KillApplicationRequestPBImpl(proto);
+  public FinishApplicationResponseProto finishApplication(RpcController arg0,
+      FinishApplicationRequestProto proto) throws ServiceException {
+    FinishApplicationRequestPBImpl request = new FinishApplicationRequestPBImpl(proto);
     try {
-      KillApplicationResponse response = real.forceKillApplication(request);
-      return ((KillApplicationResponsePBImpl)response).getProto();
+      FinishApplicationResponse response = real.finishApplication(request);
+      return ((FinishApplicationResponsePBImpl)response).getProto();
     } catch (YarnRemoteException e) {
       throw new ServiceException(e);
     }
@@ -116,13 +116,13 @@ public class ClientRMProtocolPBServiceImpl implements BlockingInterface {
   }
 
   @Override
-  public GetNewApplicationResponseProto getNewApplication(
-      RpcController arg0, GetNewApplicationRequestProto proto)
+  public GetNewApplicationIdResponseProto getNewApplicationId(
+      RpcController arg0, GetNewApplicationIdRequestProto proto)
       throws ServiceException {
-    GetNewApplicationRequestPBImpl request = new GetNewApplicationRequestPBImpl(proto);
+    GetNewApplicationIdRequestPBImpl request = new GetNewApplicationIdRequestPBImpl(proto);
     try {
-      GetNewApplicationResponse response = real.getNewApplication(request);
-      return ((GetNewApplicationResponsePBImpl)response).getProto();
+      GetNewApplicationIdResponse response = real.getNewApplicationId(request);
+      return ((GetNewApplicationIdResponsePBImpl)response).getProto();
     } catch (YarnRemoteException e) {
       throw new ServiceException(e);
     }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index cb955af8c43..ba23134170f 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -357,12 +357,6 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_AUX_SERVICE_FMT =
     NM_PREFIX + "aux-services.%s.class";
 
-  public static final String NM_USER_HOME_DIR =
-      NM_PREFIX + "user-home-dir";
-
-  public static final String DEFAULT_NM_USER_HOME_DIR= "/home/";
-
-
   public static final int INVALID_CONTAINER_EXIT_STATUS = -1000;
   public static final int ABORTED_CONTAINER_EXIT_STATUS = -100;
   
@@ -386,6 +380,6 @@ public class YarnConfiguration extends Configuration {
     // Use apps manager address to figure out the host for webapp
     addr = conf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS);
     String host = ADDR_SPLITTER.split(addr).iterator().next();
-    return JOINER.join("http://", host, ":", port);
+    return JOINER.join("http://", host, ":", port, "/");
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/ProtoOverHadoopRpcEngine.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/ProtoOverHadoopRpcEngine.java
index 9d8b846a3b6..9a623a1a8a7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/ProtoOverHadoopRpcEngine.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/ProtoOverHadoopRpcEngine.java
@@ -320,12 +320,6 @@ public class ProtoOverHadoopRpcEngine implements RpcEngine {
             + methodName);
       MethodDescriptor methodDescriptor = service.getDescriptorForType()
           .findMethodByName(methodName);
-      if (methodDescriptor == null) {
-        String msg = "Unknown method " + methodName + " called on "
-            + protocol + " protocol.";
-        LOG.warn(msg);
-        return handleException(new IOException(msg));
-      }
       Message prototype = service.getRequestPrototype(methodDescriptor);
       Message param = prototype.newBuilderForType()
           .mergeFrom(rpcRequest.getRequestProto()).build();
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java
index 71c829ac582..2a5244d6512 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java
@@ -20,14 +20,10 @@ package org.apache.hadoop.yarn.state;
 
 import java.util.EnumMap;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Set;
 import java.util.Stack;
 
-import org.apache.hadoop.yarn.util.Graph;
-
 /**
  * State machine topology.
  * This object is semantically immutable.  If you have a
@@ -445,39 +441,4 @@ final public class StateMachineFactory
       return currentState;
     }
   }
-
-  /**
-   * Generate a graph represents the state graph of this StateMachine
-   * @param name graph name
-   * @return Graph object generated
-   */
-  public Graph generateStateGraph(String name) {
-    maybeMakeStateMachineTable();
-    Graph g = new Graph(name);
-    for (STATE startState : stateMachineTable.keySet()) {
-      Map<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>> transitions
-          = stateMachineTable.get(startState);
-      for (Entry<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>> entry :
-         transitions.entrySet()) {
-        Transition<OPERAND, STATE, EVENTTYPE, EVENT> transition = entry.getValue();
-        if (transition instanceof StateMachineFactory.SingleInternalArc) {
-          StateMachineFactory.SingleInternalArc sa
-              = (StateMachineFactory.SingleInternalArc) transition;
-          Graph.Node fromNode = g.getNode(startState.toString());
-          Graph.Node toNode = g.getNode(sa.postState.toString());
-          fromNode.addEdge(toNode, entry.getKey().toString());
-        } else if (transition instanceof StateMachineFactory.MultipleInternalArc) {
-          StateMachineFactory.MultipleInternalArc ma
-              = (StateMachineFactory.MultipleInternalArc) transition;
-          Iterator<STATE> iter = ma.validPostStates.iterator();
-          while (iter.hasNext()) {
-            Graph.Node fromNode = g.getNode(startState.toString());
-            Graph.Node toNode = g.getNode(iter.next().toString());
-            fromNode.addEdge(toNode, entry.getKey().toString());
-          }
-        }
-      }
-    }
-    return g;
-  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
index 7ec367292e1..4eb63c04470 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
@@ -20,9 +20,7 @@ package org.apache.hadoop.yarn.util;
 
 import java.net.URI;
 import java.util.Comparator;
-import java.util.List;
 
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
@@ -186,31 +184,32 @@ public class BuilderUtils {
     return id;
   }
 
-  public static NodeId newNodeId(String host, int port) {
-    NodeId nodeId = recordFactory.newRecordInstance(NodeId.class);
-    nodeId.setHost(host);
-    nodeId.setPort(port);
-    return nodeId;
+  public static Container clone(Container c) {
+    Container container = recordFactory.newRecordInstance(Container.class);
+    container.setId(c.getId());
+    container.setContainerToken(c.getContainerToken());
+    container.setNodeId(c.getNodeId());
+    container.setNodeHttpAddress(c.getNodeHttpAddress());
+    container.setResource(c.getResource());
+    container.setState(c.getState());
+    return container;
   }
 
   public static Container newContainer(RecordFactory recordFactory,
       ApplicationAttemptId appAttemptId, int containerId, NodeId nodeId,
-      String nodeHttpAddress, Resource resource, Priority priority) {
+      String nodeHttpAddress, Resource resource) {
     ContainerId containerID =
         newContainerId(recordFactory, appAttemptId, containerId);
-    return newContainer(containerID, nodeId, nodeHttpAddress, 
-        resource, priority);
+    return newContainer(containerID, nodeId, nodeHttpAddress, resource);
   }
 
   public static Container newContainer(ContainerId containerId,
-      NodeId nodeId, String nodeHttpAddress, 
-      Resource resource, Priority priority) {
+      NodeId nodeId, String nodeHttpAddress, Resource resource) {
     Container container = recordFactory.newRecordInstance(Container.class);
     container.setId(containerId);
     container.setNodeId(nodeId);
     container.setNodeHttpAddress(nodeHttpAddress);
     container.setResource(resource);
-    container.setPriority(priority);
     container.setState(ContainerState.NEW);
     ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class);
     containerStatus.setContainerId(containerId);
@@ -243,7 +242,7 @@ public class BuilderUtils {
   public static ApplicationReport newApplicationReport(
       ApplicationId applicationId, String user, String queue, String name,
       String host, int rpcPort, String clientToken, ApplicationState state,
-      String diagnostics, String url, long startTime, long finishTime) {
+      String diagnostics, String url, long startTime) {
     ApplicationReport report = recordFactory
         .newRecordInstance(ApplicationReport.class);
     report.setApplicationId(applicationId);
@@ -257,7 +256,6 @@ public class BuilderUtils {
     report.setDiagnostics(diagnostics);
     report.setTrackingUrl(url);
     report.setStartTime(startTime);
-    report.setFinishTime(finishTime);
     return report;
   }
   
@@ -275,18 +273,5 @@ public class BuilderUtils {
     url.setFile(file);
     return url;
   }
-
-  public static AllocateRequest newAllocateRequest(
-      ApplicationAttemptId applicationAttemptId, int responseID,
-      float appProgress, List<ResourceRequest> resourceAsk,
-      List<ContainerId> containersToBeReleased) {
-    AllocateRequest allocateRequest = recordFactory
-        .newRecordInstance(AllocateRequest.class);
-    allocateRequest.setApplicationAttemptId(applicationAttemptId);
-    allocateRequest.setResponseId(responseID);
-    allocateRequest.setProgress(appProgress);
-    allocateRequest.addAllAsks(resourceAsk);
-    allocateRequest.addAllReleases(containersToBeReleased);
-    return allocateRequest;
-  }
+  
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java
index 6f5e9043192..ab6bd7395dc 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.yarn.util;
 
 import static org.apache.hadoop.yarn.util.StringHelper._split;
 
-import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.text.NumberFormat;
@@ -46,8 +45,6 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 public class ConverterUtils {
 
   public static final String APPLICATION_PREFIX = "application";
-  public static final String CONTAINER_PREFIX = "container";
-  public static final String APPLICATION_ATTEMPT_PREFIX = "appattempt";
 
   /**
    * return a hadoop path from a given url
@@ -135,12 +132,14 @@ public class ConverterUtils {
   }
 
   private static ApplicationAttemptId toApplicationAttemptId(
-      Iterator<String> it) throws NumberFormatException {
-    ApplicationId appId = Records.newRecord(ApplicationId.class);
+      RecordFactory recordFactory,
+      Iterator<String> it) {
+    ApplicationId appId =
+        recordFactory.newRecordInstance(ApplicationId.class);
     appId.setClusterTimestamp(Long.parseLong(it.next()));
     appId.setId(Integer.parseInt(it.next()));
-    ApplicationAttemptId appAttemptId = Records
-        .newRecord(ApplicationAttemptId.class);
+    ApplicationAttemptId appAttemptId =
+        recordFactory.newRecordInstance(ApplicationAttemptId.class);
     appAttemptId.setApplicationId(appId);
     appAttemptId.setAttemptId(Integer.parseInt(it.next()));
     return appAttemptId;
@@ -150,35 +149,16 @@ public class ConverterUtils {
     return cId.toString();
   }
 
-  public static ContainerId toContainerId(String containerIdStr)
-      throws IOException {
+  public static ContainerId toContainerId(RecordFactory recordFactory,
+      String containerIdStr) {
     Iterator<String> it = _split(containerIdStr).iterator();
-    if (!it.next().equals(CONTAINER_PREFIX)) {
-      throw new IOException("Invalid ContainerId prefix: " + containerIdStr);
-    }
-    try {
-      ApplicationAttemptId appAttemptID = toApplicationAttemptId(it);
-      ContainerId containerId = Records.newRecord(ContainerId.class);
-      containerId.setApplicationAttemptId(appAttemptID);
-      containerId.setId(Integer.parseInt(it.next()));
-      return containerId;
-    } catch (NumberFormatException n) {
-      throw new IOException("Invalid ContainerId: " + containerIdStr, n);
-    }
-  }
-
-  public static ApplicationAttemptId toApplicationAttemptId(
-      String applicationAttmeptIdStr) throws IOException {
-    Iterator<String> it = _split(applicationAttmeptIdStr).iterator();
-    if (!it.next().equals(APPLICATION_ATTEMPT_PREFIX)) {
-      throw new IOException("Invalid AppAttemptId prefix: "
-          + applicationAttmeptIdStr);
-    }
-    try {
-      return toApplicationAttemptId(it);
-    } catch (NumberFormatException n) {
-      throw new IOException("Invalid AppAttemptId: "
-          + applicationAttmeptIdStr, n);
-    }
+    it.next(); // prefix. TODO: Validate container prefix
+    ApplicationAttemptId appAttemptID = 
+        toApplicationAttemptId(recordFactory, it);
+    ContainerId containerId =
+        recordFactory.newRecordInstance(ContainerId.class);
+    containerId.setApplicationAttemptId(appAttemptID);
+    containerId.setId(Integer.parseInt(it.next()));
+    return containerId;
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java
deleted file mode 100644
index aa3604fa87a..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.util;
-
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.lang.StringEscapeUtils;
-
-public class Graph {
-  public class Edge {
-    Node from;
-    Node to;
-    String label;
-
-    public Edge(Node from, Node to, String info) {
-      this.from = from;
-      this.to = to;
-      this.label = info;
-    }
-
-    public boolean sameAs(Edge rhs) {
-      if (this.from == rhs.from &&
-          this.to == rhs.to) {
-        return true;
-      }
-      return false;
-    }
-
-    public Edge combine(Edge rhs) {
-      String newlabel = this.label + "," + rhs.label;
-      return new Edge(this.from, this.to, newlabel);
-    }
-  }
-
-  public class Node {
-    Graph parent;
-    String id;
-    List<Edge> ins;
-    List<Edge> outs;
-
-    public Node(String id) {
-      this.id = id;
-      this.parent = Graph.this;
-      this.ins = new ArrayList<Graph.Edge>();
-      this.outs = new ArrayList<Graph.Edge>();
-    }
-
-    public Graph getParent() {
-      return parent;
-    }
-
-    public Node addEdge(Node to, String info) {
-      Edge e = new Edge(this, to, info);
-      outs.add(e);
-      to.ins.add(e);
-      return this;
-    }
-
-    public String getUniqueId() {
-      return Graph.this.name + "." + id;
-    }
-  }
-
-  private String name;
-  private Graph parent;
-  private Set<Graph.Node> nodes = new HashSet<Graph.Node>();
-  private Set<Graph> subgraphs = new HashSet<Graph>();
-
-  public Graph(String name, Graph parent) {
-    this.name = name;
-    this.parent = parent;
-  }
-
-  public Graph(String name) {
-    this(name, null);
-  }
-
-  public Graph() {
-    this("graph", null);
-  }
-
-  public String getName() {
-    return name;
-  }
-
-  public Graph getParent() {
-    return parent;
-  }
-
-  private Node newNode(String id) {
-    Node ret = new Node(id);
-    nodes.add(ret);
-    return ret;
-  }
-
-  public Node getNode(String id) {
-    for (Node node : nodes) {
-      if (node.id.equals(id)) {
-        return node;
-      }
-    }
-    return newNode(id);
-  }
-
-  public Graph newSubGraph(String name) {
-    Graph ret = new Graph(name, this);
-    subgraphs.add(ret);
-    return ret;
-  }
-
-  public void addSubGraph(Graph graph) {
-    subgraphs.add(graph);
-    graph.parent = this;
-  }
-
-  private static String wrapSafeString(String label) {
-    if (label.indexOf(',') >= 0) {
-      if (label.length()>14) {
-        label = label.replaceAll(",", ",\n");
-      }
-    }
-    label = "\"" + StringEscapeUtils.escapeJava(label) + "\"";
-    return label;
-  }
-
-  public String generateGraphViz(String indent) {
-    StringBuilder sb = new StringBuilder();
-    if (this.parent == null) {
-      sb.append("digraph " + name + " {\n");
-      sb.append(String.format("graph [ label=%s, fontsize=24, fontname=Helvetica];\n",
-          wrapSafeString(name)));
-      sb.append("node [fontsize=12, fontname=Helvetica];\n");
-      sb.append("edge [fontsize=9, fontcolor=blue, fontname=Arial];\n");
-    } else {
-      sb.append("subgraph cluster_" + name + " {\nlabel=\"" + name + "\"\n");
-    }
-    for (Graph g : subgraphs) {
-      String ginfo = g.generateGraphViz(indent+"  ");
-      sb.append(ginfo);
-      sb.append("\n");
-    }
-    for (Node n : nodes) {
-      sb.append(String.format(
-          "%s%s [ label = %s ];\n",
-          indent,
-          wrapSafeString(n.getUniqueId()),
-          n.id));
-      List<Edge> combinedOuts = combineEdges(n.outs);
-      for (Edge e : combinedOuts) {
-        sb.append(String.format(
-            "%s%s -> %s [ label = %s ];\n",
-            indent,
-            wrapSafeString(e.from.getUniqueId()),
-            wrapSafeString(e.to.getUniqueId()),
-            wrapSafeString(e.label)));
-      }
-    }
-    sb.append("}\n");
-    return sb.toString();
-  }
-
-  public String generateGraphViz() {
-    return generateGraphViz("");
-  }
-
-  public void save(String filepath) throws IOException {
-    FileWriter fout = new FileWriter(filepath);
-    fout.write(generateGraphViz());
-    fout.close();
-  }
-
-  public static List<Edge> combineEdges(List<Edge> edges) {
-    List<Edge> ret = new ArrayList<Edge>();
-    for (Edge edge : edges) {
-      boolean found = false;
-      for (int i = 0; i < ret.size(); i++) {
-        Edge current = ret.get(i);
-        if (edge.sameAs(current)) {
-          ret.set(i, current.combine(edge));
-          found = true;
-          break;
-        }
-      }
-      if (!found) {
-        ret.add(edge);
-      }
-    }
-    return ret;
-  }
-}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java
deleted file mode 100644
index 0fb9a48b098..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.util;
-
-import java.lang.reflect.Field;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.yarn.state.StateMachineFactory;
-
-public class VisualizeStateMachine {
-
-  /**
-   * @param classes list of classes which have static field
-   *                stateMachineFactory of type StateMachineFactory
-   * @return graph represent this StateMachine
-   */
-  public static Graph getGraphFromClasses(String graphName, List<String> classes)
-      throws Exception {
-    Graph ret = null;
-    if (classes.size() != 1) {
-      ret = new Graph(graphName);
-    }
-    for (String className : classes) {
-      Class clz = Class.forName(className);
-      Field factoryField = clz.getDeclaredField("stateMachineFactory");
-      factoryField.setAccessible(true);
-      StateMachineFactory factory = (StateMachineFactory) factoryField.get(null);
-      if (classes.size() == 1) {
-        return factory.generateStateGraph(graphName);
-      }
-      String gname = clz.getSimpleName();
-      if (gname.endsWith("Impl")) {
-        gname = gname.substring(0, gname.length()-4);
-      }
-      ret.addSubGraph(factory.generateStateGraph(gname));
-    }
-    return ret;
-  }
-
-  public static void main(String [] args) throws Exception {
-    if (args.length < 3) {
-      System.err.printf("Usage: %s <GraphName> <class[,class[,...]]> <OutputFile>\n",
-          VisualizeStateMachine.class.getName());
-      System.exit(1);
-    }
-    String [] classes = args[1].split(",");
-    ArrayList<String> validClasses = new ArrayList<String>();
-    for (String c : classes) {
-      String vc = c.trim();
-      if (vc.length()>0) {
-        validClasses.add(vc);
-      }
-    }
-    Graph g = getGraphFromClasses(args[0], validClasses);
-    g.save(args[2]);
-  }
-}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
index e404fe5a723..ef8ab976ef8 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
@@ -84,15 +84,6 @@ public class Dispatcher extends HttpServlet {
       prepareToExit();
       return;
     }
-    // if they provide a redirectPath go there instead of going to
-    // "/" so that filters can differentiate the webapps.
-    if (uri.equals("/")) {
-      String redirectPath = webApp.getRedirectPath();
-      if (redirectPath != null && !redirectPath.isEmpty()) {
-        res.sendRedirect(redirectPath);
-        return;
-      }
-    }
     String method = req.getMethod();
     if (method.equals("OPTIONS")) {
       doOptions(req, res);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
index f83843e97e6..b9afe81ca85 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
@@ -26,7 +26,6 @@ import com.google.inject.Provides;
 import com.google.inject.servlet.GuiceFilter;
 import com.google.inject.servlet.ServletModule;
 
-import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -45,9 +44,6 @@ public abstract class WebApp extends ServletModule {
   public enum HTTP { GET, POST, HEAD, PUT, DELETE };
 
   private volatile String name;
-  private volatile List<String> servePathSpecs = new ArrayList<String>(); 
-  // path to redirect to if user goes to "/"
-  private volatile String redirectPath;
   private volatile Configuration conf;
   private volatile HttpServer httpServer;
   private volatile GuiceFilter guiceFilter;
@@ -102,22 +98,6 @@ public abstract class WebApp extends ServletModule {
 
   public String name() { return this.name; }
 
-  void addServePathSpec(String path) { this.servePathSpecs.add(path); }
-
-  public String[] getServePathSpecs() { 
-    return this.servePathSpecs.toArray(new String[this.servePathSpecs.size()]);
-  }
-
-  /**
-   * Set a path to redirect the user to if they just go to "/". For 
-   * instance "/" goes to "/yarn/apps". This allows the filters to 
-   * more easily differentiate the different webapps.
-   * @param path  the path to redirect to
-   */
-  void setRedirectPath(String path) { this.redirectPath = path; }
-
-  public String getRedirectPath() { return this.redirectPath; }
-
   void setHostClass(Class<?> cls) {
     router.setHostClass(cls);
   }
@@ -129,10 +109,7 @@ public abstract class WebApp extends ServletModule {
   @Override
   public void configureServlets() {
     setup();
-    serve("/", "/__stop").with(Dispatcher.class);
-    for (String path : this.servePathSpecs) {
-      serve(path).with(Dispatcher.class);
-    }
+    serve("/", "/__stop", StringHelper.join('/', name, '*')).with(Dispatcher.class);
   }
 
   /**
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java
index b5217999687..85b88d16cc4 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java
@@ -113,14 +113,6 @@ public class WebApps {
         };
       }
       webapp.setName(name);
-      String basePath = "/" + name;
-      webapp.setRedirectPath(basePath);
-      if (basePath.equals("/")) { 
-        webapp.addServePathSpec("/*");
-      }  else {
-        webapp.addServePathSpec(basePath);
-        webapp.addServePathSpec(basePath + "/*");
-      }
       if (conf == null) {
         conf = new Configuration();
       }
@@ -150,8 +142,7 @@ public class WebApps {
           }
         }
         HttpServer server =
-            new HttpServer(name, bindAddress, port, findPort, conf, 
-            webapp.getServePathSpecs());
+            new HttpServer(name, bindAddress, port, findPort, conf);
         server.addGlobalFilter("guice", GuiceFilter.class.getName(), null);
         webapp.setConf(conf);
         webapp.setHttpServer(server);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/cluster/.keep b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/cluster/.keep
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/jobhistory/.keep b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/jobhistory/.keep
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/mapreduce/.keep b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/mapreduce/.keep
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/node/.keep b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/node/.keep
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java
index 7d233e2d9fc..65f6c548fbc 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java
@@ -167,16 +167,6 @@ public class MockApps {
         // TODO Auto-generated method stub
 
       }
-      @Override
-      public long getFinishTime() {
-        // TODO Auto-generated method stub
-        return 0;
-      }
-      @Override
-      public void setFinishTime(long finishTime) {
-        // TODO Auto-generated method stub
-        
-      }
     };
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
index a855cc6f218..58efcc42307 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
@@ -25,11 +25,9 @@ import junit.framework.Assert;
 import org.apache.avro.ipc.Server;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.yarn.api.ClientRMProtocol;
 import org.apache.hadoop.yarn.api.ContainerManager;
 import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
@@ -49,7 +47,6 @@ import org.apache.hadoop.yarn.factory.providers.YarnRemoteExceptionFactoryProvid
 import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC;
 import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
-import org.apache.hadoop.yarn.util.Records;
 import org.junit.Test;
 
 public class TestRPC {
@@ -68,35 +65,6 @@ public class TestRPC {
 //    test(HadoopYarnRPC.class.getName());
 //  }
 
-  @Test
-  public void testUnknownCall() {
-    Configuration conf = new Configuration();
-    conf.set(YarnConfiguration.IPC_RPC_IMPL, HadoopYarnProtoRPC.class
-        .getName());
-    YarnRPC rpc = YarnRPC.create(conf);
-    String bindAddr = "localhost:0";
-    InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr);
-    Server server = rpc.getServer(ContainerManager.class,
-        new DummyContainerManager(), addr, conf, null, 1);
-    server.start();
-
-    // Any unrelated protocol would do
-    ClientRMProtocol proxy = (ClientRMProtocol) rpc.getProxy(
-        ClientRMProtocol.class, NetUtils.createSocketAddr("localhost:"
-            + server.getPort()), conf);
-
-    try {
-      proxy.getNewApplication(Records
-          .newRecord(GetNewApplicationRequest.class));
-      Assert.fail("Excepted RPC call to fail with unknown method.");
-    } catch (YarnRemoteException e) {
-      Assert.assertTrue(e.getMessage().matches(
-          "Unknown method getNewApplication called on.*"
-              + "org.apache.hadoop.yarn.proto.ClientRMProtocol"
-              + "\\$ClientRMProtocolService\\$BlockingInterface protocol."));
-    }
-  }
-
   @Test
   public void testHadoopProtoRPC() throws Exception {
     test(HadoopYarnProtoRPC.class.getName());
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
deleted file mode 100644
index 3d2a5769097..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.conf;
-
-import java.net.InetSocketAddress;
-
-import junit.framework.Assert;
-
-import org.apache.avro.ipc.Server;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.junit.Test;
-
-public class TestYarnConfiguration {
-
-  @Test
-  public void testDefaultRMWebUrl() throws Exception {
-    YarnConfiguration conf = new YarnConfiguration();
-    String rmWebUrl = YarnConfiguration.getRMWebAppURL(conf);
-    // shouldn't have a "/" on the end of the url as all the other uri routinnes
-    // specifically add slashes and Jetty doesn't handle double slashes.
-    Assert.assertEquals("RM Web Url is not correct", "http://0.0.0.0:8088", 
-        rmWebUrl);
-  }
-
-  @Test
-  public void testRMWebUrlSpecified() throws Exception {
-    YarnConfiguration conf = new YarnConfiguration();
-    // seems a bit odd but right now we are forcing webapp for RM to be RM_ADDRESS
-    // for host and use the port from the RM_WEBAPP_ADDRESS
-    conf.set(YarnConfiguration.RM_WEBAPP_ADDRESS, "footesting:99110");
-    conf.set(YarnConfiguration.RM_ADDRESS, "rmtesting:9999");
-    String rmWebUrl = YarnConfiguration.getRMWebAppURL(conf);
-    Assert.assertEquals("RM Web Url is not correct", "http://rmtesting:99110",
-        rmWebUrl);
-  }
-  
-}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java
index 31b2aaa2ed6..db84f32cf64 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java
@@ -18,7 +18,6 @@
 
 package org.apache.hadoop.yarn.webapp;
 
-import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.yarn.MockApps;
 import org.apache.hadoop.yarn.webapp.Controller;
 import org.apache.hadoop.yarn.webapp.WebApp;
@@ -149,32 +148,6 @@ public class TestWebApp {
     app.stop();
   }
 
-  @Test public void testServePaths() {
-    WebApp app = WebApps.$for("test", this).start();
-    assertEquals("/test", app.getRedirectPath());
-    String[] expectedPaths = { "/test", "/test/*" };
-    String[] pathSpecs = app.getServePathSpecs();
-     
-    assertEquals(2, pathSpecs.length);
-    for(int i = 0; i < expectedPaths.length; i++) {
-      assertTrue(ArrayUtils.contains(pathSpecs, expectedPaths[i]));
-    }
-    app.stop();
-  }
-
-  @Test public void testServePathsNoName() {
-    WebApp app = WebApps.$for("", this).start();
-    assertEquals("/", app.getRedirectPath());
-    String[] expectedPaths = { "/*" };
-    String[] pathSpecs = app.getServePathSpecs();
-     
-    assertEquals(1, pathSpecs.length);
-    for(int i = 0; i < expectedPaths.length; i++) {
-      assertTrue(ArrayUtils.contains(pathSpecs, expectedPaths[i]));
-    }
-    app.stop();
-  }
-
   @Test public void testDefaultRoutes() throws Exception {
     WebApp app = WebApps.$for("test", this).start();
     String baseUrl = baseUrl(app);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
index 1d7b9cb2d1f..98959644cf6 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
@@ -103,39 +103,6 @@
         <activeByDefault>true</activeByDefault>
       </activation>
     </profile>
-    <profile>
-      <id>visualize</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.codehaus.mojo</groupId>
-            <artifactId>exec-maven-plugin</artifactId>
-            <version>1.2</version>
-            <executions>
-              <execution>
-                <phase>compile</phase>
-                <goals>
-                  <goal>java</goal>
-                </goals>
-                <configuration>
-                  <mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
-                  <arguments>
-                    <argument>NodeManager</argument>
-                    <argument>org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl,
-                       org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl,
-                       org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalizedResource</argument>
-                    <argument>NodeManager.gv</argument>
-                  </arguments>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
   </profiles>
 
   <build>
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
index 83872876797..a7e82a2d41a 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
@@ -133,10 +133,8 @@ public class DefaultContainerExecutor extends ContainerExecutor {
       String[] command = 
           new String[] { "bash", "-c", launchDst.toUri().getPath().toString() };
       LOG.info("launchContainer: " + Arrays.toString(command));
-      shExec = new ShellCommandExecutor(
-          command,
-          new File(containerWorkDir.toUri().getPath()), 
-          container.getLaunchContext().getEnvironment());      // sanitized env
+      shExec = new ShellCommandExecutor(command,
+          new File(containerWorkDir.toUri().getPath()));
       launchCommandObjs.put(containerId, shExec);
       shExec.execute();
     } catch (IOException e) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 0779d3b1581..97721f72a36 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -161,11 +161,7 @@ public class LinuxContainerExecutor extends ContainerExecutor {
                     nmPrivateCotainerScriptPath.toUri().getPath().toString(),
                     nmPrivateTokensPath.toUri().getPath().toString()));
     String[] commandArray = command.toArray(new String[command.size()]);
-    ShellCommandExecutor shExec = 
-        new ShellCommandExecutor(
-            commandArray,
-            null,                                              // NM's cwd
-            container.getLaunchContext().getEnvironment());    // sanitized env
+    ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray);
     launchCommandObjs.put(containerId, shExec);
     // DEBUG
     LOG.info("launchContainer: " + Arrays.toString(commandArray));
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index 1b1fd46b9e7..641e74b8018 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -101,7 +101,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
   public synchronized void init(Configuration conf) {
     this.rmAddress =
         conf.get(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS,
-            YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_ADDRESS);
+            YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS);
     this.heartBeatInterval =
         conf.getLong(YarnConfiguration.NM_TO_RM_HEARTBEAT_INTERVAL_MS,
             YarnConfiguration.DEFAULT_NM_TO_RM_HEARTBEAT_INTERVAL_MS);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 8d3f3fe0842..4e02c3adede 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -158,12 +158,10 @@ public class ContainerImpl implements Container {
         ContainerEventType.CONTAINER_LAUNCHED, new LaunchTransition())
     .addTransition(ContainerState.LOCALIZED, ContainerState.EXITED_WITH_FAILURE,
         ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
-        new ExitedWithFailureTransition(true))
+        new ExitedWithFailureTransition())
     .addTransition(ContainerState.LOCALIZED, ContainerState.LOCALIZED,
        ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
        UPDATE_DIAGNOSTICS_TRANSITION)
-       // TODO race: Can lead to a CONTAINER_LAUNCHED event at state KILLING, 
-       // and a container which will never be killed by the NM.
     .addTransition(ContainerState.LOCALIZED, ContainerState.KILLING,
         ContainerEventType.KILL_CONTAINER, new KillTransition())
 
@@ -171,19 +169,16 @@ public class ContainerImpl implements Container {
     .addTransition(ContainerState.RUNNING,
         ContainerState.EXITED_WITH_SUCCESS,
         ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS,
-        new ExitedWithSuccessTransition(true))
+        new ExitedWithSuccessTransition())
     .addTransition(ContainerState.RUNNING,
         ContainerState.EXITED_WITH_FAILURE,
         ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
-        new ExitedWithFailureTransition(true))
+        new ExitedWithFailureTransition())
     .addTransition(ContainerState.RUNNING, ContainerState.RUNNING,
        ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
        UPDATE_DIAGNOSTICS_TRANSITION)
     .addTransition(ContainerState.RUNNING, ContainerState.KILLING,
         ContainerEventType.KILL_CONTAINER, new KillTransition())
-    .addTransition(ContainerState.RUNNING, ContainerState.EXITED_WITH_FAILURE,
-        ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
-        new KilledExternallyTransition()) 
 
     // From CONTAINER_EXITED_WITH_SUCCESS State
     .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE,
@@ -225,10 +220,10 @@ public class ContainerImpl implements Container {
         ContainerEventType.KILL_CONTAINER)
     .addTransition(ContainerState.KILLING, ContainerState.EXITED_WITH_SUCCESS,
         ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS,
-        new ExitedWithSuccessTransition(false))
+        new ExitedWithSuccessTransition())
     .addTransition(ContainerState.KILLING, ContainerState.EXITED_WITH_FAILURE,
         ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
-        new ExitedWithFailureTransition(false))
+        new ExitedWithFailureTransition())
     .addTransition(ContainerState.KILLING,
             ContainerState.DONE,
             ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
@@ -556,41 +551,18 @@ public class ContainerImpl implements Container {
     }
   }
 
-  @SuppressWarnings("unchecked")  // dispatcher not typed
   static class ExitedWithSuccessTransition extends ContainerTransition {
-
-    boolean clCleanupRequired;
-
-    public ExitedWithSuccessTransition(boolean clCleanupRequired) {
-      this.clCleanupRequired = clCleanupRequired;
-    }
-
     @Override
     public void transition(ContainerImpl container, ContainerEvent event) {
-      // Set exit code to 0 on success    	
-      container.exitCode = 0;
-    	
       // TODO: Add containerWorkDir to the deletion service.
 
-      if (clCleanupRequired) {
-        container.dispatcher.getEventHandler().handle(
-            new ContainersLauncherEvent(container,
-                ContainersLauncherEventType.CLEANUP_CONTAINER));
-      }
-
+      // Inform the localizer to decrement reference counts and cleanup
+      // resources.
       container.cleanup();
     }
   }
 
-  @SuppressWarnings("unchecked")  // dispatcher not typed
   static class ExitedWithFailureTransition extends ContainerTransition {
-
-    boolean clCleanupRequired;
-
-    public ExitedWithFailureTransition(boolean clCleanupRequired) {
-      this.clCleanupRequired = clCleanupRequired;
-    }
-
     @Override
     public void transition(ContainerImpl container, ContainerEvent event) {
       ContainerExitEvent exitEvent = (ContainerExitEvent) event;
@@ -599,28 +571,12 @@ public class ContainerImpl implements Container {
       // TODO: Add containerWorkDir to the deletion service.
       // TODO: Add containerOuputDir to the deletion service.
 
-      if (clCleanupRequired) {
-        container.dispatcher.getEventHandler().handle(
-            new ContainersLauncherEvent(container,
-                ContainersLauncherEventType.CLEANUP_CONTAINER));
-      }
-
+      // Inform the localizer to decrement reference counts and cleanup
+      // resources.
       container.cleanup();
     }
   }
 
-  static class KilledExternallyTransition extends ExitedWithFailureTransition {
-    KilledExternallyTransition() {
-      super(true);
-    }
-
-    @Override
-    public void transition(ContainerImpl container, ContainerEvent event) {
-      super.transition(container, event);
-      container.diagnostics.append("Killed by external signal\n");
-    }
-  }
-
   static class ResourceFailedTransition implements
       SingleArcTransition<ContainerImpl, ContainerEvent> {
     @Override
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
index 43afa4cb85e..497460d3e7d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
@@ -44,7 +44,6 @@ import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
-import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
@@ -90,6 +89,7 @@ public class ContainerLaunch implements Callable<Integer> {
     final Map<Path,String> localResources = container.getLocalizedResources();
     String containerIdStr = ConverterUtils.toString(container.getContainerID());
     final String user = launchContext.getUser();
+    final Map<String,String> env = launchContext.getEnvironment();
     final List<String> command = launchContext.getCommands();
     int ret = -1;
 
@@ -109,16 +109,16 @@ public class ContainerLaunch implements Callable<Integer> {
       }
       launchContext.setCommands(newCmds);
 
-      Map<String, String> environment = launchContext.getEnvironment();
-      // Make a copy of env to iterate & do variable expansion
-      for (Entry<String, String> entry : environment.entrySet()) {
-        String value = entry.getValue();
-        entry.setValue(
-            value.replace(
+      Map<String, String> envs = launchContext.getEnvironment();
+      Map<String, String> newEnvs = new HashMap<String, String>(envs.size());
+      for (Entry<String, String> entry : envs.entrySet()) {
+        newEnvs.put(
+            entry.getKey(),
+            entry.getValue().replace(
                 ApplicationConstants.LOG_DIR_EXPANSION_VAR,
-                containerLogDir.toUri().getPath())
-            );
+                containerLogDir.toUri().getPath()));
       }
+      launchContext.setEnvironment(newEnvs);
       // /////////////////////////// End of variable expansion
 
       FileContext lfs = FileContext.getLocalFSFileContext();
@@ -164,18 +164,11 @@ public class ContainerLaunch implements Callable<Integer> {
               EnumSet.of(CREATE, OVERWRITE));
 
         // Set the token location too.
-        environment.put(
-            ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, 
-            new Path(containerWorkDir, 
-                FINAL_CONTAINER_TOKENS_FILE).toUri().getPath());
+        env.put(ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, new Path(
+            containerWorkDir, FINAL_CONTAINER_TOKENS_FILE).toUri().getPath());
 
-        // Sanitize the container's environment
-        sanitizeEnv(environment, containerWorkDir, appDirs);
-        
-        // Write out the environment
-        writeLaunchEnv(containerScriptOutStream, environment, localResources,
-            launchContext.getCommands());
-        
+        writeLaunchEnv(containerScriptOutStream, env, localResources,
+            launchContext.getCommands(), appDirs);
         // /////////// End of writing out container-script
 
         // /////////// Write out the container-tokens in the nmPrivate space.
@@ -282,71 +275,19 @@ public class ContainerLaunch implements Callable<Integer> {
   
   }
 
-  private static void putEnvIfNotNull(
-      Map<String, String> environment, String variable, String value) {
-    if (value != null) {
-      environment.put(variable, value);
-    }
-  }
-  
-  private static void putEnvIfAbsent(
-      Map<String, String> environment, String variable) {
-    if (environment.get(variable) == null) {
-      putEnvIfNotNull(environment, variable, System.getenv(variable));
-    }
-  }
-  
-  public void sanitizeEnv(Map<String, String> environment, 
-      Path pwd, List<Path> appDirs) {
-    /**
-     * Non-modifiable environment variables
-     */
-    
-    putEnvIfNotNull(environment, Environment.USER.name(), container.getUser());
-    
-    putEnvIfNotNull(environment, 
-        Environment.LOGNAME.name(),container.getUser());
-    
-    putEnvIfNotNull(environment, 
-        Environment.HOME.name(),
-        conf.get(
-            YarnConfiguration.NM_USER_HOME_DIR, 
-            YarnConfiguration.DEFAULT_NM_USER_HOME_DIR
-            )
-        );
-    
-    putEnvIfNotNull(environment, Environment.PWD.name(), pwd.toString());
-    
-    putEnvIfNotNull(environment, 
-        Environment.HADOOP_CONF_DIR.name(), 
-        System.getenv(Environment.HADOOP_CONF_DIR.name())
-        );
-    
-    putEnvIfNotNull(environment, 
-        ApplicationConstants.LOCAL_DIR_ENV, 
-        StringUtils.join(",", appDirs)
-        );
-
-    if (!Shell.WINDOWS) {
-      environment.put("JVM_PID", "$$");
-    }
-
-    /**
-     * Modifiable environment variables
-     */
-    
-    putEnvIfAbsent(environment, Environment.JAVA_HOME.name());
-    putEnvIfAbsent(environment, Environment.HADOOP_COMMON_HOME.name());
-    putEnvIfAbsent(environment, Environment.HADOOP_HDFS_HOME.name());
-    putEnvIfAbsent(environment, Environment.YARN_HOME.name());
-
-  }
-  
   private static void writeLaunchEnv(OutputStream out,
       Map<String,String> environment, Map<Path,String> resources,
-      List<String> command)
+      List<String> command, List<Path> appDirs)
       throws IOException {
     ShellScriptBuilder sb = new ShellScriptBuilder();
+    if (System.getenv("YARN_HOME") != null) {
+      // TODO: Get from whitelist.
+      sb.env("YARN_HOME", System.getenv("YARN_HOME"));
+    }
+    sb.env(ApplicationConstants.LOCAL_DIR_ENV, StringUtils.join(",", appDirs));
+    if (!Shell.WINDOWS) {
+      sb.env("JVM_PID", "$$");
+    }
     if (environment != null) {
       for (Map.Entry<String,String> env : environment.entrySet()) {
         sb.env(env.getKey().toString(), env.getValue().toString());
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java
index e0795613b65..68b0686a254 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java
@@ -31,6 +31,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
@@ -54,26 +56,22 @@ public class ContainerLogsPage extends NMView {
     private final Configuration conf;
     private final LocalDirAllocator logsSelector;
     private final Context nmContext;
+    private final RecordFactory recordFactory;
 
     @Inject
     public ContainersLogsBlock(Configuration conf, Context context) {
       this.conf = conf;
       this.logsSelector = new LocalDirAllocator(YarnConfiguration.NM_LOG_DIRS);
       this.nmContext = context;
+      this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
     }
 
     @Override
     protected void render(Block html) {
       DIV<Hamlet> div = html.div("#content");
 
-      ContainerId containerId;
-      try {
-        containerId = ConverterUtils.toContainerId($(CONTAINER_ID));
-      } catch (IOException e) {
-        div.h1("Invalid containerId " + $(CONTAINER_ID))._();
-        return;
-      }
-
+      ContainerId containerId =
+        ConverterUtils.toContainerId(this.recordFactory, $(CONTAINER_ID));
       Container container = this.nmContext.getContainers().get(containerId);
 
       if (container == null) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java
index de76b84e277..27be38a0299 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java
@@ -18,21 +18,20 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.webapp;
 
-import static org.apache.hadoop.yarn.util.StringHelper.ujoin;
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI.ACCORDION;
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initID;
+import static org.apache.hadoop.yarn.util.StringHelper.ujoin;
 
-import java.io.IOException;
-
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.webapp.SubView;
-import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
-import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
 import org.apache.hadoop.yarn.webapp.view.InfoBlock;
 
@@ -54,30 +53,22 @@ public class ContainerPage extends NMView implements NMWebParams {
 
   public static class ContainerBlock extends HtmlBlock implements NMWebParams {
 
+    private final Configuration conf;
     private final Context nmContext;
+    private final RecordFactory recordFactory;
 
     @Inject
-    public ContainerBlock(Context nmContext) {
+    public ContainerBlock(Configuration conf, Context nmContext) {
+      this.conf = conf;
       this.nmContext = nmContext;
+      this.recordFactory = RecordFactoryProvider.getRecordFactory(this.conf);
     }
 
     @Override
     protected void render(Block html) {
-      ContainerId containerID;
-      try {
-        containerID = ConverterUtils.toContainerId($(CONTAINER_ID));
-      } catch (IOException e) {
-        html.p()._("Invalid containerId " + $(CONTAINER_ID))._();
-        return;
-      }
-
-      DIV<Hamlet> div = html.div("#content");
+      ContainerId containerID =
+        ConverterUtils.toContainerId(this.recordFactory, $(CONTAINER_ID));
       Container container = this.nmContext.getContainers().get(containerID);
-      if (container == null) {
-        div.h1("Unknown Container. Container might have completed, "
-                + "please go back to the previous page and retry.")._();
-        return;
-      }
       ContainerStatus containerData = container.cloneAndGetContainerStatus();
       int exitCode = containerData.getExitStatus();
       String exiStatus = 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
index a043a37f594..307e87eccd6 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
@@ -57,7 +57,7 @@ public class WebServer extends AbstractService {
     LOG.info("Instantiating NMWebApp at " + bindAddress);
     try {
       this.webApp =
-          WebApps.$for("node", Context.class, this.nmContext)
+          WebApps.$for("yarn", Context.class, this.nmContext)
               .at(bindAddress).with(getConfig())
               .start(new NMWebApp(this.resourceView));
     } catch (Exception e) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties
index 96108ab9656..897bca3f414 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties
@@ -12,12 +12,12 @@ log4j.threshold=ALL
 #
 
 #Default values
-yarn.app.mapreduce.container.log.dir=null
-yarn.app.mapreduce.container.log.filesize=100
+hadoop.yarn.mr.containerLogDir=null
+hadoop.yarn.mr.totalLogFileSize=100
 
 log4j.appender.CLA=org.apache.hadoop.yarn.ContainerLogAppender
-log4j.appender.CLA.containerLogDir=${yarn.app.mapreduce.container.log.dir}
-log4j.appender.CLA.totalLogFileSize=${yarn.app.mapreduce.container.log.filesize}
+log4j.appender.CLA.containerLogDir=${hadoop.yarn.mr.containerLogDir}
+log4j.appender.CLA.totalLogFileSize=${hadoop.yarn.mr.totalLogFileSize}
 
 log4j.appender.CLA.layout=org.apache.log4j.PatternLayout
 log4j.appender.CLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
index 44328dbe0aa..6ee220b674a 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
@@ -105,31 +105,7 @@ public class TestContainerManagerWithLCE extends TestContainerManager {
     LOG.info("Running testContainerLaunchAndStop");
     super.testContainerLaunchAndStop();
   }
-  
-  @Override
-  public void testContainerLaunchAndExitSuccess() throws IOException,
-      InterruptedException {
-    // Don't run the test if the binary is not available.
-    if (!shouldRunTest()) {
-      LOG.info("LCE binary path is not passed. Not running the test");
-      return;
-    }
-    LOG.info("Running testContainerLaunchAndExitSuccess");
-    super.testContainerLaunchAndExitSuccess();
-  }
 
-  @Override
-  public void testContainerLaunchAndExitFailure() throws IOException,
-      InterruptedException {
-    // Don't run the test if the binary is not available.
-    if (!shouldRunTest()) {
-      LOG.info("LCE binary path is not passed. Not running the test");
-      return;
-    }
-    LOG.info("Running testContainerLaunchAndExitFailure");
-    super.testContainerLaunchAndExitFailure();
-  }
-  
   @Override
   public void testLocalFilesCleanup() throws InterruptedException,
       IOException {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
index 2de0428cb6b..87460d045ce 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
@@ -287,95 +287,7 @@ public class TestContainerManager extends BaseContainerManagerTest {
         exec.signalContainer(user,
             pid, Signal.NULL));
   }
-  
-  private void testContainerLaunchAndExit(int exitCode) throws IOException, InterruptedException {
 
-	  File scriptFile = new File(tmpDir, "scriptFile.sh");
-	  PrintWriter fileWriter = new PrintWriter(scriptFile);
-	  File processStartFile =
-			  new File(tmpDir, "start_file.txt").getAbsoluteFile();
-	  fileWriter.write("\numask 0"); // So that start file is readable by the test
-	  fileWriter.write("\necho Hello World! > " + processStartFile);
-	  fileWriter.write("\necho $$ >> " + processStartFile); 
-
-	  // Have script throw an exit code at the end
-	  if (exitCode != 0) {
-		  fileWriter.write("\nexit "+exitCode);
-	  }
-	  
-	  fileWriter.close();
-
-	  ContainerLaunchContext containerLaunchContext = 
-			  recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
-	  // ////// Construct the Container-id
-	  ContainerId cId = createContainerId();
-	  containerLaunchContext.setContainerId(cId);
-
-	  containerLaunchContext.setUser(user);
-
-	  URL resource_alpha =
-			  ConverterUtils.getYarnUrlFromPath(localFS
-					  .makeQualified(new Path(scriptFile.getAbsolutePath())));
-	  LocalResource rsrc_alpha =
-			  recordFactory.newRecordInstance(LocalResource.class);
-	  rsrc_alpha.setResource(resource_alpha);
-	  rsrc_alpha.setSize(-1);
-	  rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
-	  rsrc_alpha.setType(LocalResourceType.FILE);
-	  rsrc_alpha.setTimestamp(scriptFile.lastModified());
-	  String destinationFile = "dest_file";
-	  Map<String, LocalResource> localResources = 
-			  new HashMap<String, LocalResource>();
-	  localResources.put(destinationFile, rsrc_alpha);
-	  containerLaunchContext.setLocalResources(localResources);
-	  containerLaunchContext.setUser(containerLaunchContext.getUser());
-	  List<String> commands = new ArrayList<String>();
-	  commands.add("/bin/bash");
-	  commands.add(scriptFile.getAbsolutePath());
-	  containerLaunchContext.setCommands(commands);
-	  containerLaunchContext.setResource(recordFactory
-			  .newRecordInstance(Resource.class));
-	  containerLaunchContext.getResource().setMemory(100 * 1024 * 1024);
-
-	  StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
-	  startRequest.setContainerLaunchContext(containerLaunchContext);
-	  containerManager.startContainer(startRequest);
-
-	  BaseContainerManagerTest.waitForContainerState(containerManager, cId,
-			  ContainerState.COMPLETE);
-
-	  GetContainerStatusRequest gcsRequest = 
-			  recordFactory.newRecordInstance(GetContainerStatusRequest.class);
-	  gcsRequest.setContainerId(cId);
-	  ContainerStatus containerStatus = 
-			  containerManager.getContainerStatus(gcsRequest).getStatus();
-
-	  // Verify exit status matches exit state of script
-	  Assert.assertEquals(exitCode,
-			  containerStatus.getExitStatus());	    
-  }
-  
-  @Test
-  public void testContainerLaunchAndExitSuccess() throws IOException, InterruptedException {
-	  containerManager.start();
-	  int exitCode = 0; 
-
-	  // launch context for a command that will return exit code 0 
-	  // and verify exit code returned 
-	  testContainerLaunchAndExit(exitCode);	  
-  }
-
-  @Test
-  public void testContainerLaunchAndExitFailure() throws IOException, InterruptedException {
-	  containerManager.start();
-	  int exitCode = 50; 
-
-	  // launch context for a command that will return exit code 0 
-	  // and verify exit code returned 
-	  testContainerLaunchAndExit(exitCode);	  
-  }
-  
   @Test
   public void testLocalFilesCleanup() throws InterruptedException,
       IOException {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index 48c745457a7..04d400ad18d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -38,6 +38,8 @@ import java.util.Map.Entry;
 import java.util.Random;
 
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.LocalResource;
@@ -133,28 +135,6 @@ public class TestContainer {
     }
   }
 
-  @Test
-  @SuppressWarnings("unchecked") // mocked generic
-  public void testExternalKill() throws Exception {
-    WrappedContainer wc = null;
-    try {
-      wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
-      wc.initContainer();
-      wc.localizeResources();
-      wc.launchContainer();
-      reset(wc.localizerBus);
-      wc.containerKilledOnRequest();
-      assertEquals(ContainerState.EXITED_WITH_FAILURE, 
-          wc.c.getContainerState());
-      verifyCleanupCall(wc);
-    }
-    finally {
-      if (wc != null) {
-        wc.finished();
-      }
-    }
-  }
-
   @Test
   @SuppressWarnings("unchecked") // mocked generic
   public void testCleanupOnFailure() throws Exception {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml
index d94f5973144..b4c398f70f0 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml
@@ -37,20 +37,6 @@
 
   <build>
     <plugins>
-
-      <!-- Publish tests jar -->
-      <plugin>
-        <artifactId>maven-jar-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals>
-              <goal>test-jar</goal>
-            </goals>
-            <phase>test-compile</phase>
-          </execution>
-        </executions>
-      </plugin>
-
       <plugin>
         <artifactId>maven-antrun-plugin</artifactId>
         <executions>
@@ -112,41 +98,4 @@
       </plugin>
     </plugins>
   </build>
-
-  <profiles>
-    <profile>
-      <id>visualize</id>
-      <activation>
-        <activeByDefault>false</activeByDefault>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.codehaus.mojo</groupId>
-            <artifactId>exec-maven-plugin</artifactId>
-            <version>1.2</version>
-            <executions>
-              <execution>
-                <phase>compile</phase>
-                <goals>
-                  <goal>java</goal>
-                </goals>
-                <configuration>
-                  <mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
-                  <arguments>
-                    <argument>ResourceManager</argument>
-                    <argument>org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl,
-                      org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl,
-                      org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl,
-                      org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl</argument>
-                    <argument>ResourceManager.gv</argument>
-                  </arguments>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
 </project>
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
index 6237f8961f0..1fc34f0dfd0 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
@@ -84,7 +84,7 @@ public class AdminService extends AbstractService implements RMAdminProtocol {
     super.init(conf);
     String bindAddress =
       conf.get(YarnConfiguration.RM_ADMIN_ADDRESS,
-          YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS);
+          YarnConfiguration.RM_ADMIN_ADDRESS);
     masterServiceAddress =  NetUtils.createSocketAddr(bindAddress);
     adminAcl = 
       new AccessControlList(
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
index 2cf19000985..a31bef8af9d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
@@ -36,8 +36,8 @@ import org.apache.hadoop.security.SecurityInfo;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.AccessControlList;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -46,8 +46,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
@@ -165,17 +165,11 @@ public class ClientRMService extends AbstractService implements
   }
 
   @Override
-  public GetNewApplicationResponse getNewApplication(
-      GetNewApplicationRequest request) throws YarnRemoteException {
-    GetNewApplicationResponse response = recordFactory
-        .newRecordInstance(GetNewApplicationResponse.class);
+  public GetNewApplicationIdResponse getNewApplicationId(
+      GetNewApplicationIdRequest request) throws YarnRemoteException {
+    GetNewApplicationIdResponse response = recordFactory
+        .newRecordInstance(GetNewApplicationIdResponse.class);
     response.setApplicationId(getNewApplicationId());
-    // Pick up min/max resource from scheduler...
-    response.setMinimumResourceCapability(scheduler
-        .getMinimumResourceCapability());
-    response.setMaximumResourceCapability(scheduler
-        .getMaximumResourceCapability());       
-    
     return response;
   }
   
@@ -234,8 +228,8 @@ public class ClientRMService extends AbstractService implements
 
   @SuppressWarnings("unchecked")
   @Override
-  public KillApplicationResponse forceKillApplication(
-      KillApplicationRequest request) throws YarnRemoteException {
+  public FinishApplicationResponse finishApplication(
+      FinishApplicationRequest request) throws YarnRemoteException {
 
     ApplicationId applicationId = request.getApplicationId();
 
@@ -268,8 +262,8 @@ public class ClientRMService extends AbstractService implements
 
     RMAuditLogger.logSuccess(callerUGI.getShortUserName(), 
         AuditConstants.KILL_APP_REQUEST, "ClientRMService" , applicationId);
-    KillApplicationResponse response = recordFactory
-        .newRecordInstance(KillApplicationResponse.class);
+    FinishApplicationResponse response = recordFactory
+        .newRecordInstance(FinishApplicationResponse.class);
     return response;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
index 3f175a34a0a..d0cd0a7ff86 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.security.ApplicationTokenIdentifier;
 import org.apache.hadoop.yarn.security.client.ClientToAMSecretManager;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
@@ -251,10 +250,13 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent> {
 
       if (rmContext.getRMApps().putIfAbsent(applicationId, application) != 
           null) {
-        String message = "Application with id " + applicationId
-            + " is already present! Cannot add a duplicate!";
-        LOG.info(message);
-        throw RPCUtil.getRemoteException(message);
+        LOG.info("Application with id " + applicationId + 
+            " is already present! Cannot add a duplicate!");
+        // don't send event through dispatcher as it will be handled by app 
+        // already present with this id.
+        application.handle(new RMAppRejectedEvent(applicationId,
+            "Application with this id is already present! " +
+            "Cannot add a duplicate!"));
       } else {
         this.rmContext.getDispatcher().getEventHandler().handle(
             new RMAppEvent(applicationId, RMAppEventType.START));
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
index 997906a62e4..85cd8825daa 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
@@ -22,6 +22,7 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore;
@@ -30,6 +31,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 
 public class RMContextImpl implements RMContext {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index 8a56d504d69..179b56a4af4 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -41,7 +41,6 @@ import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager;
 import org.apache.hadoop.yarn.security.client.ClientToAMSecretManager;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
@@ -98,7 +97,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
   private ContainerAllocationExpirer containerAllocationExpirer;
   protected NMLivelinessMonitor nmLivelinessMonitor;
   protected NodesListManager nodesListManager;
-  private EventHandler<SchedulerEvent> schedulerDispatcher;
+  private SchedulerEventDispatcher schedulerDispatcher;
   protected RMAppManager rmAppManager;
 
   private WebApp webApp;
@@ -119,7 +118,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
   @Override
   public synchronized void init(Configuration conf) {
 
-    this.rmDispatcher = createDispatcher();
+    this.rmDispatcher = new AsyncDispatcher();
     addIfService(this.rmDispatcher);
 
     this.containerAllocationExpirer = new ContainerAllocationExpirer(
@@ -138,8 +137,8 @@ public class ResourceManager extends CompositeService implements Recoverable {
     this.conf = new YarnConfiguration(conf);
     // Initialize the scheduler
     this.scheduler = createScheduler();
-    this.schedulerDispatcher = createSchedulerEventDispatcher();
-    addIfService(this.schedulerDispatcher);
+    this.schedulerDispatcher = new SchedulerEventDispatcher(this.scheduler);
+    addService(this.schedulerDispatcher);
     this.rmDispatcher.register(SchedulerEventType.class,
         this.schedulerDispatcher);
 
@@ -187,22 +186,11 @@ public class ResourceManager extends CompositeService implements Recoverable {
     addService(adminService);
 
     this.applicationMasterLauncher = createAMLauncher();
-    this.rmDispatcher.register(AMLauncherEventType.class, 
-        this.applicationMasterLauncher);
-
     addService(applicationMasterLauncher);
 
     super.init(conf);
   }
 
-  protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
-    return new SchedulerEventDispatcher(this.scheduler);
-  }
-
-  protected Dispatcher createDispatcher() {
-    return new AsyncDispatcher();
-  }
-
   protected void addIfService(Object object) {
     if (object instanceof Service) {
       addService((Service) object);
@@ -393,7 +381,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
   }
 
   protected void startWepApp() {
-    webApp = WebApps.$for("cluster", masterService).at(
+    webApp = WebApps.$for("yarn", masterService).at(
         conf.get(YarnConfiguration.RM_WEBAPP_ADDRESS,
         YarnConfiguration.DEFAULT_RM_WEBAPP_ADDRESS)).
       start(new RMWebApp(this));
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
index 337f4816890..b394faa85d2 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
@@ -136,7 +136,7 @@ public class AMLauncher implements Runnable {
     containerMgrProxy.stopContainer(stopRequest);
   }
 
-  protected ContainerManager getContainerMgrProxy(
+  private ContainerManager getContainerMgrProxy(
       final ApplicationId applicationID) throws IOException {
 
     Container container = application.getMasterContainer();
@@ -173,11 +173,23 @@ public class AMLauncher implements Runnable {
     // Construct the actual Container
     ContainerLaunchContext container = 
         applicationMasterContext.getAMContainerSpec();
-    LOG.info("Command to launch container "
-        + containerID
-        + " : "
-        + StringUtils.arrayToString(container.getCommands().toArray(
-            new String[0])));
+    StringBuilder mergedCommand = new StringBuilder();
+    String failCount = Integer.toString(application.getAppAttemptId()
+        .getAttemptId());
+    List<String> commandList = new ArrayList<String>();
+    for (String str : container.getCommands()) {
+      // This is out-right wrong. AM FAIL count should be passed via env.
+      String result =
+          str.replaceFirst(ApplicationConstants.AM_FAIL_COUNT_STRING,
+              failCount);
+      mergedCommand.append(result).append(" ");
+      commandList.add(result);
+    }
+    container.setCommands(commandList);
+    /** add the failed count to the app master command line */
+   
+    LOG.info("Command to launch container " + 
+        containerID + " : " + mergedCommand);
     
     // Finalize the container
     container.setContainerId(containerID);
@@ -191,11 +203,6 @@ public class AMLauncher implements Runnable {
       ContainerLaunchContext container)
       throws IOException {
     Map<String, String> environment = container.getEnvironment();
-
-    // Set the AppAttemptId to be consumable by the AM.
-    environment.put(ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV,
-        application.getAppAttemptId().toString());
-
     if (UserGroupInformation.isSecurityEnabled()) {
       // TODO: Security enabled/disabled info should come from RM.
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
index a25a4312b17..d1ef1d14004 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
@@ -42,16 +42,17 @@ public class ApplicationMasterLauncher extends AbstractService implements
   private final BlockingQueue<Runnable> masterEvents
     = new LinkedBlockingQueue<Runnable>();
   
-  protected ApplicationTokenSecretManager applicationTokenSecretManager;
+  private ApplicationTokenSecretManager applicationTokenSecretManager;
   private ClientToAMSecretManager clientToAMSecretManager;
-  protected final RMContext context;
+  private final RMContext context;
   
-  public ApplicationMasterLauncher(
-      ApplicationTokenSecretManager applicationTokenSecretManager, 
-      ClientToAMSecretManager clientToAMSecretManager,
+  public ApplicationMasterLauncher(ApplicationTokenSecretManager 
+      applicationTokenSecretManager, ClientToAMSecretManager clientToAMSecretManager,
       RMContext context) {
     super(ApplicationMasterLauncher.class.getName());
     this.context = context;
+    /* register to dispatcher */
+    this.context.getDispatcher().register(AMLauncherEventType.class, this);
     this.launcherPool = new ThreadPoolExecutor(1, 10, 1, 
         TimeUnit.HOURS, new LinkedBlockingQueue<Runnable>());
     this.launcherHandlingThread = new LauncherThread();
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
index 6e63e2248d6..2e739a98b99 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 
@@ -32,7 +33,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
  * look at {@link RMAppImpl} for its implementation. This interface 
  * exposes methods to access various updates in application status/report.
  */
-public interface RMApp extends EventHandler<RMAppEvent> {
+public interface RMApp extends EventHandler<RMAppEvent>{
 
   /**
    * The application id for this {@link RMApp}.
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index 94d04a8d127..9246d1838c7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
-import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
@@ -87,8 +86,7 @@ public class RMAppImpl implements RMApp {
   private long startTime;
   private long finishTime;
   private RMAppAttempt currentAttempt;
-  @SuppressWarnings("rawtypes")
-  private EventHandler handler;
+
   private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
 
   private static final StateMachineFactory<RMAppImpl,
@@ -101,6 +99,9 @@ public class RMAppImpl implements RMApp {
                                            RMAppEvent>(RMAppState.NEW)
 
 
+     // TODO - ATTEMPT_KILLED not sent right now but should handle if 
+     // attempt starts sending
+
      // Transitions from NEW state
     .addTransition(RMAppState.NEW, RMAppState.SUBMITTED,
         RMAppEventType.START, new StartAppAttemptTransition())
@@ -115,7 +116,7 @@ public class RMAppImpl implements RMApp {
     .addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED,
         RMAppEventType.APP_ACCEPTED)
     .addTransition(RMAppState.SUBMITTED, RMAppState.KILLED,
-        RMAppEventType.KILL, new KillAppAndAttemptTransition())
+        RMAppEventType.KILL, new AppKilledTransition())
 
      // Transitions from ACCEPTED state
     .addTransition(RMAppState.ACCEPTED, RMAppState.RUNNING,
@@ -125,7 +126,7 @@ public class RMAppImpl implements RMApp {
         RMAppEventType.ATTEMPT_FAILED,
         new AttemptFailedTransition(RMAppState.SUBMITTED))
     .addTransition(RMAppState.ACCEPTED, RMAppState.KILLED,
-        RMAppEventType.KILL, new KillAppAndAttemptTransition())
+        RMAppEventType.KILL, new AppKilledTransition())
 
      // Transitions from RUNNING state
     .addTransition(RMAppState.RUNNING, RMAppState.FINISHED,
@@ -135,7 +136,7 @@ public class RMAppImpl implements RMApp {
         RMAppEventType.ATTEMPT_FAILED,
         new AttemptFailedTransition(RMAppState.SUBMITTED))
     .addTransition(RMAppState.RUNNING, RMAppState.KILLED,
-        RMAppEventType.KILL, new KillAppAndAttemptTransition())
+        RMAppEventType.KILL, new AppKilledTransition())
 
      // Transitions from FINISHED state
     .addTransition(RMAppState.FINISHED, RMAppState.FINISHED,
@@ -167,7 +168,6 @@ public class RMAppImpl implements RMApp {
     this.name = name;
     this.rmContext = rmContext;
     this.dispatcher = rmContext.getDispatcher();
-    this.handler = dispatcher.getEventHandler();
     this.conf = config;
     this.user = user;
     this.queue = queue;
@@ -310,8 +310,7 @@ public class RMAppImpl implements RMApp {
       return BuilderUtils.newApplicationReport(this.applicationId, this.user,
           this.queue, this.name, host, rpcPort, clientToken,
           createApplicationState(this.stateMachine.getCurrentState()),
-          this.diagnostics.toString(), trackingUrl, 
-          this.startTime, this.finishTime);
+          this.diagnostics.toString(), trackingUrl, this.startTime);
     } finally {
       this.readLock.unlock();
     }
@@ -403,7 +402,7 @@ public class RMAppImpl implements RMApp {
         submissionContext);
     attempts.put(appAttemptId, attempt);
     currentAttempt = attempt;
-    handler.handle(
+    dispatcher.getEventHandler().handle(
         new RMAppAttemptEvent(appAttemptId, RMAppAttemptEventType.START));
   }
 
@@ -420,23 +419,13 @@ public class RMAppImpl implements RMApp {
     };
   }
 
-  private static class AppKilledTransition extends FinalTransition {
-    @Override
+  private static final class AppKilledTransition extends FinalTransition {
     public void transition(RMAppImpl app, RMAppEvent event) {
       app.diagnostics.append("Application killed by user.");
       super.transition(app, event);
     };
   }
 
-  private static class KillAppAndAttemptTransition extends AppKilledTransition {
-    @SuppressWarnings("unchecked")
-    @Override
-    public void transition(RMAppImpl app, RMAppEvent event) {
-      app.handler.handle(new RMAppAttemptEvent(app.currentAttempt.getAppAttemptId(), 
-          RMAppAttemptEventType.KILL));
-      super.transition(app, event);
-    }
-  }
   private static final class AppRejectedTransition extends
       FinalTransition{
     public void transition(RMAppImpl app, RMAppEvent event) {
@@ -460,11 +449,11 @@ public class RMAppImpl implements RMApp {
     public void transition(RMAppImpl app, RMAppEvent event) {
       Set<NodeId> nodes = getNodesOnWhichAttemptRan(app);
       for (NodeId nodeId : nodes) {
-        app.handler.handle(
+        app.dispatcher.getEventHandler().handle(
             new RMNodeCleanAppEvent(nodeId, app.applicationId));
       }
       app.finishTime = System.currentTimeMillis();
-      app.handler.handle(
+      app.dispatcher.getEventHandler().handle(
           new RMAppManagerEvent(app.applicationId, 
           RMAppManagerEventType.APP_COMPLETED));
     };
@@ -481,13 +470,11 @@ public class RMAppImpl implements RMApp {
 
     @Override
     public RMAppState transition(RMAppImpl app, RMAppEvent event) {
-      
-      RMAppFailedAttemptEvent failedEvent = ((RMAppFailedAttemptEvent)event);
+
       if (app.attempts.size() == app.maxRetries) {
         String msg = "Application " + app.getApplicationId()
         + " failed " + app.maxRetries
-        + " times due to " + failedEvent.getDiagnostics()
-        + ". Failing the application.";
+        + " times. Failing the application.";
         LOG.info(msg);
         app.diagnostics.append(msg);
         // Inform the node for app-finish
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
index aeb3d2af045..70747deacba 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
@@ -36,7 +36,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
  * {@link YarnConfiguration#RM_AM_MAX_RETRIES}. For specific 
  * implementation take a look at {@link RMAppAttemptImpl}.
  */
-public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent> {
+public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent>{
 
   /**
    * Get the application attempt id for this {@link RMAppAttempt}.
@@ -79,7 +79,7 @@ public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent> {
    * Diagnostics information for the application attempt.
    * @return diagnostics information for the application attempt.
    */
-  String getDiagnostics();
+  StringBuilder getDiagnostics();
 
   /**
    * Progress for the application attempt.
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 7f8ff82d6a1..7c6357defab 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -31,7 +31,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -48,7 +47,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
@@ -106,10 +104,10 @@ public class RMAppAttemptImpl implements RMAppAttempt {
   private Container masterContainer;
 
   private float progress = 0;
-  private String host = "N/A";
+  private String host;
   private int rpcPort;
-  private String trackingUrl = "N/A";
-  private String finalState = "N/A";
+  private String trackingUrl;
+  private String finalState;
   private final StringBuilder diagnostics = new StringBuilder();
 
   private static final StateMachineFactory<RMAppAttemptImpl,
@@ -125,8 +123,7 @@ public class RMAppAttemptImpl implements RMAppAttempt {
       .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.SUBMITTED,
           RMAppAttemptEventType.START, new AttemptStartedTransition())
       .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.KILLED,
-          RMAppAttemptEventType.KILL,
-          new BaseFinalTransition(RMAppAttemptState.KILLED))
+          RMAppAttemptEventType.KILL)
 
       // Transitions from SUBMITTED state
       .addTransition(RMAppAttemptState.SUBMITTED, RMAppAttemptState.FAILED,
@@ -326,26 +323,16 @@ public class RMAppAttemptImpl implements RMAppAttempt {
   }
 
   @Override
-  public String getDiagnostics() {
+  public StringBuilder getDiagnostics() {
     this.readLock.lock();
 
     try {
-      return this.diagnostics.toString();
+      return this.diagnostics;
     } finally {
       this.readLock.unlock();
     }
   }
 
-  public void setDiagnostics(String message) {
-    this.writeLock.lock();
-
-    try {
-      this.diagnostics.append(message);
-    } finally {
-      this.writeLock.unlock();
-    }
-  }
-
   @Override
   public float getProgress() {
     this.readLock.lock();
@@ -459,17 +446,10 @@ public class RMAppAttemptImpl implements RMAppAttempt {
         RMAppAttemptEvent event) {
 
       RMAppAttemptRejectedEvent rejectedEvent = (RMAppAttemptRejectedEvent) event;
-      
-      // Save the diagnostic message
-      String message = rejectedEvent.getMessage();
-      appAttempt.setDiagnostics(message);
-      
       // Send the rejection event to app
-      appAttempt.eventHandler.handle(
-          new RMAppRejectedEvent(
-              rejectedEvent.getApplicationAttemptId().getApplicationId(), 
-              message)
-          );
+      appAttempt.eventHandler.handle(new RMAppRejectedEvent(rejectedEvent
+          .getApplicationAttemptId().getApplicationId(), rejectedEvent
+          .getMessage()));
     }
   }
 
@@ -492,6 +472,8 @@ public class RMAppAttemptImpl implements RMAppAttempt {
       ResourceRequest request = BuilderUtils.newResourceRequest(
           AM_CONTAINER_PRIORITY, "*", appAttempt.submissionContext
               .getAMContainerSpec().getResource(), 1);
+      LOG.debug("About to request resources for AM of "
+          + appAttempt.applicationAttemptId + " required " + request);
 
       appAttempt.scheduler.allocate(appAttempt.applicationAttemptId,
           Collections.singletonList(request), EMPTY_CONTAINER_RELEASE_LIST);
@@ -535,39 +517,23 @@ public class RMAppAttemptImpl implements RMAppAttempt {
           .unregisterAttempt(appAttempt.applicationAttemptId);
 
       // Tell the application and the scheduler
-      ApplicationId applicationId = appAttempt.getAppAttemptId().getApplicationId();
-      RMAppEvent appEvent = null;
+      RMAppEventType eventToApp = null;
       switch (finalAttemptState) {
-        case FINISHED:
-        {
-          appEvent = 
-              new RMAppEvent(applicationId, RMAppEventType.ATTEMPT_FINISHED);
-        }
+      case FINISHED:
+        eventToApp = RMAppEventType.ATTEMPT_FINISHED;
         break;
-        case KILLED:
-        {
-          appEvent = 
-              new RMAppFailedAttemptEvent(applicationId, 
-                  RMAppEventType.ATTEMPT_KILLED, 
-                  "Application killed by user.");
-        }
+      case KILLED:
+        eventToApp = RMAppEventType.ATTEMPT_KILLED;
         break;
-        case FAILED:
-        {
-          appEvent = 
-              new RMAppFailedAttemptEvent(applicationId, 
-                  RMAppEventType.ATTEMPT_FAILED, 
-                  appAttempt.getDiagnostics());
-        }
+      case FAILED:
+        eventToApp = RMAppEventType.ATTEMPT_FAILED;
         break;
-        default:
-        {
-          LOG.error("Cannot get this state!! Error!!");
-        }
+      default:
+        LOG.info("Cannot get this state!! Error!!");
         break;
       }
-      
-      appAttempt.eventHandler.handle(appEvent);
+      appAttempt.eventHandler.handle(new RMAppEvent(
+          appAttempt.applicationAttemptId.getApplicationId(), eventToApp));
       appAttempt.eventHandler.handle(new AppRemovedSchedulerEvent(appAttempt
           .getAppAttemptId(), finalAttemptState));
     }
@@ -655,23 +621,16 @@ public class RMAppAttemptImpl implements RMAppAttempt {
     public void transition(RMAppAttemptImpl appAttempt,
         RMAppAttemptEvent event) {
 
-      RMAppAttemptContainerFinishedEvent finishEvent =
-          ((RMAppAttemptContainerFinishedEvent)event);
-      
       // UnRegister from AMLivelinessMonitor
       appAttempt.rmContext.getAMLivelinessMonitor().unregister(
           appAttempt.getAppAttemptId());
 
-      // Setup diagnostic message
-      ContainerStatus status = finishEvent.getContainerStatus();
-      appAttempt.diagnostics.append("AM Container for " +
-          appAttempt.getAppAttemptId() + " exited with " +
-          " exitCode: " + status.getExitStatus() + 
-          " due to: " +  status.getDiagnostics() + "." +
-          "Failing this attempt.");
-
       // Tell the app, scheduler
-      super.transition(appAttempt, finishEvent);
+      super.transition(appAttempt, event);
+
+      // Use diagnostic saying crashed.
+      appAttempt.diagnostics.append("AM Container for "
+          + appAttempt.getAppAttemptId() + " exited. Failing this attempt.");
     }
   }
 
@@ -685,8 +644,6 @@ public class RMAppAttemptImpl implements RMAppAttempt {
     public void transition(RMAppAttemptImpl appAttempt,
         RMAppAttemptEvent event) {
 
-      appAttempt.progress = 1.0f;
-
       // Tell the app and the scheduler
       super.transition(appAttempt, event);
 
@@ -768,13 +725,6 @@ public class RMAppAttemptImpl implements RMAppAttempt {
       // the AMContainer, AppAttempt fails
       if (appAttempt.masterContainer.getId().equals(
           containerStatus.getContainerId())) {
-        // Setup diagnostic message
-        appAttempt.diagnostics.append("AM Container for " +
-            appAttempt.getAppAttemptId() + " exited with " +
-            " exitCode: " + containerStatus.getExitStatus() + 
-            " due to: " +  containerStatus.getDiagnostics() + "." +
-            "Failing this attempt.");
-
         new FinalTransition(RMAppAttemptState.FAILED).transition(
             appAttempt, containerFinishedEvent);
         return RMAppAttemptState.FAILED;
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
index 3b3864a541d..dd8d7f840f7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
@@ -144,10 +144,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
     this.httpPort = httpPort;
     this.totalCapability = capability; 
     this.nodeAddress = hostName + ":" + cmPort;
-    this.httpAddress = hostName + ":" + httpPort;
+    this.httpAddress = hostName + ":" + httpPort;;
     this.node = node;
     this.nodeHealthStatus.setIsNodeHealthy(true);
-    this.nodeHealthStatus.setHealthReport("Healthy");
     this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
 
     this.latestHeartBeatResponse.setResponseId(0);
@@ -223,18 +222,6 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
     }
   }
 
-  private void setNodeHealthStatus(NodeHealthStatus status)
-  {
-    this.writeLock.lock();
-    try {
-      this.nodeHealthStatus.setHealthReport(status.getHealthReport());
-      this.nodeHealthStatus.setIsNodeHealthy(status.getIsNodeHealthy());
-      this.nodeHealthStatus.setLastHealthReportTime(status.getLastHealthReportTime());
-    } finally {
-      this.writeLock.unlock();
-    }
-  }
-
   @Override
   public RMNodeState getState() {
     this.readLock.lock();
@@ -358,10 +345,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
       // Switch the last heartbeatresponse.
       rmNode.latestHeartBeatResponse = statusEvent.getLatestResponse();
 
-      NodeHealthStatus remoteNodeHealthStatus = 
-          statusEvent.getNodeHealthStatus();
-      rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
-      if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
+      if (!statusEvent.getNodeHealthStatus().getIsNodeHealthy()) {
         // Inform the scheduler
         rmNode.context.getDispatcher().getEventHandler().handle(
             new NodeRemovedSchedulerEvent(rmNode));
@@ -408,9 +392,8 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
 
       // Switch the last heartbeatresponse.
       rmNode.latestHeartBeatResponse = statusEvent.getLatestResponse();
-      NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus();
-      rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
-      if (remoteNodeHealthStatus.getIsNodeHealthy()) {
+
+      if (statusEvent.getNodeHealthStatus().getIsNodeHealthy()) {
         rmNode.context.getDispatcher().getEventHandler().handle(
             new NodeAddedSchedulerEvent(rmNode));
         return RMNodeState.RUNNING;
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
index 6928cdb19d0..61c829507e2 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
@@ -32,8 +32,10 @@ import static org.apache.hadoop.metrics2.lib.Interns.info;
 import org.apache.hadoop.metrics2.lib.MetricsRegistry;
 import org.apache.hadoop.metrics2.lib.MutableCounterInt;
 import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
+import org.apache.hadoop.yarn.api.records.ApplicationState;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.util.Self;
 import static org.apache.hadoop.yarn.server.resourcemanager.resource.Resources.*;
 
 import org.slf4j.LoggerFactory;
@@ -280,56 +282,4 @@ public class QueueMetrics {
       parent.unreserveResource(user, res);
     }
   }
-  
-  public int getAppsSubmitted() {
-    return appsSubmitted.value();
-  }
-  
-  public int getAppsRunning() {
-    return appsRunning.value();
-  }
-  
-  public int getAppsPending() {
-    return appsPending.value();
-  }
-  
-  public int getAppsCompleted() {
-    return appsCompleted.value();
-  }
-  
-  public int getAppsKilled() {
-    return appsKilled.value();
-  }
-  
-  public int getAppsFailed() {
-    return appsFailed.value();
-  }
-
-  public int getAllocatedGB() {
-    return allocatedGB.value();
-  }
-
-  public int getAllocatedContainers() {
-    return allocatedContainers.value();
-  }
-  
-  public int getAvailableGB() {
-    return availableGB.value();
-  }  
-
-  public int getPendingGB() {
-    return pendingGB.value();
-  }
-
-  public int getPendingContainers() {
-    return pendingContainers.value();
-  }
-  
-  public int getReservedGB() {
-    return reservedGB.value();
-  }
-
-  public int getReservedContainers() {
-    return reservedContainers.value();
-  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
index 10913e09999..b4037aaeaf7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
@@ -207,18 +207,13 @@ public class SchedulerApp {
         .getDispatcher().getEventHandler(), this.rmContext
         .getContainerAllocationExpirer());
 
-    // Add it to allContainers list.
-    newlyAllocatedContainers.add(rmContainer);
-    liveContainers.put(container.getId(), rmContainer);    
-
     // Update consumption and track allocations
-    appSchedulingInfo.allocate(type, node, priority, request, container);
-    Resources.addTo(currentConsumption, container.getResource());
-
+    
     // Inform the container
     rmContainer.handle(
         new RMContainerEvent(container.getId(), RMContainerEventType.START));
 
+    Resources.addTo(currentConsumption, container.getResource());
     if (LOG.isDebugEnabled()) {
       LOG.debug("allocate: applicationAttemptId=" 
           + container.getId().getApplicationAttemptId() 
@@ -228,6 +223,12 @@ public class SchedulerApp {
     RMAuditLogger.logSuccess(getUser(), 
         AuditConstants.ALLOC_CONTAINER, "SchedulerApp", 
         getApplicationId(), container.getId());
+
+    // Add it to allContainers list.
+    newlyAllocatedContainers.add(rmContainer);
+    liveContainers.put(container.getId(), rmContainer);
+    
+    appSchedulingInfo.allocate(type, node, priority, request, container);
     
     return rmContainer;
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index e67d371ee61..9a3b1c4da35 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -1046,20 +1046,19 @@ public class LeafQueue implements CSQueue {
   }
   
   private Container getContainer(RMContainer rmContainer, 
-      SchedulerApp application, SchedulerNode node, 
-      Resource capability, Priority priority) {
+      SchedulerApp application, SchedulerNode node, Resource capability) {
     return (rmContainer != null) ? rmContainer.getContainer() :
-      createContainer(application, node, capability, priority);
+      createContainer(application, node, capability);
   }
   
   public Container createContainer(SchedulerApp application, SchedulerNode node, 
-      Resource capability, Priority priority) {
+      Resource capability) {
     Container container = 
           BuilderUtils.newContainer(this.recordFactory,
               application.getApplicationAttemptId(),
               application.getNewContainerId(),
-              node.getNodeID(), node.getHttpAddress(), 
-              capability, priority);
+              node.getNodeID(),
+              node.getHttpAddress(), capability);
 
     // If security is enabled, send the container-tokens too.
     if (UserGroupInformation.isSecurityEnabled()) {
@@ -1100,7 +1099,7 @@ public class LeafQueue implements CSQueue {
 
     // Create the container if necessary
     Container container = 
-        getContainer(rmContainer, application, node, capability, priority);
+        getContainer(rmContainer, application, node, capability);
 
     // Can we allocate a container on this node?
     int availableContainers = 
@@ -1153,17 +1152,14 @@ public class LeafQueue implements CSQueue {
 
   private void reserve(SchedulerApp application, Priority priority, 
       SchedulerNode node, RMContainer rmContainer, Container container) {
+    rmContainer = application.reserve(node, priority, rmContainer, container);
+    node.reserveResource(application, priority, rmContainer);
+    
     // Update reserved metrics if this is the first reservation
     if (rmContainer == null) {
       getMetrics().reserveResource(
           application.getUser(), container.getResource());
     }
-
-    // Inform the application 
-    rmContainer = application.reserve(node, priority, rmContainer, container);
-    
-    // Update the node
-    node.reserveResource(application, priority, rmContainer);
   }
 
   private void unreserve(SchedulerApp application, Priority priority, 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/NodeUpdateSchedulerEvent.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/NodeUpdateSchedulerEvent.java
index ff51d62d910..9f3bc1cce7a 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/NodeUpdateSchedulerEvent.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/NodeUpdateSchedulerEvent.java
@@ -19,7 +19,10 @@
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.event;
 
 import java.util.List;
+import java.util.Map;
 
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
index 7a90c5b6fac..752b81ce5de 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -291,7 +291,7 @@ public class FifoScheduler implements ResourceScheduler {
   
   @SuppressWarnings("unchecked")
   private synchronized void addApplication(ApplicationAttemptId appAttemptId,
-      String user) {
+      String queueName, String user) {
     // TODO: Fix store
     SchedulerApp schedulerApp = 
         new SchedulerApp(appAttemptId, user, DEFAULT_QUEUE, 
@@ -528,8 +528,7 @@ public class FifoScheduler implements ResourceScheduler {
                 application.getApplicationAttemptId(),
                 application.getNewContainerId(),
                 node.getRMNode().getNodeID(),
-                node.getRMNode().getHttpAddress(), 
-                capability, priority);
+                node.getRMNode().getHttpAddress(), capability);
         
         // If security is enabled, send the container-tokens too.
         if (UserGroupInformation.isSecurityEnabled()) {
@@ -628,7 +627,7 @@ public class FifoScheduler implements ResourceScheduler {
     {
       AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
       addApplication(appAddedEvent.getApplicationAttemptId(), appAddedEvent
-          .getUser());
+          .getQueue(), appAddedEvent.getUser());
     }
     break;
     case APP_REMOVED:
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
index 1d074e3160b..a621cc10472 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
@@ -76,7 +76,7 @@ class NodesPage extends RmView {
             // TODO: FIXME Vinodkv
 //            td(String.valueOf(ni.getUsedResource().getMemory())).
 //            td(String.valueOf(ni.getAvailableResource().getMemory())).
-            td("n/a")._();
+            _();
       }
       tbody._()._();
     }
@@ -100,7 +100,7 @@ class NodesPage extends RmView {
         // rack, nodeid, host, healthStatus, health update ts, health report,
         // containers, memused, memavail
         append(", aoColumns:[null, null, null, null, null, null, ").
-        append("{sType:'title-numeric', bSearchable:false}]}").
+        append("{bSearchable:false},{bSearchable:false},{bSearchable:false}]}").
         toString();
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
index 698bc3c933a..234f93e2f0c 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
@@ -102,7 +102,7 @@ public class RmController extends Controller {
         .getMasterContainer();
     if (masterContainer != null) {
       String url = join("http://", masterContainer.getNodeHttpAddress(),
-          "/node", "/containerlogs/",
+          "/yarn", "/containerlogs/",
           ConverterUtils.toString(masterContainer.getId()));
       info._("AM container logs:", url, url);
     } else {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java
index 727cd1a2323..9a9ae2f51cd 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java
@@ -38,7 +38,6 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
-import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 
 public class MockAM {
@@ -129,7 +128,7 @@ public class MockAM {
     req.setHostName(resource);
     req.setNumContainers(containers);
     Priority pri = Records.newRecord(Priority.class);
-    pri.setPriority(priority);
+    pri.setPriority(1);
     req.setPriority(pri);
     Resource capability = Records.newRecord(Resource.class);
     capability.setMemory(memory);
@@ -140,8 +139,11 @@ public class MockAM {
   public AMResponse allocate(
       List<ResourceRequest> resourceRequest, List<ContainerId> releases) 
       throws Exception {
-    AllocateRequest req = BuilderUtils.newAllocateRequest(attemptId,
-        ++responseId, 0F, resourceRequest, releases);
+    AllocateRequest req = Records.newRecord(AllocateRequest.class);
+    req.setResponseId(++responseId);
+    req.setApplicationAttemptId(attemptId);
+    req.addAllAsks(resourceRequest);
+    req.addAllReleases(releases);
     AllocateResponse resp = amRMProtocol.allocate(req);
     return resp.getAMResponse();
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
index d2a9a11182f..4be27399672 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
@@ -22,9 +22,9 @@ import junit.framework.Assert;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -74,17 +74,11 @@ public class MockRM extends ResourceManager {
     Assert.assertEquals("App state is not correct (timedout)",
         finalState, app.getState());
   }
-  
-  // get new application id 
-  public GetNewApplicationResponse getNewAppId() throws Exception {
-    ClientRMProtocol client = getClientRMService();
-    return client.getNewApplication(Records.newRecord(GetNewApplicationRequest.class));	  
-  }
 
   //client
   public RMApp submitApp(int masterMemory) throws Exception {
     ClientRMProtocol client = getClientRMService();
-    GetNewApplicationResponse resp = client.getNewApplication(Records.newRecord(GetNewApplicationRequest.class));
+    GetNewApplicationIdResponse resp = client.getNewApplicationId(Records.newRecord(GetNewApplicationIdRequest.class));
     ApplicationId appId = resp.getApplicationId();
     
     SubmitApplicationRequest req = Records.newRecord(SubmitApplicationRequest.class);
@@ -95,7 +89,7 @@ public class MockRM extends ResourceManager {
     sub.setUser("");
     ContainerLaunchContext clc = 
         Records.newRecord(ContainerLaunchContext.class);
-    Resource capability = Records.newRecord(Resource.class);    
+    Resource capability = Records.newRecord(Resource.class);
     capability.setMemory(masterMemory);
     clc.setResource(capability);
     sub.setAMContainerSpec(clc);
@@ -115,9 +109,9 @@ public class MockRM extends ResourceManager {
 
   public void killApp(ApplicationId appId) throws Exception {
     ClientRMProtocol client = getClientRMService();
-    KillApplicationRequest req = Records.newRecord(KillApplicationRequest.class);
+    FinishApplicationRequest req = Records.newRecord(FinishApplicationRequest.class);
     req.setApplicationId(appId);
-    client.forceKillApplication(req);
+    client.finishApplication(req);
   }
 
   //from AMLauncher
@@ -201,7 +195,6 @@ public class MockRM extends ResourceManager {
     };
   }
 
-  @Override
   protected AdminService createAdminService() {
     return new AdminService(getConfig(), scheduler, getRMContext(), 
         this.nodesListManager){
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
index 72ade5c1da8..a7b5d02c914 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
@@ -45,7 +45,6 @@ import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.api.records.NodeId;
-import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
@@ -185,9 +184,7 @@ public class NodeManager implements ContainerManager {
     Container container =
         BuilderUtils.newContainer(containerLaunchContext.getContainerId(),
             this.nodeId, nodeHttpAddress,
-            containerLaunchContext.getResource(), 
-            null                                        // DKDC - Doesn't matter
-            );
+            containerLaunchContext.getResource());
 
     applicationContainers.add(container);
     
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
deleted file mode 100644
index a12049f9e82..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.server.resourcemanager;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.yarn.api.ApplicationConstants;
-import org.apache.hadoop.yarn.api.ContainerManager;
-import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.StartContainerResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncher;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
-import org.apache.log4j.Level;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
-import org.junit.Assert;
-import org.junit.Test;
-
-public class TestApplicationMasterLauncher {
-
-  private static final Log LOG = LogFactory
-      .getLog(TestApplicationMasterLauncher.class);
-
-  private static final class MyContainerManagerImpl implements
-      ContainerManager {
-
-    boolean launched = false;
-    boolean cleanedup = false;
-    String attemptIdAtContainerManager = null;
-
-    @Override
-    public StartContainerResponse
-        startContainer(StartContainerRequest request)
-            throws YarnRemoteException {
-      LOG.info("Container started by MyContainerManager: " + request);
-      launched = true;
-      attemptIdAtContainerManager = request.getContainerLaunchContext()
-          .getEnvironment().get(
-              ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV);
-      return null;
-    }
-
-    @Override
-    public StopContainerResponse stopContainer(StopContainerRequest request)
-        throws YarnRemoteException {
-      LOG.info("Container cleaned up by MyContainerManager");
-      cleanedup = true;
-      return null;
-    }
-
-    @Override
-    public GetContainerStatusResponse getContainerStatus(
-        GetContainerStatusRequest request) throws YarnRemoteException {
-      return null;
-    }
-
-  }
-
-  private static final class MockRMWithCustomAMLauncher extends MockRM {
-
-    private final ContainerManager containerManager;
-
-    public MockRMWithCustomAMLauncher(ContainerManager containerManager) {
-      this.containerManager = containerManager;
-    }
-
-    @Override
-    protected ApplicationMasterLauncher createAMLauncher() {
-      return new ApplicationMasterLauncher(super.appTokenSecretManager,
-          super.clientToAMSecretManager, getRMContext()) {
-        @Override
-        protected Runnable createRunnableLauncher(RMAppAttempt application,
-            AMLauncherEventType event) {
-          return new AMLauncher(context, application, event,
-              applicationTokenSecretManager, clientToAMSecretManager,
-              getConfig()) {
-            @Override
-            protected ContainerManager getContainerMgrProxy(
-                ApplicationId applicationID) throws IOException {
-              return containerManager;
-            }
-          };
-        }
-      };
-    }
-  }
-
-  @Test
-  public void testAMLaunchAndCleanup() throws Exception {
-    Logger rootLogger = LogManager.getRootLogger();
-    rootLogger.setLevel(Level.DEBUG);
-    MyContainerManagerImpl containerManager = new MyContainerManagerImpl();
-    MockRMWithCustomAMLauncher rm = new MockRMWithCustomAMLauncher(
-        containerManager);
-    rm.start();
-    MockNM nm1 = rm.registerNode("h1:1234", 5120);
-
-    RMApp app = rm.submitApp(2000);
-
-    // kick the scheduling
-    nm1.nodeHeartbeat(true);
-
-    int waitCount = 0;
-    while (containerManager.launched == false && waitCount++ < 20) {
-      LOG.info("Waiting for AM Launch to happen..");
-      Thread.sleep(1000);
-    }
-    Assert.assertTrue(containerManager.launched);
-
-    RMAppAttempt attempt = app.getCurrentAppAttempt();
-    ApplicationAttemptId appAttemptId = attempt.getAppAttemptId();
-    Assert.assertEquals(appAttemptId.toString(),
-        containerManager.attemptIdAtContainerManager);
-
-    MockAM am = new MockAM(rm.getRMContext(), rm
-        .getApplicationMasterService(), appAttemptId);
-    am.registerAppAttempt();
-    am.unregisterAppAttempt();
-
-    waitCount = 0;
-    while (containerManager.cleanedup == false && waitCount++ < 20) {
-      LOG.info("Waiting for AM Cleanup to happen..");
-      Thread.sleep(1000);
-    }
-    Assert.assertTrue(containerManager.cleanedup);
-
-    am.waitForState(RMAppAttemptState.FINISHED);
-    rm.stop();
-  }
-}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
index 3bba11e1fb1..03941e3625d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
@@ -25,7 +25,6 @@ import junit.framework.Assert;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
@@ -41,20 +40,6 @@ public class TestRM {
 
   private static final Log LOG = LogFactory.getLog(TestRM.class);
 
-  @Test
-  public void testGetNewAppId() throws Exception {
-    Logger rootLogger = LogManager.getRootLogger();
-    rootLogger.setLevel(Level.DEBUG);
-    MockRM rm = new MockRM();
-    rm.start();
-    
-    GetNewApplicationResponse resp = rm.getNewAppId();
-    assert (resp.getApplicationId().getId() != 0);    
-    assert (resp.getMinimumResourceCapability().getMemory() > 0);
-    assert (resp.getMaximumResourceCapability().getMemory() > 0);    
-    rm.stop();
-  }
-  
   @Test
   public void testAppWithNoContainers() throws Exception {
     Logger rootLogger = LogManager.getRootLogger();
@@ -134,7 +119,6 @@ public class TestRM {
 
   public static void main(String[] args) throws Exception {
     TestRM t = new TestRM();
-    t.testGetNewAppId();
     t.testAppWithNoContainers();
     t.testAppOnMultiNode();
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
index 536aa672d79..60a227bc6d4 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
@@ -18,16 +18,12 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager;
 
-import static org.junit.Assert.assertNotNull;
-
 import java.io.IOException;
-import java.util.Collection;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.net.NetworkTopology;
-import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
@@ -157,23 +153,6 @@ public class TestResourceManager {
     
     LOG.info("--- END: testResourceAllocation ---");
   }
-  
-  @Test
-  public void testNodeHealthReportIsNotNull() throws Exception{
-    String host1 = "host1";
-    final int memory = 4 * 1024;
-    org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm1 = 
-      registerNode(host1, 1234, 2345, NetworkTopology.DEFAULT_RACK, memory);
-    nm1.heartbeat();
-    nm1.heartbeat();
-    Collection<RMNode> values = resourceManager.getRMContext().getRMNodes().values();
-    for (RMNode ni : values)
-    {
-      NodeHealthStatus nodeHealthStatus = ni.getNodeHealthStatus();
-      String healthReport = nodeHealthStatus.getHealthReport();
-      assertNotNull(healthReport);
-    }
-  }
 
   private void checkResourceUsage(
       org.apache.hadoop.yarn.server.resourcemanager.NodeManager... nodes ) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java
index 3bc55473423..61d678ea01c 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java
@@ -32,7 +32,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
-import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -78,14 +77,13 @@ public class TestAMRMRPCResponseId {
 
     am.registerAppAttempt();
     
-    AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(attempt
-        .getAppAttemptId(), 0, 0F, null, null);
+    AllocateRequest allocateRequest = recordFactory.newRecordInstance(AllocateRequest.class);
+    allocateRequest.setApplicationAttemptId(attempt.getAppAttemptId());
 
     AMResponse response = amService.allocate(allocateRequest).getAMResponse();
     Assert.assertEquals(1, response.getResponseId());
     Assert.assertFalse(response.getReboot());
-    allocateRequest = BuilderUtils.newAllocateRequest(attempt
-        .getAppAttemptId(), response.getResponseId(), 0F, null, null);
+    allocateRequest.setResponseId(response.getResponseId());
     
     response = amService.allocate(allocateRequest).getAMResponse();
     Assert.assertEquals(2, response.getResponseId());
@@ -93,9 +91,8 @@ public class TestAMRMRPCResponseId {
     response = amService.allocate(allocateRequest).getAMResponse();
     Assert.assertEquals(2, response.getResponseId());
     
-    /** try sending old request again **/
-    allocateRequest = BuilderUtils.newAllocateRequest(attempt
-        .getAppAttemptId(), 0, 0F, null, null);
+    /** try sending old **/
+    allocateRequest.setResponseId(0);
     response = amService.allocate(allocateRequest).getAMResponse();
     Assert.assertTrue(response.getReboot());
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterLauncher.java
new file mode 100644
index 00000000000..8cc948400e1
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterLauncher.java
@@ -0,0 +1,193 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationState;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager;
+import org.apache.hadoop.yarn.security.client.ClientToAMSecretManager;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
+import org.apache.hadoop.yarn.util.Records;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Testing the applications manager launcher.
+ *
+ */
+public class TestApplicationMasterLauncher {
+//  private static final Log LOG = LogFactory.getLog(TestApplicationMasterLauncher.class);
+//  private static RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
+//  private ApplicationMasterLauncher amLauncher;
+//  private DummyEventHandler asmHandle;
+//  private final ApplicationTokenSecretManager applicationTokenSecretManager =
+//    new ApplicationTokenSecretManager();
+//  private final ClientToAMSecretManager clientToAMSecretManager = 
+//    new ClientToAMSecretManager();
+//
+//  Object doneLaunching = new Object();
+//  AtomicInteger launched = new AtomicInteger();
+//  AtomicInteger cleanedUp = new AtomicInteger();
+//  private RMContext context = new RMContextImpl(new MemStore(), null, null,
+//      null);
+//
+//  private Configuration conf = new Configuration();
+//  
+//  private class DummyEventHandler implements EventHandler<ApplicationEvent> {
+//    @Override
+//    public void handle(ApplicationEvent appEvent) {
+//      ApplicationEventType event = appEvent.getType();
+//      switch (event) {
+//      case FINISH:
+//        synchronized(doneLaunching) {
+//          doneLaunching.notify();
+//        }
+//        break;
+//
+//      default:
+//        break;
+//      }
+//    }
+//  }
+//
+//  private class DummyLaunch implements Runnable {
+//    public void run() {
+//      launched.incrementAndGet();
+//    }
+//  }
+//
+//  private class DummyCleanUp implements Runnable {
+//    private EventHandler eventHandler;
+//    
+//    public DummyCleanUp(EventHandler eventHandler) {
+//      this.eventHandler = eventHandler;
+//    }
+//    public void run() {
+//      cleanedUp.incrementAndGet();
+//      eventHandler.handle(new AMFinishEvent(null,
+//          ApplicationState.COMPLETED, "", ""));
+//    }
+//  }
+//
+//  private class DummyApplicationMasterLauncher extends
+//      ApplicationMasterLauncher {
+//    private EventHandler eventHandler;
+//
+//    public DummyApplicationMasterLauncher(
+//        ApplicationTokenSecretManager applicationTokenSecretManager,
+//        ClientToAMSecretManager clientToAMSecretManager,
+//        EventHandler eventHandler) {
+//      super(applicationTokenSecretManager, clientToAMSecretManager, context);
+//      this.eventHandler = eventHandler;
+//    }
+//
+//    @Override
+//    protected Runnable createRunnableLauncher(RMAppAttempt application,
+//        AMLauncherEventType event) {
+//      Runnable r = null;
+//      switch (event) {
+//      case LAUNCH:
+//        r = new DummyLaunch();
+//        break;
+//      case CLEANUP:
+//        r = new DummyCleanUp(eventHandler);
+//      default:
+//        break;
+//      }
+//      return r;
+//    }
+//  }
+//
+//  @Before
+//  public void setUp() {
+//    asmHandle = new DummyEventHandler();
+//    amLauncher = new DummyApplicationMasterLauncher(applicationTokenSecretManager,
+//        clientToAMSecretManager, asmHandle);
+//    context.getDispatcher().init(conf);
+//    amLauncher.init(conf);
+//    context.getDispatcher().start();
+//    amLauncher.start();
+//    
+//  }
+//
+//  @After
+//  public void tearDown() {
+//    amLauncher.stop();
+//  }
+//
+//  @Test
+//  public void testAMLauncher() throws Exception {
+//
+//    // Creat AppId
+//    ApplicationId appId = recordFactory
+//        .newRecordInstance(ApplicationId.class);
+//    appId.setClusterTimestamp(System.currentTimeMillis());
+//    appId.setId(1);
+//
+//    ApplicationAttemptId appAttemptId = Records
+//        .newRecord(ApplicationAttemptId.class);
+//    appAttemptId.setApplicationId(appId);
+//    appAttemptId.setAttemptId(1);
+//
+//    // Create submissionContext
+//    ApplicationSubmissionContext submissionContext = recordFactory
+//        .newRecordInstance(ApplicationSubmissionContext.class);
+//    submissionContext.setApplicationId(appId);
+//    submissionContext.setUser("dummyuser");
+//
+//    RMAppAttempt appAttempt = new RMAppAttemptImpl(appAttemptId,
+//        "dummyclienttoken", context, null, submissionContext);
+//
+//    // Tell AMLauncher to launch the appAttempt
+//    amLauncher.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH,
+//        appAttempt));
+//
+//    // Tell AMLauncher to cleanup the appAttempt
+//    amLauncher.handle(new AMLauncherEvent(AMLauncherEventType.CLEANUP,
+//        appAttempt));
+//
+//    synchronized (doneLaunching) {
+//      doneLaunching.wait(10000);
+//    }
+//    Assert.assertEquals(1, launched.get());
+//    Assert.assertEquals(1, cleanedUp.get());
+//  }
+}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/InlineDispatcher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/InlineDispatcher.java
index d771a61d864..51eb8cf2ec7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/InlineDispatcher.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/InlineDispatcher.java
@@ -1,57 +1,50 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package org.apache.hadoop.yarn.server.resourcemanager.resourcetracker;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 
-public class InlineDispatcher extends AsyncDispatcher {
-  private static final Log LOG = LogFactory.getLog(InlineDispatcher.class);
-
-  private class TestEventHandler implements EventHandler {
+class InlineDispatcher extends AsyncDispatcher {
+  private class InlineEventHandler implements EventHandler {
+    private final InlineDispatcher dispatcher;
+    public InlineEventHandler(InlineDispatcher dispatcher) {
+      this.dispatcher = dispatcher;
+    }
     @Override
     public void handle(Event event) {
-      dispatch(event);
+      this.dispatcher.dispatch(event);
     }
   }
-  @Override
-  protected void dispatch(Event event) {
-      LOG.info("Dispatching the event " + event.getClass().getName() + "."
-        + event.toString());
-
-    Class<? extends Enum> type = event.getType().getDeclaringClass();
-    if (eventDispatchers.get(type) != null) {
-      eventDispatchers.get(type).handle(event);
-    }
+  public void dispatch(Event event) {
+    super.dispatch(event);
   }
   @Override
   public EventHandler getEventHandler() {
-    return new TestEventHandler();
+    return new InlineEventHandler(this);
   }
-  
+
   static class EmptyEventHandler implements EventHandler<Event> {
     @Override
     public void handle(Event event) {
-      //do nothing      
-    }    
+      ; // ignore
+    }
   }
 }
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
index 03229c34b48..4fb6486c2c7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
@@ -162,7 +162,6 @@ public class MockRMApp implements RMApp {
     this.diagnostics  = new StringBuilder(diag);
   }
 
-  @Override
   public void handle(RMAppEvent event) {
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
index 24408821e2d..56b3f4b18af 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
@@ -1,27 +1,26 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 
 package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
 
 import static org.mockito.Mockito.mock;
 
 import java.io.IOException;
-import java.util.List;
 
 import junit.framework.Assert;
 
@@ -33,62 +32,46 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
-
 import org.junit.Before;
-import org.junit.After;
 import org.junit.Test;
 
 
 public class TestRMAppTransitions {
-  static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
-
+  private static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
+  
   private RMContext rmContext;
   private static int maxRetries = 4;
   private static int appId = 1;
-  private AsyncDispatcher rmDispatcher;
 
   // ignore all the RM application attempt events
   private static final class TestApplicationAttemptEventDispatcher implements
-  EventHandler<RMAppAttemptEvent> {
+      EventHandler<RMAppAttemptEvent> {
 
-    private final RMContext rmContext;
-    public  TestApplicationAttemptEventDispatcher(RMContext rmContext) {
-      this.rmContext = rmContext;
+    public TestApplicationAttemptEventDispatcher() {
     }
 
     @Override
     public void handle(RMAppAttemptEvent event) {
-      ApplicationId appId = event.getApplicationAttemptId().getApplicationId();
-      RMApp rmApp = this.rmContext.getRMApps().get(appId);
-      if (rmApp != null) {
-        try {
-          rmApp.getRMAppAttempt(event.getApplicationAttemptId()).handle(event);
-        } catch (Throwable t) {
-          LOG.error("Error in handling event type " + event.getType()
-              + " for application " + appId, t);
-        }    
-      }
     }
   }
 
   // handle all the RM application events - same as in ResourceManager.java
   private static final class TestApplicationEventDispatcher implements
-  EventHandler<RMAppEvent> {
+      EventHandler<RMAppEvent> {
 
     private final RMContext rmContext;
     public TestApplicationEventDispatcher(RMContext rmContext) {
@@ -112,23 +95,19 @@ public class TestRMAppTransitions {
 
   @Before
   public void setUp() throws Exception {
-    AsyncDispatcher rmDispatcher = new AsyncDispatcher();
     Configuration conf = new Configuration();
-    rmDispatcher = new InlineDispatcher();
+    Dispatcher rmDispatcher = new AsyncDispatcher();
 
-    ContainerAllocationExpirer containerAllocationExpirer = 
-        mock(ContainerAllocationExpirer.class);
+    ContainerAllocationExpirer containerAllocationExpirer = mock(ContainerAllocationExpirer.class);
     AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class);
     this.rmContext = new RMContextImpl(new MemStore(), rmDispatcher,
-        containerAllocationExpirer, amLivelinessMonitor);
+      containerAllocationExpirer, amLivelinessMonitor);
 
     rmDispatcher.register(RMAppAttemptEventType.class,
-        new TestApplicationAttemptEventDispatcher(this.rmContext));
+        new TestApplicationAttemptEventDispatcher());
 
     rmDispatcher.register(RMAppEventType.class,
         new TestApplicationEventDispatcher(rmContext));
-    rmDispatcher.init(conf);
-    rmDispatcher.start();
   }
 
   protected RMApp createNewTestApp() {
@@ -143,23 +122,22 @@ public class TestRMAppTransitions {
     String clientTokenStr = "bogusstring";
     ApplicationStore appStore = mock(ApplicationStore.class);
     YarnScheduler scheduler = mock(YarnScheduler.class);
-    ApplicationMasterService masterService = 
-        new ApplicationMasterService(rmContext,
-            new ApplicationTokenSecretManager(), scheduler);
+    ApplicationMasterService masterService = new ApplicationMasterService(rmContext,
+        new ApplicationTokenSecretManager(), scheduler);
 
     RMApp application = new RMAppImpl(applicationId, rmContext,
-        conf, name, user,
-        queue, submissionContext, clientTokenStr,
-        appStore, scheduler,
-        masterService);
+          conf, name, user,
+          queue, submissionContext, clientTokenStr,
+          appStore, scheduler,
+          masterService);
 
     testAppStartState(applicationId, user, name, queue, application);
     return application;
   }
 
   // Test expected newly created app state
-  private static void testAppStartState(ApplicationId applicationId, 
-      String user, String name, String queue, RMApp application) {
+  private static void testAppStartState(ApplicationId applicationId, String user, 
+        String name, String queue, RMApp application) {
     Assert.assertTrue("application start time is not greater then 0", 
         application.getStartTime() > 0);
     Assert.assertTrue("application start time is before currentTime", 
@@ -213,14 +191,6 @@ public class TestRMAppTransitions {
         "Application killed by user.", diag.toString());
   }
 
-  private static void assertAppAndAttemptKilled(RMApp application) {
-    assertKilled(application);
-    /* also check if the attempt is killed */
-    Assert.assertEquals( RMAppAttemptState.KILLED, 
-        application.getCurrentAppAttempt().getAppAttemptState() 
-        );
-  }
-
   private static void assertFailed(RMApp application, String regex) {
     assertTimesAtFinish(application);
     assertAppState(RMAppState.FAILED, application);
@@ -232,8 +202,7 @@ public class TestRMAppTransitions {
   protected RMApp testCreateAppSubmitted() throws IOException {
     RMApp application = createNewTestApp();
     // NEW => SUBMITTED event RMAppEventType.START
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), RMAppEventType.START);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.START);
     application.handle(event);
     assertStartTimeSet(application);
     assertAppState(RMAppState.SUBMITTED, application);
@@ -243,9 +212,7 @@ public class TestRMAppTransitions {
   protected RMApp testCreateAppAccepted() throws IOException {
     RMApp application = testCreateAppSubmitted();
     // SUBMITTED => ACCEPTED event RMAppEventType.APP_ACCEPTED
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), 
-            RMAppEventType.APP_ACCEPTED);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_ACCEPTED);
     application.handle(event);
     assertStartTimeSet(application);
     assertAppState(RMAppState.ACCEPTED, application);
@@ -255,9 +222,7 @@ public class TestRMAppTransitions {
   protected RMApp testCreateAppRunning() throws IOException {
     RMApp application = testCreateAppAccepted();
     // ACCEPTED => RUNNING event RMAppEventType.ATTEMPT_REGISTERED
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_REGISTERED);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_REGISTERED);
     application.handle(event);
     assertStartTimeSet(application);
     assertAppState(RMAppState.RUNNING, application);
@@ -267,9 +232,7 @@ public class TestRMAppTransitions {
   protected RMApp testCreateAppFinished() throws IOException {
     RMApp application = testCreateAppRunning();
     // RUNNING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_FINISHED);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FINISHED);
     application.handle(event);
     assertAppState(RMAppState.FINISHED, application);
     assertTimesAtFinish(application);
@@ -288,8 +251,7 @@ public class TestRMAppTransitions {
 
     RMApp application = createNewTestApp();
     // NEW => KILLED event RMAppEventType.KILL
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertKilled(application);
   }
@@ -301,8 +263,7 @@ public class TestRMAppTransitions {
     RMApp application = createNewTestApp();
     // NEW => FAILED event RMAppEventType.APP_REJECTED
     String rejectedText = "Test Application Rejected";
-    RMAppEvent event = 
-        new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
+    RMAppEvent event = new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
     application.handle(event);
     assertFailed(application, rejectedText);
   }
@@ -314,8 +275,7 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppSubmitted();
     // SUBMITTED => FAILED event RMAppEventType.APP_REJECTED
     String rejectedText = "app rejected";
-    RMAppEvent event = 
-        new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
+    RMAppEvent event = new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
     application.handle(event);
     assertFailed(application, rejectedText);
   }
@@ -327,9 +287,8 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppAccepted();
     // SUBMITTED => KILLED event RMAppEventType.KILL 
     RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
-    this.rmContext.getRMApps().putIfAbsent(application.getApplicationId(), application);
     application.handle(event);
-    assertAppAndAttemptKilled(application);
+    assertKilled(application);
   }
 
   @Test
@@ -339,26 +298,18 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppAccepted();
     // ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
     for (int i=1; i<maxRetries; i++) {
-      RMAppEvent event = 
-          new RMAppFailedAttemptEvent(application.getApplicationId(), 
-              RMAppEventType.ATTEMPT_FAILED, "");
+      RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
       application.handle(event);
       assertAppState(RMAppState.SUBMITTED, application);
-      event = 
-          new RMAppEvent(application.getApplicationId(), 
-              RMAppEventType.APP_ACCEPTED);
+      event = new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_ACCEPTED);
       application.handle(event);
       assertAppState(RMAppState.ACCEPTED, application);
     }
 
-    // ACCEPTED => FAILED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED 
-    // after max retries
-    String message = "Test fail";
-    RMAppEvent event = 
-        new RMAppFailedAttemptEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_FAILED, message);
+    // ACCEPTED => FAILED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED after max retries
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
     application.handle(event);
-    assertFailed(application, ".*" + message + ".*Failing the application.*");
+    assertFailed(application, ".*Failing the application.*");
   }
 
   @Test
@@ -367,8 +318,7 @@ public class TestRMAppTransitions {
 
     RMApp application = testCreateAppAccepted();
     // ACCEPTED => KILLED event RMAppEventType.KILL
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertKilled(application);
   }
@@ -379,8 +329,7 @@ public class TestRMAppTransitions {
 
     RMApp application = testCreateAppRunning();
     // RUNNING => KILLED event RMAppEventType.KILL
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertKilled(application);
   }
@@ -392,35 +341,25 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppRunning();
     RMAppAttempt appAttempt = application.getCurrentAppAttempt();
     int expectedAttemptId = 1;
-    Assert.assertEquals(expectedAttemptId, 
-        appAttempt.getAppAttemptId().getAttemptId());
+    Assert.assertEquals(expectedAttemptId, appAttempt.getAppAttemptId().getAttemptId());
     // RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
     for (int i=1; i<maxRetries; i++) {
-      RMAppEvent event = 
-          new RMAppFailedAttemptEvent(application.getApplicationId(), 
-              RMAppEventType.ATTEMPT_FAILED, "");
+      RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
       application.handle(event);
       assertAppState(RMAppState.SUBMITTED, application);
       appAttempt = application.getCurrentAppAttempt();
       Assert.assertEquals(++expectedAttemptId, 
           appAttempt.getAppAttemptId().getAttemptId());
-      event = 
-          new RMAppEvent(application.getApplicationId(), 
-              RMAppEventType.APP_ACCEPTED);
+      event = new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_ACCEPTED);
       application.handle(event);
       assertAppState(RMAppState.ACCEPTED, application);
-      event = 
-          new RMAppEvent(application.getApplicationId(), 
-              RMAppEventType.ATTEMPT_REGISTERED);
+      event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_REGISTERED);
       application.handle(event);
       assertAppState(RMAppState.RUNNING, application);
     }
 
-    // RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED 
-    // after max retries
-    RMAppEvent event = 
-        new RMAppFailedAttemptEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_FAILED, "");
+    // RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED after max retries
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
     application.handle(event);
     assertFailed(application, ".*Failing the application.*");
 
@@ -437,8 +376,7 @@ public class TestRMAppTransitions {
 
     RMApp application = testCreateAppFinished();
     // FINISHED => FINISHED event RMAppEventType.KILL
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.FINISHED, application);
@@ -454,32 +392,25 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppRunning();
 
     // RUNNING => KILLED event RMAppEventType.KILL
-    RMAppEvent event = 
-        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.KILLED, application);
 
     // KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED
-    event =
-        new RMAppEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_FINISHED);
+    event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FINISHED);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.KILLED, application);
 
     // KILLED => KILLED event RMAppEventType.ATTEMPT_FAILED
-    event = 
-        new RMAppFailedAttemptEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_FAILED, "");
+    event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.KILLED, application);
 
     // KILLED => KILLED event RMAppEventType.ATTEMPT_KILLED
-    event = 
-        new RMAppEvent(application.getApplicationId(), 
-            RMAppEventType.ATTEMPT_KILLED);
+    event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_KILLED);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.KILLED, application);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
deleted file mode 100644
index 03a4ba07441..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
+++ /dev/null
@@ -1,403 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.mockito.Matchers.*;
-import static org.mockito.Mockito.*;
-
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.MockApps;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.api.records.Container;
-import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.event.AsyncDispatcher;
-import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
-import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-public class TestRMAppAttemptTransitions {
-
-  private static final Log LOG = 
-      LogFactory.getLog(TestRMAppAttemptTransitions.class);
-  
-  private static final String EMPTY_DIAGNOSTICS = "";
-  
-  private RMContext rmContext;
-  private YarnScheduler scheduler;
-  private ApplicationMasterService masterService;
-  private ApplicationMasterLauncher applicationMasterLauncher;
-  
-  private RMApp application;
-  private RMAppAttempt applicationAttempt;
-  
-  private final class TestApplicationAttemptEventDispatcher implements
-      EventHandler<RMAppAttemptEvent> {
-
-    @Override
-    public void handle(RMAppAttemptEvent event) {
-      ApplicationAttemptId appID = event.getApplicationAttemptId();
-      assertEquals(applicationAttempt.getAppAttemptId(), appID);
-      try {
-        applicationAttempt.handle(event);
-      } catch (Throwable t) {
-        LOG.error("Error in handling event type " + event.getType()
-            + " for application " + appID, t);
-      }
-    }
-  }
-
-  // handle all the RM application events - same as in ResourceManager.java
-  private final class TestApplicationEventDispatcher implements
-      EventHandler<RMAppEvent> {
-    @Override
-    public void handle(RMAppEvent event) {
-      assertEquals(application.getApplicationId(), event.getApplicationId());
-      try {
-        application.handle(event);
-      } catch (Throwable t) {
-        LOG.error("Error in handling event type " + event.getType()
-            + " for application " + application.getApplicationId(), t);
-      }
-    }
-  }
-
-  private final class TestSchedulerEventDispatcher implements
-  EventHandler<SchedulerEvent> {
-    @Override
-    public void handle(SchedulerEvent event) {
-      scheduler.handle(event);
-    }
-  }
-  
-  private final class TestAMLauncherEventDispatcher implements
-  EventHandler<AMLauncherEvent> {
-    @Override
-    public void handle(AMLauncherEvent event) {
-      applicationMasterLauncher.handle(event);
-    }
-  }
-  
-  private static int appId = 1;
-
-  @Before
-  public void setUp() throws Exception {
-    InlineDispatcher rmDispatcher = new InlineDispatcher();
-  
-    ContainerAllocationExpirer containerAllocationExpirer =
-        mock(ContainerAllocationExpirer.class);
-    AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class);
-    rmContext = new RMContextImpl(new MemStore(), rmDispatcher,
-      containerAllocationExpirer, amLivelinessMonitor);
-    
-    scheduler = mock(YarnScheduler.class);
-    masterService = mock(ApplicationMasterService.class);
-    applicationMasterLauncher = mock(ApplicationMasterLauncher.class);
-    
-    rmDispatcher.register(RMAppAttemptEventType.class,
-        new TestApplicationAttemptEventDispatcher());
-  
-    rmDispatcher.register(RMAppEventType.class,
-        new TestApplicationEventDispatcher());
-    
-    rmDispatcher.register(SchedulerEventType.class, 
-        new TestSchedulerEventDispatcher());
-    
-    rmDispatcher.register(AMLauncherEventType.class, 
-        new TestAMLauncherEventDispatcher());
-
-    rmDispatcher.init(new Configuration());
-    rmDispatcher.start();
-    
-
-    ApplicationId applicationId = MockApps.newAppID(appId++);
-    ApplicationAttemptId applicationAttemptId = 
-        MockApps.newAppAttemptID(applicationId, 0);
-    
-    final String user = MockApps.newUserName();
-    final String queue = MockApps.newQueue();
-    ApplicationSubmissionContext submissionContext = 
-        mock(ApplicationSubmissionContext.class);
-    when(submissionContext.getUser()).thenReturn(user);
-    when(submissionContext.getQueue()).thenReturn(queue);
-    ContainerLaunchContext amContainerSpec = mock(ContainerLaunchContext.class);
-    Resource resource = mock(Resource.class);
-    when(amContainerSpec.getResource()).thenReturn(resource);
-    when(submissionContext.getAMContainerSpec()).thenReturn(amContainerSpec);
-    
-    application = mock(RMApp.class);
-    applicationAttempt = 
-        new RMAppAttemptImpl(applicationAttemptId, null, rmContext, scheduler, 
-            masterService, submissionContext);
-    when(application.getCurrentAppAttempt()).thenReturn(applicationAttempt);
-    when(application.getApplicationId()).thenReturn(applicationId);
-    
-    testAppAttemptNewState();
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    ((AsyncDispatcher)this.rmContext.getDispatcher()).stop();
-  }
-  
-
-  /**
-   * {@link RMAppAttemptState#NEW}
-   */
-  private void testAppAttemptNewState() {
-    assertEquals(RMAppAttemptState.NEW, 
-        applicationAttempt.getAppAttemptState());
-    assertEquals(0, applicationAttempt.getDiagnostics().length());
-    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
-    assertNull(applicationAttempt.getMasterContainer());
-    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
-    assertEquals(0, applicationAttempt.getRanNodes().size());
-  }
-
-  /**
-   * {@link RMAppAttemptState#SUBMITTED}
-   */
-  private void testAppAttemptSubmittedState() {
-    assertEquals(RMAppAttemptState.SUBMITTED, 
-        applicationAttempt.getAppAttemptState());
-    assertEquals(0, applicationAttempt.getDiagnostics().length());
-    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
-    assertNull(applicationAttempt.getMasterContainer());
-    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
-    assertEquals(0, applicationAttempt.getRanNodes().size());
-    
-    // Check events
-    verify(masterService).
-        registerAppAttempt(applicationAttempt.getAppAttemptId());
-    verify(scheduler).handle(any(AppAddedSchedulerEvent.class));
-  }
-
-  /**
-   * {@link RMAppAttemptState#SUBMITTED} -> {@link RMAppAttemptState#FAILED}
-   */
-  private void testAppAttemptSubmittedToFailedState(String diagnostics) {
-    assertEquals(RMAppAttemptState.FAILED, 
-        applicationAttempt.getAppAttemptState());
-    assertEquals(diagnostics, applicationAttempt.getDiagnostics());
-    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
-    assertNull(applicationAttempt.getMasterContainer());
-    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
-    assertEquals(0, applicationAttempt.getRanNodes().size());
-    
-    // Check events
-    verify(application).handle(any(RMAppRejectedEvent.class));
-  }
-
-  /**
-   * {@link RMAppAttemptState#KILLED}
-   */
-  private void testAppAttemptKilledState(Container amContainer, 
-      String diagnostics) {
-    assertEquals(RMAppAttemptState.KILLED, 
-        applicationAttempt.getAppAttemptState());
-    assertEquals(diagnostics, applicationAttempt.getDiagnostics());
-    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
-    assertEquals(amContainer, applicationAttempt.getMasterContainer());
-    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
-    assertEquals(0, applicationAttempt.getRanNodes().size());
-  }
-
-  /**
-   * {@link RMAppAttemptState#SCHEDULED}
-   */
-  private void testAppAttemptScheduledState() {
-    assertEquals(RMAppAttemptState.SCHEDULED, 
-        applicationAttempt.getAppAttemptState());
-    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
-    assertNull(applicationAttempt.getMasterContainer());
-    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
-    assertEquals(0, applicationAttempt.getRanNodes().size());
-    
-    // Check events
-    verify(application).handle(any(RMAppEvent.class));
-    verify(scheduler).
-        allocate(any(ApplicationAttemptId.class), 
-            any(List.class), any(List.class));
-  }
-
-  /**
-   * {@link RMAppAttemptState#ALLOCATED}
-   */
-  private void testAppAttemptAllocatedState(Container amContainer) {
-    assertEquals(RMAppAttemptState.ALLOCATED, 
-        applicationAttempt.getAppAttemptState());
-    assertEquals(amContainer, applicationAttempt.getMasterContainer());
-    
-    // Check events
-    verify(applicationMasterLauncher).handle(any(AMLauncherEvent.class));
-    verify(scheduler, times(2)).
-        allocate(
-            any(ApplicationAttemptId.class), any(List.class), any(List.class));
-  }
-  
-  /**
-   * {@link RMAppAttemptState#FAILED}
-   */
-  private void testAppAttemptFailedState(Container container, 
-      String diagnostics) {
-    assertEquals(RMAppAttemptState.FAILED, 
-        applicationAttempt.getAppAttemptState());
-    assertEquals(diagnostics, applicationAttempt.getDiagnostics());
-    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
-    assertEquals(container, applicationAttempt.getMasterContainer());
-    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
-    assertEquals(0, applicationAttempt.getRanNodes().size());
-    
-    // Check events
-    verify(application, times(2)).handle(any(RMAppFailedAttemptEvent.class));
-  }
-
-  private void submitApplicationAttempt() {
-    ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId();
-    applicationAttempt.handle(
-        new RMAppAttemptEvent(appAttemptId, RMAppAttemptEventType.START));
-    testAppAttemptSubmittedState();
-  }
-
-  private void scheduleApplicationAttempt() {
-    submitApplicationAttempt();
-    applicationAttempt.handle(
-        new RMAppAttemptEvent(
-            applicationAttempt.getAppAttemptId(), 
-            RMAppAttemptEventType.APP_ACCEPTED));
-    testAppAttemptScheduledState();
-  }
-
-  private Container allocateApplicationAttempt() {
-    scheduleApplicationAttempt();
-    
-    // Mock the allocation of AM container 
-    Container container = mock(Container.class);
-    Allocation allocation = mock(Allocation.class);
-    when(allocation.getContainers()).
-        thenReturn(Collections.singletonList(container));
-    when(
-        scheduler.allocate(
-            any(ApplicationAttemptId.class), 
-            any(List.class), 
-            any(List.class))).
-    thenReturn(allocation);
-    
-    applicationAttempt.handle(
-        new RMAppAttemptContainerAllocatedEvent(
-            applicationAttempt.getAppAttemptId(), 
-            container));
-    
-    testAppAttemptAllocatedState(container);
-    
-    return container;
-  }
-
-  @Test
-  public void testNewToKilled() {
-    applicationAttempt.handle(
-        new RMAppAttemptEvent(
-            applicationAttempt.getAppAttemptId(), 
-            RMAppAttemptEventType.KILL));
-    testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
-  } 
-  
-  @Test
-  public void testSubmittedToFailed() {
-    submitApplicationAttempt();
-    String message = "Rejected";
-    applicationAttempt.handle(
-        new RMAppAttemptRejectedEvent(
-            applicationAttempt.getAppAttemptId(), message));
-    testAppAttemptSubmittedToFailedState(message);
-  }
-
-  @Test
-  public void testSubmittedToKilled() {
-    submitApplicationAttempt();
-    applicationAttempt.handle(
-        new RMAppAttemptEvent(
-            applicationAttempt.getAppAttemptId(), 
-            RMAppAttemptEventType.KILL));
-    testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
-  }
-
-  @Test
-  public void testScheduledToKilled() {
-    scheduleApplicationAttempt();
-    applicationAttempt.handle(        
-        new RMAppAttemptEvent(
-            applicationAttempt.getAppAttemptId(), 
-            RMAppAttemptEventType.KILL));
-    testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
-  }
-
-  @Test
-  public void testAllocatedToKilled() {
-    Container amContainer = allocateApplicationAttempt();
-    applicationAttempt.handle(
-        new RMAppAttemptEvent(
-            applicationAttempt.getAppAttemptId(), 
-            RMAppAttemptEventType.KILL));
-    testAppAttemptKilledState(amContainer, EMPTY_DIAGNOSTICS);
-  }
-
-  @Test
-  public void testAllocatedToFailed() {
-    Container amContainer = allocateApplicationAttempt();
-    String diagnostics = "Launch Failed";
-    applicationAttempt.handle(
-        new RMAppAttemptLaunchFailedEvent(
-            applicationAttempt.getAppAttemptId(), 
-            diagnostics));
-    testAppAttemptFailedState(amContainer, diagnostics);
-  }
-  
-}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
index 639daf9e5ac..3ea01003320 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
@@ -135,8 +135,7 @@ public class TestLeafQueue {
             Container container = TestUtils.getMockContainer(
                 containerId,
                 ((SchedulerNode)(invocation.getArguments()[1])).getNodeID(), 
-                (Resource)(invocation.getArguments()[2]),
-                ((Priority)invocation.getArguments()[3]));
+                (Resource)(invocation.getArguments()[2]));
             return container;
           }
         }
@@ -144,9 +143,7 @@ public class TestLeafQueue {
       when(queue).createContainer(
               any(SchedulerApp.class), 
               any(SchedulerNode.class), 
-              any(Resource.class),
-              any(Priority.class)
-              );
+              any(Resource.class));
     
     // 2. Stub out LeafQueue.parent.completedContainer
     CSQueue parent = queue.getParent();
@@ -205,8 +202,6 @@ public class TestLeafQueue {
     assertEquals(1*GB, a.getUsedResources().getMemory());
     assertEquals(1*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(1, a.getMetrics().getAllocatedGB());
 
     // Also 2nd -> minCapacity = 1024 since (.1 * 8G) < minAlloc, also
     // you can get one container more than user-limit
@@ -214,16 +209,12 @@ public class TestLeafQueue {
     assertEquals(2*GB, a.getUsedResources().getMemory());
     assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(2, a.getMetrics().getAllocatedGB());
     
     // Can't allocate 3rd due to user-limit
     a.assignContainers(clusterResource, node_0);
     assertEquals(2*GB, a.getUsedResources().getMemory());
     assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(2, a.getMetrics().getAllocatedGB());
     
     // Bump up user-limit-factor, now allocate should work
     a.setUserLimitFactor(10);
@@ -231,16 +222,12 @@ public class TestLeafQueue {
     assertEquals(3*GB, a.getUsedResources().getMemory());
     assertEquals(3*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(3, a.getMetrics().getAllocatedGB());
 
     // One more should work, for app_1, due to user-limit-factor
     a.assignContainers(clusterResource, node_0);
     assertEquals(4*GB, a.getUsedResources().getMemory());
     assertEquals(3*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(1*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(4, a.getMetrics().getAllocatedGB());
 
     // Test max-capacity
     // Now - no more allocs since we are at max-cap
@@ -249,8 +236,6 @@ public class TestLeafQueue {
     assertEquals(4*GB, a.getUsedResources().getMemory());
     assertEquals(3*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(1*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(4, a.getMetrics().getAllocatedGB());
     
     // Release each container from app_0
     for (RMContainer rmContainer : app_0.getLiveContainers()) {
@@ -260,8 +245,6 @@ public class TestLeafQueue {
     assertEquals(1*GB, a.getUsedResources().getMemory());
     assertEquals(0*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(1*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(1, a.getMetrics().getAllocatedGB());
     
     // Release each container from app_1
     for (RMContainer rmContainer : app_1.getLiveContainers()) {
@@ -271,8 +254,6 @@ public class TestLeafQueue {
     assertEquals(0*GB, a.getUsedResources().getMemory());
     assertEquals(0*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(0, a.getMetrics().getAllocatedGB());
   }
   
   @Test
@@ -492,8 +473,6 @@ public class TestLeafQueue {
     assertEquals(1*GB, a.getUsedResources().getMemory());
     assertEquals(1*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(1, a.getMetrics().getAllocatedGB());
 
     // Also 2nd -> minCapacity = 1024 since (.1 * 8G) < minAlloc, also
     // you can get one container more than user-limit
@@ -501,8 +480,6 @@ public class TestLeafQueue {
     assertEquals(2*GB, a.getUsedResources().getMemory());
     assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(2, a.getMetrics().getAllocatedGB());
     
     // Now, reservation should kick in for app_1
     a.assignContainers(clusterResource, node_0);
@@ -511,8 +488,6 @@ public class TestLeafQueue {
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
     assertEquals(4*GB, app_1.getCurrentReservation().getMemory());
     assertEquals(2*GB, node_0.getUsedResource().getMemory());
-    assertEquals(4, a.getMetrics().getReservedGB());
-    assertEquals(2, a.getMetrics().getAllocatedGB());
     
     // Now free 1 container from app_0 i.e. 1G
     a.completedContainer(clusterResource, app_0, node_0, 
@@ -523,8 +498,6 @@ public class TestLeafQueue {
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
     assertEquals(4*GB, app_1.getCurrentReservation().getMemory());
     assertEquals(1*GB, node_0.getUsedResource().getMemory());
-    assertEquals(4, a.getMetrics().getReservedGB());
-    assertEquals(1, a.getMetrics().getAllocatedGB());
 
     // Now finish another container from app_0 and fulfill the reservation
     a.completedContainer(clusterResource, app_0, node_0, 
@@ -535,8 +508,6 @@ public class TestLeafQueue {
     assertEquals(4*GB, app_1.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentReservation().getMemory());
     assertEquals(4*GB, node_0.getUsedResource().getMemory());
-    assertEquals(0, a.getMetrics().getReservedGB());
-    assertEquals(4, a.getMetrics().getAllocatedGB());
   }
   
   @Test
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
index 8459e51d5c2..84dbbac8676 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
@@ -161,13 +161,11 @@ public class TestUtils {
   }
   
   public static Container getMockContainer(
-      ContainerId containerId, NodeId nodeId, 
-      Resource resource, Priority priority) {
+      ContainerId containerId, NodeId nodeId, Resource resource) {
     Container container = mock(Container.class);
     when(container.getId()).thenReturn(containerId);
     when(container.getNodeId()).thenReturn(nodeId);
     when(container.getResource()).thenReturn(resource);
-    when(container.getPriority()).thenReturn(priority);
     return container;
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java
deleted file mode 100644
index e0583a20075..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.yarn.server.resourcemanager.webapp;
-
-import java.io.PrintWriter;
-
-import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodesPage.NodesBlock;
-import org.apache.hadoop.yarn.webapp.test.WebAppTests;
-import org.junit.Test;
-import org.mockito.Mockito;
-
-/**
- * This tests the NodesPage block table that it should contain the table body
- * data for all the columns in the table as specified in the header.
- */
-public class TestNodesPage {
-
-  @Test
-  public void testNodesBlockRender() throws Exception {
-    int numberOfRacks = 2;
-    int numberOfNodesPerRack = 2;
-    // Number of Actual Table Headers for NodesPage.NodesBlock might change in
-    // future. In that case this value should be adjusted to the new value.
-    int numberOfActualTableHeaders = 7;
-
-    PrintWriter writer = WebAppTests.testBlock(
-        NodesBlock.class,
-        RMContext.class,
-        TestRMWebApp.mockRMContext(3, numberOfRacks, numberOfNodesPerRack,
-            8 * TestRMWebApp.GiB)).getInstance(PrintWriter.class);
-
-    Mockito.verify(writer, Mockito.times(numberOfActualTableHeaders)).print(
-        "<th");
-    Mockito.verify(
-        writer,
-        Mockito.times(numberOfRacks * numberOfNodesPerRack
-            * numberOfActualTableHeaders)).print("<td");
-  }
-}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java
index 1b681628c98..989f3483d91 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java
@@ -83,7 +83,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
-import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.junit.BeforeClass;
 import org.junit.AfterClass;
@@ -241,8 +240,12 @@ public class TestContainerTokenSecretManager {
     ask.add(rr);
     ArrayList<ContainerId> release = new ArrayList<ContainerId>();
     
-    AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
-        appAttempt.getAppAttemptId(), 0, 0F, ask, release);
+    AllocateRequest allocateRequest =
+        recordFactory.newRecordInstance(AllocateRequest.class);
+    allocateRequest.setApplicationAttemptId(appAttempt.getAppAttemptId());
+    allocateRequest.setResponseId(0);
+    allocateRequest.addAllAsks(ask);
+    allocateRequest.addAllReleases(release);
     List<Container> allocatedContainers = scheduler.allocate(allocateRequest)
         .getAMResponse().getAllocatedContainers();
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm
deleted file mode 100644
index affb277b7ff..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm
+++ /dev/null
@@ -1,180 +0,0 @@
-~~ Licensed under the Apache License, Version 2.0 (the "License");
-~~ you may not use this file except in compliance with the License.
-~~ You may obtain a copy of the License at
-~~
-~~   http://www.apache.org/licenses/LICENSE-2.0
-~~
-~~ Unless required by applicable law or agreed to in writing, software
-~~ distributed under the License is distributed on an "AS IS" BASIS,
-~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-~~ See the License for the specific language governing permissions and
-~~ limitations under the License. See accompanying LICENSE file.
-
-  ---
-  Hadoop MapReduce Next Generation ${project.version} - Setting up a Single Node Cluster.
-  ---
-  ---
-  ${maven.build.timestamp}
-
-Hadoop MapReduce Next Generation - Setting up a Single Node Cluster.
-
-  \[ {{{./index.html}Go Back}} \]
-
-* Mapreduce Tarball
-
-  You should be able to obtain the MapReduce tarball from the release.
-  If not, you should be able to create a tarball from the source.
-
-+---+
-$ mvn clean install -DskipTests
-$ cd hadoop-mapreduce-project
-$ mvn clean install assembly:assembly 
-+---+
-  <<NOTE:>> You will need protoc installed of version 2.4.1 or greater.
-
-  To ignore the native builds in mapreduce you can use <<<-P-cbuild>>> argument
-  for maven. The tarball should be available in <<<target/>>> directory. 
-
-  
-* Setting up the environment.
-
-  Assuming you have installed hadoop-common/hadoop-hdfs and exported
-  <<$HADOOP_COMMON_HOME>>/<<$HADOOP_COMMON_HOME>>, untar hadoop mapreduce 
-  tarball and set environment variable <<$HADOOP_MAPRED_HOME>> to the 
-  untarred directory. Set <<$YARN_HOME>> the same as <<$HADOOP_MAPRED_HOME>>. 
- 
-  <<NOTE:>> The following instructions assume you have hdfs running.
-
-* Setting up Configuration.
-
-  To start the ResourceManager and NodeManager, you will have to update the configs.
-  Assuming your $HADOOP_CONF_DIR is the configuration directory and has the installed
-  configs for HDFS and <<<core-site.xml>>>. There are 2 config files you will have to setup
-  <<<mapred-site.xml>>> and <<<yarn-site.xml>>>.
-
-** Setting up <<<mapred-site.xml>>>
-
-  Add the following configs to your <<<mapred-site.xml>>>.
-
-+---+
-  <property>
-    <name>mapreduce.cluster.temp.dir</name>
-    <value></value>
-    <description>No description</description>
-    <final>true</final>
-  </property>
-
-  <property>
-    <name>mapreduce.cluster.local.dir</name>
-    <value></value>
-    <description>No description</description>
-    <final>true</final>
-  </property>
-+---+
-
-** Setting up <<<yarn-site.xml>>>
-
-Add the following configs to your <<<yarn-site.xml>>>
-
-+---+
- <property>
-    <name>yarn.resourcemanager.resource-tracker.address</name>
-    <value>host:port</value>
-    <description>host is the hostname of the resource manager and 
-    port is the port on which the NodeManagers contact the Resource Manager.
-    </description>
- </property>
-
- <property>
-    <name>yarn.resourcemanager.scheduler.address</name>
-    <value>host:port</value>
-    <description>host is the hostname of the resourcemanager and port is the port
-    on which the Applications in the cluster talk to the Resource Manager.
-    </description>
-  </property>
-
-  <property>
-    <name>yarn.resourcemanager.scheduler.class</name>
-    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
-    <description>In case you do not want to use the default scheduler</description>
-  </property>
-
-  <property>
-    <name>yarn.resourcemanager.address</name>
-    <value>host:port</value>
-    <description>the host is the hostname of the ResourceManager and the port is the port on
-    which the clients can talk to the Resource Manager. </description>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.local-dirs</name>
-    <value></value>
-    <description>the local directories used by the nodemanager</description>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.address</name>
-    <value>0.0.0.0:port</value>
-    <description>the nodemanagers bind to this port</description>
-  </property>  
-
-  <property>
-    <name>yarn.nodemanager.resource.memory-gb</name>
-    <value>10</value>
-    <description>the amount of memory on the NodeManager in GB</description>
-  </property>
- 
-  <property>
-    <name>yarn.nodemanager.remote-app-log-dir</name>
-    <value>/app-logs</value>
-    <description>directory on hdfs where the application logs are moved to </description>
-  </property>
-
-   <property>
-    <name>yarn.nodemanager.log-dirs</name>
-    <value></value>
-    <description>the directories used by Nodemanagers as log directories</description>
-  </property>
-
-  <property>
-    <name>yarn.nodemanager.aux-services</name>
-    <value>mapreduce.shuffle</value>
-    <description>shuffle service that needs to be set for Map Reduce to run </description>
-  </property>
-+---+
-
-* Create Symlinks.
-
-  You will have to create the following symlinks:
-
-+---+
-$ cd $HADOOP_COMMON_HOME/share/hadoop/common/lib/
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-app-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-jobclient-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-common-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-shuffle-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-core-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-common-*-SNAPSHOT.jar .
-$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar .
-+---+
-* Running daemons.
-
-  Assuming that the environment variables <<$HADOOP_COMMON_HOME>>, <<$HADOOP_HDFS_HOME>>, <<$HADOO_MAPRED_HOME>>,
-  <<$YARN_HOME>>, <<$JAVA_HOME>> and <<$HADOOP_CONF_DIR>> have been set appropriately.
-  Set $<<$YARN_CONF_DIR>> the same as $<<HADOOP_CONF_DIR>>
- 
-  Run ResourceManager and NodeManager as:
-  
-+---+
-$ cd $HADOOP_MAPRED_HOME
-$ bin/yarn-daemon.sh start resourcemanager
-$ bin/yarn-daemon.sh start nodemanager
-+---+
-
-  You should be up and running. You can run randomwriter as:
-
-+---+
-$ $HADOOP_COMMON_HOME/bin/hadoop jar hadoop-examples.jar randomwriter out
-+---+
-
-Good luck.
diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm
deleted file mode 100644
index db9fe870349..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm
+++ /dev/null
@@ -1,39 +0,0 @@
-~~ Licensed under the Apache License, Version 2.0 (the "License");
-~~ you may not use this file except in compliance with the License.
-~~ You may obtain a copy of the License at
-~~
-~~   http://www.apache.org/licenses/LICENSE-2.0
-~~
-~~ Unless required by applicable law or agreed to in writing, software
-~~ distributed under the License is distributed on an "AS IS" BASIS,
-~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-~~ See the License for the specific language governing permissions and
-~~ limitations under the License. See accompanying LICENSE file.
-
-  ---
-  Hadoop MapReduce Next Generation  ${project.version}
-  ---
-  ---
-  ${maven.build.timestamp}
-
-Hadoop MapReduce Next Generation
-
-* Architecture
-
-  The new architecture introduced in 0.23, divides the two major functions 
-  of the JobTracker, resource management and job scheduling/monitoring, into separate 
-  components. 
-  The new ResourceManager manages the global assignment of compute resources to applications 
-  and the per-application ApplicationMaster manages the application’s scheduling and coordination.
-  An application is either a single job in the classic MapReduce jobs or a DAG of such jobs. 
-  The ResourceManager and per-machine NodeManager server, which manages the user processes on that 
-  machine, form the computation fabric. The per-application ApplicationMaster is, in effect, a 
-  framework specific library and is tasked with negotiating resources from the ResourceManager 
-  and working with the NodeManager(s) to execute and monitor the tasks.
-
-* User Documentation
-
-  * {{{./SingleCluster.html}SingleCluster}}
-
-  * {{{./apidocs/index.html}JavaDocs}}
-
diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml b/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml
deleted file mode 100644
index 35a75cb2e55..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml
+++ /dev/null
@@ -1,34 +0,0 @@
-<!--
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
-   http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. See accompanying LICENSE file.
--->
-<project name="Hadoop MapReduce Next Gen">
-
-  <version position="right"/>
-
-  <bannerLeft>
-    <name>&nbsp;</name>
-  </bannerLeft>
-
-  <skin>
-    <groupId>org.apache.maven.skins</groupId>
-    <artifactId>maven-stylus-skin</artifactId>
-    <version>1.1</version>
-  </skin>
-
-  <body>
-    <links>
-      <item name="Apache Hadoop" href="http://hadoop.apache.org/"/>
-    </links>
-  </body>
-
-</project>
diff --git a/hadoop-mapreduce-project/src/contrib/fairscheduler/ivy.xml b/hadoop-mapreduce-project/src/contrib/fairscheduler/ivy.xml
index 0b910158df5..e927032d7db 100644
--- a/hadoop-mapreduce-project/src/contrib/fairscheduler/ivy.xml
+++ b/hadoop-mapreduce-project/src/contrib/fairscheduler/ivy.xml
@@ -48,9 +48,9 @@
       <artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests"/>
     </dependency>
     <dependency org="org.apache.hadoop" name="hadoop-hdfs" 
-                rev="${hadoop-hdfs.version}" conf="common->default"/>
+                rev="${hadoop-common.version}" conf="common->default"/>
     <dependency org="org.apache.hadoop" name="hadoop-hdfs" 
-                rev="${hadoop-hdfs.version}" conf="test->default">
+                rev="${hadoop-common.version}" conf="test->default">
       <artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
     </dependency>
     <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" 
diff --git a/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
index 27629476d92..3212a1fcdfb 100644
--- a/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
+++ b/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
@@ -22,10 +22,8 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URI;
-import java.net.URISyntaxException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
@@ -45,7 +43,6 @@ import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.filecache.DistributedCache;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -280,25 +277,19 @@ public class StreamJob implements Tool {
       if (values != null && values.length > 0) {
         LOG.warn("-file option is deprecated, please use generic option" +
         		" -files instead.");
-
-        String fileList = null;
+        StringBuilder unpackRegex = new StringBuilder(
+          config_.getPattern(MRJobConfig.JAR_UNPACK_PATTERN,
+                             JobConf.UNPACK_JAR_PATTERN_DEFAULT).pattern());
         for (String file : values) {
           packageFiles_.add(file);
-          try {
-            URI pathURI = new URI(file);
-            Path path = new Path(pathURI);
-            FileSystem localFs = FileSystem.getLocal(config_);
-            String finalPath = path.makeQualified(localFs).toString();
-            fileList = fileList == null ? finalPath : fileList + "," + finalPath;
-          } catch (Exception e) {
-            throw new IllegalArgumentException(e);
-          }
+          String fname = new File(file).getName();
+          unpackRegex.append("|(?:").append(Pattern.quote(fname)).append(")");
         }
-        config_.set("tmpfiles", config_.get("tmpfiles", "") +
-                                  (fileList == null ? "" : fileList));
+        config_.setPattern(MRJobConfig.JAR_UNPACK_PATTERN,
+                           Pattern.compile(unpackRegex.toString()));
         validate(packageFiles_);
       }
-
+         
       String fsName = cmdLine.getOptionValue("dfs");
       if (null != fsName){
         LOG.warn("-dfs option is deprecated, please use -fs instead.");
diff --git a/hadoop-mapreduce-project/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh b/hadoop-mapreduce-project/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
index 2a32cbd1c9d..8ac5b61a5d3 100644
--- a/hadoop-mapreduce-project/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
+++ b/hadoop-mapreduce-project/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
@@ -31,78 +31,17 @@ script=`basename "$this"`
 bin=`cd "$bin"; pwd`
 this="$bin/$script"
 
-# Check if HADOOP_HOME AND JAVA_HOME is set.
-if [ -z "$HADOOP_HOME" ] && [ -z "$HADOOP_PREFIX" ] ; then
-  echo "HADOOP_HOME or HADOOP_PREFIX environment variable should be defined"
+# Check if HADOOP_PREFIX AND JAVA_HOME is set.
+if [ -z $HADOOP_PREFIX ] ; then
+  echo "HADOOP_PREFIX environment variable not defined"
   exit -1;
 fi
 
-if [ -z "$JAVA_HOME" ] ; then
+if [ -z $JAVA_HOME ] ; then
   echo "JAVA_HOME environment variable not defined"
   exit -1;
 fi
 
-if [ -z "$HADOOP_PREFIX" ]; then
-  hadoopVersion=`$HADOOP_HOME/bin/hadoop version | awk 'BEGIN { RS = "" ; FS = "\n" } ; { print $1 }' | awk '{print $2}'`
-else
-  hadoopVersion=`$HADOOP_PREFIX/bin/hadoop version | awk 'BEGIN { RS = "" ; FS = "\n" } ; { print $1 }' | awk '{print $2}'`
-fi
+hadoopVersion=`$HADOOP_PREFIX/bin/hadoop version | grep Hadoop | awk '{print $2}'`
 
-# so that filenames w/ spaces are handled correctly in loops below
-IFS=
-
-# for releases, add core hadoop jar to CLASSPATH
-if [ -e $HADOOP_PREFIX/share/hadoop/hadoop-core-* ]; then
-  for f in $HADOOP_PREFIX/share/hadoop/hadoop-core-*.jar; do
-    CLASSPATH=${CLASSPATH}:$f;
-  done
-
-  # add libs to CLASSPATH
-  for f in $HADOOP_PREFIX/share/hadoop/lib/*.jar; do
-    CLASSPATH=${CLASSPATH}:$f;
-  done
-else
-  # tarball layout
-  if [ -e $HADOOP_HOME/hadoop-core-* ]; then
-    for f in $HADOOP_HOME/hadoop-core-*.jar; do
-      CLASSPATH=${CLASSPATH}:$f;
-    done
-  fi
-  if [ -e $HADOOP_HOME/build/hadoop-core-* ]; then 
-    for f in $HADOOP_HOME/build/hadoop-core-*.jar; do
-      CLASSPATH=${CLASSPATH}:$f;
-    done
-  fi
-  for f in $HADOOP_HOME/lib/*.jar; do
-    CLASSPATH=${CLASSPATH}:$f;
-  done
-
-  if [ -d "$HADOOP_HOME/build/ivy/lib/Hadoop/common" ]; then
-    for f in $HADOOP_HOME/build/ivy/lib/Hadoop/common/*.jar; do
-      CLASSPATH=${CLASSPATH}:$f;
-    done
-  fi
-fi
-
-# Set the Vaidya home
-if [ -d "$HADOOP_PREFIX/share/hadoop/contrib/vaidya/" ]; then
-  VAIDYA_HOME=$HADOOP_PREFIX/share/hadoop/contrib/vaidya/
-fi
-if [ -d "$HADOOP_HOME/contrib/vaidya" ]; then
-  VAIDYA_HOME=$HADOOP_HOME/contrib/vaidya/
-fi
-if [ -d "$HADOOP_HOME/build/contrib/vaidya" ]; then
-  VAIDYA_HOME=$HADOOP_HOME/build/contrib/vaidya/
-fi
-
-# add user-specified CLASSPATH last
-if [ "$HADOOP_USER_CLASSPATH_FIRST" = "" ] && [ "$HADOOP_CLASSPATH" != "" ]; then
-  CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
-fi
-
-# restore ordinary behaviour
-unset IFS
-
-echo "$CLASSPATH"
-
-$JAVA_HOME/bin/java -Xmx1024m -classpath $VAIDYA_HOME/hadoop-vaidya-${hadoopVersion}.jar:${CLASSPATH} org.apache.hadoop.vaidya.postexdiagnosis.PostExPerformanceDiagnoser $@
+$JAVA_HOME/bin/java -Xmx1024m -classpath $HADOOP_PREFIX/hadoop-${hadoopVersion}-core.jar:$HADOOP_PREFIX/contrib/vaidya/hadoop-${hadoopVersion}-vaidya.jar:$HADOOP_PREFIX/lib/commons-logging-1.0.4.jar:${CLASSPATH} org.apache.hadoop.vaidya.postexdiagnosis.PostExPerformanceDiagnoser $@
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTrackerClientProtocolProvider.java b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTrackerClientProtocolProvider.java
index c695816e414..42c958d77c1 100644
--- a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTrackerClientProtocolProvider.java
+++ b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTrackerClientProtocolProvider.java
@@ -37,30 +37,26 @@ public class JobTrackerClientProtocolProvider extends ClientProtocolProvider {
   @Override
   public ClientProtocol create(Configuration conf) throws IOException {
     String framework = conf.get(MRConfig.FRAMEWORK_NAME);
-    if (!MRConfig.CLASSIC_FRAMEWORK_NAME.equals(framework)) {
+    if (framework != null && !framework.equals("classic")) {
       return null;
     }
     String tracker = conf.get(JTConfig.JT_IPC_ADDRESS, "local");
     if (!"local".equals(tracker)) {
       return createRPCProxy(JobTracker.getAddress(conf), conf);
-    } else {
-      throw new IOException("Invalid \"" + JTConfig.JT_IPC_ADDRESS
-          + "\" configuration value for JobTracker: \""
-          + tracker + "\"");
     }
+    return null;
   }
 
   @Override
-  public ClientProtocol create(InetSocketAddress addr, Configuration conf)
-      throws IOException {
+  public ClientProtocol create(InetSocketAddress addr, Configuration conf) throws IOException {
     return createRPCProxy(addr, conf);
   }
-
+  
   private ClientProtocol createRPCProxy(InetSocketAddress addr,
       Configuration conf) throws IOException {
     return (ClientProtocol) RPC.getProxy(ClientProtocol.class,
-        ClientProtocol.versionID, addr, UserGroupInformation.getCurrentUser(),
-        conf, NetUtils.getSocketFactory(conf, ClientProtocol.class));
+      ClientProtocol.versionID, addr, UserGroupInformation.getCurrentUser(),
+      conf, NetUtils.getSocketFactory(conf, ClientProtocol.class));
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/LocalClientProtocolProvider.java b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/LocalClientProtocolProvider.java
index d09b222ee9b..68d10bc4d00 100644
--- a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/LocalClientProtocolProvider.java
+++ b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/LocalClientProtocolProvider.java
@@ -37,16 +37,11 @@ public class LocalClientProtocolProvider extends ClientProtocolProvider {
     if (framework != null && !framework.equals("local")) {
       return null;
     }
-    String tracker = conf.get(JTConfig.JT_IPC_ADDRESS, "local");
-    if ("local".equals(tracker)) {
+    if ("local".equals(conf.get(JTConfig.JT_IPC_ADDRESS, "local"))) {
       conf.setInt("mapreduce.job.maps", 1);
       return new LocalJobRunner(conf);
-    } else {
-
-      throw new IOException("Invalid \"" + JTConfig.JT_IPC_ADDRESS
-          + "\" configuration value for LocalJobRunner : \""
-          + tracker + "\"");
     }
+    return null;
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
index 86980bb73d6..7581f8bc7be 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
@@ -382,7 +382,6 @@ public class MiniMRCluster {
                                   UserGroupInformation ugi) {
     JobConf result = new JobConf(conf);
     FileSystem.setDefaultUri(result, namenode);
-    result.set(MRConfig.FRAMEWORK_NAME, MRConfig.CLASSIC_FRAMEWORK_NAME);
     result.set(JTConfig.JT_IPC_ADDRESS, "localhost:"+jobTrackerPort);
     result.set(JTConfig.JT_HTTP_ADDRESS, 
                         "127.0.0.1:" + jobTrackerInfoPort);
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/QueueManagerTestUtils.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/QueueManagerTestUtils.java
index 4cb0fee616c..dee6f57b72f 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/QueueManagerTestUtils.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/QueueManagerTestUtils.java
@@ -24,7 +24,6 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.QueueState;
 import org.apache.hadoop.mapreduce.SleepJob;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
@@ -315,7 +314,6 @@ public class QueueManagerTestUtils {
       final long reduceSleepTime, boolean shouldComplete, String userInfo,
       String queueName, Configuration clientConf) throws IOException,
       InterruptedException, ClassNotFoundException {
-    clientConf.set(MRConfig.FRAMEWORK_NAME, MRConfig.CLASSIC_FRAMEWORK_NAME);
     clientConf.set(JTConfig.JT_IPC_ADDRESS, "localhost:"
         + miniMRCluster.getJobTrackerPort());
     UserGroupInformation ugi;
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestMiniMRClasspath.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestMiniMRClasspath.java
index 2563902d4bc..911aa2cf7c2 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestMiniMRClasspath.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestMiniMRClasspath.java
@@ -55,7 +55,6 @@ public class TestMiniMRClasspath extends TestCase {
       file.close();
     }
     FileSystem.setDefaultUri(conf, fs.getUri());
-    conf.set(JTConfig.FRAMEWORK_NAME, JTConfig.CLASSIC_FRAMEWORK_NAME);
     conf.set(JTConfig.JT_IPC_ADDRESS, jobTracker);
     conf.setJobName("wordcount");
     conf.setInputFormat(TextInputFormat.class);
@@ -122,7 +121,6 @@ public class TestMiniMRClasspath extends TestCase {
       file.close();
     }
     FileSystem.setDefaultUri(conf, uri);
-    conf.set(JTConfig.FRAMEWORK_NAME, JTConfig.CLASSIC_FRAMEWORK_NAME);
     conf.set(JTConfig.JT_IPC_ADDRESS, jobTracker);
     conf.setJobName("wordcount");
     conf.setInputFormat(TextInputFormat.class);
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestSpecialCharactersInOutputPath.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestSpecialCharactersInOutputPath.java
index 5e510094ced..dc3355bb4b8 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestSpecialCharactersInOutputPath.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestSpecialCharactersInOutputPath.java
@@ -27,14 +27,13 @@ import junit.framework.TestCase;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.lib.IdentityMapper;
 import org.apache.hadoop.mapred.lib.IdentityReducer;
-import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
 import org.apache.hadoop.util.Progressable;
 
@@ -68,7 +67,6 @@ public class TestSpecialCharactersInOutputPath extends TestCase {
 
     // use WordCount example
     FileSystem.setDefaultUri(conf, fileSys);
-    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.CLASSIC_FRAMEWORK_NAME);
     conf.set(JTConfig.JT_IPC_ADDRESS, jobTracker);
     conf.setJobName("foo");
 
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java
deleted file mode 100644
index a9044e24308..00000000000
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.mapreduce;
-
-import java.io.IOException;
-
-import junit.framework.TestCase;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapred.LocalJobRunner;
-import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
-import org.junit.Test;
-
-public class TestClientProtocolProviderImpls extends TestCase {
-
-  @Test
-  public void testClusterWithLocalClientProvider() throws Exception {
-
-    Configuration conf = new Configuration();
-
-    try {
-      conf.set(MRConfig.FRAMEWORK_NAME, "incorrect");
-      new Cluster(conf);
-      fail("Cluster should not be initialized with incorrect framework name");
-    } catch (IOException e) {
-
-    }
-
-    try {
-      conf.set(MRConfig.FRAMEWORK_NAME, "local");
-      conf.set(JTConfig.JT_IPC_ADDRESS, "127.0.0.1:0");
-
-      new Cluster(conf);
-      fail("Cluster with Local Framework name should use local JT address");
-    } catch (IOException e) {
-
-    }
-
-    try {
-      conf.set(JTConfig.JT_IPC_ADDRESS, "local");
-      Cluster cluster = new Cluster(conf);
-      assertTrue(cluster.getClient() instanceof LocalJobRunner);
-      cluster.close();
-    } catch (IOException e) {
-
-    }
-  }
-
-  @Test
-  public void testClusterWithJTClientProvider() throws Exception {
-
-    Configuration conf = new Configuration();
-    try {
-      conf.set(MRConfig.FRAMEWORK_NAME, "incorrect");
-      new Cluster(conf);
-      fail("Cluster should not be initialized with incorrect framework name");
-
-    } catch (IOException e) {
-
-    }
-
-    try {
-      conf.set(MRConfig.FRAMEWORK_NAME, "classic");
-      conf.set(JTConfig.JT_IPC_ADDRESS, "local");
-      new Cluster(conf);
-      fail("Cluster with classic Framework name shouldnot use local JT address");
-
-    } catch (IOException e) {
-
-    }
-
-    try {
-      conf = new Configuration();
-      conf.set(MRConfig.FRAMEWORK_NAME, "classic");
-      conf.set(JTConfig.JT_IPC_ADDRESS, "127.0.0.1:0");
-      Cluster cluster = new Cluster(conf);
-      cluster.close();
-    } catch (IOException e) {
-
-    }
-  }
-
-}
diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index ec342060fe2..a1114a13664 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -76,9 +76,6 @@
     <distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
 
     <commons-daemon.version>1.0.3</commons-daemon.version>
-    
-    <test.build.dir>${project.build.directory}/test-dir</test.build.dir>
-    <test.build.data>${test.build.dir}</test.build.data>
   </properties>
 
   <dependencyManagement>
@@ -562,25 +559,6 @@
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>create-testdirs</id>
-            <phase>validate</phase>
-            <goals>
-              <goal>run</goal>
-            </goals>
-            <configuration>
-              <target>
-                <mkdir dir="${test.build.dir}"/>
-                <mkdir dir="${test.build.data}"/>
-              </target>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>

From ab0402bc1def44e3d52eea517f4132c460bd5f87 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 29 Sep 2011 00:42:47 +0000
Subject: [PATCH 011/177] Merging trunk to HDFS-1623 branch

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1177130 13f79535-47bb-0310-9956-ffa450edef68
---
 dev-support/test-patch.sh                     |    4 +-
 .../hadoop-common/CHANGES.txt                 |   42 +-
 .../content/xdocs/HttpAuthentication.xml      |    8 +-
 .../org/apache/hadoop/conf/Configuration.java |    4 +
 .../apache/hadoop/fs/LocalDirAllocator.java   |   12 +-
 .../org/apache/hadoop/http/HttpServer.java    |   29 +-
 .../apache/hadoop/ipc/ProtocolSignature.java  |    2 +-
 .../java/org/apache/hadoop/net/NetUtils.java  |   21 +
 .../AuthenticationFilterInitializer.java      |   26 +-
 .../src/main/packages/hadoop-setup-conf.sh    |    5 +-
 .../templates/conf/hadoop-metrics2.properties |   20 +
 .../packages/templates/conf/hdfs-site.xml     |   20 +
 .../packages/templates/conf/log4j.properties  |  213 ++++
 .../src/main/resources/core-default.xml       |    4 +-
 .../apache/hadoop/conf/TestConfiguration.java |   16 +-
 .../hadoop/fs/TestLocalDirAllocator.java      |  221 ++--
 .../java/org/apache/hadoop/fs/TestTrash.java  |    5 +-
 .../hadoop/http/HttpServerFunctionalTest.java |   27 +
 .../apache/hadoop/http/TestPathFilter.java    |  145 +++
 .../org/apache/hadoop/net/TestNetUtils.java   |   32 +
 .../security/TestAuthenticationFilter.java    |   16 +-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   36 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |   85 +-
 .../hadoop/hdfs/protocol/DatanodeInfo.java    |   20 +
 .../server/blockmanagement/BlockManager.java  |    9 +-
 .../hadoop/hdfs/server/common/JspHelper.java  |   23 +-
 .../hadoop/hdfs/server/datanode/DataNode.java |    8 +-
 .../web/resources/DatanodeWebHdfsMethods.java |    4 +
 .../hdfs/server/namenode/BackupNode.java      |    6 +
 .../hdfs/server/namenode/FSDirectory.java     |    2 +-
 .../hdfs/server/namenode/FSNamesystem.java    |    5 -
 .../hadoop/hdfs/server/namenode/NameNode.java |   20 +-
 .../server/namenode/NameNodeRpcServer.java    |   17 +-
 .../server/namenode/SecondaryNameNode.java    |    9 +-
 .../web/resources/NamenodeWebHdfsMethods.java |   63 +-
 .../hdfs/tools/DelegationTokenFetcher.java    |   45 +-
 .../org/apache/hadoop/hdfs/web/JsonUtil.java  |  286 ++++-
 .../apache/hadoop/hdfs/web/ParamFilter.java   |   85 ++
 .../hadoop/hdfs/web/WebHdfsFileSystem.java    |   61 +-
 .../hdfs/web/resources/AccessTimeParam.java   |    2 +-
 .../hdfs/web/resources/BlockSizeParam.java    |    2 +-
 .../hdfs/web/resources/BufferSizeParam.java   |    2 +-
 .../hdfs/web/resources/DelegationParam.java   |    3 +-
 .../hdfs/web/resources/DeleteOpParam.java     |    3 -
 .../hdfs/web/resources/DstPathParam.java      |    2 +-
 .../hadoop/hdfs/web/resources/GetOpParam.java |    6 +-
 .../hdfs/web/resources/HttpOpParam.java       |    3 +
 .../web/resources/ModificationTimeParam.java  |    2 +-
 .../hdfs/web/resources/OverwriteParam.java    |    2 +-
 .../hdfs/web/resources/PostOpParam.java       |    3 -
 .../hadoop/hdfs/web/resources/PutOpParam.java |    3 -
 .../web/resources/RenameOptionSetParam.java   |    2 +-
 .../hdfs/web/resources/RenewerParam.java      |   41 +
 .../src/main/resources/hdfs-default.xml       |   20 -
 .../apache/hadoop/hdfs/TestDFSPermission.java |   50 +-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   |  200 +--
 .../org/apache/hadoop/hdfs/TestQuota.java     |   20 +-
 .../hdfs/security/TestDelegationToken.java    |   36 +-
 .../blockmanagement/TestHost2NodesMap.java    |   26 +-
 .../TestMulitipleNNDataBlockScanner.java      |    5 +-
 .../hdfs/server/datanode/TestReplicasMap.java |   17 +-
 .../hdfs/server/namenode/FSImageTestUtil.java |    7 +
 .../namenode/TestProcessCorruptBlocks.java    |  290 +++++
 .../web/TestWebHdfsFileSystemContract.java    |   44 +
 hadoop-mapreduce-project/CHANGES.txt          |  102 ++
 .../hadoop-mapreduce-client-app/pom.xml       |   43 +
 .../hadoop/mapred/MapReduceChildJVM.java      |  218 ++--
 .../org/apache/hadoop/mapred/YarnChild.java   |    7 +-
 .../hadoop/mapreduce/v2/app/MRAppMaster.java  |   62 +-
 .../v2/app/client/MRClientService.java        |    2 +-
 .../mapreduce/v2/app/job/impl/JobImpl.java    |   58 +-
 .../v2/app/job/impl/TaskAttemptImpl.java      |   85 +-
 .../app/launcher/ContainerLauncherImpl.java   |   35 +-
 .../v2/app/local/LocalContainerAllocator.java |   21 +-
 .../v2/app/recover/RecoveryService.java       |   17 +-
 .../mapreduce/v2/app/rm/RMCommunicator.java   |   54 +-
 .../v2/app/rm/RMContainerAllocator.java       |   40 +-
 .../v2/app/rm/RMContainerRequestor.java       |   13 +-
 .../v2/app/speculate/DefaultSpeculator.java   |    3 +-
 .../mapreduce/v2/app/webapp/JobConfPage.java  |    1 +
 .../mapreduce/v2/app/webapp/NavBlock.java     |    6 +-
 .../mapreduce/v2/app/webapp/TaskPage.java     |    2 +-
 .../apache/hadoop/mapreduce/v2/app/MRApp.java |   26 +-
 .../v2/app/TestRMContainerAllocator.java      | 1069 ++++++++++-------
 .../hadoop/mapreduce/TypeConverter.java       |   34 +-
 .../hadoop/mapreduce/v2/MRConstants.java      |   50 -
 .../mapreduce/v2/api/records/JobReport.java   |    4 +
 .../api/records/impl/pb/JobReportPBImpl.java  |   24 +
 .../v2/jobhistory/JobHistoryUtils.java        |    2 +-
 .../hadoop/mapreduce/v2/util/MRApps.java      |  115 +-
 .../mapreduce/v2/util/MRBuilderUtils.java     |   32 +-
 .../src/main/proto/mr_protos.proto            |    2 +
 .../hadoop/mapreduce/TestTypeConverter.java   |   13 +
 .../hadoop/mapreduce/v2/util/TestMRApps.java  |    4 +-
 .../org/apache/hadoop/mapred/BackupStore.java |    3 +-
 .../org/apache/hadoop/mapred/JobConf.java     |    1 +
 .../org/apache/hadoop/mapred/JobStatus.java   |    4 +
 .../org/apache/hadoop/mapred/MRConstants.java |    8 +-
 .../apache/hadoop/mapred/MROutputFiles.java   |   23 +-
 .../org/apache/hadoop/mapred/TaskLog.java     |   13 +-
 .../hadoop/mapred/pipes/Application.java      |    5 +-
 .../org/apache/hadoop/mapreduce/Cluster.java  |   37 +-
 .../java/org/apache/hadoop/mapreduce/Job.java |    3 +-
 .../apache/hadoop/mapreduce/JobStatus.java    |   18 +
 .../apache/hadoop/mapreduce/MRJobConfig.java  |   79 +-
 .../mapreduce/v2/hs/HistoryClientService.java |    2 +-
 .../hadoop/mapreduce/v2/hs/JobHistory.java    |   28 +-
 .../hadoop-mapreduce-client-jobclient/pom.xml |    6 +
 .../org/apache/hadoop/mapred/ClientCache.java |   58 +-
 .../hadoop/mapred/ClientServiceDelegate.java  |   52 +-
 .../apache/hadoop/mapred/NotRunningJob.java   |   45 +-
 .../hadoop/mapred/ResourceMgrDelegate.java    |   30 +-
 .../org/apache/hadoop/mapred/YARNRunner.java  |  106 +-
 .../hadoop/mapred/TestClientRedirect.java     |   25 +-
 .../mapred/TestClientServiceDelegate.java     |    9 +-
 .../TestYarnClientProtocolProvider.java       |   59 +
 .../mapreduce/v2/MiniMRYarnCluster.java       |    8 +-
 .../hadoop/mapreduce/v2/TestMRJobs.java       |    2 +-
 .../hadoop/mapreduce/v2/TestYARNRunner.java   |  121 +-
 .../hadoop-mapreduce-client/pom.xml           |    6 +
 hadoop-mapreduce-project/hadoop-yarn/README   |   14 +-
 .../dev-support/findbugs-exclude.xml          |    4 +
 .../hadoop/yarn/api/ApplicationConstants.java |  120 +-
 .../hadoop/yarn/api/ClientRMProtocol.java     |   26 +-
 ...est.java => GetNewApplicationRequest.java} |    4 +-
 ...se.java => GetNewApplicationResponse.java} |   31 +-
 ...quest.java => KillApplicationRequest.java} |    4 +-
 ...onse.java => KillApplicationResponse.java} |    4 +-
 .../pb/GetNewApplicationIdResponsePBImpl.java |  109 --
 ...va => GetNewApplicationRequestPBImpl.java} |   21 +-
 .../pb/GetNewApplicationResponsePBImpl.java   |  173 +++
 ...java => KillApplicationRequestPBImpl.java} |   24 +-
 ...ava => KillApplicationResponsePBImpl.java} |   20 +-
 .../yarn/api/records/ApplicationReport.java   |   12 +
 .../hadoop/yarn/api/records/Container.java    |   13 +
 .../impl/pb/ApplicationReportPBImpl.java      |   36 +-
 .../api/records/impl/pb/ContainerPBImpl.java  |   39 +
 .../src/main/proto/client_RM_protocol.proto   |    4 +-
 .../src/main/proto/yarn_protos.proto          |   16 +-
 .../src/main/proto/yarn_service_protos.proto  |   10 +-
 .../client/ClientRMProtocolPBClientImpl.java  |   41 +-
 .../ClientRMProtocolPBServiceImpl.java        |   40 +-
 .../hadoop/yarn/conf/YarnConfiguration.java   |    8 +-
 .../yarn/ipc/ProtoOverHadoopRpcEngine.java    |    6 +
 .../yarn/state/StateMachineFactory.java       |   39 +
 .../apache/hadoop/yarn/util/BuilderUtils.java |   43 +-
 .../hadoop/yarn/util/ConverterUtils.java      |   52 +-
 .../org/apache/hadoop/yarn/util/Graph.java    |  210 ++++
 .../yarn/util/VisualizeStateMachine.java      |   73 ++
 .../apache/hadoop/yarn/webapp/Dispatcher.java |    9 +
 .../org/apache/hadoop/yarn/webapp/WebApp.java |   25 +-
 .../apache/hadoop/yarn/webapp/WebApps.java    |   11 +-
 .../src/main/resources/webapps/cluster/.keep  |    0
 .../main/resources/webapps/jobhistory/.keep   |    0
 .../main/resources/webapps/mapreduce/.keep    |    0
 .../src/main/resources/webapps/node/.keep     |    0
 .../java/org/apache/hadoop/yarn/MockApps.java |   10 +
 .../java/org/apache/hadoop/yarn/TestRPC.java  |   32 +
 .../yarn/conf/TestYarnConfiguration.java      |   54 +
 .../apache/hadoop/yarn/webapp/TestWebApp.java |   27 +
 .../hadoop-yarn-server-nodemanager/pom.xml    |   33 +
 .../nodemanager/DefaultContainerExecutor.java |    6 +-
 .../nodemanager/LinuxContainerExecutor.java   |    6 +-
 .../nodemanager/NodeStatusUpdaterImpl.java    |    2 +-
 .../container/ContainerImpl.java              |   62 +-
 .../launcher/ContainerLaunch.java             |  103 +-
 .../nodemanager/webapp/ContainerLogsPage.java |   14 +-
 .../nodemanager/webapp/ContainerPage.java     |   31 +-
 .../server/nodemanager/webapp/WebServer.java  |    2 +-
 .../main/resources/container-log4j.properties |    8 +-
 .../TestContainerManagerWithLCE.java          |   24 +
 .../TestContainerManager.java                 |   88 ++
 .../container/TestContainer.java              |   24 +-
 .../pom.xml                                   |   51 +
 .../server/resourcemanager/AdminService.java  |    2 +-
 .../resourcemanager/ClientRMService.java      |   30 +-
 .../server/resourcemanager/RMAppManager.java  |   12 +-
 .../server/resourcemanager/RMContextImpl.java |    2 -
 .../resourcemanager/ResourceManager.java      |   22 +-
 .../amlauncher/AMLauncher.java                |   29 +-
 .../amlauncher/ApplicationMasterLauncher.java |   11 +-
 .../server/resourcemanager/rmapp/RMApp.java   |    3 +-
 .../rmapp/RMAppFailedAttemptEvent.java}       |   23 +-
 .../resourcemanager/rmapp/RMAppImpl.java      |   41 +-
 .../rmapp/attempt/RMAppAttempt.java           |    4 +-
 .../rmapp/attempt/RMAppAttemptImpl.java       |  104 +-
 .../resourcemanager/rmnode/RMNodeImpl.java    |   25 +-
 .../scheduler/QueueMetrics.java               |   54 +-
 .../scheduler/SchedulerApp.java               |   15 +-
 .../scheduler/capacity/LeafQueue.java         |   22 +-
 .../event/NodeUpdateSchedulerEvent.java       |    3 -
 .../scheduler/fifo/FifoScheduler.java         |    7 +-
 .../resourcemanager/webapp/NodesPage.java     |    4 +-
 .../resourcemanager/webapp/RmController.java  |    2 +-
 .../yarn/server/resourcemanager/MockAM.java   |   10 +-
 .../yarn/server/resourcemanager/MockRM.java   |   21 +-
 .../server/resourcemanager/NodeManager.java   |    5 +-
 .../TestApplicationMasterLauncher.java        |  159 +++
 .../yarn/server/resourcemanager/TestRM.java   |   16 +
 .../resourcemanager/TestResourceManager.java  |   21 +
 .../TestAMRMRPCResponseId.java                |   13 +-
 .../TestApplicationMasterLauncher.java        |  193 ---
 .../resourcetracker/InlineDispatcher.java     |   65 +-
 .../resourcemanager/rmapp/MockRMApp.java      |    1 +
 .../rmapp/TestRMAppTransitions.java           |  191 ++-
 .../attempt/TestRMAppAttemptTransitions.java  |  403 +++++++
 .../scheduler/capacity/TestLeafQueue.java     |   33 +-
 .../scheduler/capacity/TestUtils.java         |    4 +-
 .../resourcemanager/webapp/TestNodesPage.java |   55 +
 .../TestContainerTokenSecretManager.java      |    9 +-
 .../src/site/apt/SingleCluster.apt.vm         |  180 +++
 .../hadoop-yarn/src/site/apt/index.apt.vm     |   39 +
 .../hadoop-yarn/src/site/site.xml             |   34 +
 .../src/contrib/fairscheduler/ivy.xml         |    4 +-
 .../apache/hadoop/streaming/StreamJob.java    |   25 +-
 .../java/org/apache/hadoop/vaidya/vaidya.sh   |   73 +-
 .../JobTrackerClientProtocolProvider.java     |   16 +-
 .../mapred/LocalClientProtocolProvider.java   |    9 +-
 .../apache/hadoop/mapred/MiniMRCluster.java   |    1 +
 .../hadoop/mapred/QueueManagerTestUtils.java  |    2 +
 .../hadoop/mapred/TestMiniMRClasspath.java    |    2 +
 .../TestSpecialCharactersInOutputPath.java    |    4 +-
 .../TestClientProtocolProviderImpls.java      |   99 ++
 hadoop-project/pom.xml                        |   22 +
 224 files changed, 7124 insertions(+), 2160 deletions(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-metrics2.properties
 create mode 100644 hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestPathFilter.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/ParamFilter.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenewerParam.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/MRConstants.java
 create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/{GetNewApplicationIdRequest.java => GetNewApplicationRequest.java} (91%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/{GetNewApplicationIdResponse.java => GetNewApplicationResponse.java} (66%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/{FinishApplicationRequest.java => KillApplicationRequest.java} (94%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/{FinishApplicationResponse.java => KillApplicationResponse.java} (91%)
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdResponsePBImpl.java
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/{GetNewApplicationIdRequestPBImpl.java => GetNewApplicationRequestPBImpl.java} (68%)
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationResponsePBImpl.java
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/{FinishApplicationRequestPBImpl.java => KillApplicationRequestPBImpl.java} (74%)
 rename hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/{FinishApplicationResponsePBImpl.java => KillApplicationResponsePBImpl.java} (62%)
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/cluster/.keep
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/jobhistory/.keep
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/mapreduce/.keep
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/node/.keep
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
 rename hadoop-mapreduce-project/{hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Constants.java => hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFailedAttemptEvent.java} (64%)
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
 delete mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterLauncher.java
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml
 create mode 100644 hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java

diff --git a/dev-support/test-patch.sh b/dev-support/test-patch.sh
index 76b020a01e3..6325e6a193b 100755
--- a/dev-support/test-patch.sh
+++ b/dev-support/test-patch.sh
@@ -598,8 +598,8 @@ runTests () {
   echo ""
   echo ""
   
-  echo "$MVN clean test -Pnative -D${PROJECT_NAME}PatchProcess"
-  $MVN clean test -Pnative -D${PROJECT_NAME}PatchProcess
+  echo "$MVN clean install test -Pnative -D${PROJECT_NAME}PatchProcess"
+  $MVN clean install test -Pnative -D${PROJECT_NAME}PatchProcess
   if [[ $? != 0 ]] ; then
     ### Find and format names of failed tests
     failed_tests=`find . -name 'TEST*.xml' | xargs $GREP  -l -E "<failure|<error" | sed -e "s|.*target/surefire-reports/TEST-|                  |g" | sed -e "s|\.xml||g"`
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt
index a253040539c..4a0f0057f18 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -2,6 +2,12 @@ Hadoop Change Log
 
 Trunk (unreleased changes)
 
+  INCOMPATIBLE CHANGES
+   
+   HADOOP-7542. Change Configuration XML format to 1.1 to add support for
+                serializing additional characters. This requires XML1.1
+                support in the XML parser (Christopher Egner via harsh)
+
   IMPROVEMENTS
 
     HADOOP-7595. Upgrade dependency to Avro 1.5.3. (Alejandro Abdelnur via atm)
@@ -13,6 +19,11 @@ Trunk (unreleased changes)
 
     HADOOP-7635. RetryInvocationHandler should release underlying resources on
                  close (atm)
+    
+    HADOOP-7668. Add a NetUtils method that can tell if an InetAddress 
+    belongs to local host. (suresh)
+
+    HADOOP-7687 Make getProtocolSignature public  (sanjay)
 
   BUGS
 
@@ -23,6 +34,16 @@ Trunk (unreleased changes)
 
     HADOOP-7641. Add Apache License to template config files (Eric Yang via atm)
 
+    HADOOP-7621. alfredo config should be in a file not readable by users
+                 (Alejandro Abdelnur via atm)
+    
+    HADOOP-7669  Fix newly introduced release audit warning. 
+                 (Uma Maheswara Rao G via stevel)
+    
+    HADOOP-6220. HttpServer wraps InterruptedExceptions by IOExceptions if interrupted 
+                 in startup (stevel)
+                 
+
 Release 0.23.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -287,9 +308,6 @@ Release 0.23.0 - Unreleased
     HADOOP-7430. Improve error message when moving to trash fails due to 
     quota issue. (Ravi Prakash via mattf)
 
-    HADOOP-7457. Remove out-of-date Chinese language documentation.
-    (Jakob Homan via eli)
-
     HADOOP-7444. Add Checksum API to verify and calculate checksums "in bulk"
     (todd)
 
@@ -388,6 +406,13 @@ Release 0.23.0 - Unreleased
     HADOOP-7599. Script improvements to setup a secure Hadoop cluster
     (Eric Yang via ddas)
 
+    HADOOP-7639. Enhance HttpServer to allow passing path-specs for filtering,
+    so that servers like Yarn WebApp can get filtered the paths served by
+    their own injected servlets. (Thomas Graves via vinodkv)
+
+    HADOOP-7575. Enhanced LocalDirAllocator to support fully-qualified
+    paths. (Jonathan Eagles via vinodkv)
+
   OPTIMIZATIONS
   
     HADOOP-7333. Performance improvement in PureJavaCrc32. (Eric Caspole
@@ -398,6 +423,9 @@ Release 0.23.0 - Unreleased
 
   BUG FIXES
 
+    HADOOP-7630. hadoop-metrics2.properties should have a property *.period 
+    set to a default value for metrics. (Eric Yang via mattf)
+
     HADOOP-7327. FileSystem.listStatus() throws NullPointerException instead of
     IOException upon access permission failure. (mattf)
 
@@ -603,6 +631,9 @@ Release 0.23.0 - Unreleased
     HADOOP-7631. Fixes a config problem to do with running streaming jobs
     (Eric Yang via ddas)
 
+    HADOOP-7662. Fixed logs servlet to use the pathspec '/*' instead of '/'
+    for correct filtering. (Thomas Graves via vinodkv)
+
 Release 0.22.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -1118,6 +1149,11 @@ Release 0.22.0 - Unreleased
     HADOOP-7568. SequenceFile should not print into stdout.
     (Plamen Jeliazkov via shv)
 
+    HADOOP-7663. Fix TestHDFSTrash failure. (Mayank Bansal via shv)
+
+    HADOOP-7457. Remove out-of-date Chinese language documentation.
+    (Jakob Homan via eli)
+
 Release 0.21.1 - Unreleased
 
   IMPROVEMENTS
diff --git a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml
index 15abfbb044a..5c756ac21c2 100644
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/HttpAuthentication.xml
@@ -82,10 +82,12 @@
       <code>36000</code>.
       </p>
 
-      <p><code>hadoop.http.authentication.signature.secret</code>: The signature secret for  
-      signing the authentication tokens. If not set a random secret is generated at 
+      <p><code>hadoop.http.authentication.signature.secret.file</code>: The signature secret 
+      file for signing the authentication tokens. If not set a random secret is generated at 
       startup time. The same secret should be used for all nodes in the cluster, JobTracker, 
-      NameNode, DataNode and TastTracker. The default value is a <code>hadoop</code> value.
+      NameNode, DataNode and TastTracker. The default value is 
+      <code>${user.home}/hadoop-http-auth-signature-secret</code>.
+      IMPORTANT: This file should be readable only by the Unix user running the daemons.
       </p>
         
       <p><code>hadoop.http.authentication.cookie.domain</code>: The domain to use for the HTTP 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index 4fb1d190663..c310aa65e6c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -1632,6 +1632,10 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
     try {
       doc =
         DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
+
+      // Allow a broader set of control characters to appear in job confs.
+      // cf https://issues.apache.org/jira/browse/MAPREDUCE-109 
+      doc.setXmlVersion( "1.1" );
     } catch (ParserConfigurationException pe) {
       throw new IOException(pe);
     }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
index 71c82357577..d1eae086f90 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
@@ -264,9 +264,15 @@ public class LocalDirAllocator {
             Path tmpDir = new Path(localDirs[i]);
             if(localFS.mkdirs(tmpDir)|| localFS.exists(tmpDir)) {
               try {
-                DiskChecker.checkDir(new File(localDirs[i]));
-                dirs.add(localDirs[i]);
-                dfList.add(new DF(new File(localDirs[i]), 30000));
+
+                File tmpFile = tmpDir.isAbsolute()
+                  ? new File(localFS.makeQualified(tmpDir).toUri())
+                  : new File(localDirs[i]);
+
+                DiskChecker.checkDir(tmpFile);
+                dirs.add(tmpFile.getPath());
+                dfList.add(new DF(tmpFile, 30000));
+
               } catch (DiskErrorException de) {
                 LOG.warn( localDirs[i] + " is not writable\n", de);
               }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java
index 00cdf32746f..c526e102865 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.http;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.PrintWriter;
+import java.io.InterruptedIOException;
 import java.net.BindException;
 import java.net.InetSocketAddress;
 import java.net.URL;
@@ -124,6 +125,29 @@ public class HttpServer implements FilterContainer {
       boolean findPort, Configuration conf, Connector connector) throws IOException {
     this(name, bindAddress, port, findPort, conf, null, connector);
   }
+
+  /**
+   * Create a status server on the given port. Allows you to specify the
+   * path specifications that this server will be serving so that they will be
+   * added to the filters properly.  
+   * 
+   * @param name The name of the server
+   * @param bindAddress The address for this server
+   * @param port The port to use on the server
+   * @param findPort whether the server should start at the given port and 
+   *        increment by 1 until it finds a free port.
+   * @param conf Configuration 
+   * @param pathSpecs Path specifications that this httpserver will be serving. 
+   *        These will be added to any filters.
+   */
+  public HttpServer(String name, String bindAddress, int port,
+      boolean findPort, Configuration conf, String[] pathSpecs) throws IOException {
+    this(name, bindAddress, port, findPort, conf, null, null);
+    for (String path : pathSpecs) {
+        LOG.info("adding path spec: " + path);
+      addFilterPathMapping(path, webAppContext);
+    }
+  }
   
   /**
    * Create a status server on the given port.
@@ -259,7 +283,7 @@ public class HttpServer implements FilterContainer {
     if (logDir != null) {
       Context logContext = new Context(parent, "/logs");
       logContext.setResourceBase(logDir);
-      logContext.addServlet(AdminAuthorizedServlet.class, "/");
+      logContext.addServlet(AdminAuthorizedServlet.class, "/*");
       logContext.setDisplayName("logs");
       setContextAttributes(logContext, conf);
       defaultContexts.put(logContext, true);
@@ -660,6 +684,9 @@ public class HttpServer implements FilterContainer {
       }
     } catch (IOException e) {
       throw e;
+    } catch (InterruptedException e) {
+      throw (IOException) new InterruptedIOException(
+          "Interrupted while starting HTTP server").initCause(e);
     } catch (Exception e) {
       throw new IOException("Problem starting http server", e);
     }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java
index a055a7fd46a..04d08c5142d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolSignature.java
@@ -199,7 +199,7 @@ public class ProtocolSignature implements Writable {
    * @param protocol protocol
    * @return the server's protocol signature
    */
-  static ProtocolSignature getProtocolSignature(
+  public static ProtocolSignature getProtocolSignature(
       int clientMethodsHashCode,
       long serverVersion,
       Class<? extends VersionedProtocol> protocol) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
index b22aaa009c1..d94b69f1836 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java
@@ -516,4 +516,25 @@ public class NetUtils {
     } catch (UnknownHostException ignore) { }
     return addr;
   }
+  
+  /**
+   * Given an InetAddress, checks to see if the address is a local address, by
+   * comparing the address with all the interfaces on the node.
+   * @param addr address to check if it is local node's address
+   * @return true if the address corresponds to the local node
+   */
+  public static boolean isLocalAddress(InetAddress addr) {
+    // Check if the address is any local or loop back
+    boolean local = addr.isAnyLocalAddress() || addr.isLoopbackAddress();
+
+    // Check if the address is defined on any interface
+    if (!local) {
+      try {
+        local = NetworkInterface.getByInetAddress(addr) != null;
+      } catch (SocketException e) {
+        local = false;
+      }
+    }
+    return local;
+  }
 }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java
index cd6ab7b3260..666632d5bfa 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AuthenticationFilterInitializer.java
@@ -22,6 +22,9 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.http.FilterContainer;
 import org.apache.hadoop.http.FilterInitializer;
 
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -40,8 +43,10 @@ import java.util.Map;
  */
 public class AuthenticationFilterInitializer extends FilterInitializer {
 
-  private static final String PREFIX = "hadoop.http.authentication.";
+  static final String PREFIX = "hadoop.http.authentication.";
 
+  static final String SIGNATURE_SECRET_FILE = AuthenticationFilter.SIGNATURE_SECRET + ".file";
+  
   /**
    * Initializes Alfredo AuthenticationFilter.
    * <p/>
@@ -67,6 +72,25 @@ public class AuthenticationFilterInitializer extends FilterInitializer {
       }
     }
 
+    String signatureSecretFile = filterConfig.get(SIGNATURE_SECRET_FILE);
+    if (signatureSecretFile == null) {
+      throw new RuntimeException("Undefined property: " + SIGNATURE_SECRET_FILE);      
+    }
+    
+    try {
+      StringBuilder secret = new StringBuilder();
+      Reader reader = new FileReader(signatureSecretFile);
+      int c = reader.read();
+      while (c > -1) {
+        secret.append((char)c);
+        c = reader.read();
+      }
+      reader.close();
+      filterConfig.put(AuthenticationFilter.SIGNATURE_SECRET, secret.toString());
+    } catch (IOException ex) {
+      throw new RuntimeException("Could not read HTTP signature secret file: " + signatureSecretFile);            
+    }
+    
     container.addFilter("authentication",
                         AuthenticationFilter.class.getName(),
                         filterConfig);
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh
index 8e903cf308d..96a989fc39f 100644
--- a/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh
+++ b/hadoop-common-project/hadoop-common/src/main/packages/hadoop-setup-conf.sh
@@ -475,7 +475,10 @@ else
   template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/taskcontroller.cfg ${HADOOP_CONF_DIR}/taskcontroller.cfg
   template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
   if [ ! -e ${HADOOP_CONF_DIR}/capacity-scheduler.xml ]; then
-    template_generator ${HADOOP_PREFIX}/share/hadoop/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
+    template_generator ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/capacity-scheduler.xml ${HADOOP_CONF_DIR}/capacity-scheduler.xml
+  fi
+  if [ ! -e ${HADOOP_CONF_DIR}/hadoop-metrics2.properties ]; then
+    cp ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/hadoop-metrics2.properties ${HADOOP_CONF_DIR}/hadoop-metrics2.properties
   fi
   if [ ! -e ${HADOOP_CONF_DIR}/log4j.properties ]; then
     cp ${HADOOP_PREFIX}/share/hadoop/common/templates/conf/log4j.properties ${HADOOP_CONF_DIR}/log4j.properties
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-metrics2.properties b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-metrics2.properties
new file mode 100644
index 00000000000..4a1019385c0
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-metrics2.properties
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# syntax: [prefix].[source|sink|jmx].[instance].[options]
+# See package.html for org.apache.hadoop.metrics2 for details
+
+*.period=60
+
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml
index 69e078380c1..ffec60355ae 100644
--- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml
+++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hdfs-site.xml
@@ -144,6 +144,26 @@
     </description>
   </property>
 
+  <property>
+    <name>dfs.web.authentication.kerberos.principal</name>
+    <value>HTTP/_HOST@${local.realm}</value>
+    <description>
+      The HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
+
+      The HTTP Kerberos principal MUST start with 'HTTP/' per Kerberos
+      HTTP SPENGO specification.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.web.authentication.kerberos.keytab</name>
+    <value>/etc/security/keytabs/nn.service.keytab</value>
+    <description>
+      The Kerberos keytab file with the credentials for the
+      HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
+    </description>
+  </property>
+
   <property>
     <name>dfs.namenode.keytab.file</name>
     <value>/etc/security/keytabs/nn.service.keytab</value>
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties
new file mode 100644
index 00000000000..16c6aa6890e
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/log4j.properties
@@ -0,0 +1,213 @@
+# Copyright 2011 The Apache Software Foundation
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+hadoop.root.logger=INFO,console
+hadoop.log.dir=.
+hadoop.log.file=hadoop.log
+
+#
+# Job Summary Appender 
+#
+# Use following logger to send summary to separate file defined by 
+# hadoop.mapreduce.jobsummary.log.file rolled daily:
+# hadoop.mapreduce.jobsummary.logger=INFO,JSA
+# 
+hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
+hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
+
+# Define the root logger to the system property "hadoop.root.logger".
+log4j.rootLogger=${hadoop.root.logger}, EventCounter
+
+# Logging Threshold
+log4j.threshold=ALL
+
+#
+# Daily Rolling File Appender
+#
+
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Rollver at midnight
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+# Debugging Pattern format
+#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+
+#
+# console
+# Add "console" to rootlogger above if you want to use this 
+#
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+
+#
+# TaskLog Appender
+#
+
+#Default values
+hadoop.tasklog.taskid=null
+hadoop.tasklog.iscleanup=false
+hadoop.tasklog.noKeepSplits=4
+hadoop.tasklog.totalLogFileSize=100
+hadoop.tasklog.purgeLogSplits=true
+hadoop.tasklog.logsRetainHours=12
+
+log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
+log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
+log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
+log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
+
+log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
+log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+#Security appender
+#
+hadoop.security.log.file=SecurityAuth.audit
+log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 
+log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
+
+log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+#new logger
+# Define some default values that can be overridden by system properties
+hadoop.security.logger=INFO,console
+log4j.category.SecurityLogger=${hadoop.security.logger}
+
+# hdfs audit logging
+
+hdfs.audit.logger=INFO,console
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
+log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
+log4j.appender.DRFAAUDIT=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
+log4j.appender.DRFAAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.DRFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.DRFAAUDIT.DatePattern=.yyyy-MM-dd
+
+# mapred audit logging
+
+mapred.audit.logger=INFO,console
+log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
+log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+log4j.appender.MRAUDIT=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
+log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd
+
+#
+# Rolling File Appender
+#
+
+#log4j.appender.RFA=org.apache.log4j.RollingFileAppender
+#log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
+
+# Logfile size and and 30-day backups
+#log4j.appender.RFA.MaxFileSize=1MB
+#log4j.appender.RFA.MaxBackupIndex=30
+
+#log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} - %m%n
+#log4j.appender.RFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
+
+#
+# FSNamesystem Audit logging
+# All audit events are logged at INFO level
+#
+log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=WARN
+
+# Custom Logging levels
+
+#log4j.logger.org.apache.hadoop.mapred.JobTracker=DEBUG
+#log4j.logger.org.apache.hadoop.mapred.TaskTracker=DEBUG
+#log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=DEBUG
+
+# Jets3t library
+log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
+
+#
+# Event Counter Appender
+# Sends counts of logging messages at different severity levels to Hadoop Metrics.
+#
+log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
+
+#
+# Job Summary Appender
+#
+log4j.appender.JSA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
+log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
+log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
+log4j.appender.JSA.DatePattern=.yyyy-MM-dd
+log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
+log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
+
+#
+# MapReduce Audit Log Appender
+#
+
+# Set the MapReduce audit log filename
+#hadoop.mapreduce.audit.log.file=hadoop-mapreduce.audit.log
+
+# Appender for AuditLogger.
+# Requires the following system properties to be set
+#    - hadoop.log.dir (Hadoop Log directory)
+#    - hadoop.mapreduce.audit.log.file (MapReduce audit log filename)
+
+#log4j.logger.org.apache.hadoop.mapred.AuditLogger=INFO,MRAUDIT
+#log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
+#log4j.appender.MRAUDIT=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.MRAUDIT.File=${hadoop.log.dir}/${hadoop.mapreduce.audit.log.file}
+#log4j.appender.MRAUDIT.DatePattern=.yyyy-MM-dd
+#log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
+#log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
+
+#
+# Yarn ResourceManager Application Summary Log 
+#
+# Set the ResourceManager summary log filename
+#yarn.server.resourcemanager.appsummary.log.file=rm-appsummary.log
+# Set the ResourceManager summary log level and appender
+#yarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY
+
+# Appender for ResourceManager Application Summary Log - rolled daily
+# Requires the following properties to be set
+#    - hadoop.log.dir (Hadoop Log directory)
+#    - yarn.server.resourcemanager.appsummary.log.file (resource manager app summary log filename)
+#    - yarn.server.resourcemanager.appsummary.logger (resource manager app summary log level and appender)
+
+#log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
+#log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
+#log4j.appender.RMSUMMARY=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
+#log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
+#log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
+#log4j.appender.RMSUMMARY.DatePattern=.yyyy-MM-dd
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index d4b40305592..e34c2023738 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -808,8 +808,8 @@
 </property>
 
 <property>
-  <name>hadoop.http.authentication.signature.secret</name>
-  <value>hadoop</value>
+  <name>hadoop.http.authentication.signature.secret.file</name>
+  <value>${user.home}/hadoop-http-auth-signature-secret</value>
   <description>
     The signature secret for signing the authentication tokens.
     If not set a random secret is generated at startup time.
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
index f9f14fb8480..5842db199de 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@@ -58,7 +58,7 @@ public class TestConfiguration extends TestCase {
   }
   
   private void startConfig() throws IOException{
-    out.write("<?xml version=\"1.0\"?>\n");
+    out.write("<?xml version=\"1.1\"?>\n");
     out.write("<configuration>\n");
   }
 
@@ -221,6 +221,18 @@ public class TestConfiguration extends TestCase {
     assertEquals("this  contains a comment", conf.get("my.comment"));
   }
   
+  public void testControlAInValue() throws IOException {
+    out = new BufferedWriter(new FileWriter(CONFIG));
+    startConfig();
+    appendProperty("my.char", "");
+    appendProperty("my.string", "somestring");
+    endConfig();
+    Path fileResource = new Path(CONFIG);
+    conf.addResource(fileResource);
+    assertEquals("\u0001", conf.get("my.char"));
+    assertEquals("some\u0001string", conf.get("my.string"));
+  }
+
   public void testTrim() throws IOException {
     out=new BufferedWriter(new FileWriter(CONFIG));
     startConfig();
@@ -298,7 +310,7 @@ public class TestConfiguration extends TestCase {
     conf.writeXml(baos);
     String result = baos.toString();
     assertTrue("Result has proper header", result.startsWith(
-        "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?><configuration>"));
+        "<?xml version=\"1.1\" encoding=\"UTF-8\" standalone=\"no\"?><configuration>"));
     assertTrue("Result has proper footer", result.endsWith("</configuration>"));
   }
   
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
index 1e22a73bbac..e87f2d122bf 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalDirAllocator.java
@@ -20,40 +20,48 @@ package org.apache.hadoop.fs;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.Shell;
 
-import junit.framework.TestCase;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
 
 /** This test LocalDirAllocator works correctly;
- * Every test case uses different buffer dirs to 
+ * Every test case uses different buffer dirs to
  * enforce the AllocatorPerContext initialization.
  * This test does not run on Cygwin because under Cygwin
  * a directory can be created in a read-only directory
  * which breaks this test.
- */ 
-public class TestLocalDirAllocator extends TestCase {
+ */
+@RunWith(Parameterized.class)
+public class TestLocalDirAllocator {
   final static private Configuration conf = new Configuration();
   final static private String BUFFER_DIR_ROOT = "build/test/temp";
+  final static private String ABSOLUTE_DIR_ROOT;
+  final static private String QUALIFIED_DIR_ROOT;
   final static private Path BUFFER_PATH_ROOT = new Path(BUFFER_DIR_ROOT);
   final static private File BUFFER_ROOT = new File(BUFFER_DIR_ROOT);
-  final static private String BUFFER_DIR[] = new String[] {
-    BUFFER_DIR_ROOT+"/tmp0",  BUFFER_DIR_ROOT+"/tmp1", BUFFER_DIR_ROOT+"/tmp2",
-    BUFFER_DIR_ROOT+"/tmp3", BUFFER_DIR_ROOT+"/tmp4", BUFFER_DIR_ROOT+"/tmp5",
-    BUFFER_DIR_ROOT+"/tmp6"};
-  final static private Path BUFFER_PATH[] = new Path[] {
-    new Path(BUFFER_DIR[0]), new Path(BUFFER_DIR[1]), new Path(BUFFER_DIR[2]),
-    new Path(BUFFER_DIR[3]), new Path(BUFFER_DIR[4]), new Path(BUFFER_DIR[5]),
-    new Path(BUFFER_DIR[6])};
-  final static private String CONTEXT = "dfs.client.buffer.dir";
+  final static private String CONTEXT = "fs.client.buffer.dir";
   final static private String FILENAME = "block";
-  final static private LocalDirAllocator dirAllocator = 
+  final static private LocalDirAllocator dirAllocator =
     new LocalDirAllocator(CONTEXT);
   static LocalFileSystem localFs;
   final static private boolean isWindows =
     System.getProperty("os.name").startsWith("Windows");
   final static int SMALL_FILE_SIZE = 100;
+  final static private String RELATIVE = "/RELATIVE";
+  final static private String ABSOLUTE = "/ABSOLUTE";
+  final static private String QUALIFIED = "/QUALIFIED";
+  final private String ROOT;
+  final private String PREFIX;
+
   static {
     try {
       localFs = FileSystem.getLocal(conf);
@@ -63,170 +71,214 @@ public class TestLocalDirAllocator extends TestCase {
       e.printStackTrace();
       System.exit(-1);
     }
+
+    ABSOLUTE_DIR_ROOT = new Path(localFs.getWorkingDirectory(),
+        BUFFER_DIR_ROOT).toUri().getPath();
+    QUALIFIED_DIR_ROOT = new Path(localFs.getWorkingDirectory(),
+        BUFFER_DIR_ROOT).toUri().toString();
+  }
+
+  public TestLocalDirAllocator(String root, String prefix) {
+    ROOT = root;
+    PREFIX = prefix;
+  }
+
+  @Parameters
+  public static Collection<Object[]> params() {
+    Object [][] data = new Object[][] {
+      { BUFFER_DIR_ROOT, RELATIVE },
+      { ABSOLUTE_DIR_ROOT, ABSOLUTE },
+      { QUALIFIED_DIR_ROOT, QUALIFIED }
+    };
+
+    return Arrays.asList(data);
   }
 
   private static void rmBufferDirs() throws IOException {
     assertTrue(!localFs.exists(BUFFER_PATH_ROOT) ||
         localFs.delete(BUFFER_PATH_ROOT, true));
   }
-  
-  private void validateTempDirCreation(int i) throws IOException {
+
+  private static void validateTempDirCreation(String dir) throws IOException {
     File result = createTempFile(SMALL_FILE_SIZE);
-    assertTrue("Checking for " + BUFFER_DIR[i] + " in " + result + " - FAILED!", 
-        result.getPath().startsWith(new File(BUFFER_DIR[i], FILENAME).getPath()));
+    assertTrue("Checking for " + dir + " in " + result + " - FAILED!",
+        result.getPath().startsWith(new Path(dir, FILENAME).toUri().getPath()));
   }
-  
-  private File createTempFile() throws IOException {
-    File result = dirAllocator.createTmpFileForWrite(FILENAME, -1, conf);
-    result.delete();
-    return result;
+
+  private static File createTempFile() throws IOException {
+    return createTempFile(-1);
   }
-  
-  private File createTempFile(long size) throws IOException {
+
+  private static File createTempFile(long size) throws IOException {
     File result = dirAllocator.createTmpFileForWrite(FILENAME, size, conf);
     result.delete();
     return result;
   }
-  
-  /** Two buffer dirs. The first dir does not exist & is on a read-only disk; 
+
+  private String buildBufferDir(String dir, int i) {
+    return dir + PREFIX + i;
+  }
+
+  /** Two buffer dirs. The first dir does not exist & is on a read-only disk;
    * The second dir exists & is RW
    * @throws Exception
    */
+  @Test
   public void test0() throws Exception {
     if (isWindows) return;
+    String dir0 = buildBufferDir(ROOT, 0);
+    String dir1 = buildBufferDir(ROOT, 1);
     try {
-      conf.set(CONTEXT, BUFFER_DIR[0]+","+BUFFER_DIR[1]);
-      assertTrue(localFs.mkdirs(BUFFER_PATH[1]));
+      conf.set(CONTEXT, dir0 + "," + dir1);
+      assertTrue(localFs.mkdirs(new Path(dir1)));
       BUFFER_ROOT.setReadOnly();
-      validateTempDirCreation(1);
-      validateTempDirCreation(1);
+      validateTempDirCreation(dir1);
+      validateTempDirCreation(dir1);
     } finally {
       Shell.execCommand(new String[]{"chmod", "u+w", BUFFER_DIR_ROOT});
       rmBufferDirs();
     }
   }
-    
-  /** Two buffer dirs. The first dir exists & is on a read-only disk; 
+
+  /** Two buffer dirs. The first dir exists & is on a read-only disk;
    * The second dir exists & is RW
    * @throws Exception
    */
+  @Test
   public void test1() throws Exception {
     if (isWindows) return;
+    String dir1 = buildBufferDir(ROOT, 1);
+    String dir2 = buildBufferDir(ROOT, 2);
     try {
-      conf.set(CONTEXT, BUFFER_DIR[1]+","+BUFFER_DIR[2]);
-      assertTrue(localFs.mkdirs(BUFFER_PATH[2]));
+      conf.set(CONTEXT, dir1 + "," + dir2);
+      assertTrue(localFs.mkdirs(new Path(dir2)));
       BUFFER_ROOT.setReadOnly();
-      validateTempDirCreation(2);
-      validateTempDirCreation(2);
+      validateTempDirCreation(dir2);
+      validateTempDirCreation(dir2);
     } finally {
       Shell.execCommand(new String[]{"chmod", "u+w", BUFFER_DIR_ROOT});
       rmBufferDirs();
     }
   }
   /** Two buffer dirs. Both do not exist but on a RW disk.
-   * Check if tmp dirs are allocated in a round-robin 
+   * Check if tmp dirs are allocated in a round-robin
    */
+  @Test
   public void test2() throws Exception {
     if (isWindows) return;
+    String dir2 = buildBufferDir(ROOT, 2);
+    String dir3 = buildBufferDir(ROOT, 3);
     try {
-      conf.set(CONTEXT, BUFFER_DIR[2]+","+BUFFER_DIR[3]);
+      conf.set(CONTEXT, dir2 + "," + dir3);
 
       // create the first file, and then figure the round-robin sequence
       createTempFile(SMALL_FILE_SIZE);
       int firstDirIdx = (dirAllocator.getCurrentDirectoryIndex() == 0) ? 2 : 3;
       int secondDirIdx = (firstDirIdx == 2) ? 3 : 2;
-      
+
       // check if tmp dirs are allocated in a round-robin manner
-      validateTempDirCreation(firstDirIdx);
-      validateTempDirCreation(secondDirIdx);
-      validateTempDirCreation(firstDirIdx);
+      validateTempDirCreation(buildBufferDir(ROOT, firstDirIdx));
+      validateTempDirCreation(buildBufferDir(ROOT, secondDirIdx));
+      validateTempDirCreation(buildBufferDir(ROOT, firstDirIdx));
     } finally {
       rmBufferDirs();
     }
   }
 
-  /** Two buffer dirs. Both exists and on a R/W disk. 
+  /** Two buffer dirs. Both exists and on a R/W disk.
    * Later disk1 becomes read-only.
    * @throws Exception
    */
+  @Test
   public void test3() throws Exception {
     if (isWindows) return;
+    String dir3 = buildBufferDir(ROOT, 3);
+    String dir4 = buildBufferDir(ROOT, 4);
     try {
-      conf.set(CONTEXT, BUFFER_DIR[3]+","+BUFFER_DIR[4]);
-      assertTrue(localFs.mkdirs(BUFFER_PATH[3]));
-      assertTrue(localFs.mkdirs(BUFFER_PATH[4]));
-      
-      // create the first file with size, and then figure the round-robin sequence
+      conf.set(CONTEXT, dir3 + "," + dir4);
+      assertTrue(localFs.mkdirs(new Path(dir3)));
+      assertTrue(localFs.mkdirs(new Path(dir4)));
+
+      // Create the first small file
       createTempFile(SMALL_FILE_SIZE);
 
+      // Determine the round-robin sequence
       int nextDirIdx = (dirAllocator.getCurrentDirectoryIndex() == 0) ? 3 : 4;
-      validateTempDirCreation(nextDirIdx);
+      validateTempDirCreation(buildBufferDir(ROOT, nextDirIdx));
 
       // change buffer directory 2 to be read only
-      new File(BUFFER_DIR[4]).setReadOnly();
-      validateTempDirCreation(3);
-      validateTempDirCreation(3);
+      new File(new Path(dir4).toUri().getPath()).setReadOnly();
+      validateTempDirCreation(dir3);
+      validateTempDirCreation(dir3);
     } finally {
       rmBufferDirs();
     }
   }
-  
+
   /**
    * Two buffer dirs, on read-write disk.
-   * 
+   *
    * Try to create a whole bunch of files.
    *  Verify that they do indeed all get created where they should.
-   *  
+   *
    *  Would ideally check statistical properties of distribution, but
    *  we don't have the nerve to risk false-positives here.
-   * 
+   *
    * @throws Exception
    */
   static final int TRIALS = 100;
+  @Test
   public void test4() throws Exception {
     if (isWindows) return;
+    String dir5 = buildBufferDir(ROOT, 5);
+    String dir6 = buildBufferDir(ROOT, 6);
     try {
 
-      conf.set(CONTEXT, BUFFER_DIR[5]+","+BUFFER_DIR[6]);
-      assertTrue(localFs.mkdirs(BUFFER_PATH[5]));
-      assertTrue(localFs.mkdirs(BUFFER_PATH[6]));
-        
+      conf.set(CONTEXT, dir5 + "," + dir6);
+      assertTrue(localFs.mkdirs(new Path(dir5)));
+      assertTrue(localFs.mkdirs(new Path(dir6)));
+
       int inDir5=0, inDir6=0;
       for(int i = 0; i < TRIALS; ++i) {
         File result = createTempFile();
-        if(result.getPath().startsWith(new File(BUFFER_DIR[5], FILENAME).getPath())) {
+        if(result.getPath().startsWith(
+              new Path(dir5, FILENAME).toUri().getPath())) {
           inDir5++;
-        } else  if(result.getPath().startsWith(new File(BUFFER_DIR[6], FILENAME).getPath())) {
+        } else if(result.getPath().startsWith(
+              new Path(dir6, FILENAME).toUri().getPath())) {
           inDir6++;
         }
         result.delete();
       }
-      
-      assertTrue( inDir5 + inDir6 == TRIALS);
-        
+
+      assertTrue(inDir5 + inDir6 == TRIALS);
+
     } finally {
       rmBufferDirs();
     }
   }
-  
-  /** Two buffer dirs. The first dir does not exist & is on a read-only disk; 
+
+  /** Two buffer dirs. The first dir does not exist & is on a read-only disk;
    * The second dir exists & is RW
    * getLocalPathForWrite with checkAccess set to false should create a parent
    * directory. With checkAccess true, the directory should not be created.
    * @throws Exception
    */
+  @Test
   public void testLocalPathForWriteDirCreation() throws IOException {
+    String dir0 = buildBufferDir(ROOT, 0);
+    String dir1 = buildBufferDir(ROOT, 1);
     try {
-      conf.set(CONTEXT, BUFFER_DIR[0] + "," + BUFFER_DIR[1]);
-      assertTrue(localFs.mkdirs(BUFFER_PATH[1]));
+      conf.set(CONTEXT, dir0 + "," + dir1);
+      assertTrue(localFs.mkdirs(new Path(dir1)));
       BUFFER_ROOT.setReadOnly();
       Path p1 =
-          dirAllocator.getLocalPathForWrite("p1/x", SMALL_FILE_SIZE, conf);
+        dirAllocator.getLocalPathForWrite("p1/x", SMALL_FILE_SIZE, conf);
       assertTrue(localFs.getFileStatus(p1.getParent()).isDirectory());
 
       Path p2 =
-          dirAllocator.getLocalPathForWrite("p2/x", SMALL_FILE_SIZE, conf,
-              false);
+        dirAllocator.getLocalPathForWrite("p2/x", SMALL_FILE_SIZE, conf,
+            false);
       try {
         localFs.getFileStatus(p2.getParent());
       } catch (Exception e) {
@@ -237,5 +289,26 @@ public class TestLocalDirAllocator extends TestCase {
       rmBufferDirs();
     }
   }
-  
+
+  /** Test no side effect files are left over. After creating a temp
+   * temp file, remove both the temp file and its parent. Verify that
+   * no files or directories are left over as can happen when File objects
+   * are mistakenly created from fully qualified path strings.
+   * @throws IOException
+   */
+  @Test
+  public void testNoSideEffects() throws IOException {
+    if (isWindows) return;
+    String dir = buildBufferDir(ROOT, 0);
+    try {
+      conf.set(CONTEXT, dir);
+      File result = dirAllocator.createTmpFileForWrite(FILENAME, -1, conf);
+      assertTrue(result.delete());
+      assertTrue(result.getParentFile().delete());
+      assertFalse(new File(dir).exists());
+    } finally {
+      Shell.execCommand(new String[]{"chmod", "u+w", BUFFER_DIR_ROOT});
+      rmBufferDirs();
+    }
+  }
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java
index 3d739a07d8b..782e4e41674 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestTrash.java
@@ -486,6 +486,9 @@ public class TestTrash extends TestCase {
     conf.set(FS_TRASH_INTERVAL_KEY, "0.2"); // 12 seconds
     conf.setClass("fs.file.impl", TestLFS.class, FileSystem.class);
     conf.set(FS_TRASH_CHECKPOINT_INTERVAL_KEY, "0.1"); // 6 seconds
+    FileSystem fs = FileSystem.getLocal(conf);
+    conf.set("fs.default.name", fs.getUri().toString());
+    
     Trash trash = new Trash(conf);
 
     // Start Emptier in background
@@ -493,8 +496,6 @@ public class TestTrash extends TestCase {
     Thread emptierThread = new Thread(emptier);
     emptierThread.start();
 
-    FileSystem fs = FileSystem.getLocal(conf);
-    conf.set("fs.defaultFS", fs.getUri().toString());
     FsShell shell = new FsShell();
     shell.setConf(conf);
     shell.init();
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/HttpServerFunctionalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/HttpServerFunctionalTest.java
index 07688137d5e..aff74b573b0 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/HttpServerFunctionalTest.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/HttpServerFunctionalTest.java
@@ -70,6 +70,21 @@ public class HttpServerFunctionalTest extends Assert {
     return createServer(TEST, conf);
   }
 
+  /**
+   * Create but do not start the test webapp server. The test webapp dir is
+   * prepared/checked in advance.
+   * @param conf the server configuration to use
+   * @return the server instance
+   *
+   * @throws IOException if a problem occurs
+   * @throws AssertionError if a condition was not met
+   */
+  public static HttpServer createTestServer(Configuration conf, 
+      String[] pathSpecs) throws IOException {
+    prepareTestWebapp();
+    return createServer(TEST, conf, pathSpecs);
+  }
+
   /**
    * Prepare the test webapp by creating the directory from the test properties
    * fail if the directory cannot be created.
@@ -104,6 +119,18 @@ public class HttpServerFunctionalTest extends Assert {
       throws IOException {
     return new HttpServer(webapp, "0.0.0.0", 0, true, conf);
   }
+  /**
+   * Create an HttpServer instance for the given webapp
+   * @param webapp the webapp to work with
+   * @param conf the configuration to use for the server
+   * @param pathSpecs the paths specifications the server will service
+   * @return the server
+   * @throws IOException if it could not be created
+   */
+  public static HttpServer createServer(String webapp, Configuration conf,
+      String[] pathSpecs) throws IOException {
+    return new HttpServer(webapp, "0.0.0.0", 0, true, conf, pathSpecs);
+  }
 
   /**
    * Create and start a server with the test webapp
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestPathFilter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestPathFilter.java
new file mode 100644
index 00000000000..73aebea486f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestPathFilter.java
@@ -0,0 +1,145 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.http;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.Set;
+import java.util.TreeSet;
+
+import javax.servlet.Filter;
+import javax.servlet.FilterChain;
+import javax.servlet.FilterConfig;
+import javax.servlet.ServletException;
+import javax.servlet.ServletRequest;
+import javax.servlet.ServletResponse;
+import javax.servlet.http.HttpServletRequest;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+public class TestPathFilter extends HttpServerFunctionalTest {
+  static final Log LOG = LogFactory.getLog(HttpServer.class);
+  static final Set<String> RECORDS = new TreeSet<String>(); 
+
+  /** A very simple filter that records accessed uri's */
+  static public class RecordingFilter implements Filter {
+    private FilterConfig filterConfig = null;
+
+    public void init(FilterConfig filterConfig) {
+      this.filterConfig = filterConfig;
+    }
+
+    public void destroy() {
+      this.filterConfig = null;
+    }
+
+    public void doFilter(ServletRequest request, ServletResponse response,
+        FilterChain chain) throws IOException, ServletException {
+      if (filterConfig == null)
+         return;
+
+      String uri = ((HttpServletRequest)request).getRequestURI();
+      LOG.info("filtering " + uri);
+      RECORDS.add(uri);
+      chain.doFilter(request, response);
+    }
+
+    /** Configuration for RecordingFilter */
+    static public class Initializer extends FilterInitializer {
+      public Initializer() {}
+
+      public void initFilter(FilterContainer container, Configuration conf) {
+        container.addFilter("recording", RecordingFilter.class.getName(), null);
+      }
+    }
+  }
+  
+  
+  /** access a url, ignoring some IOException such as the page does not exist */
+  static void access(String urlstring) throws IOException {
+    LOG.warn("access " + urlstring);
+    URL url = new URL(urlstring);
+    
+    URLConnection connection = url.openConnection();
+    connection.connect();
+    
+    try {
+      BufferedReader in = new BufferedReader(new InputStreamReader(
+          connection.getInputStream()));
+      try {
+        for(; in.readLine() != null; );
+      } finally {
+        in.close();
+      }
+    } catch(IOException ioe) {
+      LOG.warn("urlstring=" + urlstring, ioe);
+    }
+  }
+
+  @Test
+  public void testPathSpecFilters() throws Exception {
+    Configuration conf = new Configuration();
+    
+    //start a http server with CountingFilter
+    conf.set(HttpServer.FILTER_INITIALIZER_PROPERTY,
+        RecordingFilter.Initializer.class.getName());
+    String[] pathSpecs = { "/path", "/path/*" };
+    HttpServer http = createTestServer(conf, pathSpecs);
+    http.start();
+
+    final String baseURL = "/path";
+    final String baseSlashURL = "/path/";
+    final String addedURL = "/path/nodes";
+    final String addedSlashURL = "/path/nodes/";
+    final String longURL = "/path/nodes/foo/job";
+    final String rootURL = "/";
+    final String allURL = "/*";
+
+    final String[] filteredUrls = {baseURL, baseSlashURL, addedURL, 
+        addedSlashURL, longURL};
+    final String[] notFilteredUrls = {rootURL, allURL};
+
+    // access the urls and verify our paths specs got added to the 
+    // filters
+    final String prefix = "http://localhost:" + http.getPort();
+    try {
+      for(int i = 0; i < filteredUrls.length; i++) {
+        access(prefix + filteredUrls[i]);
+      }
+      for(int i = 0; i < notFilteredUrls.length; i++) {
+        access(prefix + notFilteredUrls[i]);
+      }
+    } finally {
+      http.stop();
+    }
+
+    LOG.info("RECORDS = " + RECORDS);
+    
+    //verify records
+    for(int i = 0; i < filteredUrls.length; i++) {
+      assertTrue(RECORDS.remove(filteredUrls[i]));
+    }
+    assertTrue(RECORDS.isEmpty());
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java
index f49d4c886ec..7cc6f4d5213 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/TestNetUtils.java
@@ -18,13 +18,17 @@
 package org.apache.hadoop.net;
 
 import org.junit.Test;
+
 import static org.junit.Assert.*;
 
+import java.net.InetAddress;
+import java.net.NetworkInterface;
 import java.net.Socket;
 import java.net.ConnectException;
 import java.net.SocketException;
 import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
+import java.util.Enumeration;
 
 import org.apache.hadoop.conf.Configuration;
 
@@ -88,4 +92,32 @@ public class TestNetUtils {
       fail("NetUtils.verifyHostnames threw unexpected UnknownHostException");
     }
   }
+  
+  /** 
+   * Test for {@link NetUtils#isLocalAddress(java.net.InetAddress)}
+   */
+  @Test
+  public void testIsLocalAddress() throws Exception {
+    // Test - local host is local address
+    assertTrue(NetUtils.isLocalAddress(InetAddress.getLocalHost()));
+    
+    // Test - all addresses bound network interface is local address
+    Enumeration<NetworkInterface> interfaces = NetworkInterface
+        .getNetworkInterfaces();
+    if (interfaces != null) { // Iterate through all network interfaces
+      while (interfaces.hasMoreElements()) {
+        NetworkInterface i = interfaces.nextElement();
+        Enumeration<InetAddress> addrs = i.getInetAddresses();
+        if (addrs == null) {
+          continue;
+        }
+        // Iterate through all the addresses of a network interface
+        while (addrs.hasMoreElements()) {
+          InetAddress addr = addrs.nextElement();
+          assertTrue(NetUtils.isLocalAddress(addr));
+        }
+      }
+    }
+    assertFalse(NetUtils.isLocalAddress(InetAddress.getByName("8.8.8.8")));
+  }
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestAuthenticationFilter.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestAuthenticationFilter.java
index 7a21e4c6b87..2d699ddcf1f 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestAuthenticationFilter.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/TestAuthenticationFilter.java
@@ -25,14 +25,28 @@ import org.mockito.Mockito;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
+import java.io.File;
+import java.io.FileWriter;
+import java.io.Writer;
 import java.util.Map;
 
 public class TestAuthenticationFilter extends TestCase {
 
   @SuppressWarnings("unchecked")
-  public void testConfiguration() {
+  public void testConfiguration() throws Exception {
     Configuration conf = new Configuration();
     conf.set("hadoop.http.authentication.foo", "bar");
+    
+    File testDir = new File(System.getProperty("test.build.data", 
+                                               "target/test-dir"));
+    testDir.mkdirs();
+    File secretFile = new File(testDir, "http-secret.txt");
+    Writer writer = new FileWriter(new File(testDir, "http-secret.txt"));
+    writer.write("hadoop");
+    writer.close();
+    conf.set(AuthenticationFilterInitializer.PREFIX + 
+             AuthenticationFilterInitializer.SIGNATURE_SECRET_FILE, 
+             secretFile.getAbsolutePath());
 
     FilterContainer container = Mockito.mock(FilterContainer.class);
     Mockito.doAnswer(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 459d2325d20..43c360fcb0c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -16,6 +16,9 @@ Trunk (unreleased changes)
     HDFS-2318. Provide authentication to webhdfs using SPNEGO and delegation
     tokens.  (szetszwo)
 
+    HDFS-2340. Support getFileBlockLocations and getDelegationToken in webhdfs.
+    (szetszwo)
+
   IMPROVEMENTS
 
     HADOOP-7524 Change RPC to allow multiple protocols including multuple versions of the same protocol (sanjay Radia)
@@ -35,6 +38,18 @@ Trunk (unreleased changes)
     not use ArrayWritable for writing non-array items.  (Uma Maheswara Rao G
     via szetszwo)
 
+    HDFS-2351 Change Namenode and Datanode to register each of their protocols
+    seperately. (Sanjay Radia)
+
+    HDFS-2356.  Support case insensitive query parameter names in webhdfs.
+    (szetszwo)
+
+    HDFS-2368.  Move SPNEGO conf properties from hdfs-default.xml to
+    hdfs-site.xml.  (szetszwo)
+
+    HDFS-2355. Federation: enable using the same configuration file across 
+    all the nodes in the cluster. (suresh)
+
   BUG FIXES
     HDFS-2287. TestParallelRead has a small off-by-one bug. (todd)
 
@@ -57,6 +72,17 @@ Trunk (unreleased changes)
     IOExceptions of stream closures can mask root exceptions.  (Uma Maheswara
     Rao G via szetszwo)
 
+    HDFS-46.   Change default namespace quota of root directory from
+    Integer.MAX_VALUE to Long.MAX_VALUE.  (Uma Maheswara Rao G via szetszwo)
+
+    HDFS-2366. Initialize WebHdfsFileSystem.ugi in object construction.
+    (szetszwo)
+
+    HDFS-2373. Commands using webhdfs and hftp print unnecessary debug 
+    info on the console with security enabled. (Arpit Gupta via suresh)
+
+    HDFS-2361. hftp is broken, fixed username checks in JspHelper. (jitendra)
+
 Release 0.23.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -739,6 +765,12 @@ Release 0.23.0 - Unreleased
     HDFS-1217.  Change some NameNode methods from public to package private.
     (Laxman via szetszwo)
 
+    HDFS-2332. Add test for HADOOP-7629 (using an immutable FsPermission
+    object as an RPC parameter fails). (todd)
+
+    HDFS-2363. Move datanodes size printing from FSNamesystem.metasave(..)
+    to BlockManager.  (Uma Maheswara Rao G via szetszwo)
+
   OPTIMIZATIONS
 
     HDFS-1458. Improve checkpoint performance by avoiding unnecessary image
@@ -1607,7 +1639,11 @@ Release 0.22.0 - Unreleased
     HDFS-2232. Generalize regular expressions in TestHDFSCLI.
     (Plamen Jeliazkov via shv)
 
+    HDFS-2290. Block with corrupt replica is not getting replicated.
+    (Benoy Antony via shv)
+
 Release 0.21.1 - Unreleased
+
     HDFS-1466. TestFcHdfsSymlink relies on /tmp/test not existing. (eli)
 
     HDFS-874. TestHDFSFileContextMainOperations fails on weirdly 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index fea81f3d04e..6ae4a13952a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -38,6 +38,7 @@ import java.util.Random;
 import java.util.StringTokenizer;
 import java.util.concurrent.TimeUnit;
 
+import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
@@ -577,17 +578,6 @@ public class DFSUtil {
     }
   }
   
-  /**
-   * Returns the configured nameservice Id
-   * 
-   * @param conf
-   *          Configuration object to lookup the nameserviceId
-   * @return nameserviceId string from conf
-   */
-  public static String getNameServiceId(Configuration conf) {
-    return conf.get(DFS_FEDERATION_NAMESERVICE_ID);
-  }
-  
   /** Return used as percentage of capacity */
   public static float getPercentUsed(long used, long capacity) {
     return capacity <= 0 ? 100 : ((float)used * 100.0f)/(float)capacity; 
@@ -707,4 +697,77 @@ public class DFSUtil {
     // TODO:HA configuration changes pending
     return false;
   }
+  
+  /**
+   * Get name service Id for the {@link NameNode} based on namenode RPC address
+   * matching the local node address.
+   */
+  public static String getNamenodeNameServiceId(Configuration conf) {
+    return getNameServiceId(conf, DFS_NAMENODE_RPC_ADDRESS_KEY);
+  }
+  
+  /**
+   * Get name service Id for the BackupNode based on backup node RPC address
+   * matching the local node address.
+   */
+  public static String getBackupNameServiceId(Configuration conf) {
+    return getNameServiceId(conf, DFS_NAMENODE_BACKUP_ADDRESS_KEY);
+  }
+  
+  /**
+   * Get name service Id for the secondary node based on secondary http address
+   * matching the local node address.
+   */
+  public static String getSecondaryNameServiceId(Configuration conf) {
+    return getNameServiceId(conf, DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY);
+  }
+  
+  /**
+   * Get the nameservice Id by matching the {@code addressKey} with the
+   * the address of the local node. 
+   * 
+   * If {@link DFSConfigKeys#DFS_FEDERATION_NAMESERVICE_ID} is not specifically
+   * configured, this method determines the nameservice Id by matching the local
+   * nodes address with the configured addresses. When a match is found, it
+   * returns the nameservice Id from the corresponding configuration key.
+   * 
+   * @param conf Configuration
+   * @param addressKey configuration key to get the address.
+   * @return name service Id on success, null on failure.
+   * @throws HadoopIllegalArgumentException on error
+   */
+  private static String getNameServiceId(Configuration conf, String addressKey) {
+    String nameserviceId = conf.get(DFS_FEDERATION_NAMESERVICE_ID);
+    if (nameserviceId != null) {
+      return nameserviceId;
+    }
+    
+    Collection<String> ids = getNameServiceIds(conf);
+    if (ids == null || ids.size() == 0) {
+      // Not federation configuration, hence no nameservice Id
+      return null;
+    }
+    
+    // Match the rpc address with that of local address
+    int found = 0;
+    for (String id : ids) {
+      String addr = conf.get(getNameServiceIdKey(addressKey, id));
+      InetSocketAddress s = NetUtils.createSocketAddr(addr);
+      if (NetUtils.isLocalAddress(s.getAddress())) {
+        nameserviceId = id;
+        found++;
+      }
+    }
+    if (found > 1) { // Only one address must match the local address
+      throw new HadoopIllegalArgumentException(
+          "Configuration has multiple RPC addresses that matches "
+              + "the local node's address. Please configure the system with "
+              + "the parameter " + DFS_FEDERATION_NAMESERVICE_ID);
+    }
+    if (found == 0) {
+      throw new HadoopIllegalArgumentException("Configuration address "
+          + addressKey + " is missing in configuration with name service Id");
+    }
+    return nameserviceId;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
index 17a09f695ec..af3283ee718 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
@@ -115,6 +115,26 @@ public class DatanodeInfo extends DatanodeID implements Node {
     this.location = location;
     this.hostName = hostName;
   }
+
+  /** Constructor */
+  public DatanodeInfo(final String name, final String storageID,
+      final int infoPort, final int ipcPort,
+      final long capacity, final long dfsUsed, final long remaining,
+      final long blockPoolUsed, final long lastUpdate, final int xceiverCount,
+      final String networkLocation, final String hostName,
+      final AdminStates adminState) {
+    super(name, storageID, infoPort, ipcPort);
+
+    this.capacity = capacity;
+    this.dfsUsed = dfsUsed;
+    this.remaining = remaining;
+    this.blockPoolUsed = blockPoolUsed;
+    this.lastUpdate = lastUpdate;
+    this.xceiverCount = xceiverCount;
+    this.location = networkLocation;
+    this.hostName = hostName;
+    this.adminState = adminState;
+  }
   
   /** The raw capacity. */
   public long getCapacity() { return capacity; }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 682d272922b..994275aec06 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -308,6 +308,11 @@ public class BlockManager {
   /** Dump meta data to out. */
   public void metaSave(PrintWriter out) {
     assert namesystem.hasWriteLock();
+    final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
+    final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
+    datanodeManager.fetchDatanodes(live, dead, false);
+    out.println("Live Datanodes: " + live.size());
+    out.println("Dead Datanodes: " + dead.size());
     //
     // Dump contents of neededReplication
     //
@@ -842,7 +847,7 @@ public class BlockManager {
 
     // Add this replica to corruptReplicas Map
     corruptReplicas.addToCorruptReplicasMap(storedBlock, node);
-    if (countNodes(storedBlock).liveReplicas() > inode.getReplication()) {
+    if (countNodes(storedBlock).liveReplicas() >= inode.getReplication()) {
       // the block is over-replicated so invalidate the replicas immediately
       invalidateBlock(storedBlock, node);
     } else if (namesystem.isPopulatingReplQueues()) {
@@ -867,7 +872,7 @@ public class BlockManager {
     // Check how many copies we have of the block. If we have at least one
     // copy on a live node, then we can delete it.
     int count = countNodes(blk).liveReplicas();
-    if (count > 1) {
+    if (count >= 1) {
       addToInvalidates(blk, dn);
       removeStoredBlock(blk, node);
       if(NameNode.stateChangeLog.isDebugEnabled()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
index e2ce26df6b9..67f67c03958 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java
@@ -54,11 +54,13 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeHttpServer;
+import org.apache.hadoop.hdfs.web.resources.DelegationParam;
 import org.apache.hadoop.hdfs.web.resources.UserParam;
 import org.apache.hadoop.http.HtmlQuoting;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.authentication.util.KerberosName;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
 import org.apache.hadoop.security.token.Token;
@@ -68,7 +70,7 @@ import org.apache.hadoop.util.VersionInfo;
 public class JspHelper {
   public static final String CURRENT_CONF = "current.conf";
   final static public String WEB_UGI_PROPERTY_NAME = DFSConfigKeys.DFS_WEB_UGI_KEY;
-  public static final String DELEGATION_PARAMETER_NAME = "delegation";
+  public static final String DELEGATION_PARAMETER_NAME = DelegationParam.NAME;
   public static final String NAMENODE_ADDRESS = "nnaddr";
   static final String SET_DELEGATION = "&" + DELEGATION_PARAMETER_NAME +
                                               "=";
@@ -551,7 +553,8 @@ public class JspHelper {
         DelegationTokenIdentifier id = new DelegationTokenIdentifier();
         id.readFields(in);
         ugi = id.getUser();
-        checkUsername(ugi.getUserName(), user);
+        checkUsername(ugi.getShortUserName(), usernameFromQuery);
+        checkUsername(ugi.getShortUserName(), user);
         ugi.addToken(token);
         ugi.setAuthenticationMethod(AuthenticationMethod.TOKEN);
       } else {
@@ -560,13 +563,11 @@ public class JspHelper {
                                 "authenticated by filter");
         }
         ugi = UserGroupInformation.createRemoteUser(user);
+        checkUsername(ugi.getShortUserName(), usernameFromQuery);
         // This is not necessarily true, could have been auth'ed by user-facing
         // filter
         ugi.setAuthenticationMethod(secureAuthMethod);
       }
-
-      checkUsername(user, usernameFromQuery);
-
     } else { // Security's not on, pull from url
       ugi = usernameFromQuery == null?
           getDefaultWebUser(conf) // not specified in request
@@ -579,10 +580,18 @@ public class JspHelper {
     return ugi;
   }
 
+  /**
+   * Expected user name should be a short name.
+   */
   private static void checkUsername(final String expected, final String name
       ) throws IOException {
-    if (name != null && !name.equals(expected)) {
-      throw new IOException("Usernames not matched: name=" + name
+    if (name == null) {
+      return;
+    }
+    KerberosName u = new KerberosName(name);
+    String shortName = u.getShortName();
+    if (!shortName.equals(expected)) {
+      throw new IOException("Usernames not matched: name=" + shortName
           + " != expected=" + expected);
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 73fbe50e535..b6ec0c05b4d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -425,7 +425,7 @@ public class DataNode extends Configured
   private List<ServicePlugin> plugins;
   
   // For InterDataNodeProtocol
-  public Server ipcServer;
+  public RPC.Server ipcServer;
 
   private SecureResources secureResources = null;
   private AbstractList<File> dataDirs;
@@ -575,11 +575,15 @@ public class DataNode extends Configured
   private void initIpcServer(Configuration conf) throws IOException {
     InetSocketAddress ipcAddr = NetUtils.createSocketAddr(
         conf.get("dfs.datanode.ipc.address"));
-    ipcServer = RPC.getServer(DataNode.class, this, ipcAddr.getHostName(),
+    
+    // Add all the RPC protocols that the Datanode implements
+    ipcServer = RPC.getServer(ClientDatanodeProtocol.class, this, ipcAddr.getHostName(),
                               ipcAddr.getPort(), 
                               conf.getInt(DFS_DATANODE_HANDLER_COUNT_KEY, 
                                           DFS_DATANODE_HANDLER_COUNT_DEFAULT), 
                               false, conf, blockPoolTokenSecretManager);
+    ipcServer.addProtocol(InterDatanodeProtocol.class, this);
+    
     // set service-level authorization security policy
     if (conf.getBoolean(
         CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, false)) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
index 4c5c61aac7c..0305024e4f0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSClient.DFSDataInputStream;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.web.ParamFilter;
 import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
 import org.apache.hadoop.hdfs.web.resources.BufferSizeParam;
@@ -66,8 +67,11 @@ import org.apache.hadoop.hdfs.web.resources.UriFsPathParam;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 
+import com.sun.jersey.spi.container.ResourceFilters;
+
 /** Web-hdfs DataNode implementation. */
 @Path("")
+@ResourceFilters(ParamFilter.class)
 public class DatanodeWebHdfsMethods {
   public static final Log LOG = LogFactory.getLog(DatanodeWebHdfsMethods.class);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index d8f68a0aaab..8a736572c0f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
@@ -372,4 +373,9 @@ public class BackupNode extends NameNode {
       throw new UnsupportedActionException(msg);
     }
   }
+  
+  @Override
+  protected String getNameServiceId(Configuration conf) {
+    return DFSUtil.getBackupNameServiceId(conf);
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index 4d7f2b9ca6e..654c3a231d4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -120,7 +120,7 @@ public class FSDirectory implements Closeable {
     this.cond = dirLock.writeLock().newCondition();
     rootDir = new INodeDirectoryWithQuota(INodeDirectory.ROOT_NAME,
         ns.createFsOwnerPermissions(new FsPermission((short)0755)),
-        Integer.MAX_VALUE, UNKNOWN_DISK_SPACE);
+        Long.MAX_VALUE, UNKNOWN_DISK_SPACE);
     this.fsImage = fsImage;
     int configuredLimit = conf.getInt(
         DFSConfigKeys.DFS_LIST_LIMIT, DFSConfigKeys.DFS_LIST_LIMIT_DEFAULT);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 116fa4826ac..4851796cea4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -564,11 +564,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       out.println(totalInodes + " files and directories, " + totalBlocks
           + " blocks = " + (totalInodes + totalBlocks) + " total");
 
-      final List<DatanodeDescriptor> live = new ArrayList<DatanodeDescriptor>();
-      final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
-      blockManager.getDatanodeManager().fetchDatanodes(live, dead, false);
-      out.println("Live Datanodes: "+live.size());
-      out.println("Dead Datanodes: "+dead.size());
       blockManager.metaSave(out);
 
       out.flush();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 6db96924c24..0efa268e313 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -27,6 +27,7 @@ import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ha.HealthCheckFailedException;
@@ -380,7 +381,6 @@ public class NameNode {
    * @param conf the configuration
    */
   protected void initialize(Configuration conf) throws IOException {
-    initializeGenericKeys(conf);
     UserGroupInformation.setConfiguration(conf);
     loginAsNameNodeUser(conf);
 
@@ -513,10 +513,14 @@ public class NameNode {
     this.haEnabled = DFSUtil.isHAEnabled(conf);
     this.state = !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
     try {
+      initializeGenericKeys(conf, getNameServiceId(conf));
       initialize(conf);
     } catch (IOException e) {
       this.stop();
       throw e;
+    } catch (HadoopIllegalArgumentException e) {
+      this.stop();
+      throw e;
     }
   }
 
@@ -821,16 +825,16 @@ public class NameNode {
    * @param conf
    *          Configuration object to lookup specific key and to set the value
    *          to the key passed. Note the conf object is modified
+   * @param nameserviceId name service Id
    * @see DFSUtil#setGenericConf(Configuration, String, String...)
    */
-  public static void initializeGenericKeys(Configuration conf) {
-    final String nameserviceId = DFSUtil.getNameServiceId(conf);
+  public static void initializeGenericKeys(Configuration conf, String
+      nameserviceId) {
     if ((nameserviceId == null) || nameserviceId.isEmpty()) {
       return;
     }
     
     DFSUtil.setGenericConf(conf, nameserviceId, NAMESERVICE_SPECIFIC_KEYS);
-    
     if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
       URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
           + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
@@ -838,6 +842,14 @@ public class NameNode {
     }
   }
     
+  /** 
+   * Get the name service Id for the node
+   * @return name service Id or null if federation is not configured
+   */
+  protected String getNameServiceId(Configuration conf) {
+    return DFSUtil.getNamenodeNameServiceId(conf);
+  }
+  
   /**
    */
   public static void main(String argv[]) throws Exception {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 7fdf3e60d39..3e4eaf5f9dc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
+import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
@@ -145,10 +146,17 @@ class NameNodeRpcServer implements NamenodeProtocols {
       serviceRpcServer = null;
       serviceRPCAddress = null;
     }
-    this.server = RPC.getServer(NamenodeProtocols.class, this,
+    // Add all the RPC protocols that the namenode implements
+    this.server = RPC.getServer(ClientProtocol.class, this,
                                 socAddr.getHostName(), socAddr.getPort(),
                                 handlerCount, false, conf, 
                                 namesystem.getDelegationTokenSecretManager());
+    this.server.addProtocol(DatanodeProtocol.class, this);
+    this.server.addProtocol(NamenodeProtocol.class, this);
+    this.server.addProtocol(RefreshAuthorizationPolicyProtocol.class, this);
+    this.server.addProtocol(RefreshUserMappingsProtocol.class, this);
+    this.server.addProtocol(GetUserMappingsProtocol.class, this);
+    
 
     // set service-level authorization security policy
     if (serviceAuthEnabled =
@@ -971,8 +979,11 @@ class NameNodeRpcServer implements NamenodeProtocols {
   }
 
   private static String getClientMachine() {
-    String clientMachine = Server.getRemoteAddress();
-    if (clientMachine == null) {
+    String clientMachine = NamenodeWebHdfsMethods.getRemoteAddress();
+    if (clientMachine == null) { //not a web client
+      clientMachine = Server.getRemoteAddress();
+    }
+    if (clientMachine == null) { //not a RPC client
       clientMachine = "";
     }
     return clientMachine;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 9c5ef6f2c36..d403629146f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -38,10 +38,12 @@ import org.apache.commons.cli.ParseException;
 import org.apache.commons.cli.PosixParser;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
+
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil.ErrorSimulator;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -173,12 +175,17 @@ public class SecondaryNameNode implements Runnable {
   public SecondaryNameNode(Configuration conf,
       CommandLineOpts commandLineOpts) throws IOException {
     try {
-      NameNode.initializeGenericKeys(conf);
+      NameNode.initializeGenericKeys(conf,
+          DFSUtil.getSecondaryNameServiceId(conf));
       initialize(conf, commandLineOpts);
     } catch(IOException e) {
       shutdown();
       LOG.fatal("Failed to start secondary namenode. ", e);
       throw e;
+    } catch(HadoopIllegalArgumentException e) {
+      shutdown();
+      LOG.fatal("Failed to start secondary namenode. ", e);
+      throw e;
     }
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
index 948466f638f..2dd1db33410 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
@@ -57,6 +57,7 @@ import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.web.JsonUtil;
+import org.apache.hadoop.hdfs.web.ParamFilter;
 import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
 import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
@@ -78,6 +79,7 @@ import org.apache.hadoop.hdfs.web.resources.PostOpParam;
 import org.apache.hadoop.hdfs.web.resources.PutOpParam;
 import org.apache.hadoop.hdfs.web.resources.RecursiveParam;
 import org.apache.hadoop.hdfs.web.resources.RenameOptionSetParam;
+import org.apache.hadoop.hdfs.web.resources.RenewerParam;
 import org.apache.hadoop.hdfs.web.resources.ReplicationParam;
 import org.apache.hadoop.hdfs.web.resources.UriFsPathParam;
 import org.apache.hadoop.hdfs.web.resources.UserParam;
@@ -89,10 +91,20 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenIdentifier;
 
+import com.sun.jersey.spi.container.ResourceFilters;
+
 /** Web-hdfs NameNode implementation. */
 @Path("")
+@ResourceFilters(ParamFilter.class)
 public class NamenodeWebHdfsMethods {
-  private static final Log LOG = LogFactory.getLog(NamenodeWebHdfsMethods.class);
+  public static final Log LOG = LogFactory.getLog(NamenodeWebHdfsMethods.class);
+
+  private static final ThreadLocal<String> REMOTE_ADDRESS = new ThreadLocal<String>(); 
+
+  /** @return the remote client address. */
+  public static String getRemoteAddress() {
+    return REMOTE_ADDRESS.get();
+  }
 
   private @Context ServletContext context;
   private @Context HttpServletRequest request;
@@ -215,6 +227,8 @@ public class NamenodeWebHdfsMethods {
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
       public Response run() throws IOException, URISyntaxException {
+        REMOTE_ADDRESS.set(request.getRemoteAddr());
+        try {
 
     final String fullpath = path.getAbsolutePath();
     final NameNode namenode = (NameNode)context.getAttribute("name.node");
@@ -272,6 +286,10 @@ public class NamenodeWebHdfsMethods {
     default:
       throw new UnsupportedOperationException(op + " is not supported");
     }
+
+        } finally {
+          REMOTE_ADDRESS.set(null);
+        }
       }
     });
   }
@@ -301,6 +319,8 @@ public class NamenodeWebHdfsMethods {
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
       public Response run() throws IOException, URISyntaxException {
+        REMOTE_ADDRESS.set(request.getRemoteAddr());
+        try {
 
     final String fullpath = path.getAbsolutePath();
     final NameNode namenode = (NameNode)context.getAttribute("name.node");
@@ -315,6 +335,10 @@ public class NamenodeWebHdfsMethods {
     default:
       throw new UnsupportedOperationException(op + " is not supported");
     }
+
+        } finally {
+          REMOTE_ADDRESS.set(null);
+        }
       }
     });
   }
@@ -335,10 +359,12 @@ public class NamenodeWebHdfsMethods {
           final OffsetParam offset,
       @QueryParam(LengthParam.NAME) @DefaultValue(LengthParam.DEFAULT)
           final LengthParam length,
+      @QueryParam(RenewerParam.NAME) @DefaultValue(RenewerParam.DEFAULT)
+          final RenewerParam renewer,
       @QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT)
           final BufferSizeParam bufferSize
       ) throws IOException, URISyntaxException, InterruptedException {
-    return get(ugi, delegation, ROOT, op, offset, length, bufferSize);
+    return get(ugi, delegation, ROOT, op, offset, length, renewer, bufferSize);
   }
 
   /** Handle HTTP GET request. */
@@ -356,19 +382,23 @@ public class NamenodeWebHdfsMethods {
           final OffsetParam offset,
       @QueryParam(LengthParam.NAME) @DefaultValue(LengthParam.DEFAULT)
           final LengthParam length,
+      @QueryParam(RenewerParam.NAME) @DefaultValue(RenewerParam.DEFAULT)
+          final RenewerParam renewer,
       @QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT)
           final BufferSizeParam bufferSize
       ) throws IOException, URISyntaxException, InterruptedException {
 
     if (LOG.isTraceEnabled()) {
       LOG.trace(op + ": " + path + ", ugi=" + ugi
-          + Param.toSortedString(", ", offset, length, bufferSize));
+          + Param.toSortedString(", ", offset, length, renewer, bufferSize));
     }
 
 
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
       public Response run() throws IOException, URISyntaxException {
+        REMOTE_ADDRESS.set(request.getRemoteAddr());
+        try {
 
     final NameNode namenode = (NameNode)context.getAttribute("name.node");
     final String fullpath = path.getAbsolutePath();
@@ -381,6 +411,15 @@ public class NamenodeWebHdfsMethods {
           op.getValue(), offset.getValue(), offset, length, bufferSize);
       return Response.temporaryRedirect(uri).build();
     }
+    case GETFILEBLOCKLOCATIONS:
+    {
+      final long offsetValue = offset.getValue();
+      final Long lengthValue = length.getValue();
+      final LocatedBlocks locatedblocks = np.getBlockLocations(fullpath,
+          offsetValue, lengthValue != null? lengthValue: offsetValue + 1);
+      final String js = JsonUtil.toJsonString(locatedblocks);
+      return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
+    }
     case GETFILESTATUS:
     {
       final HdfsFileStatus status = np.getFileInfo(fullpath);
@@ -392,9 +431,20 @@ public class NamenodeWebHdfsMethods {
       final StreamingOutput streaming = getListingStream(np, fullpath);
       return Response.ok(streaming).type(MediaType.APPLICATION_JSON).build();
     }
+    case GETDELEGATIONTOKEN:
+    {
+      final Token<? extends TokenIdentifier> token = generateDelegationToken(
+          namenode, ugi, renewer.getValue());
+      final String js = JsonUtil.toJsonString(token);
+      return Response.ok(js).type(MediaType.APPLICATION_JSON).build();
+    }
     default:
       throw new UnsupportedOperationException(op + " is not supported");
     }    
+
+        } finally {
+          REMOTE_ADDRESS.set(null);
+        }
       }
     });
   }
@@ -462,6 +512,9 @@ public class NamenodeWebHdfsMethods {
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
       public Response run() throws IOException {
+        REMOTE_ADDRESS.set(request.getRemoteAddr());
+        try {
+
         final NameNode namenode = (NameNode)context.getAttribute("name.node");
         final String fullpath = path.getAbsolutePath();
 
@@ -475,6 +528,10 @@ public class NamenodeWebHdfsMethods {
         default:
           throw new UnsupportedOperationException(op + " is not supported");
         }
+
+        } finally {
+          REMOTE_ADDRESS.set(null);
+        }
       }
     });
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java
index d085534e110..1e853933433 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DelegationTokenFetcher.java
@@ -149,7 +149,9 @@ public class DelegationTokenFetcher {
                 DataInputStream in = new DataInputStream(
                     new ByteArrayInputStream(token.getIdentifier()));
                 id.readFields(in);
-                System.out.println("Token (" + id + ") for " + token.getService());
+                if(LOG.isDebugEnabled()) {
+                  LOG.debug("Token (" + id + ") for " + token.getService());
+                }
               }
               return null;
             }
@@ -160,22 +162,28 @@ public class DelegationTokenFetcher {
                 for (Token<?> token : readTokens(tokenFile, conf)) {
                   result = renewDelegationToken(webUrl,
                       (Token<DelegationTokenIdentifier>) token);
-                  System.out.println("Renewed token via " + webUrl + " for "
-                      + token.getService() + " until: " + new Date(result));
+                  if(LOG.isDebugEnabled()) {
+                	  LOG.debug("Renewed token via " + webUrl + " for "
+                          + token.getService() + " until: " + new Date(result));
+                  }
                 }
               } else if (cancel) {
                 for (Token<?> token : readTokens(tokenFile, conf)) {
                   cancelDelegationToken(webUrl,
                       (Token<DelegationTokenIdentifier>) token);
-                  System.out.println("Cancelled token via " + webUrl + " for "
-                      + token.getService());
+                  if(LOG.isDebugEnabled()) {
+                    LOG.debug("Cancelled token via " + webUrl + " for "
+                	    + token.getService());
+                  }
                 }
               } else {
                 Credentials creds = getDTfromRemote(webUrl, renewer);
                 creds.writeTokenStorageFile(tokenFile, conf);
                 for (Token<?> token : creds.getAllTokens()) {
-                  System.out.println("Fetched token via " + webUrl + " for "
-                      + token.getService() + " into " + tokenFile);
+                  if(LOG.isDebugEnabled()) {	
+                    LOG.debug("Fetched token via " + webUrl + " for "
+                        + token.getService() + " into " + tokenFile);
+                  }
                 }
               }
             } else {
@@ -184,24 +192,30 @@ public class DelegationTokenFetcher {
                 for (Token<?> token : readTokens(tokenFile, conf)) {
                   ((DistributedFileSystem) fs)
                       .cancelDelegationToken((Token<DelegationTokenIdentifier>) token);
-                  System.out.println("Cancelled token for "
-                      + token.getService());
+                  if(LOG.isDebugEnabled()) {
+                    LOG.debug("Cancelled token for "
+                        + token.getService());
+                  }
                 }
               } else if (renew) {
                 long result;
                 for (Token<?> token : readTokens(tokenFile, conf)) {
                   result = ((DistributedFileSystem) fs)
                       .renewDelegationToken((Token<DelegationTokenIdentifier>) token);
-                  System.out.println("Renewed token for " + token.getService()
-                      + " until: " + new Date(result));
+                  if(LOG.isDebugEnabled()) {
+                    LOG.debug("Renewed token for " + token.getService()
+                        + " until: " + new Date(result));
+                  }
                 }
               } else {
                 Token<?> token = fs.getDelegationToken(renewer);
                 Credentials cred = new Credentials();
                 cred.addToken(token.getService(), token);
                 cred.writeTokenStorageFile(tokenFile, conf);
-                System.out.println("Fetched token for " + token.getService()
-                    + " into " + tokenFile);
+                if(LOG.isDebugEnabled()) {
+                  LOG.debug("Fetched token for " + token.getService()
+                      + " into " + tokenFile);
+                }
               }
             }
             return null;
@@ -221,6 +235,11 @@ public class DelegationTokenFetcher {
       } else {
         url.append(nnAddr).append(GetDelegationTokenServlet.PATH_SPEC);
       }
+      
+      if(LOG.isDebugEnabled()) {
+        LOG.debug("Retrieving token from: " + url);
+      }
+      
       URL remoteURL = new URL(url.toString());
       SecurityUtil.fetchServiceTicket(remoteURL);
       URLConnection connection = remoteURL.openConnection();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
index 1c18dc334e2..314d53b38f2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java
@@ -17,19 +17,31 @@
  */
 package org.apache.hadoop.hdfs.web;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
 
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.security.token.TokenIdentifier;
 import org.mortbay.util.ajax.JSON;
 
 /** JSON Utilities */
 public class JsonUtil {
-  private static final ThreadLocal<Map<String, Object>> jsonMap
-      = new ThreadLocal<Map<String, Object>>() {
+  private static class ThreadLocalMap extends ThreadLocal<Map<String, Object>> {
     @Override
     protected Map<String, Object> initialValue() {
       return new TreeMap<String, Object>();
@@ -41,7 +53,54 @@ public class JsonUtil {
       m.clear();
       return m;
     }
-  };
+  }
+
+  private static final ThreadLocalMap jsonMap = new ThreadLocalMap();
+  private static final ThreadLocalMap tokenMap = new ThreadLocalMap();
+  private static final ThreadLocalMap datanodeInfoMap = new ThreadLocalMap();
+  private static final ThreadLocalMap extendedBlockMap = new ThreadLocalMap();
+  private static final ThreadLocalMap locatedBlockMap = new ThreadLocalMap();
+
+  private static final DatanodeInfo[] EMPTY_DATANODE_INFO_ARRAY = {};
+
+  /** Convert a token object to a Json string. */
+  public static String toJsonString(final Token<? extends TokenIdentifier> token
+      ) throws IOException {
+    if (token == null) {
+      return null;
+    }
+
+    final Map<String, Object> m = tokenMap.get();
+    m.put("urlString", token.encodeToUrlString());
+    return JSON.toString(m);
+  }
+
+  /** Convert a Json map to a Token. */
+  public static Token<? extends TokenIdentifier> toToken(
+      final Map<?, ?> m) throws IOException {
+    if (m == null) {
+      return null;
+    }
+
+    final Token<DelegationTokenIdentifier> token
+        = new Token<DelegationTokenIdentifier>();
+    token.decodeFromUrlString((String)m.get("urlString"));
+    return token;
+  }
+
+  /** Convert a Json map to a Token of DelegationTokenIdentifier. */
+  @SuppressWarnings("unchecked")
+  public static Token<DelegationTokenIdentifier> toDelegationToken(
+      final Map<?, ?> m) throws IOException {
+    return (Token<DelegationTokenIdentifier>)toToken(m);
+  }
+
+  /** Convert a Json map to a Token of BlockTokenIdentifier. */
+  @SuppressWarnings("unchecked")
+  public static Token<BlockTokenIdentifier> toBlockToken(
+      final Map<?, ?> m) throws IOException {
+    return (Token<BlockTokenIdentifier>)toToken(m);
+  }
 
   /** Convert an exception object to a Json string. */
   public static String toJsonString(final Exception e) {
@@ -77,11 +136,10 @@ public class JsonUtil {
 
   /** Convert a HdfsFileStatus object to a Json string. */
   public static String toJsonString(final HdfsFileStatus status) {
-    final Map<String, Object> m = jsonMap.get();
     if (status == null) {
-      m.put("isNull", true);
+      return null;
     } else {
-      m.put("isNull", false);
+      final Map<String, Object> m = jsonMap.get();
       m.put("localName", status.getLocalName());
       m.put("isDir", status.isDir());
       m.put("isSymlink", status.isSymlink());
@@ -97,8 +155,8 @@ public class JsonUtil {
       m.put("modificationTime", status.getModificationTime());
       m.put("blockSize", status.getBlockSize());
       m.put("replication", status.getReplication());
+      return JSON.toString(m);
     }
-    return JSON.toString(m);
   }
 
   @SuppressWarnings("unchecked")
@@ -106,9 +164,9 @@ public class JsonUtil {
     return (Map<String, Object>) JSON.parse(jsonString);
   }
 
-  /** Convert a Json string to a HdfsFileStatus object. */
+  /** Convert a Json map to a HdfsFileStatus object. */
   public static HdfsFileStatus toFileStatus(final Map<String, Object> m) {
-    if ((Boolean)m.get("isNull")) {
+    if (m == null) {
       return null;
     }
 
@@ -130,4 +188,214 @@ public class JsonUtil {
         permission, owner, group,
         symlink, DFSUtil.string2Bytes(localName));
   }
+
+  /** Convert a LocatedBlock to a Json string. */
+  public static String toJsonString(final ExtendedBlock extendedblock) {
+    if (extendedblock == null) {
+      return null;
+    }
+
+    final Map<String, Object> m = extendedBlockMap.get();
+    m.put("blockPoolId", extendedblock.getBlockPoolId());
+    m.put("blockId", extendedblock.getBlockId());
+    m.put("numBytes", extendedblock.getNumBytes());
+    m.put("generationStamp", extendedblock.getGenerationStamp());
+    return JSON.toString(m);
+  }
+
+  /** Convert a Json map to an ExtendedBlock object. */
+  public static ExtendedBlock toExtendedBlock(final Map<?, ?> m) {
+    if (m == null) {
+      return null;
+    }
+    
+    final String blockPoolId = (String)m.get("blockPoolId");
+    final long blockId = (Long)m.get("blockId");
+    final long numBytes = (Long)m.get("numBytes");
+    final long generationStamp = (Long)m.get("generationStamp");
+    return new ExtendedBlock(blockPoolId, blockId, numBytes, generationStamp);
+  }
+  
+  /** Convert a DatanodeInfo to a Json string. */
+  public static String toJsonString(final DatanodeInfo datanodeinfo) {
+    if (datanodeinfo == null) {
+      return null;
+    }
+
+    final Map<String, Object> m = datanodeInfoMap.get();
+    m.put("name", datanodeinfo.getName());
+    m.put("storageID", datanodeinfo.getStorageID());
+    m.put("infoPort", datanodeinfo.getInfoPort());
+
+    m.put("ipcPort", datanodeinfo.getIpcPort());
+
+    m.put("capacity", datanodeinfo.getCapacity());
+    m.put("dfsUsed", datanodeinfo.getDfsUsed());
+    m.put("remaining", datanodeinfo.getRemaining());
+    m.put("blockPoolUsed", datanodeinfo.getBlockPoolUsed());
+    m.put("lastUpdate", datanodeinfo.getLastUpdate());
+    m.put("xceiverCount", datanodeinfo.getXceiverCount());
+    m.put("networkLocation", datanodeinfo.getNetworkLocation());
+    m.put("hostName", datanodeinfo.getHostName());
+    m.put("adminState", datanodeinfo.getAdminState().name());
+    return JSON.toString(m);
+  }
+
+  /** Convert a Json map to an DatanodeInfo object. */
+  public static DatanodeInfo toDatanodeInfo(final Map<?, ?> m) {
+    if (m == null) {
+      return null;
+    }
+
+    return new DatanodeInfo(
+        (String)m.get("name"),
+        (String)m.get("storageID"),
+        (int)(long)(Long)m.get("infoPort"),
+        (int)(long)(Long)m.get("ipcPort"),
+
+        (Long)m.get("capacity"),
+        (Long)m.get("dfsUsed"),
+        (Long)m.get("remaining"),
+        (Long)m.get("blockPoolUsed"),
+        (Long)m.get("lastUpdate"),
+        (int)(long)(Long)m.get("xceiverCount"),
+        (String)m.get("networkLocation"),
+        (String)m.get("hostName"),
+        AdminStates.valueOf((String)m.get("adminState")));
+  }
+
+  /** Convert a DatanodeInfo[] to a Json string. */
+  public static String toJsonString(final DatanodeInfo[] array
+      ) throws IOException {
+    if (array == null) {
+      return null;
+    } else if (array.length == 0) {
+      return "[]";
+    } else {
+      final StringBuilder b = new StringBuilder().append('[').append(
+          toJsonString(array[0]));
+      for(int i = 1; i < array.length; i++) {
+        b.append(", ").append(toJsonString(array[i]));
+      }
+      return b.append(']').toString();
+    }
+  }
+
+  /** Convert an Object[] to a DatanodeInfo[]. */
+  public static DatanodeInfo[] toDatanodeInfoArray(final Object[] objects) {
+    if (objects == null) {
+      return null;
+    } else if (objects.length == 0) {
+      return EMPTY_DATANODE_INFO_ARRAY;
+    } else {
+      final DatanodeInfo[] array = new DatanodeInfo[objects.length];
+      for(int i = 0; i < array.length; i++) {
+        array[i] = (DatanodeInfo)toDatanodeInfo((Map<?, ?>) objects[i]);
+      }
+      return array;
+    }
+  }
+
+  /** Convert a LocatedBlock to a Json string. */
+  public static String toJsonString(final LocatedBlock locatedblock
+      ) throws IOException {
+    if (locatedblock == null) {
+      return null;
+    }
+ 
+    final Map<String, Object> m = locatedBlockMap.get();
+    m.put("blockToken", toJsonString(locatedblock.getBlockToken()));
+    m.put("isCorrupt", locatedblock.isCorrupt());
+    m.put("startOffset", locatedblock.getStartOffset());
+    m.put("block", toJsonString(locatedblock.getBlock()));
+
+    m.put("locations", toJsonString(locatedblock.getLocations()));
+    return JSON.toString(m);
+  }
+
+  /** Convert a Json map to LocatedBlock. */
+  public static LocatedBlock toLocatedBlock(final Map<?, ?> m) throws IOException {
+    if (m == null) {
+      return null;
+    }
+
+    final ExtendedBlock b = toExtendedBlock((Map<?, ?>)JSON.parse((String)m.get("block")));
+    final DatanodeInfo[] locations = toDatanodeInfoArray(
+        (Object[])JSON.parse((String)m.get("locations")));
+    final long startOffset = (Long)m.get("startOffset");
+    final boolean isCorrupt = (Boolean)m.get("isCorrupt");
+
+    final LocatedBlock locatedblock = new LocatedBlock(b, locations, startOffset, isCorrupt);
+    locatedblock.setBlockToken(toBlockToken((Map<?, ?>)JSON.parse((String)m.get("blockToken"))));
+    return locatedblock;
+  }
+
+  /** Convert a LocatedBlock[] to a Json string. */
+  public static String toJsonString(final List<LocatedBlock> array
+      ) throws IOException {
+    if (array == null) {
+      return null;
+    } else if (array.size() == 0) {
+      return "[]";
+    } else {
+      final StringBuilder b = new StringBuilder().append('[').append(
+          toJsonString(array.get(0)));
+      for(int i = 1; i < array.size(); i++) {
+        b.append(",\n  ").append(toJsonString(array.get(i)));
+      }
+      return b.append(']').toString();
+    }
+  }
+
+  /** Convert an Object[] to a List of LocatedBlock. 
+   * @throws IOException */
+  public static List<LocatedBlock> toLocatedBlockList(final Object[] objects
+      ) throws IOException {
+    if (objects == null) {
+      return null;
+    } else if (objects.length == 0) {
+      return Collections.emptyList();
+    } else {
+      final List<LocatedBlock> list = new ArrayList<LocatedBlock>(objects.length);
+      for(int i = 0; i < objects.length; i++) {
+        list.add((LocatedBlock)toLocatedBlock((Map<?, ?>)objects[i]));
+      }
+      return list;
+    }
+  }
+
+  /** Convert LocatedBlocks to a Json string. */
+  public static String toJsonString(final LocatedBlocks locatedblocks
+      ) throws IOException {
+    if (locatedblocks == null) {
+      return null;
+    }
+
+    final Map<String, Object> m = jsonMap.get();
+    m.put("fileLength", locatedblocks.getFileLength());
+    m.put("isUnderConstruction", locatedblocks.isUnderConstruction());
+
+    m.put("locatedBlocks", toJsonString(locatedblocks.getLocatedBlocks()));
+    m.put("lastLocatedBlock", toJsonString(locatedblocks.getLastLocatedBlock()));
+    m.put("isLastBlockComplete", locatedblocks.isLastBlockComplete());
+    return JSON.toString(m);
+  }
+
+  /** Convert a Json map to LocatedBlock. */
+  public static LocatedBlocks toLocatedBlocks(final Map<String, Object> m
+      ) throws IOException {
+    if (m == null) {
+      return null;
+    }
+    
+    final long fileLength = (Long)m.get("fileLength");
+    final boolean isUnderConstruction = (Boolean)m.get("isUnderConstruction");
+    final List<LocatedBlock> locatedBlocks = toLocatedBlockList(
+        (Object[])JSON.parse((String) m.get("locatedBlocks")));
+    final LocatedBlock lastLocatedBlock = toLocatedBlock(
+        (Map<?, ?>)JSON.parse((String)m.get("lastLocatedBlock")));
+    final boolean isLastBlockComplete = (Boolean)m.get("isLastBlockComplete");
+    return new LocatedBlocks(fileLength, isUnderConstruction, locatedBlocks,
+        lastLocatedBlock, isLastBlockComplete);
+  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/ParamFilter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/ParamFilter.java
new file mode 100644
index 00000000000..687b8747673
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/ParamFilter.java
@@ -0,0 +1,85 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.web;
+
+import java.net.URI;
+import java.util.List;
+import java.util.Map;
+
+import javax.ws.rs.core.MultivaluedMap;
+import javax.ws.rs.core.UriBuilder;
+
+import com.sun.jersey.spi.container.ContainerRequest;
+import com.sun.jersey.spi.container.ContainerRequestFilter;
+import com.sun.jersey.spi.container.ContainerResponseFilter;
+import com.sun.jersey.spi.container.ResourceFilter;
+
+/**
+ * A filter to change parameter names to lower cases
+ * so that parameter names are considered as case insensitive.
+ */
+public class ParamFilter implements ResourceFilter {
+  private static final ContainerRequestFilter LOWER_CASE
+      = new ContainerRequestFilter() {
+    @Override
+    public ContainerRequest filter(final ContainerRequest request) {
+      final MultivaluedMap<String, String> parameters = request.getQueryParameters();
+      if (containsUpperCase(parameters.keySet())) {
+        //rebuild URI
+        final URI lower = rebuildQuery(request.getRequestUri(), parameters);
+        request.setUris(request.getBaseUri(), lower);
+      }
+      return request;
+    }
+  };
+
+  @Override
+  public ContainerRequestFilter getRequestFilter() {
+    return LOWER_CASE;
+  }
+
+  @Override
+  public ContainerResponseFilter getResponseFilter() {
+    return null;
+  }
+
+  /** Do the strings contain upper case letters? */
+  private static boolean containsUpperCase(final Iterable<String> strings) {
+    for(String s : strings) {
+      for(int i = 0; i < s.length(); i++) {
+        if (Character.isUpperCase(s.charAt(i))) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  /** Rebuild the URI query with lower case parameter names. */
+  private static URI rebuildQuery(final URI uri,
+      final MultivaluedMap<String, String> parameters) {
+    UriBuilder b = UriBuilder.fromUri(uri).replaceQuery("");
+    for(Map.Entry<String, List<String>> e : parameters.entrySet()) {
+      final String key = e.getKey().toLowerCase();
+      for(String v : e.getValue()) {
+        b = b.queryParam(key, v);
+      }
+    }
+    return b.build();
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
index df86456e899..35c325281b5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java
@@ -27,9 +27,12 @@ import java.net.HttpURLConnection;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
@@ -45,6 +48,7 @@ import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.hdfs.web.resources.AccessTimeParam;
 import org.apache.hadoop.hdfs.web.resources.BlockSizeParam;
@@ -54,7 +58,9 @@ import org.apache.hadoop.hdfs.web.resources.DstPathParam;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
 import org.apache.hadoop.hdfs.web.resources.GroupParam;
 import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
+import org.apache.hadoop.hdfs.web.resources.LengthParam;
 import org.apache.hadoop.hdfs.web.resources.ModificationTimeParam;
+import org.apache.hadoop.hdfs.web.resources.OffsetParam;
 import org.apache.hadoop.hdfs.web.resources.OverwriteParam;
 import org.apache.hadoop.hdfs.web.resources.OwnerParam;
 import org.apache.hadoop.hdfs.web.resources.Param;
@@ -63,13 +69,16 @@ import org.apache.hadoop.hdfs.web.resources.PostOpParam;
 import org.apache.hadoop.hdfs.web.resources.PutOpParam;
 import org.apache.hadoop.hdfs.web.resources.RecursiveParam;
 import org.apache.hadoop.hdfs.web.resources.RenameOptionSetParam;
+import org.apache.hadoop.hdfs.web.resources.RenewerParam;
 import org.apache.hadoop.hdfs.web.resources.ReplicationParam;
 import org.apache.hadoop.hdfs.web.resources.UserParam;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authentication.client.AuthenticatedURL;
 import org.apache.hadoop.security.authentication.client.AuthenticationException;
+import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Progressable;
 import org.mortbay.util.ajax.JSON;
 
@@ -82,17 +91,24 @@ public class WebHdfsFileSystem extends HftpFileSystem {
 
   private static final KerberosUgiAuthenticator AUTH = new KerberosUgiAuthenticator();
 
-  private UserGroupInformation ugi;
+  private final UserGroupInformation ugi;
   private final AuthenticatedURL.Token authToken = new AuthenticatedURL.Token();
   protected Path workingDir;
 
+  {
+    try {
+      ugi = UserGroupInformation.getCurrentUser();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
   @Override
   public synchronized void initialize(URI uri, Configuration conf
       ) throws IOException {
     super.initialize(uri, conf);
     setConf(conf);
 
-    ugi = UserGroupInformation.getCurrentUser();
     this.workingDir = getHomeDirectory();
   }
 
@@ -163,11 +179,11 @@ public class WebHdfsFileSystem extends HftpFileSystem {
     }
   }
 
-  private URL toUrl(final HttpOpParam.Op op, final Path fspath,
+  URL toUrl(final HttpOpParam.Op op, final Path fspath,
       final Param<?,?>... parameters) throws IOException {
     //initialize URI path and query
     final String path = "/" + PATH_PREFIX
-        + makeQualified(fspath).toUri().getPath();
+        + (fspath == null? "/": makeQualified(fspath).toUri().getPath());
     final String query = op.toQueryString()
         + '&' + new UserParam(ugi)
         + Param.toSortedString("&", parameters);
@@ -396,4 +412,41 @@ public class WebHdfsFileSystem extends HftpFileSystem {
     }
     return statuses;
   }
+
+  @Override
+  public Token<DelegationTokenIdentifier> getDelegationToken(final String renewer
+      ) throws IOException {
+    final HttpOpParam.Op op = GetOpParam.Op.GETDELEGATIONTOKEN;
+    final Map<String, Object> m = run(op, null, new RenewerParam(renewer));
+    final Token<DelegationTokenIdentifier> token = JsonUtil.toDelegationToken(m); 
+    token.setService(new Text(getCanonicalServiceName()));
+    return token;
+  }
+
+  @Override
+  public List<Token<?>> getDelegationTokens(final String renewer
+      ) throws IOException {
+    final Token<?>[] t = {getDelegationToken(renewer)};
+    return Arrays.asList(t);
+  }
+
+  @Override
+  public BlockLocation[] getFileBlockLocations(final FileStatus status,
+      final long offset, final long length) throws IOException {
+    if (status == null) {
+      return null;
+    }
+    return getFileBlockLocations(status.getPath(), offset, length);
+  }
+
+  @Override
+  public BlockLocation[] getFileBlockLocations(final Path p, 
+      final long offset, final long length) throws IOException {
+    statistics.incrementReadOps(1);
+
+    final HttpOpParam.Op op = GetOpParam.Op.GETFILEBLOCKLOCATIONS;
+    final Map<String, Object> m = run(op, p, new OffsetParam(offset),
+        new LengthParam(length));
+    return DFSUtil.locatedBlocks2Locations(JsonUtil.toLocatedBlocks(m));
+  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/AccessTimeParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/AccessTimeParam.java
index 830e5cd32dd..8d82131c703 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/AccessTimeParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/AccessTimeParam.java
@@ -20,7 +20,7 @@ package org.apache.hadoop.hdfs.web.resources;
 /** Access time parameter. */
 public class AccessTimeParam extends LongParam {
   /** Parameter name. */
-  public static final String NAME = "accessTime";
+  public static final String NAME = "accesstime";
   /** Default parameter value. */
   public static final String DEFAULT = "-1";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BlockSizeParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BlockSizeParam.java
index e50b282f33b..96114968074 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BlockSizeParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BlockSizeParam.java
@@ -25,7 +25,7 @@ import org.apache.hadoop.conf.Configuration;
 /** Block size parameter. */
 public class BlockSizeParam extends LongParam {
   /** Parameter name. */
-  public static final String NAME = "blockSize";
+  public static final String NAME = "blocksize";
   /** Default parameter value. */
   public static final String DEFAULT = NULL;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BufferSizeParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BufferSizeParam.java
index 424e5ba2533..148834b1024 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BufferSizeParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/BufferSizeParam.java
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 /** Buffer size parameter. */
 public class BufferSizeParam extends IntegerParam {
   /** Parameter name. */
-  public static final String NAME = "bufferSize";
+  public static final String NAME = "buffersize";
   /** Default parameter value. */
   public static final String DEFAULT = NULL;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DelegationParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DelegationParam.java
index 80f0c4b0b33..ad08773ea24 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DelegationParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DelegationParam.java
@@ -17,13 +17,12 @@
  */
 package org.apache.hadoop.hdfs.web.resources;
 
-import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.security.UserGroupInformation;
 
 /** Delegation token parameter. */
 public class DelegationParam extends StringParam {
   /** Parameter name. */
-  public static final String NAME = JspHelper.DELEGATION_PARAMETER_NAME;
+  public static final String NAME = "delegation";
   /** Default parameter value. */
   public static final String DEFAULT = "";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
index e61e858ee49..12962b4a4ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DeleteOpParam.java
@@ -21,9 +21,6 @@ import java.net.HttpURLConnection;
 
 /** Http DELETE operation parameter. */
 public class DeleteOpParam extends HttpOpParam<DeleteOpParam.Op> {
-  /** Parameter name. */
-  public static final String NAME = "deleteOp";
-
   /** Delete operations. */
   public static enum Op implements HttpOpParam.Op {
     DELETE(HttpURLConnection.HTTP_OK),
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DstPathParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DstPathParam.java
index 7d522a38770..5fa52456f92 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DstPathParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/DstPathParam.java
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.Path;
 /** Destination path parameter. */
 public class DstPathParam extends StringParam {
   /** Parameter name. */
-  public static final String NAME = "dstPath";
+  public static final String NAME = "dstpath";
   /** Default parameter value. */
   public static final String DEFAULT = "";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
index 6f11871ebb8..d547f1b1b4d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/GetOpParam.java
@@ -21,16 +21,16 @@ import java.net.HttpURLConnection;
 
 /** Http GET operation parameter. */
 public class GetOpParam extends HttpOpParam<GetOpParam.Op> {
-  /** Parameter name. */
-  public static final String NAME = "getOp";
-
   /** Get operations. */
   public static enum Op implements HttpOpParam.Op {
     OPEN(HttpURLConnection.HTTP_OK),
+    GETFILEBLOCKLOCATIONS(HttpURLConnection.HTTP_OK),
 
     GETFILESTATUS(HttpURLConnection.HTTP_OK),
     LISTSTATUS(HttpURLConnection.HTTP_OK),
 
+    GETDELEGATIONTOKEN(HttpURLConnection.HTTP_OK),
+
     NULL(HttpURLConnection.HTTP_NOT_IMPLEMENTED);
 
     final int expectedHttpResponseCode;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
index 644c4032dbc..422ec0f2f2f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/HttpOpParam.java
@@ -20,6 +20,9 @@ package org.apache.hadoop.hdfs.web.resources;
 /** Http operation parameter. */
 public abstract class HttpOpParam<E extends Enum<E> & HttpOpParam.Op>
     extends EnumParam<E> {
+  /** Parameter name. */
+  public static final String NAME = "op";
+
   /** Default parameter value. */
   public static final String DEFAULT = NULL;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ModificationTimeParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ModificationTimeParam.java
index d43da073280..a0e38a97e7d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ModificationTimeParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/ModificationTimeParam.java
@@ -20,7 +20,7 @@ package org.apache.hadoop.hdfs.web.resources;
 /** Modification time parameter. */
 public class ModificationTimeParam extends LongParam {
   /** Parameter name. */
-  public static final String NAME = "modificationTime";
+  public static final String NAME = "modificationtime";
   /** Default parameter value. */
   public static final String DEFAULT = "-1";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/OverwriteParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/OverwriteParam.java
index 6639ece7b25..f6945bb4351 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/OverwriteParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/OverwriteParam.java
@@ -17,7 +17,7 @@
  */
 package org.apache.hadoop.hdfs.web.resources;
 
-/** Recursive parameter. */
+/** Overwrite parameter. */
 public class OverwriteParam extends BooleanParam {
   /** Parameter name. */
   public static final String NAME = "overwrite";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
index 116d6af8b36..b553ecc6701 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PostOpParam.java
@@ -21,9 +21,6 @@ import java.net.HttpURLConnection;
 
 /** Http POST operation parameter. */
 public class PostOpParam extends HttpOpParam<PostOpParam.Op> {
-  /** Parameter name. */
-  public static final String NAME = "postOp";
-
   /** Post operations. */
   public static enum Op implements HttpOpParam.Op {
     APPEND(HttpURLConnection.HTTP_OK),
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
index 00703fefbc7..dcfaa6f06cd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/PutOpParam.java
@@ -21,9 +21,6 @@ import java.net.HttpURLConnection;
 
 /** Http POST operation parameter. */
 public class PutOpParam extends HttpOpParam<PutOpParam.Op> {
-  /** Parameter name. */
-  public static final String NAME = "putOp";
-
   /** Put operations. */
   public static enum Op implements HttpOpParam.Op {
     CREATE(true, HttpURLConnection.HTTP_CREATED),
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenameOptionSetParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenameOptionSetParam.java
index ec66a51c788..d7c157d5086 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenameOptionSetParam.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenameOptionSetParam.java
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.Options;
 /** Rename option set parameter. */
 public class RenameOptionSetParam extends EnumSetParam<Options.Rename> {
   /** Parameter name. */
-  public static final String NAME = "renameOptions";
+  public static final String NAME = "renameoptions";
   /** Default parameter value. */
   public static final String DEFAULT = "";
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenewerParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenewerParam.java
new file mode 100644
index 00000000000..750e8bc91b1
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/RenewerParam.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.web.resources;
+
+/** Renewer parameter. */
+public class RenewerParam extends StringParam {
+  /** Parameter name. */
+  public static final String NAME = "renewer";
+  /** Default parameter value. */
+  public static final String DEFAULT = NULL;
+
+  private static final Domain DOMAIN = new Domain(NAME, null);
+
+  /**
+   * Constructor.
+   * @param str a string representation of the parameter value.
+   */
+  public RenewerParam(final String str) {
+    super(DOMAIN, str == null || str.equals(DEFAULT)? null: str);
+  }
+
+  @Override
+  public String getName() {
+    return NAME;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 9fae462f04f..59a8ff645ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -683,24 +683,4 @@ creations/deletions), or "all".</description>
   </description>
 </property>
 
-<property>
-  <name>dfs.web.authentication.kerberos.principal</name>
-  <value>HTTP/${dfs.web.hostname}@${kerberos.realm}</value>
-  <description>
-    The HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
-
-    The HTTP Kerberos principal MUST start with 'HTTP/' per Kerberos
-    HTTP SPENGO specification.
-  </description>
-</property>
-
-<property>
-  <name>dfs.web.authentication.kerberos.keytab</name>
-  <value>${user.home}/dfs.web.keytab</value>
-  <description>
-    The Kerberos keytab file with the credentials for the
-    HTTP Kerberos principal used by Hadoop-Auth in the HTTP endpoint.
-  </description>
-</property>
-
 </configuration>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java
index 2d50ce440f9..495e8e191a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPermission.java
@@ -72,6 +72,7 @@ public class TestDFSPermission extends TestCase {
   final private static Path NON_EXISTENT_FILE = new Path("/NonExistentFile");
 
   private FileSystem fs;
+  private MiniDFSCluster cluster;
   private static Random r;
 
   static {
@@ -105,18 +106,25 @@ public class TestDFSPermission extends TestCase {
     }
   }
 
+  @Override
+  public void setUp() throws IOException {
+    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    cluster.waitActive();
+  }
+  
+  @Override
+  public void tearDown() throws IOException {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+  
   /** This tests if permission setting in create, mkdir, and 
    * setPermission works correctly
    */
   public void testPermissionSetting() throws Exception {
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
-    try {
-      cluster.waitActive();
-      testPermissionSetting(OpType.CREATE); // test file creation
-      testPermissionSetting(OpType.MKDIRS); // test directory creation
-    } finally {
-      cluster.shutdown();
-    }
+    testPermissionSetting(OpType.CREATE); // test file creation
+    testPermissionSetting(OpType.MKDIRS); // test directory creation
   }
 
   private void initFileSystem(short umask) throws Exception {
@@ -245,17 +253,22 @@ public class TestDFSPermission extends TestCase {
     }
   }
 
+  /**
+   * check that ImmutableFsPermission can be used as the argument
+   * to setPermission
+   */
+  public void testImmutableFsPermission() throws IOException {
+    fs = FileSystem.get(conf);
+
+    // set the permission of the root to be world-wide rwx
+    fs.setPermission(new Path("/"),
+        FsPermission.createImmutable((short)0777));
+  }
+  
   /* check if the ownership of a file/directory is set correctly */
   public void testOwnership() throws Exception {
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
-    try {
-      cluster.waitActive();
-      testOwnership(OpType.CREATE); // test file creation
-      testOwnership(OpType.MKDIRS); // test directory creation
-    } finally {
-      fs.close();
-      cluster.shutdown();
-    }
+    testOwnership(OpType.CREATE); // test file creation
+    testOwnership(OpType.MKDIRS); // test directory creation
   }
 
   /* change a file/directory's owner and group.
@@ -342,9 +355,7 @@ public class TestDFSPermission extends TestCase {
   /* Check if namenode performs permission checking correctly for
    * superuser, file owner, group owner, and other users */
   public void testPermissionChecking() throws Exception {
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
     try {
-      cluster.waitActive();
       fs = FileSystem.get(conf);
 
       // set the permission of the root to be world-wide rwx
@@ -401,7 +412,6 @@ public class TestDFSPermission extends TestCase {
           parentPermissions, permissions, parentPaths, filePaths, dirPaths);
     } finally {
       fs.close();
-      cluster.shutdown();
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index fc883118f82..f154ff7d203 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -29,8 +29,7 @@ import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 
-import junit.framework.Assert;
-
+import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -40,8 +39,7 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
-import static org.apache.hadoop.fs.CommonConfigurationKeys.HADOOP_SECURITY_AUTHENTICATION;
-
+import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 public class TestDFSUtil {
   /**
@@ -76,79 +74,141 @@ public class TestDFSUtil {
       }
     }
 
-    assertTrue("expected 1 corrupt files but got " + corruptCount, 
-               corruptCount == 1);
-    
+    assertTrue("expected 1 corrupt files but got " + corruptCount,
+        corruptCount == 1);
+
     // test an empty location
     bs = DFSUtil.locatedBlocks2Locations(new LocatedBlocks());
     assertEquals(0, bs.length);
   }
 
-  /** 
-   * Test for
-   * {@link DFSUtil#getNameServiceIds(Configuration)}
-   * {@link DFSUtil#getNameServiceId(Configuration)}
-   * {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}
+
+  private Configuration setupAddress(String key) {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1");
+    conf.set(DFSUtil.getNameServiceIdKey(key, "nn1"), "localhost:9000");
+    return conf;
+  }
+
+  /**
+   * Test {@link DFSUtil#getNamenodeNameServiceId(Configuration)} to ensure
+   * nameserviceId from the configuration returned
    */
   @Test
-  public void testMultipleNamenodes() throws IOException {
+  public void getNameServiceId() {
     HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
-    
-    // Test - The configured nameserviceIds are returned
+    conf.set(DFS_FEDERATION_NAMESERVICE_ID, "nn1");
+    assertEquals("nn1", DFSUtil.getNamenodeNameServiceId(conf));
+  }
+  
+  /**
+   * Test {@link DFSUtil#getNameNodeNameServiceId(Configuration)} to ensure
+   * nameserviceId for namenode is determined based on matching the address with
+   * local node's address
+   */
+  @Test
+  public void getNameNodeNameServiceId() {
+    Configuration conf = setupAddress(DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals("nn1", DFSUtil.getNamenodeNameServiceId(conf));
+  }
+
+  /**
+   * Test {@link DFSUtil#getBackupNameServiceId(Configuration)} to ensure
+   * nameserviceId for backup node is determined based on matching the address
+   * with local node's address
+   */
+  @Test
+  public void getBackupNameServiceId() {
+    Configuration conf = setupAddress(DFS_NAMENODE_BACKUP_ADDRESS_KEY);
+    assertEquals("nn1", DFSUtil.getBackupNameServiceId(conf));
+  }
+
+  /**
+   * Test {@link DFSUtil#getSecondaryNameServiceId(Configuration)} to ensure
+   * nameserviceId for backup node is determined based on matching the address
+   * with local node's address
+   */
+  @Test
+  public void getSecondaryNameServiceId() {
+    Configuration conf = setupAddress(DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY);
+    assertEquals("nn1", DFSUtil.getSecondaryNameServiceId(conf));
+  }
+
+  /**
+   * Test {@link DFSUtil#getNameServiceId(Configuration, String))} to ensure
+   * exception is thrown when multiple rpc addresses match the local node's
+   * address
+   */
+  @Test(expected = HadoopIllegalArgumentException.class)
+  public void testGetNameServiceIdException() {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
+    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"),
+        "localhost:9000");
+    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
+        "localhost:9001");
+    DFSUtil.getNamenodeNameServiceId(conf);
+    fail("Expected exception is not thrown");
+  }
+
+  /**
+   * Test {@link DFSUtil#getNameServiceIds(Configuration)}
+   */
+  @Test
+  public void testGetNameServiceIds() {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
     Collection<String> nameserviceIds = DFSUtil.getNameServiceIds(conf);
     Iterator<String> it = nameserviceIds.iterator();
     assertEquals(2, nameserviceIds.size());
     assertEquals("nn1", it.next().toString());
     assertEquals("nn2", it.next().toString());
-    
-    // Tests default nameserviceId is returned
-    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, "nn1");
-    assertEquals("nn1", DFSUtil.getNameServiceId(conf));
-    
+  }
+
+  /**
+   * Test for {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}
+   * {@link DFSUtil#getNameServiceIdFromAddress(Configuration, InetSocketAddress, String...)
+   * (Configuration)}
+   */
+  @Test
+  public void testMultipleNamenodes() throws IOException {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
     // Test - configured list of namenodes are returned
     final String NN1_ADDRESS = "localhost:9000";
     final String NN2_ADDRESS = "localhost:9001";
     final String NN3_ADDRESS = "localhost:9002";
-    conf.set(DFSUtil.getNameServiceIdKey(
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"), NN1_ADDRESS);
-    conf.set(DFSUtil.getNameServiceIdKey(
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"), NN2_ADDRESS);
-    
-    Collection<InetSocketAddress> nnAddresses = 
-      DFSUtil.getNNServiceRpcAddresses(conf);
+    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"),
+        NN1_ADDRESS);
+    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
+        NN2_ADDRESS);
+
+    Collection<InetSocketAddress> nnAddresses = DFSUtil
+        .getNNServiceRpcAddresses(conf);
     assertEquals(2, nnAddresses.size());
     Iterator<InetSocketAddress> iterator = nnAddresses.iterator();
-    assertEquals(2, nameserviceIds.size());
     InetSocketAddress addr = iterator.next();
     assertEquals("localhost", addr.getHostName());
     assertEquals(9000, addr.getPort());
     addr = iterator.next();
     assertEquals("localhost", addr.getHostName());
     assertEquals(9001, addr.getPort());
-    
+
     // Test - can look up nameservice ID from service address
-    InetSocketAddress testAddress1 = NetUtils.createSocketAddr(NN1_ADDRESS);
-    String nameserviceId = DFSUtil.getNameServiceIdFromAddress(
-        conf, testAddress1,
-        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
-    assertEquals("nn1", nameserviceId);
-    InetSocketAddress testAddress2 = NetUtils.createSocketAddr(NN2_ADDRESS);
-    nameserviceId = DFSUtil.getNameServiceIdFromAddress(
-        conf, testAddress2,
-        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
-    assertEquals("nn2", nameserviceId);
-    InetSocketAddress testAddress3 = NetUtils.createSocketAddr(NN3_ADDRESS);
-    nameserviceId = DFSUtil.getNameServiceIdFromAddress(
-        conf, testAddress3,
-        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
-    assertNull(nameserviceId);
+    checkNameServiceId(conf, NN1_ADDRESS, "nn1");
+    checkNameServiceId(conf, NN2_ADDRESS, "nn2");
+    checkNameServiceId(conf, NN3_ADDRESS, null);
   }
-  
-  /** 
+
+  public void checkNameServiceId(Configuration conf, String addr,
+      String expectedNameServiceId) {
+    InetSocketAddress s = NetUtils.createSocketAddr(addr);
+    String nameserviceId = DFSUtil.getNameServiceIdFromAddress(conf, s,
+        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals(expectedNameServiceId, nameserviceId);
+  }
+
+  /**
    * Test for
    * {@link DFSUtil#isDefaultNamenodeAddress(Configuration, InetSocketAddress, String...)}
    */
@@ -157,27 +217,25 @@ public class TestDFSUtil {
     HdfsConfiguration conf = new HdfsConfiguration();
     final String DEFAULT_ADDRESS = "localhost:9000";
     final String NN2_ADDRESS = "localhost:9001";
-    conf.set(DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, DEFAULT_ADDRESS);
-    
+    conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, DEFAULT_ADDRESS);
+
     InetSocketAddress testAddress1 = NetUtils.createSocketAddr(DEFAULT_ADDRESS);
     boolean isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress1,
-        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
     assertTrue(isDefault);
     InetSocketAddress testAddress2 = NetUtils.createSocketAddr(NN2_ADDRESS);
     isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress2,
-        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
     assertFalse(isDefault);
   }
-  
+
   /** Tests to ensure default namenode is used as fallback */
   @Test
   public void testDefaultNamenode() throws IOException {
     HdfsConfiguration conf = new HdfsConfiguration();
     final String hdfs_default = "hdfs://localhost:9999/";
-    conf.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY, hdfs_default);
-    // If DFSConfigKeys.DFS_FEDERATION_NAMESERVICES is not set, verify that 
+    conf.set(FS_DEFAULT_NAME_KEY, hdfs_default);
+    // If DFS_FEDERATION_NAMESERVICES is not set, verify that
     // default namenode address is returned.
     List<InetSocketAddress> addrList = DFSUtil.getNNServiceRpcAddresses(conf);
     assertEquals(1, addrList.size());
@@ -191,26 +249,26 @@ public class TestDFSUtil {
   @Test
   public void testConfModification() throws IOException {
     final HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, "nn1");
-    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, "nn1");
-    final String nameserviceId = DFSUtil.getNameServiceId(conf);
-    
+    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1");
+    conf.set(DFS_FEDERATION_NAMESERVICE_ID, "nn1");
+    final String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
+
     // Set the nameservice specific keys with nameserviceId in the config key
     for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
       // Note: value is same as the key
       conf.set(DFSUtil.getNameServiceIdKey(key, nameserviceId), key);
     }
-    
+
     // Initialize generic keys from specific keys
-    NameNode.initializeGenericKeys(conf);
-    
+    NameNode.initializeGenericKeys(conf, nameserviceId);
+
     // Retrieve the keys without nameserviceId and Ensure generic keys are set
     // to the correct value
     for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
       assertEquals(key, conf.get(key));
     }
   }
-  
+
   /**
    * Tests for empty configuration, an exception is thrown from
    * {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}
@@ -238,16 +296,16 @@ public class TestDFSUtil {
     } catch (IOException expected) {
     }
   }
-  
+
   @Test
-  public void testGetServerInfo(){
+  public void testGetServerInfo() {
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(HADOOP_SECURITY_AUTHENTICATION, "kerberos");
     UserGroupInformation.setConfiguration(conf);
     String httpsport = DFSUtil.getInfoServer(null, conf, true);
-    Assert.assertEquals("0.0.0.0:50470", httpsport);
+    assertEquals("0.0.0.0:50470", httpsport);
     String httpport = DFSUtil.getInfoServer(null, conf, false);
-    Assert.assertEquals("0.0.0.0:50070", httpport);
+    assertEquals("0.0.0.0:50070", httpport);
   }
 
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java
index a0727a6c90b..d7ee516b0a8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestQuota.java
@@ -17,6 +17,10 @@
  */
 package org.apache.hadoop.hdfs;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
 import java.io.OutputStream;
 import java.security.PrivilegedExceptionAction;
 
@@ -24,17 +28,15 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
 import org.apache.hadoop.hdfs.protocol.QuotaExceededException;
+import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.hdfs.protocol.NSQuotaExceededException;
-import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
-
 import org.junit.Test;
-import static org.junit.Assert.*;
 
 /** A class for testing quota-related commands */
 public class TestQuota {
@@ -841,6 +843,14 @@ public class TestQuota {
     DFSAdmin admin = new DFSAdmin(conf);
 
     try {
+      
+      //Test for deafult NameSpace Quota
+      long nsQuota = FSImageTestUtil.getNSQuota(cluster.getNameNode()
+          .getNamesystem());
+      assertTrue(
+          "Default namespace quota expected as long max. But the value is :"
+              + nsQuota, nsQuota == Long.MAX_VALUE);
+      
       Path dir = new Path("/test");
       boolean exceededQuota = false;
       ContentSummary c;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
index d6397b6a2ee..9c577f740ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
@@ -23,12 +23,12 @@ package org.apache.hadoop.hdfs.security;
 import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
+import java.net.URI;
 import java.security.PrivilegedExceptionAction;
 
-import junit.framework.Assert;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@@ -38,12 +38,16 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
+import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
+import org.apache.log4j.Level;
 import org.junit.After;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -56,12 +60,13 @@ public class TestDelegationToken {
   @Before
   public void setUp() throws Exception {
     config = new HdfsConfiguration();
+    config.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
     config.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000);
     config.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000);
     config.set("hadoop.security.auth_to_local",
         "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT");
     FileSystem.setDefaultUri(config, "hdfs://localhost:" + "0");
-    cluster = new MiniDFSCluster.Builder(config).build();
+    cluster = new MiniDFSCluster.Builder(config).numDataNodes(0).build();
     cluster.waitActive();
     dtSecretManager = NameNodeAdapter.getDtSecretManager(
         cluster.getNamesystem());
@@ -153,6 +158,31 @@ public class TestDelegationToken {
     dtSecretManager.renewToken(token, "JobTracker");
   }
   
+  @Test
+  public void testDelegationTokenWebHdfsApi() throws Exception {
+    ((Log4JLogger)NamenodeWebHdfsMethods.LOG).getLogger().setLevel(Level.ALL);
+    final String uri = WebHdfsFileSystem.SCHEME  + "://"
+        + config.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
+    //get file system as JobTracker
+    final UserGroupInformation ugi = UserGroupInformation.createUserForTesting(
+        "JobTracker", new String[]{"user"});
+    final WebHdfsFileSystem webhdfs = ugi.doAs(
+        new PrivilegedExceptionAction<WebHdfsFileSystem>() {
+      @Override
+      public WebHdfsFileSystem run() throws Exception {
+        return (WebHdfsFileSystem)FileSystem.get(new URI(uri), config);
+      }
+    });
+
+    final Token<DelegationTokenIdentifier> token = webhdfs.getDelegationToken("JobTracker");
+    DelegationTokenIdentifier identifier = new DelegationTokenIdentifier();
+    byte[] tokenId = token.getIdentifier();
+    identifier.readFields(new DataInputStream(new ByteArrayInputStream(tokenId)));
+    LOG.info("A valid token should have non-null password, and should be renewed successfully");
+    Assert.assertTrue(null != dtSecretManager.retrievePassword(identifier));
+    dtSecretManager.renewToken(token, "JobTracker");
+  }
+
   @Test
   public void testDelegationTokenWithDoAs() throws Exception {
     final DistributedFileSystem dfs = (DistributedFileSystem) cluster.getFileSystem();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java
index 7b9126f7de8..d34cf1c4c6e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHost2NodesMap.java
@@ -18,31 +18,34 @@
 
 package org.apache.hadoop.hdfs.server.blockmanagement;
 
-import junit.framework.TestCase;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
 
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
-import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
-import org.apache.hadoop.hdfs.server.blockmanagement.Host2NodesMap;
+import org.junit.Before;
+import org.junit.Test;
 
-public class TestHost2NodesMap extends TestCase {
-  static private Host2NodesMap map = new Host2NodesMap();
-  private final static DatanodeDescriptor dataNodes[] = new DatanodeDescriptor[] {
+public class TestHost2NodesMap {
+  private Host2NodesMap map = new Host2NodesMap();
+  private final DatanodeDescriptor dataNodes[] = new DatanodeDescriptor[] {
     new DatanodeDescriptor(new DatanodeID("h1:5020"), "/d1/r1"),
     new DatanodeDescriptor(new DatanodeID("h2:5020"), "/d1/r1"),
     new DatanodeDescriptor(new DatanodeID("h3:5020"), "/d1/r2"),
     new DatanodeDescriptor(new DatanodeID("h3:5030"), "/d1/r2"),
   };
-  private final static DatanodeDescriptor NULL_NODE = null; 
-  private final static DatanodeDescriptor NODE = 
-    new DatanodeDescriptor(new DatanodeID("h3:5040"), "/d1/r4");
+  private final DatanodeDescriptor NULL_NODE = null; 
+  private final DatanodeDescriptor NODE = new DatanodeDescriptor(new DatanodeID("h3:5040"),
+      "/d1/r4");
 
-  static {
+  @Before
+  public void setup() {
     for(DatanodeDescriptor node:dataNodes) {
       map.add(node);
     }
     map.add(NULL_NODE);
   }
   
+  @Test
   public void testContains() throws Exception {
     for(int i=0; i<dataNodes.length; i++) {
       assertTrue(map.contains(dataNodes[i]));
@@ -51,6 +54,7 @@ public class TestHost2NodesMap extends TestCase {
     assertFalse(map.contains(NODE));
   }
 
+  @Test
   public void testGetDatanodeByHost() throws Exception {
     assertTrue(map.getDatanodeByHost("h1")==dataNodes[0]);
     assertTrue(map.getDatanodeByHost("h2")==dataNodes[1]);
@@ -59,6 +63,7 @@ public class TestHost2NodesMap extends TestCase {
     assertTrue(null==map.getDatanodeByHost("h4"));
   }
 
+  @Test
   public void testGetDatanodeByName() throws Exception {
     assertTrue(map.getDatanodeByName("h1:5020")==dataNodes[0]);
     assertTrue(map.getDatanodeByName("h1:5030")==null);
@@ -71,6 +76,7 @@ public class TestHost2NodesMap extends TestCase {
     assertTrue(map.getDatanodeByName(null)==null);
   }
 
+  @Test
   public void testRemove() throws Exception {
     assertFalse(map.remove(NODE));
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
index 78ff00288b3..1b9a19c6499 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
@@ -96,7 +96,8 @@ public class TestMulitipleNNDataBlockScanner {
 
       String bpidToShutdown = cluster.getNamesystem(2).getBlockPoolId();
       for (int i = 0; i < 2; i++) {
-        String nsId = DFSUtil.getNameServiceId(cluster.getConfiguration(i));
+        String nsId = DFSUtil.getNamenodeNameServiceId(cluster
+            .getConfiguration(i));
         namenodesBuilder.append(nsId);
         namenodesBuilder.append(",");
       }
@@ -116,7 +117,7 @@ public class TestMulitipleNNDataBlockScanner {
         LOG.info(ex.getMessage());
       }
 
-      namenodesBuilder.append(DFSUtil.getNameServiceId(cluster
+      namenodesBuilder.append(DFSUtil.getNamenodeNameServiceId(cluster
           .getConfiguration(2)));
       conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, namenodesBuilder
           .toString());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReplicasMap.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReplicasMap.java
index ff21636ddf4..cacbfd62f6b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReplicasMap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReplicasMap.java
@@ -17,21 +17,24 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.fail;
+
 import org.apache.hadoop.hdfs.protocol.Block;
-import static org.junit.Assert.*;
-import org.junit.BeforeClass;
+import org.junit.Before;
 import org.junit.Test;
 
 /**
  * Unit test for ReplicasMap class
  */
 public class TestReplicasMap {
-  private static final ReplicasMap map = new ReplicasMap(TestReplicasMap.class);
-  private static final String bpid = "BP-TEST";
-  private static final  Block block = new Block(1234, 1234, 1234);
+  private final ReplicasMap map = new ReplicasMap(TestReplicasMap.class);
+  private final String bpid = "BP-TEST";
+  private final  Block block = new Block(1234, 1234, 1234);
   
-  @BeforeClass
-  public static void setup() {
+  @Before
+  public void setup() {
     map.add(bpid, new FinalizedReplica(block, null, null));
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index c90b2900db1..39e7db17dc7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -412,4 +412,11 @@ public abstract class FSImageTestUtil {
   public static FSImage getFSImage(NameNode node) {
     return node.getFSImage();
   }
+
+  /**
+   * get NameSpace quota.
+   */
+  public static long getNSQuota(FSNamesystem ns) {
+    return ns.dir.rootDir.getNsQuota();
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
new file mode 100644
index 00000000000..a843962e081
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestProcessCorruptBlocks.java
@@ -0,0 +1,290 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.server.blockmanagement.NumberReplicas;
+import org.junit.Test;
+
+public class TestProcessCorruptBlocks {
+  /**
+   * The corrupt block has to be removed when the number of valid replicas
+   * matches replication factor for the file. In this the above condition is
+   * tested by reducing the replication factor 
+   * The test strategy : 
+   *   Bring up Cluster with 3 DataNodes
+   *   Create a file of replication factor 3 
+   *   Corrupt one replica of a block of the file 
+   *   Verify that there are still 2 good replicas and 1 corrupt replica
+   *    (corrupt replica should not be removed since number of good
+   *     replicas (2) is less than replication factor (3))
+   *   Set the replication factor to 2 
+   *   Verify that the corrupt replica is removed. 
+   *     (corrupt replica  should not be removed since number of good
+   *      replicas (2) is equal to replication factor (2))
+   */
+  @Test
+  public void testWhenDecreasingReplication() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    FileSystem fs = cluster.getFileSystem();
+    final FSNamesystem namesystem = cluster.getNamesystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
+
+      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
+      corruptBlock(cluster, fs, fileName, 0, block);
+
+      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
+
+      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
+      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());
+
+      namesystem.setReplication(fileName.toString(), (short) 2);
+
+      // wait for 3 seconds so that all block reports are processed.
+      try {
+        Thread.sleep(3000);
+      } catch (InterruptedException ignored) {
+      }
+
+      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
+      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  /**
+   * The corrupt block has to be removed when the number of valid replicas
+   * matches replication factor for the file. In this test, the above 
+   * condition is achieved by increasing the number of good replicas by 
+   * replicating on a new Datanode. 
+   * The test strategy : 
+   *   Bring up Cluster with 3 DataNodes
+   *   Create a file  of replication factor 3
+   *   Corrupt one replica of a block of the file 
+   *   Verify that there are still 2 good replicas and 1 corrupt replica 
+   *     (corrupt replica should not be removed since number of good replicas
+   *      (2) is less  than replication factor (3)) 
+   *   Start a new data node 
+   *   Verify that the a new replica is created and corrupt replica is
+   *   removed.
+   * 
+   */
+  @Test
+  public void testByAddingAnExtraDataNode() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
+    FileSystem fs = cluster.getFileSystem();
+    final FSNamesystem namesystem = cluster.getNamesystem();
+    DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
+
+      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
+      corruptBlock(cluster, fs, fileName, 0, block);
+
+      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
+
+      assertEquals(2, countReplicas(namesystem, block).liveReplicas());
+      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());
+
+      cluster.restartDataNode(dnPropsFourth);
+
+      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
+
+      assertEquals(3, countReplicas(namesystem, block).liveReplicas());
+      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  /**
+   * The corrupt block has to be removed when the number of valid replicas
+   * matches replication factor for the file. The above condition should hold
+   * true as long as there is one good replica. This test verifies that.
+   * 
+   * The test strategy : 
+   *   Bring up Cluster with 2 DataNodes
+   *   Create a file of replication factor 2 
+   *   Corrupt one replica of a block of the file 
+   *   Verify that there is  one good replicas and 1 corrupt replica 
+   *     (corrupt replica should not be removed since number of good 
+   *     replicas (1) is less than replication factor (2)).
+   *   Set the replication factor to 1 
+   *   Verify that the corrupt replica is removed. 
+   *     (corrupt replica should  be removed since number of good
+   *      replicas (1) is equal to replication factor (1))
+   */
+  @Test
+  public void testWithReplicationFactorAsOne() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
+    FileSystem fs = cluster.getFileSystem();
+    final FSNamesystem namesystem = cluster.getNamesystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short) 2);
+
+      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
+      corruptBlock(cluster, fs, fileName, 0, block);
+
+      DFSTestUtil.waitReplication(fs, fileName, (short) 1);
+
+      assertEquals(1, countReplicas(namesystem, block).liveReplicas());
+      assertEquals(1, countReplicas(namesystem, block).corruptReplicas());
+
+      namesystem.setReplication(fileName.toString(), (short) 1);
+
+      // wait for 3 seconds so that all block reports are processed.
+      try {
+        Thread.sleep(3000);
+      } catch (InterruptedException ignored) {
+      }
+
+      assertEquals(1, countReplicas(namesystem, block).liveReplicas());
+      assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  /**
+   * None of the blocks can be removed if all blocks are corrupt.
+   * 
+   * The test strategy : 
+   *    Bring up Cluster with 3 DataNodes
+   *    Create a file of replication factor 3 
+   *    Corrupt all three replicas 
+   *    Verify that all replicas are corrupt and 3 replicas are present.
+   *    Set the replication factor to 1 
+   *    Verify that all replicas are corrupt and 3 replicas are present.
+   */
+  @Test
+  public void testWithAllCorruptReplicas() throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+    FileSystem fs = cluster.getFileSystem();
+    final FSNamesystem namesystem = cluster.getNamesystem();
+
+    try {
+      final Path fileName = new Path("/foo1");
+      DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
+      DFSTestUtil.waitReplication(fs, fileName, (short) 3);
+
+      ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
+      corruptBlock(cluster, fs, fileName, 0, block);
+
+      corruptBlock(cluster, fs, fileName, 1, block);
+
+      corruptBlock(cluster, fs, fileName, 2, block);
+
+      // wait for 3 seconds so that all block reports are processed.
+      try {
+        Thread.sleep(3000);
+      } catch (InterruptedException ignored) {
+      }
+
+      assertEquals(0, countReplicas(namesystem, block).liveReplicas());
+      assertEquals(3, countReplicas(namesystem, block).corruptReplicas());
+
+      namesystem.setReplication(fileName.toString(), (short) 1);
+
+      // wait for 3 seconds so that all block reports are processed.
+      try {
+        Thread.sleep(3000);
+      } catch (InterruptedException ignored) {
+      }
+
+      assertEquals(0, countReplicas(namesystem, block).liveReplicas());
+      assertEquals(3, countReplicas(namesystem, block).corruptReplicas());
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  private static NumberReplicas countReplicas(final FSNamesystem namesystem, ExtendedBlock block) {
+    return namesystem.getBlockManager().countNodes(block.getLocalBlock());
+  }
+
+  private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName,
+      int dnIndex, ExtendedBlock block) throws IOException {
+    // corrupt the block on datanode dnIndex
+    // the indexes change once the nodes are restarted.
+    // But the datadirectory will not change
+    assertTrue(MiniDFSCluster.corruptReplica(dnIndex, block));
+
+    DataNodeProperties dnProps = cluster.stopDataNode(0);
+
+    // Each datanode has multiple data dirs, check each
+    for (int dirIndex = 0; dirIndex < 2; dirIndex++) {
+      final String bpid = cluster.getNamesystem().getBlockPoolId();
+      File storageDir = MiniDFSCluster.getStorageDir(dnIndex, dirIndex);
+      File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
+      File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr");
+      if (scanLogFile.exists()) {
+        // wait for one minute for deletion to succeed;
+        for (int i = 0; !scanLogFile.delete(); i++) {
+          assertTrue("Could not delete log file in one minute", i < 60);
+          try {
+            Thread.sleep(1000);
+          } catch (InterruptedException ignored) {
+          }
+        }
+      }
+    }
+
+    // restart the detained so the corrupt replica will be detected
+    cluster.restartDataNode(dnProps);
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsFileSystemContract.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsFileSystemContract.java
index 4c2264fea8e..abe07fc51f1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsFileSystemContract.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsFileSystemContract.java
@@ -18,17 +18,23 @@
 
 package org.apache.hadoop.hdfs.web;
 
+import java.io.BufferedReader;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.HttpURLConnection;
 import java.net.URI;
+import java.net.URL;
 import java.security.PrivilegedExceptionAction;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystemContractBaseTest;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.web.resources.PutOpParam;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 
@@ -114,4 +120,42 @@ public class TestWebHdfsFileSystemContract extends FileSystemContractBaseTest {
       // also okay for HDFS.
     }    
   }
+  
+  public void testGetFileBlockLocations() throws IOException {
+    final String f = "/test/testGetFileBlockLocations";
+    createFile(path(f));
+    final BlockLocation[] computed = fs.getFileBlockLocations(new Path(f), 0L, 1L);
+    final BlockLocation[] expected = cluster.getFileSystem().getFileBlockLocations(
+        new Path(f), 0L, 1L);
+    assertEquals(expected.length, computed.length);
+    for(int i = 0; i < computed.length; i++) {
+      assertEquals(expected[i].toString(), computed[i].toString());
+    }
+  }
+
+  public void testCaseInsensitive() throws IOException {
+    final Path p = new Path("/test/testCaseInsensitive");
+    final WebHdfsFileSystem webhdfs = (WebHdfsFileSystem)fs;
+    final PutOpParam.Op op = PutOpParam.Op.MKDIRS;
+
+    //replace query with mix case letters
+    final URL url = webhdfs.toUrl(op, p);
+    WebHdfsFileSystem.LOG.info("url      = " + url);
+    final URL replaced = new URL(url.toString().replace(op.toQueryString(),
+        "Op=mkDIrs"));
+    WebHdfsFileSystem.LOG.info("replaced = " + replaced);
+
+    //connect with the replaced URL.
+    final HttpURLConnection conn = (HttpURLConnection)replaced.openConnection();
+    conn.setRequestMethod(op.getType().toString());
+    conn.connect();
+    final BufferedReader in = new BufferedReader(new InputStreamReader(
+        conn.getInputStream()));
+    for(String line; (line = in.readLine()) != null; ) {
+      WebHdfsFileSystem.LOG.info("> " + line);
+    }
+
+    //check if the command successes.
+    assertTrue(fs.getFileStatus(p).isDirectory());
+  }
 }
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 1d637419eb5..43fed6baa1c 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -29,6 +29,8 @@ Trunk (unreleased changes)
     findBugs, correct links to findBugs artifacts and no links to the
     artifacts when there are no warnings. (Tom White via vinodkv).
 
+    MAPREDUCE-3081. Fix vaidya startup script. (gkesavan via suhas).
+
 Release 0.23.0 - Unreleased
 
   INCOMPATIBLE CHANGES
@@ -70,6 +72,9 @@ Release 0.23.0 - Unreleased
     MAPREDUCE-2037. Capture intermediate progress, CPU and memory usage for
     tasks. (Dick King via acmurthy) 
 
+    MAPREDUCE-2930. Added the ability to be able to generate graphs from the
+    state-machine definitions. (Binglin Chang via vinodkv)
+
   IMPROVEMENTS
 
     MAPREDUCE-2187. Reporter sends progress during sort/merge. (Anupam Seth via
@@ -307,6 +312,15 @@ Release 0.23.0 - Unreleased
     MAPREDUCE-2726. Added job-file to the AM and JobHistoryServer web
     interfaces. (Jeffrey Naisbitt via vinodkv)
 
+    MAPREDUCE-3055. Simplified ApplicationAttemptId passing to
+    ApplicationMaster via environment variable. (vinodkv)
+
+    MAPREDUCE-3092. Removed a special comparator for JobIDs in JobHistory as
+    JobIDs are already comparable. (Devaraj K via vinodkv)
+
+    MAPREDUCE-3099. Add docs for setting up a single node MRv2 cluster.
+    (mahadev)
+
   OPTIMIZATIONS
 
     MAPREDUCE-2026. Make JobTracker.getJobCounters() and
@@ -318,6 +332,9 @@ Release 0.23.0 - Unreleased
 
     MAPREDUCE-901. Efficient framework counters. (llu via acmurthy)
 
+    MAPREDUCE-2880. Improve classpath-construction for mapreduce AM and
+    containers. (Arun C Murthy via vinodkv)
+
   BUG FIXES
 
     MAPREDUCE-2603. Disable High-Ram emulation in system tests. 
@@ -1370,6 +1387,91 @@ Release 0.23.0 - Unreleased
     YarnClientProtocolProvider and ensured MiniMRYarnCluster sets JobHistory
     configuration for tests. (acmurthy) 
 
+    MAPREDUCE-3018. Fixed -file option for streaming. (mahadev via acmurthy) 
+
+    MAPREDUCE-3036. Fixed metrics for reserved resources in CS. (Robert Evans
+    via acmurthy)
+
+    MAPREDUCE-2998. Fixed a bug in TaskAttemptImpl which caused it to fork
+    bin/mapred too many times. (vinodkv via acmurthy)
+
+    MAPREDUCE-3023. Fixed clients to display queue state correctly. (Ravi
+    Prakash via acmurthy) 
+
+    MAPREDUCE-2970. Fixed NPEs in corner cases with different configurations
+    for mapreduce.framework.name. (Venu Gopala Rao via vinodkv)
+
+    MAPREDUCE-3062. Fixed default RMAdmin address. (Chris Riccomini
+    via acmurthy) 
+
+    MAPREDUCE-3066. Fixed default ResourceTracker address for the NodeManager. 
+    (Chris Riccomini via acmurthy) 
+
+    MAPREDUCE-3044. Pipes jobs stuck without making progress. (mahadev)
+
+    MAPREDUCE-2754. Fixed MR AM stdout, stderr and syslog to redirect to
+    correct log-files. (Ravi Teja Ch N V via vinodkv)
+
+    MAPREDUCE-3073. Fixed build issues in MR1. (mahadev via acmurthy)
+
+    MAPREDUCE-2691. Increase threadpool size for launching containers in
+    MapReduce ApplicationMaster. (vinodkv via acmurthy)
+
+
+    MAPREDUCE-2990. Fixed display of NodeHealthStatus. (Subroto Sanyal via
+    acmurthy) 
+
+    MAPREDUCE-3053. Better diagnostic message for unknown methods in ProtoBuf
+    RPCs. (vinodkv via acmurthy)
+
+    MAPREDUCE-2952. Fixed ResourceManager/MR-client to consume diagnostics
+    for AM failures in a couple of corner cases. (Arun C Murthy via vinodkv)
+
+    MAPREDUCE-3064. 27 unit test failures with Invalid 
+    "mapreduce.jobtracker.address" configuration value for 
+    JobTracker: "local" (Venu Gopala Rao via mahadev)
+
+    MAPREDUCE-3090. Fix MR AM to use ApplicationAttemptId rather than
+    (ApplicationId, startCount) consistently. (acmurthy)  
+
+    MAPREDUCE-2646. Fixed AMRMProtocol to return containers based on
+    priority. (Sharad Agarwal and Arun C Murthy via vinodkv)
+
+    MAPREDUCE-3031. Proper handling of killed containers to prevent stuck
+    containers/AMs on an external kill signal. (Siddharth Seth via vinodkv)
+
+    MAPREDUCE-2984. Better error message for displaying completed containers.
+    (Devaraj K via acmurthy)
+
+    MAPREDUCE-3071. app master configuration web UI link under the Job menu 
+    opens up application menu. (thomas graves  via mahadev)
+
+    MAPREDUCE-3067. Ensure exit-code is set correctly for containers. (Hitesh
+    Shah via acmurthy)
+
+    MAPREDUCE-2999. Fix YARN webapp framework to properly filter servlet
+    paths. (Thomas Graves via vinodkv)
+
+    MAPREDUCE-3095. fairscheduler ivy including wrong version for hdfs.
+    (John George via mahadev)
+
+    MAPREDUCE-3054. Unable to kill submitted jobs. (mahadev)
+
+    MAPREDUCE-3021. Change base urls for RM web-ui. (Thomas Graves via
+    acmurthy) 
+
+    MAPREDUCE-3041. Fixed ClientRMProtocol to provide min/max resource
+    capabilities along-with new ApplicationId for application submission.
+    (Hitesh Shah via acmurthy)
+
+    MAPREDUCE-2843. Fixed the node-table to be completely displayed and making
+    node entries on RM UI to be sortable. (Abhijit Suresh Shingate via vinodkv)
+
+    MAPREDUCE-3110. Fixed TestRPC failure. (vinodkv)
+
+    MAPREDUCE-3078. Ensure MapReduce AM reports progress correctly for
+    displaying on the RM Web-UI. (vinodkv via acmurthy)
+
 Release 0.22.0 - Unreleased
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
index 17d4b8b0e54..0f12598fc17 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml
@@ -55,6 +55,12 @@
       <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-mapreduce-client-shuffle</artifactId>
@@ -113,4 +119,41 @@
       </plugin>
     </plugins>
   </build>
+
+  <profiles>
+    <profile>
+      <id>visualize</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>exec-maven-plugin</artifactId>
+            <version>1.2</version>
+            <executions>
+              <execution>
+                <phase>compile</phase>
+                <goals>
+                  <goal>java</goal>
+                </goals>
+                <configuration>
+                  <classpathScope>test</classpathScope>
+                  <mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
+                  <arguments>
+                    <argument>MapReduce</argument>
+                    <argument>org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl,
+                      org.apache.hadoop.mapreduce.v2.app.job.impl.TaskImpl,
+                      org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl</argument>
+                    <argument>MapReduce.gv</argument>
+                  </arguments>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
 </project>
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
index d9d5b1f3076..ce6557abd03 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/MapReduceChildJVM.java
@@ -18,27 +18,27 @@
 
 package org.apache.hadoop.mapred;
 
-import java.io.File;
 import java.net.InetSocketAddress;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Vector;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.TaskLog.LogName;
 import org.apache.hadoop.mapreduce.ID;
+import org.apache.hadoop.mapreduce.MRJobConfig;
+import org.apache.hadoop.mapreduce.v2.util.MRApps;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
 
 public class MapReduceChildJVM {
-  private static final String SYSTEM_PATH_SEPARATOR = 
-    System.getProperty("path.separator");
 
-  private static final Log LOG = LogFactory.getLog(MapReduceChildJVM.class);
-
-  private static File getTaskLogFile(String logDir, LogName filter) {
-    return new File(logDir, filter.toString());
+  private static String getTaskLogFile(LogName filter) {
+    return ApplicationConstants.LOG_DIR_EXPANSION_VAR + Path.SEPARATOR + 
+        filter.toString();
   }
 
   private static String getChildEnv(JobConf jobConf, boolean isMap) {
@@ -50,32 +50,53 @@ public class MapReduceChildJVM {
         jobConf.get(jobConf.MAPRED_TASK_ENV));
   }
 
-  public static void setVMEnv(Map<String, String> env,
-      List<String> classPaths, String pwd, String containerLogDir,
-      String nmLdLibraryPath, Task task, CharSequence applicationTokensFile) {
+  private static String getChildLogLevel(JobConf conf, boolean isMap) {
+    if (isMap) {
+      return conf.get(
+          MRJobConfig.MAP_LOG_LEVEL, 
+          JobConf.DEFAULT_LOG_LEVEL.toString()
+          );
+    } else {
+      return conf.get(
+          MRJobConfig.REDUCE_LOG_LEVEL, 
+          JobConf.DEFAULT_LOG_LEVEL.toString()
+          );
+    }
+  }
+  
+  public static void setVMEnv(Map<String, String> environment,
+      Task task) {
 
     JobConf conf = task.conf;
 
-    // Add classpath.
-    CharSequence cp = env.get("CLASSPATH");
-    String classpath = StringUtils.join(SYSTEM_PATH_SEPARATOR, classPaths);
-    if (null == cp) {
-      env.put("CLASSPATH", classpath);
-    } else {
-      env.put("CLASSPATH", classpath + SYSTEM_PATH_SEPARATOR + cp);
-    }
+    // Shell
+    environment.put(
+        Environment.SHELL.name(), 
+        conf.get(
+            MRJobConfig.MAPRED_ADMIN_USER_SHELL, 
+            MRJobConfig.DEFAULT_SHELL)
+            );
+    
+    // Add pwd to LD_LIBRARY_PATH, add this before adding anything else
+    MRApps.addToEnvironment(
+        environment, 
+        Environment.LD_LIBRARY_PATH.name(), 
+        Environment.PWD.$());
 
-    /////// Environmental variable LD_LIBRARY_PATH
-    StringBuilder ldLibraryPath = new StringBuilder();
+    // Add the env variables passed by the user & admin
+    String mapredChildEnv = getChildEnv(conf, task.isMapTask());
+    MRApps.setEnvFromInputString(environment, mapredChildEnv);
+    MRApps.setEnvFromInputString(
+        environment, 
+        conf.get(
+            MRJobConfig.MAPRED_ADMIN_USER_ENV, 
+            MRJobConfig.DEFAULT_MAPRED_ADMIN_USER_ENV)
+        );
 
-    ldLibraryPath.append(nmLdLibraryPath);
-    ldLibraryPath.append(SYSTEM_PATH_SEPARATOR);
-    ldLibraryPath.append(pwd);
-    env.put("LD_LIBRARY_PATH", ldLibraryPath.toString());
-    /////// Environmental variable LD_LIBRARY_PATH
-
-    // for the child of task jvm, set hadoop.root.logger
-    env.put("HADOOP_ROOT_LOGGER", "DEBUG,CLA"); // TODO: Debug
+    // Set logging level
+    environment.put(
+        "HADOOP_ROOT_LOGGER", 
+        getChildLogLevel(conf, task.isMapTask()) + ",CLA"); 
 
     // TODO: The following is useful for instance in streaming tasks. Should be
     // set in ApplicationMaster's env by the RM.
@@ -89,76 +110,69 @@ public class MapReduceChildJVM {
     // properties.
     long logSize = TaskLog.getTaskLogLength(conf);
     Vector<String> logProps = new Vector<String>(4);
-    setupLog4jProperties(logProps, logSize, containerLogDir);
+    setupLog4jProperties(logProps, logSize);
     Iterator<String> it = logProps.iterator();
     StringBuffer buffer = new StringBuffer();
     while (it.hasNext()) {
       buffer.append(" " + it.next());
     }
     hadoopClientOpts = hadoopClientOpts + buffer.toString();
-    
-    env.put("HADOOP_CLIENT_OPTS", hadoopClientOpts);
+    environment.put("HADOOP_CLIENT_OPTS", hadoopClientOpts);
 
-    // add the env variables passed by the user
-    String mapredChildEnv = getChildEnv(conf, task.isMapTask());
-    if (mapredChildEnv != null && mapredChildEnv.length() > 0) {
-      String childEnvs[] = mapredChildEnv.split(",");
-      for (String cEnv : childEnvs) {
-        String[] parts = cEnv.split("="); // split on '='
-        String value = (String) env.get(parts[0]);
-        if (value != null) {
-          // replace $env with the child's env constructed by tt's
-          // example LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp
-          value = parts[1].replace("$" + parts[0], value);
-        } else {
-          // this key is not configured by the tt for the child .. get it 
-          // from the tt's env
-          // example PATH=$PATH:/tmp
-          value = System.getenv(parts[0]); // Get from NM?
-          if (value != null) {
-            // the env key is present in the tt's env
-            value = parts[1].replace("$" + parts[0], value);
-          } else {
-            // the env key is note present anywhere .. simply set it
-            // example X=$X:/tmp or X=/tmp
-            value = parts[1].replace("$" + parts[0], "");
-          }
-        }
-        env.put(parts[0], value);
-      }
-    }
-
-    //This should not be set here (If an OS check is requied. moved to ContainerLuanch)
-    // env.put("JVM_PID", "`echo $$`");
-
-    env.put(Constants.STDOUT_LOGFILE_ENV,
-        getTaskLogFile(containerLogDir, TaskLog.LogName.STDOUT).toString());
-    env.put(Constants.STDERR_LOGFILE_ENV,
-        getTaskLogFile(containerLogDir, TaskLog.LogName.STDERR).toString());
+    // Add stdout/stderr env
+    environment.put(
+        MRJobConfig.STDOUT_LOGFILE_ENV,
+        getTaskLogFile(TaskLog.LogName.STDOUT)
+        );
+    environment.put(
+        MRJobConfig.STDERR_LOGFILE_ENV,
+        getTaskLogFile(TaskLog.LogName.STDERR)
+        );
   }
 
   private static String getChildJavaOpts(JobConf jobConf, boolean isMapTask) {
+    String userClasspath = "";
+    String adminClasspath = "";
     if (isMapTask) {
-      return jobConf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, jobConf.get(
-          JobConf.MAPRED_TASK_JAVA_OPTS,
-          JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS));
+      userClasspath = 
+          jobConf.get(
+              JobConf.MAPRED_MAP_TASK_JAVA_OPTS, 
+              jobConf.get(
+                  JobConf.MAPRED_TASK_JAVA_OPTS, 
+                  JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS)
+          );
+      adminClasspath = 
+          jobConf.get(
+              MRJobConfig.MAPRED_MAP_ADMIN_JAVA_OPTS,
+              MRJobConfig.DEFAULT_MAPRED_ADMIN_JAVA_OPTS);
+    } else {
+      userClasspath =
+          jobConf.get(
+              JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, 
+              jobConf.get(
+                  JobConf.MAPRED_TASK_JAVA_OPTS,
+                  JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS)
+              );
+      adminClasspath =
+          jobConf.get(
+              MRJobConfig.MAPRED_REDUCE_ADMIN_JAVA_OPTS,
+              MRJobConfig.DEFAULT_MAPRED_ADMIN_JAVA_OPTS);
     }
-    return jobConf
-        .get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, jobConf.get(
-            JobConf.MAPRED_TASK_JAVA_OPTS,
-            JobConf.DEFAULT_MAPRED_TASK_JAVA_OPTS));
+    
+    // Add admin classpath first so it can be overridden by user.
+    return adminClasspath + " " + userClasspath;
   }
 
   private static void setupLog4jProperties(Vector<String> vargs,
-      long logSize, String containerLogDir) {
+      long logSize) {
     vargs.add("-Dlog4j.configuration=container-log4j.properties");
-    vargs.add("-Dhadoop.yarn.mr.containerLogDir=" + containerLogDir);
-    vargs.add("-Dhadoop.yarn.mr.totalLogFileSize=" + logSize);
+    vargs.add("-D" + MRJobConfig.TASK_LOG_DIR + "=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+    vargs.add("-D" + MRJobConfig.TASK_LOG_SIZE + "=" + logSize);
   }
 
   public static List<String> getVMCommand(
-      InetSocketAddress taskAttemptListenerAddr, Task task, String javaHome,
-      String workDir, String logDir, String childTmpDir, ID jvmID) {
+      InetSocketAddress taskAttemptListenerAddr, Task task, 
+      ID jvmID) {
 
     TaskAttemptID attemptID = task.getTaskID();
     JobConf conf = task.conf;
@@ -166,7 +180,7 @@ public class MapReduceChildJVM {
     Vector<String> vargs = new Vector<String>(8);
 
     vargs.add("exec");
-    vargs.add(javaHome + "/bin/java");
+    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
 
     // Add child (task) java-vm options.
     //
@@ -199,44 +213,26 @@ public class MapReduceChildJVM {
     String javaOpts = getChildJavaOpts(conf, task.isMapTask());
     javaOpts = javaOpts.replace("@taskid@", attemptID.toString());
     String [] javaOptsSplit = javaOpts.split(" ");
-    
-    // Add java.library.path; necessary for loading native libraries.
-    //
-    // 1. We add the 'cwd' of the task to it's java.library.path to help 
-    //    users distribute native libraries via the DistributedCache.
-    // 2. The user can also specify extra paths to be added to the 
-    //    java.library.path via mapred.{map|reduce}.child.java.opts.
-    //
-    String libraryPath = workDir;
-    boolean hasUserLDPath = false;
-    for(int i=0; i<javaOptsSplit.length ;i++) { 
-      if(javaOptsSplit[i].startsWith("-Djava.library.path=")) {
-        // TODO: Does the above take care of escaped space chars
-        javaOptsSplit[i] += SYSTEM_PATH_SEPARATOR + libraryPath;
-        hasUserLDPath = true;
-        break;
-      }
-    }
-    if(!hasUserLDPath) {
-      vargs.add("-Djava.library.path=" + libraryPath);
-    }
     for (int i = 0; i < javaOptsSplit.length; i++) {
       vargs.add(javaOptsSplit[i]);
     }
 
-    if (childTmpDir != null) {
-      vargs.add("-Djava.io.tmpdir=" + childTmpDir);
-    }
+    String childTmpDir = Environment.PWD.$() + Path.SEPARATOR + "tmp";
+    vargs.add("-Djava.io.tmpdir=" + childTmpDir);
 
     // Setup the log4j prop
     long logSize = TaskLog.getTaskLogLength(conf);
-    setupLog4jProperties(vargs, logSize, logDir);
+    setupLog4jProperties(vargs, logSize);
 
     if (conf.getProfileEnabled()) {
       if (conf.getProfileTaskRange(task.isMapTask()
                                    ).isIncluded(task.getPartition())) {
-        File prof = getTaskLogFile(logDir, TaskLog.LogName.PROFILE);
-        vargs.add(String.format(conf.getProfileParams(), prof.toString()));
+        vargs.add(
+            String.format(
+                conf.getProfileParams(), 
+                getTaskLogFile(TaskLog.LogName.PROFILE)
+                )
+            );
       }
     }
 
@@ -249,8 +245,8 @@ public class MapReduceChildJVM {
 
     // Finally add the jvmID
     vargs.add(String.valueOf(jvmID.getId()));
-    vargs.add("1>" + getTaskLogFile(logDir, TaskLog.LogName.STDERR));
-    vargs.add("2>" + getTaskLogFile(logDir, TaskLog.LogName.STDOUT));
+    vargs.add("1>" + getTaskLogFile(TaskLog.LogName.STDOUT));
+    vargs.add("2>" + getTaskLogFile(TaskLog.LogName.STDERR));
 
     // Final commmand
     StringBuilder mergedCommand = new StringBuilder();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java
index 3021004f9dd..0ab220bf383 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/YarnChild.java
@@ -47,7 +47,6 @@ import org.apache.hadoop.mapreduce.filecache.DistributedCache;
 import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.source.JvmMetrics;
 import org.apache.hadoop.security.Credentials;
@@ -71,7 +70,7 @@ class YarnChild {
     LOG.debug("Child starting");
 
     final JobConf defaultConf = new JobConf();
-    defaultConf.addResource(MRConstants.JOB_CONF_FILE);
+    defaultConf.addResource(MRJobConfig.JOB_CONF_FILE);
     UserGroupInformation.setConfiguration(defaultConf);
 
     String host = args[0];
@@ -238,7 +237,7 @@ class YarnChild {
 
   private static JobConf configureTask(Task task, Credentials credentials,
       Token<JobTokenIdentifier> jt) throws IOException {
-    final JobConf job = new JobConf(MRConstants.JOB_CONF_FILE);
+    final JobConf job = new JobConf(MRJobConfig.JOB_CONF_FILE);
     job.setCredentials(credentials);
     // set tcp nodelay
     job.setBoolean("ipc.client.tcpnodelay", true);
@@ -260,7 +259,7 @@ class YarnChild {
 
     // Overwrite the localized task jobconf which is linked to in the current
     // work-dir.
-    Path localTaskFile = new Path(Constants.JOBFILE);
+    Path localTaskFile = new Path(MRJobConfig.JOB_CONF_FILE);
     writeLocalJobFile(localTaskFile, job);
     task.setJobFile(localTaskFile.toString());
     task.setConf(job);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
index 20c7e9779e8..8b7d578fc9b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
@@ -39,7 +39,6 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEventHandler;
 import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
@@ -78,6 +77,7 @@ import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.SystemClock;
 import org.apache.hadoop.yarn.YarnException;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
@@ -88,6 +88,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.service.AbstractService;
 import org.apache.hadoop.yarn.service.CompositeService;
 import org.apache.hadoop.yarn.service.Service;
+import org.apache.hadoop.yarn.util.ConverterUtils;
 
 /**
  * The Map-Reduce Application Master.
@@ -114,8 +115,6 @@ public class MRAppMaster extends CompositeService {
   private Clock clock;
   private final long startTime = System.currentTimeMillis();
   private String appName;
-  private final int startCount;
-  private final ApplicationId appID;
   private final ApplicationAttemptId appAttemptID;
   protected final MRAppMetrics metrics;
   private Set<TaskId> completedTasksFromPreviousRun;
@@ -133,21 +132,16 @@ public class MRAppMaster extends CompositeService {
 
   private Job job;
   
-  public MRAppMaster(ApplicationId applicationId, int startCount) {
-    this(applicationId, new SystemClock(), startCount);
+  public MRAppMaster(ApplicationAttemptId applicationAttemptId) {
+    this(applicationAttemptId, new SystemClock());
   }
 
-  public MRAppMaster(ApplicationId applicationId, Clock clock, int startCount) {
+  public MRAppMaster(ApplicationAttemptId applicationAttemptId, Clock clock) {
     super(MRAppMaster.class.getName());
     this.clock = clock;
-    this.appID = applicationId;
-    this.appAttemptID = RecordFactoryProvider.getRecordFactory(null)
-        .newRecordInstance(ApplicationAttemptId.class);
-    this.appAttemptID.setApplicationId(appID);
-    this.appAttemptID.setAttemptId(startCount);
-    this.startCount = startCount;
+    this.appAttemptID = applicationAttemptId;
     this.metrics = MRAppMetrics.create();
-    LOG.info("Created MRAppMaster for application " + applicationId);
+    LOG.info("Created MRAppMaster for application " + applicationAttemptId);
   }
 
   @Override
@@ -159,9 +153,9 @@ public class MRAppMaster extends CompositeService {
     appName = conf.get(MRJobConfig.JOB_NAME, "<missing app name>");
 
     if (conf.getBoolean(MRJobConfig.MR_AM_JOB_RECOVERY_ENABLE, false)
-         && startCount > 1) {
+         && appAttemptID.getAttemptId() > 1) {
       LOG.info("Recovery is enabled. Will try to recover from previous life.");
-      Recovery recoveryServ = new RecoveryService(appID, clock, startCount);
+      Recovery recoveryServ = new RecoveryService(appAttemptID, clock);
       addIfService(recoveryServ);
       dispatcher = recoveryServ.getDispatcher();
       clock = recoveryServ.getClock();
@@ -243,10 +237,10 @@ public class MRAppMaster extends CompositeService {
         // Read the file-system tokens from the localized tokens-file.
         Path jobSubmitDir = 
             FileContext.getLocalFSFileContext().makeQualified(
-                new Path(new File(MRConstants.JOB_SUBMIT_DIR)
+                new Path(new File(MRJobConfig.JOB_SUBMIT_DIR)
                     .getAbsolutePath()));
         Path jobTokenFile = 
-            new Path(jobSubmitDir, MRConstants.APPLICATION_TOKENS_FILE);
+            new Path(jobSubmitDir, MRJobConfig.APPLICATION_TOKENS_FILE);
         fsTokens.addAll(Credentials.readTokenStorageFile(jobTokenFile, conf));
         LOG.info("jobSubmitDir=" + jobSubmitDir + " jobTokenFile="
             + jobTokenFile);
@@ -264,8 +258,8 @@ public class MRAppMaster extends CompositeService {
     // ////////// End of obtaining the tokens needed by the job. //////////
 
     // create single job
-    Job newJob = new JobImpl(appID, conf, dispatcher.getEventHandler(),
-        taskAttemptListener, jobTokenSecretManager, fsTokens, clock, startCount,
+    Job newJob = new JobImpl(appAttemptID, conf, dispatcher.getEventHandler(),
+        taskAttemptListener, jobTokenSecretManager, fsTokens, clock,
         completedTasksFromPreviousRun, metrics, currentUser.getUserName());
     ((RunningAppContext) context).jobs.put(newJob.getID(), newJob);
 
@@ -376,11 +370,11 @@ public class MRAppMaster extends CompositeService {
   }
 
   public ApplicationId getAppID() {
-    return appID;
+    return appAttemptID.getApplicationId();
   }
 
   public int getStartCount() {
-    return startCount;
+    return appAttemptID.getAttemptId();
   }
 
   public AppContext getContext() {
@@ -505,7 +499,7 @@ public class MRAppMaster extends CompositeService {
 
     @Override
     public ApplicationId getApplicationID() {
-      return appID;
+      return appAttemptID.getApplicationId();
     }
 
     @Override
@@ -555,9 +549,9 @@ public class MRAppMaster extends CompositeService {
     // It's more test friendly to put it here.
     DefaultMetricsSystem.initialize("MRAppMaster");
 
-    /** create a job event for job intialization */
+    // create a job event for job intialization
     JobEvent initJobEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT);
-    /** send init to the job (this does NOT trigger job execution) */
+    // Send init to the job (this does NOT trigger job execution)
     // This is a synchronous call, not an event through dispatcher. We want
     // job-init to be done completely here.
     jobEventDispatcher.handle(initJobEvent);
@@ -648,17 +642,21 @@ public class MRAppMaster extends CompositeService {
 
   public static void main(String[] args) {
     try {
-      //Configuration.addDefaultResource("job.xml");
-      ApplicationId applicationId = RecordFactoryProvider
-          .getRecordFactory(null).newRecordInstance(ApplicationId.class);
-      applicationId.setClusterTimestamp(Long.valueOf(args[0]));
-      applicationId.setId(Integer.valueOf(args[1]));
-      int failCount = Integer.valueOf(args[2]);
-      MRAppMaster appMaster = new MRAppMaster(applicationId, failCount);
+      String applicationAttemptIdStr = System
+          .getenv(ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV);
+      if (applicationAttemptIdStr == null) {
+        String msg = ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV
+            + " is null";
+        LOG.error(msg);
+        throw new IOException(msg);
+      }
+      ApplicationAttemptId applicationAttemptId = ConverterUtils
+          .toApplicationAttemptId(applicationAttemptIdStr);
+      MRAppMaster appMaster = new MRAppMaster(applicationAttemptId);
       Runtime.getRuntime().addShutdownHook(
           new CompositeServiceShutdownHook(appMaster));
       YarnConfiguration conf = new YarnConfiguration(new JobConf());
-      conf.addResource(new Path(MRConstants.JOB_CONF_FILE));
+      conf.addResource(new Path(MRJobConfig.JOB_CONF_FILE));
       conf.set(MRJobConfig.USER_NAME, 
           System.getProperty("user.name")); 
       UserGroupInformation.setConfiguration(conf);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java
index 73359bb12a5..f84a4d9dbe4 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/client/MRClientService.java
@@ -149,7 +149,7 @@ public class MRClientService extends AbstractService
             + ":" + server.getPort());
     LOG.info("Instantiated MRClientService at " + this.bindAddress);
     try {
-      webApp = WebApps.$for("yarn", AppContext.class, appContext).with(conf).
+      webApp = WebApps.$for("mapreduce", AppContext.class, appContext).with(conf).
           start(new AMWebApp());
     } catch (Exception e) {
       LOG.error("Webapps failed to start. Ignoring for now:", e);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
index 69de493b16b..c26bc24695c 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@@ -64,7 +64,6 @@ import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
 import org.apache.hadoop.mapreduce.split.SplitMetaInfoReader;
 import org.apache.hadoop.mapreduce.task.JobContextImpl;
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.Counter;
 import org.apache.hadoop.mapreduce.v2.api.records.CounterGroup;
 import org.apache.hadoop.mapreduce.v2.api.records.Counters;
@@ -93,6 +92,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
+import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.AccessControlList;
@@ -101,6 +101,7 @@ import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
@@ -129,11 +130,11 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
       RecordFactoryProvider.getRecordFactory(null);
   
   //final fields
+  private final ApplicationAttemptId applicationAttemptId;
   private final Clock clock;
   private final JobACLsManager aclsManager;
   private final String username;
   private final Map<JobACL, AccessControlList> jobACLs;
-  private final int startCount;
   private final Set<TaskId> completedTasksFromPreviousRun;
   private final Lock readLock;
   private final Lock writeLock;
@@ -365,26 +366,26 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   private Token<JobTokenIdentifier> jobToken;
   private JobTokenSecretManager jobTokenSecretManager;
 
-  public JobImpl(ApplicationId appID, Configuration conf,
+  public JobImpl(ApplicationAttemptId applicationAttemptId, Configuration conf,
       EventHandler eventHandler, TaskAttemptListener taskAttemptListener,
       JobTokenSecretManager jobTokenSecretManager,
-      Credentials fsTokenCredentials, Clock clock, int startCount, 
+      Credentials fsTokenCredentials, Clock clock, 
       Set<TaskId> completedTasksFromPreviousRun, MRAppMetrics metrics,
       String userName) {
-
+    this.applicationAttemptId = applicationAttemptId;
     this.jobId = recordFactory.newRecordInstance(JobId.class);
     this.jobName = conf.get(JobContext.JOB_NAME, "<missing job name>");
     this.conf = conf;
     this.metrics = metrics;
     this.clock = clock;
     this.completedTasksFromPreviousRun = completedTasksFromPreviousRun;
-    this.startCount = startCount;
     this.userName = userName;
-    jobId.setAppId(appID);
-    jobId.setId(appID.getId());
+    ApplicationId applicationId = applicationAttemptId.getApplicationId();
+    jobId.setAppId(applicationId);
+    jobId.setId(applicationId.getId());
     oldJobId = TypeConverter.fromYarn(jobId);
     LOG.info("Job created" +
-    		" appId=" + appID + 
+    		" appId=" + applicationId + 
     		" jobId=" + jobId + 
     		" oldJobId=" + oldJobId);
     
@@ -584,25 +585,17 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
   public JobReport getReport() {
     readLock.lock();
     try {
-      JobReport report = recordFactory.newRecordInstance(JobReport.class);
-      report.setJobId(jobId);
-      report.setJobState(getState());
-      
-      // TODO - Fix to correctly setup report and to check state
-      if (report.getJobState() == JobState.NEW) {
-        return report;
-      }
-      
-      report.setStartTime(startTime);
-      report.setFinishTime(finishTime);
-      report.setSetupProgress(setupProgress);
-      report.setCleanupProgress(cleanupProgress);
-      report.setMapProgress(computeProgress(mapTasks));
-      report.setReduceProgress(computeProgress(reduceTasks));
-      report.setJobName(jobName);
-      report.setUser(username);
+      JobState state = getState();
 
-      return report;
+      if (getState() == JobState.NEW) {
+        return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
+            startTime, finishTime, setupProgress, 0.0f,
+            0.0f, cleanupProgress);
+      }
+
+      return MRBuilderUtils.newJobReport(jobId, jobName, username, state,
+          startTime, finishTime, setupProgress, computeProgress(mapTasks),
+          computeProgress(reduceTasks), cleanupProgress);
     } finally {
       readLock.unlock();
     }
@@ -1007,7 +1000,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
           FileSystem.get(job.conf).makeQualified(
               new Path(path, oldJobIDString));
       job.remoteJobConfFile =
-          new Path(job.remoteJobSubmitDir, MRConstants.JOB_CONF_FILE);
+          new Path(job.remoteJobSubmitDir, MRJobConfig.JOB_CONF_FILE);
 
       // Prepare the TaskAttemptListener server for authentication of Containers
       // TaskAttemptListener gets the information via jobTokenSecretManager.
@@ -1033,7 +1026,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
 
       Path remoteJobTokenFile =
           new Path(job.remoteJobSubmitDir,
-              MRConstants.APPLICATION_TOKENS_FILE);
+              MRJobConfig.APPLICATION_TOKENS_FILE);
       tokenStorage.writeTokenStorageFile(remoteJobTokenFile, job.conf);
       LOG.info("Writing back the job-token file on the remote file system:"
           + remoteJobTokenFile.toString());
@@ -1078,7 +1071,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
                 job.conf, splits[i], 
                 job.taskAttemptListener, 
                 job.committer, job.jobToken, job.fsTokens.getAllTokens(), 
-                job.clock, job.completedTasksFromPreviousRun, job.startCount,
+                job.clock, job.completedTasksFromPreviousRun, 
+                job.applicationAttemptId.getAttemptId(),
                 job.metrics);
         job.addTask(task);
       }
@@ -1095,7 +1089,9 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
                 job.conf, job.numMapTasks, 
                 job.taskAttemptListener, job.committer, job.jobToken,
                 job.fsTokens.getAllTokens(), job.clock, 
-                job.completedTasksFromPreviousRun, job.startCount, job.metrics);
+                job.completedTasksFromPreviousRun, 
+                job.applicationAttemptId.getAttemptId(),
+                job.metrics);
         job.addTask(task);
       }
       LOG.info("Number of reduces for job " + job.jobId + " = "
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
index cc9f6bddf06..495d00e22c9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@@ -21,7 +21,6 @@ package org.apache.hadoop.mapreduce.v2.app.job.impl;
 import java.io.File;
 import java.io.IOException;
 import java.net.InetSocketAddress;
-import java.net.URI;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -62,7 +61,6 @@ import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptStartedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent;
 import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.Counter;
 import org.apache.hadoop.mapreduce.v2.api.records.Counters;
 import org.apache.hadoop.mapreduce.v2.api.records.Phase;
@@ -103,6 +101,7 @@ import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.ContainerToken;
@@ -117,7 +116,6 @@ import org.apache.hadoop.yarn.state.InvalidStateTransitonException;
 import org.apache.hadoop.yarn.state.SingleArcTransition;
 import org.apache.hadoop.yarn.state.StateMachine;
 import org.apache.hadoop.yarn.state.StateMachineFactory;
-import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.util.RackResolver;
 
@@ -153,7 +151,7 @@ public abstract class TaskAttemptImpl implements
   private Token<JobTokenIdentifier> jobToken;
   private static AtomicBoolean initialClasspathFlag = new AtomicBoolean();
   private static String initialClasspath = null;
-  private final Object classpathLock = new Object();
+  private static final Object classpathLock = new Object();
   private long launchTime;
   private long finishTime;
   private WrappedProgressSplitsBlock progressSplitBlock;
@@ -518,8 +516,8 @@ public abstract class TaskAttemptImpl implements
         return initialClasspath;
       }
       Map<String, String> env = new HashMap<String, String>();
-      MRApps.setInitialClasspath(env);
-      initialClasspath = env.get(MRApps.CLASSPATH);
+      MRApps.setClasspath(env);
+      initialClasspath = env.get(Environment.CLASSPATH.name());
       initialClasspathFlag.set(true);
       return initialClasspath;
     }
@@ -531,16 +529,18 @@ public abstract class TaskAttemptImpl implements
    */
   private ContainerLaunchContext createContainerLaunchContext() {
 
-    ContainerLaunchContext container =
-        recordFactory.newRecordInstance(ContainerLaunchContext.class);
-
     // Application resources
     Map<String, LocalResource> localResources = 
         new HashMap<String, LocalResource>();
     
     // Application environment
     Map<String, String> environment = new HashMap<String, String>();
-    
+
+    // Service data
+    Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>();
+
+    // Tokens
+    ByteBuffer tokens = ByteBuffer.wrap(new byte[]{});
     try {
       FileSystem remoteFS = FileSystem.get(conf);
 
@@ -550,7 +550,7 @@ public abstract class TaskAttemptImpl implements
               MRJobConfig.JAR))).makeQualified(remoteFS.getUri(), 
                                                remoteFS.getWorkingDirectory());
         localResources.put(
-            MRConstants.JOB_JAR,
+            MRJobConfig.JOB_JAR,
             createLocalResource(remoteFS, recordFactory, remoteJobJar,
                 LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
         LOG.info("The job-jar file on the remote FS is "
@@ -570,9 +570,9 @@ public abstract class TaskAttemptImpl implements
       Path remoteJobSubmitDir =
           new Path(path, oldJobId.toString());
       Path remoteJobConfPath = 
-          new Path(remoteJobSubmitDir, MRConstants.JOB_CONF_FILE);
+          new Path(remoteJobSubmitDir, MRJobConfig.JOB_CONF_FILE);
       localResources.put(
-          MRConstants.JOB_CONF_FILE,
+          MRJobConfig.JOB_CONF_FILE,
           createLocalResource(remoteFS, recordFactory, remoteJobConfPath,
               LocalResourceType.FILE, LocalResourceVisibility.APPLICATION));
       LOG.info("The job-conf file on the remote FS is "
@@ -580,12 +580,8 @@ public abstract class TaskAttemptImpl implements
       // //////////// End of JobConf setup
 
       // Setup DistributedCache
-      MRApps.setupDistributedCache(conf, localResources, environment);
+      MRApps.setupDistributedCache(conf, localResources);
 
-      // Set local-resources and environment
-      container.setLocalResources(localResources);
-      container.setEnvironment(environment);
-      
       // Setup up tokens
       Credentials taskCredentials = new Credentials();
 
@@ -606,52 +602,43 @@ public abstract class TaskAttemptImpl implements
       LOG.info("Size of containertokens_dob is "
           + taskCredentials.numberOfTokens());
       taskCredentials.writeTokenStorageToStream(containerTokens_dob);
-      container.setContainerTokens(
+      tokens = 
           ByteBuffer.wrap(containerTokens_dob.getData(), 0,
-              containerTokens_dob.getLength()));
+              containerTokens_dob.getLength());
 
       // Add shuffle token
       LOG.info("Putting shuffle token in serviceData");
-      Map<String, ByteBuffer> serviceData = new HashMap<String, ByteBuffer>();
       serviceData.put(ShuffleHandler.MAPREDUCE_SHUFFLE_SERVICEID,
           ShuffleHandler.serializeServiceData(jobToken));
-      container.setServiceData(serviceData);
 
-      MRApps.addToClassPath(container.getEnvironment(), getInitialClasspath());
+      MRApps.addToEnvironment(
+          environment,  
+          Environment.CLASSPATH.name(), 
+          getInitialClasspath());
     } catch (IOException e) {
       throw new YarnException(e);
     }
+
+    // Setup environment
+    MapReduceChildJVM.setVMEnv(environment, remoteTask);
+
+    // Set up the launch command
+    List<String> commands = MapReduceChildJVM.getVMCommand(
+        taskAttemptListener.getAddress(), remoteTask,
+        jvmID);
     
-    container.setContainerId(containerID);
-    container.setUser(conf.get(MRJobConfig.USER_NAME)); // TODO: Fix
-
-    File workDir = new File("$PWD"); // Will be expanded by the shell.
-    String containerLogDir =
-        new File(ApplicationConstants.LOG_DIR_EXPANSION_VAR).toString();
-    String childTmpDir = new File(workDir, "tmp").toString();
-    String javaHome = "${JAVA_HOME}"; // Will be expanded by the shell.
-    String nmLdLibraryPath = "{LD_LIBRARY_PATH}"; // Expanded by the shell?
-    List<String> classPaths = new ArrayList<String>();
-
-    String localizedApplicationTokensFile =
-        new File(workDir, MRConstants.APPLICATION_TOKENS_FILE).toString();
-    classPaths.add(MRConstants.JOB_JAR);
-    classPaths.add(MRConstants.YARN_MAPREDUCE_APP_JAR_PATH);
-    classPaths.add(workDir.toString()); // TODO
-
-    // Construct the actual Container
-    container.setCommands(MapReduceChildJVM.getVMCommand(
-        taskAttemptListener.getAddress(), remoteTask, javaHome,
-        workDir.toString(), containerLogDir, childTmpDir, jvmID));
-
-    MapReduceChildJVM.setVMEnv(container.getEnvironment(), classPaths,
-        workDir.toString(), containerLogDir, nmLdLibraryPath, remoteTask,
-        localizedApplicationTokensFile);
-
     // Construct the actual Container
+    ContainerLaunchContext container =
+        recordFactory.newRecordInstance(ContainerLaunchContext.class);
     container.setContainerId(containerID);
     container.setUser(conf.get(MRJobConfig.USER_NAME));
     container.setResource(assignedCapability);
+    container.setLocalResources(localResources);
+    container.setEnvironment(environment);
+    container.setCommands(commands);
+    container.setServiceData(serviceData);
+    container.setContainerTokens(tokens);
+    
     return container;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
index 982f7d334ae..95e17d8f4f6 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/launcher/ContainerLauncherImpl.java
@@ -73,6 +73,8 @@ public class ContainerLauncherImpl extends AbstractService implements
 
   private AppContext context;
   private ThreadPoolExecutor launcherPool;
+  private static final int INITIAL_POOL_SIZE = 10;
+  private int limitOnPoolSize;
   private Thread eventHandlingThread;
   private BlockingQueue<ContainerLauncherEvent> eventQueue =
       new LinkedBlockingQueue<ContainerLauncherEvent>();
@@ -96,16 +98,17 @@ public class ContainerLauncherImpl extends AbstractService implements
         YarnConfiguration.YARN_SECURITY_INFO,
         ContainerManagerSecurityInfo.class, SecurityInfo.class);
     this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
+    this.limitOnPoolSize = conf.getInt(
+        MRJobConfig.MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT,
+        MRJobConfig.DEFAULT_MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT);
     super.init(myLocalConfig);
   }
 
   public void start() {
-    launcherPool =
-        new ThreadPoolExecutor(getConfig().getInt(
-            MRJobConfig.MR_AM_CONTAINERLAUNCHER_THREAD_COUNT, 10),
-            Integer.MAX_VALUE, 1, TimeUnit.HOURS,
-            new LinkedBlockingQueue<Runnable>());
-    launcherPool.prestartAllCoreThreads(); // Wait for work.
+    // Start with a default core-pool size of 10 and change it dynamically.
+    launcherPool = new ThreadPoolExecutor(INITIAL_POOL_SIZE,
+        Integer.MAX_VALUE, 1, TimeUnit.HOURS,
+        new LinkedBlockingQueue<Runnable>());
     eventHandlingThread = new Thread(new Runnable() {
       @Override
       public void run() {
@@ -117,6 +120,26 @@ public class ContainerLauncherImpl extends AbstractService implements
             LOG.error("Returning, interrupted : " + e);
             return;
           }
+
+          int poolSize = launcherPool.getCorePoolSize();
+
+          // See if we need up the pool size only if haven't reached the
+          // maximum limit yet.
+          if (poolSize != limitOnPoolSize) {
+
+            // nodes where containers will run at *this* point of time. This is
+            // *not* the cluster size and doesn't need to be.
+            int numNodes = ugiMap.size();
+            int idealPoolSize = Math.min(limitOnPoolSize, numNodes);
+
+            if (poolSize <= idealPoolSize) {
+              // Bump up the pool size to idealPoolSize+INITIAL_POOL_SIZE, the
+              // later is just a buffer so we are not always increasing the
+              // pool-size
+              launcherPool.setCorePoolSize(idealPoolSize + INITIAL_POOL_SIZE);
+            }
+          }
+
           // the events from the queue are handled in parallel
           // using a thread pool
           launcherPool.execute(new EventProcessor(event));
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java
index 18a0f2d5a6a..0261e18b56f 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/local/LocalContainerAllocator.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.mapreduce.v2.app.local;
 
+import java.util.ArrayList;
 import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.commons.logging.Log;
@@ -30,15 +31,19 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent;
-import org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 
 /**
@@ -65,6 +70,20 @@ public class LocalContainerAllocator extends RMCommunicator
     this.appID = context.getApplicationID();
   }
 
+  @Override
+  protected synchronized void heartbeat() throws Exception {
+    AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
+        this.applicationAttemptId, this.lastResponseID, super
+            .getApplicationProgress(), new ArrayList<ResourceRequest>(),
+        new ArrayList<ContainerId>());
+    AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
+    AMResponse response = allocateResponse.getAMResponse();
+    if (response.getReboot()) {
+      // TODO
+      LOG.info("Event from RM: shutting down Application Master");
+    }
+  }
+
   @Override
   public void handle(ContainerAllocatorEvent event) {
     if (event.getType() == ContainerAllocator.EventType.CONTAINER_REQ) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
index 073411c9b47..ca213f17f86 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/recover/RecoveryService.java
@@ -58,7 +58,7 @@ import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleaner;
 import org.apache.hadoop.mapreduce.v2.app.taskclean.TaskCleanupEvent;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.yarn.Clock;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
@@ -92,10 +92,9 @@ public class RecoveryService extends CompositeService implements Recovery {
 
   private static final Log LOG = LogFactory.getLog(RecoveryService.class);
 
-  private final ApplicationId appID;
+  private final ApplicationAttemptId applicationAttemptId;
   private final Dispatcher dispatcher;
   private final ControlledClock clock;
-  private final int startCount;
 
   private JobInfo jobInfo = null;
   private final Map<TaskId, TaskInfo> completedTasks =
@@ -106,10 +105,10 @@ public class RecoveryService extends CompositeService implements Recovery {
 
   private volatile boolean recoveryMode = false;
 
-  public RecoveryService(ApplicationId appID, Clock clock, int startCount) {
+  public RecoveryService(ApplicationAttemptId applicationAttemptId, 
+      Clock clock) {
     super("RecoveringDispatcher");
-    this.appID = appID;
-    this.startCount = startCount;
+    this.applicationAttemptId = applicationAttemptId;
     this.dispatcher = new RecoveryDispatcher();
     this.clock = new ControlledClock(clock);
       addService((Service) dispatcher);
@@ -152,7 +151,8 @@ public class RecoveryService extends CompositeService implements Recovery {
 
   private void parse() throws IOException {
     // TODO: parse history file based on startCount
-    String jobName = TypeConverter.fromYarn(appID).toString();
+    String jobName = 
+        TypeConverter.fromYarn(applicationAttemptId.getApplicationId()).toString();
     String jobhistoryDir = JobHistoryUtils.getConfiguredHistoryStagingDirPrefix(getConfig());
     FSDataInputStream in = null;
     Path historyFile = null;
@@ -160,8 +160,9 @@ public class RecoveryService extends CompositeService implements Recovery {
         new Path(jobhistoryDir));
     FileContext fc = FileContext.getFileContext(histDirPath.toUri(),
         getConfig());
+    //read the previous history file
     historyFile = fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(
-        histDirPath, jobName, startCount - 1));          //read the previous history file
+        histDirPath, jobName, (applicationAttemptId.getAttemptId() - 1)));          
     in = fc.open(historyFile);
     JobHistoryParser parser = new JobHistoryParser(in);
     jobInfo = parser.parse();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
index db4a60b1dcc..15a7e3f6a5a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.mapreduce.v2.app.rm;
 
 import java.io.IOException;
 import java.security.PrivilegedAction;
-import java.util.ArrayList;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -29,6 +28,7 @@ import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
+import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
@@ -42,17 +42,12 @@ import org.apache.hadoop.security.token.TokenIdentifier;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.AMRMProtocol;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.factories.RecordFactory;
@@ -64,7 +59,7 @@ import org.apache.hadoop.yarn.service.AbstractService;
 /**
  * Registers/unregisters to RM and sends heartbeats to RM.
  */
-public class RMCommunicator extends AbstractService  {
+public abstract class RMCommunicator extends AbstractService  {
   private static final Log LOG = LogFactory.getLog(RMContainerAllocator.class);
   private int rmPollInterval;//millis
   protected ApplicationId applicationId;
@@ -74,7 +69,7 @@ public class RMCommunicator extends AbstractService  {
   protected EventHandler eventHandler;
   protected AMRMProtocol scheduler;
   private final ClientService clientService;
-  private int lastResponseID;
+  protected int lastResponseID;
   private Resource minContainerCapability;
   private Resource maxContainerCapability;
 
@@ -121,6 +116,34 @@ public class RMCommunicator extends AbstractService  {
     return job;
   }
 
+  /**
+   * Get the appProgress. Can be used only after this component is started.
+   * @return the appProgress.
+   */
+  protected float getApplicationProgress() {
+    // For now just a single job. In future when we have a DAG, we need an
+    // aggregate progress.
+    JobReport report = this.job.getReport();
+    float setupWeight = 0.05f;
+    float cleanupWeight = 0.05f;
+    float mapWeight = 0.0f;
+    float reduceWeight = 0.0f;
+    int numMaps = this.job.getTotalMaps();
+    int numReduces = this.job.getTotalReduces();
+    if (numMaps == 0 && numReduces == 0) {
+    } else if (numMaps == 0) {
+      reduceWeight = 0.9f;
+    } else if (numReduces == 0) {
+      mapWeight = 0.9f;
+    } else {
+      mapWeight = reduceWeight = 0.45f;
+    }
+    return (report.getSetupProgress() * setupWeight
+        + report.getCleanupProgress() * cleanupWeight
+        + report.getMapProgress() * mapWeight + report.getReduceProgress()
+        * reduceWeight);
+  }
+
   protected void register() {
     //Register
     String host = 
@@ -262,18 +285,5 @@ public class RMCommunicator extends AbstractService  {
     });
   }
 
-  protected synchronized void heartbeat() throws Exception {
-    AllocateRequest allocateRequest =
-        recordFactory.newRecordInstance(AllocateRequest.class);
-    allocateRequest.setApplicationAttemptId(applicationAttemptId);
-    allocateRequest.setResponseId(lastResponseID);
-    allocateRequest.addAllAsks(new ArrayList<ResourceRequest>());
-    allocateRequest.addAllReleases(new ArrayList<ContainerId>());
-    AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
-    AMResponse response = allocateResponse.getAMResponse();
-    if (response.getReboot()) {
-      LOG.info("Event from RM: shutting down Application Master");
-    }
-  }
-
+  protected abstract void heartbeat() throws Exception;
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
index ff232104bd4..7b75cd1fbd7 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
@@ -586,37 +586,21 @@ public class RMContainerAllocator extends RMContainerRequestor
     private ContainerRequest assign(Container allocated) {
       ContainerRequest assigned = null;
       
-      if (mapResourceReqt != reduceResourceReqt) {
-        //assign based on size
-        LOG.info("Assigning based on container size");
-        if (allocated.getResource().getMemory() == mapResourceReqt) {
-          assigned = assignToFailedMap(allocated);
-          if (assigned == null) {
-            assigned = assignToMap(allocated);
-          }
-        } else if (allocated.getResource().getMemory() == reduceResourceReqt) {
-          assigned = assignToReduce(allocated);
-        }
-        
-        return assigned;
-      }
-      
-      //container can be given to either map or reduce
-      //assign based on priority
-      
-      //try to assign to earlierFailedMaps if present
-      assigned = assignToFailedMap(allocated);
-      
-      //Assign to reduces before assigning to maps ?
-      if (assigned == null) {
+      Priority priority = allocated.getPriority();
+      if (PRIORITY_FAST_FAIL_MAP.equals(priority)) {
+        LOG.info("Assigning container " + allocated + " to fast fail map");
+        assigned = assignToFailedMap(allocated);
+      } else if (PRIORITY_REDUCE.equals(priority)) {
+        LOG.info("Assigning container " + allocated + " to reduce");
         assigned = assignToReduce(allocated);
-      }
-      
-      //try to assign to maps if present
-      if (assigned == null) {
+      } else if (PRIORITY_MAP.equals(priority)) {
+        LOG.info("Assigning container " + allocated + " to map");
         assigned = assignToMap(allocated);
+      } else {
+        LOG.warn("Container allocated at unwanted priority: " + priority + 
+            ". Returning to RM...");
       }
-      
+        
       return assigned;
     }
     
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java
index b9f0c6ee45e..cda2ed678af 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerRequestor.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 
 /**
  * Keeps the data structures to send container requests to RM.
@@ -107,15 +108,11 @@ public abstract class RMContainerRequestor extends RMCommunicator {
     LOG.info("maxTaskFailuresPerNode is " + maxTaskFailuresPerNode);
   }
 
-  protected abstract void heartbeat() throws Exception;
-
   protected AMResponse makeRemoteRequest() throws YarnRemoteException {
-    AllocateRequest allocateRequest = recordFactory
-        .newRecordInstance(AllocateRequest.class);
-    allocateRequest.setApplicationAttemptId(applicationAttemptId);
-    allocateRequest.setResponseId(lastResponseID);
-    allocateRequest.addAllAsks(new ArrayList<ResourceRequest>(ask));
-    allocateRequest.addAllReleases(new ArrayList<ContainerId>(release));
+    AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
+        applicationAttemptId, lastResponseID, super.getApplicationProgress(),
+        new ArrayList<ResourceRequest>(ask), new ArrayList<ContainerId>(
+            release));
     AllocateResponse allocateResponse = scheduler.allocate(allocateRequest);
     AMResponse response = allocateResponse.getAMResponse();
     lastResponseID = response.getResponseId();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java
index feb019fe162..ab7d23ef9dc 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/speculate/DefaultSpeculator.java
@@ -35,7 +35,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.MRJobConfig;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
@@ -87,7 +86,7 @@ public class DefaultSpeculator extends AbstractService implements
   private final ConcurrentMap<JobId, AtomicInteger> reduceContainerNeeds
       = new ConcurrentHashMap<JobId, AtomicInteger>();
 
-  private final Set<TaskId> mayHaveSpeculated = new HashSet();
+  private final Set<TaskId> mayHaveSpeculated = new HashSet<TaskId>();
 
   private final Configuration conf;
   private AppContext context;
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobConfPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobConfPage.java
index 8bf2ce1955f..983859e7d67 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobConfPage.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/JobConfPage.java
@@ -44,6 +44,7 @@ public class JobConfPage extends AppView {
     set(TITLE, jobID.isEmpty() ? "Bad request: missing job ID"
         : join("Configuration for MapReduce Job ", $(JOB_ID)));
     commonPreHead(html);
+    set(initID(ACCORDION, "nav"), "{autoHeight:false, active:2}");
     set(DATATABLES_ID, "conf");
     set(initID(DATATABLES, "conf"), confTableInit());
     set(postInitID(DATATABLES, "conf"), confPostTableInit());
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java
index bb4e2390a75..8b4524ad117 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/NavBlock.java
@@ -38,9 +38,9 @@ public class NavBlock extends HtmlBlock {
       div("#nav").
         h3("Cluster").
         ul().
-          li().a(url(rmweb, prefix(), "cluster"), "About")._().
-          li().a(url(rmweb, prefix(), "apps"), "Applications")._().
-          li().a(url(rmweb, prefix(), "scheduler"), "Scheduler")._()._().
+          li().a(url(rmweb, "cluster", "cluster"), "About")._().
+          li().a(url(rmweb, "cluster", "apps"), "Applications")._().
+          li().a(url(rmweb, "cluster", "scheduler"), "Scheduler")._()._().
         h3("Application").
         ul().
           li().a(url("app/info"), "About")._().
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
index d9884d146a6..736bef639e0 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/TaskPage.java
@@ -85,7 +85,7 @@ public class TaskPage extends AppView {
         if (containerId != null) {
           String containerIdStr = ConverterUtils.toString(containerId);
           nodeTd._(" ").
-            a(".logslink", url("http://", nodeHttpAddr, "yarn", "containerlogs",
+            a(".logslink", url("http://", nodeHttpAddr, "node", "containerlogs",
               containerIdStr), "logs");
         }
         nodeTd._().
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
index 548d754a6c6..d6e2d968173 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -91,7 +92,7 @@ public class MRApp extends MRAppMaster {
   private File testWorkDir;
   private Path testAbsPath;
 
-  private final RecordFactory recordFactory =
+  private static final RecordFactory recordFactory =
       RecordFactoryProvider.getRecordFactory(null);
 
   //if true, tasks complete automatically as soon as they are launched
@@ -100,7 +101,7 @@ public class MRApp extends MRAppMaster {
   static ApplicationId applicationId;
 
   static {
-    applicationId = RecordFactoryProvider.getRecordFactory(null).newRecordInstance(ApplicationId.class);
+    applicationId = recordFactory.newRecordInstance(ApplicationId.class);
     applicationId.setClusterTimestamp(0);
     applicationId.setId(0);
   }
@@ -108,9 +109,19 @@ public class MRApp extends MRAppMaster {
   public MRApp(int maps, int reduces, boolean autoComplete, String testName, boolean cleanOnStart) {
     this(maps, reduces, autoComplete, testName, cleanOnStart, 1);
   }
+  
+  private static ApplicationAttemptId getApplicationAttemptId(
+      ApplicationId applicationId, int startCount) {
+    ApplicationAttemptId applicationAttemptId =
+        recordFactory.newRecordInstance(ApplicationAttemptId.class);
+    applicationAttemptId.setApplicationId(applicationId);
+    applicationAttemptId.setAttemptId(startCount);
+    return applicationAttemptId;
+  }
 
-  public MRApp(int maps, int reduces, boolean autoComplete, String testName, boolean cleanOnStart, int startCount) {
-    super(applicationId, startCount);
+  public MRApp(int maps, int reduces, boolean autoComplete, String testName, 
+      boolean cleanOnStart, int startCount) {
+    super(getApplicationAttemptId(applicationId, startCount));
     this.testWorkDir = new File("target", testName);
     testAbsPath = new Path(testWorkDir.getAbsolutePath());
     LOG.info("PathUsed: " + testAbsPath);
@@ -391,11 +402,12 @@ public class MRApp extends MRAppMaster {
       return localStateMachine;
     }
 
-    public TestJob(Configuration conf, ApplicationId appID,
+    public TestJob(Configuration conf, ApplicationId applicationId,
         EventHandler eventHandler, TaskAttemptListener taskAttemptListener,
         Clock clock, String user) {
-      super(appID, conf, eventHandler, taskAttemptListener,
-          new JobTokenSecretManager(), new Credentials(), clock, getStartCount(), 
+      super(getApplicationAttemptId(applicationId, getStartCount()), 
+          conf, eventHandler, taskAttemptListener,
+          new JobTokenSecretManager(), new Credentials(), clock, 
           getCompletedTaskFromPreviousRun(), metrics, user);
 
       // This "this leak" is okay because the retained pointer is in an
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
index cbf3ab0a658..a1eb928919e 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java
@@ -18,12 +18,15 @@
 
 package org.apache.hadoop.mapreduce.v2.app;
 
+import static org.mockito.Matchers.isA;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
-import java.util.Map;
 import java.util.Set;
 
 import junit.framework.Assert;
@@ -32,475 +35,651 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
+import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
+import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
+import org.apache.hadoop.mapreduce.v2.app.client.ClientService;
 import org.apache.hadoop.mapreduce.v2.app.job.Job;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssignedEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl;
 import org.apache.hadoop.mapreduce.v2.app.rm.ContainerRequestEvent;
 import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator;
+import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NetworkTopology;
-import org.apache.hadoop.yarn.Clock;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.AMRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
-import org.apache.hadoop.yarn.api.records.AMResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationMaster;
-import org.apache.hadoop.yarn.api.records.ApplicationStatus;
-import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.DrainDispatcher;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.ipc.RPCUtil;
-import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
-import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
+import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
 import org.apache.hadoop.yarn.server.security.ContainerTokenSecretManager;
-import org.junit.BeforeClass;
+import org.apache.hadoop.yarn.util.BuilderUtils;
+import org.junit.After;
 import org.junit.Test;
 
 public class TestRMContainerAllocator {
-//  private static final Log LOG = LogFactory.getLog(TestRMContainerAllocator.class);
-//  private static final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
-//
-//  @BeforeClass
-//  public static void preTests() {
-//    DefaultMetricsSystem.shutdown();
-//  }
-//
-//  @Test
-//  public void testSimple() throws Exception {
-//    FifoScheduler scheduler = createScheduler();
-//    LocalRMContainerAllocator allocator = new LocalRMContainerAllocator(
-//        scheduler, new Configuration());
-//
-//    //add resources to scheduler
-//    RMNode nodeManager1 = addNode(scheduler, "h1", 10240);
-//    RMNode nodeManager2 = addNode(scheduler, "h2", 10240);
-//    RMNode nodeManager3 = addNode(scheduler, "h3", 10240);
-//
-//    //create the container request
-//    ContainerRequestEvent event1 = 
-//      createReq(1, 1024, new String[]{"h1"});
-//    allocator.sendRequest(event1);
-//
-//    //send 1 more request with different resource req
-//    ContainerRequestEvent event2 = createReq(2, 1024, new String[]{"h2"});
-//    allocator.sendRequest(event2);
-//
-//    //this tells the scheduler about the requests
-//    //as nodes are not added, no allocations
-//    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
-//    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
-//
-//    //send another request with different resource and priority
-//    ContainerRequestEvent event3 = createReq(3, 1024, new String[]{"h3"});
-//    allocator.sendRequest(event3);
-//
-//    //this tells the scheduler about the requests
-//    //as nodes are not added, no allocations
-//    assigned = allocator.schedule();
-//    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
-//
-//    //update resources in scheduler
-//    scheduler.nodeUpdate(nodeManager1); // Node heartbeat
-//    scheduler.nodeUpdate(nodeManager2); // Node heartbeat
-//    scheduler.nodeUpdate(nodeManager3); // Node heartbeat
-//
-//
-//    assigned = allocator.schedule();
-//    checkAssignments(
-//        new ContainerRequestEvent[]{event1, event2, event3}, assigned, false);
-//  }
-//
-//  //TODO: Currently Scheduler seems to have bug where it does not work
-//  //for Application asking for containers with different capabilities.
-//  //@Test
-//  public void testResource() throws Exception {
-//    FifoScheduler scheduler = createScheduler();
-//    LocalRMContainerAllocator allocator = new LocalRMContainerAllocator(
-//        scheduler, new Configuration());
-//
-//    //add resources to scheduler
-//    RMNode nodeManager1 = addNode(scheduler, "h1", 10240);
-//    RMNode nodeManager2 = addNode(scheduler, "h2", 10240);
-//    RMNode nodeManager3 = addNode(scheduler, "h3", 10240);
-//
-//    //create the container request
-//    ContainerRequestEvent event1 = 
-//      createReq(1, 1024, new String[]{"h1"});
-//    allocator.sendRequest(event1);
-//
-//    //send 1 more request with different resource req
-//    ContainerRequestEvent event2 = createReq(2, 2048, new String[]{"h2"});
-//    allocator.sendRequest(event2);
-//
-//    //this tells the scheduler about the requests
-//    //as nodes are not added, no allocations
-//    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
-//    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
-//
-//    //update resources in scheduler
-//    scheduler.nodeUpdate(nodeManager1); // Node heartbeat
-//    scheduler.nodeUpdate(nodeManager2); // Node heartbeat
-//    scheduler.nodeUpdate(nodeManager3); // Node heartbeat
-//
-//    assigned = allocator.schedule();
-//    checkAssignments(
-//        new ContainerRequestEvent[]{event1, event2}, assigned, false);
-//  }
-//
-//  @Test
-//  public void testMapReduceScheduling() throws Exception {
-//    FifoScheduler scheduler = createScheduler();
-//    Configuration conf = new Configuration();
-//    LocalRMContainerAllocator allocator = new LocalRMContainerAllocator(
-//        scheduler, conf);
-//
-//    //add resources to scheduler
-//    RMNode nodeManager1 = addNode(scheduler, "h1", 1024);
-//    RMNode nodeManager2 = addNode(scheduler, "h2", 10240);
-//    RMNode nodeManager3 = addNode(scheduler, "h3", 10240);
-//
-//    //create the container request
-//    //send MAP request
-//    ContainerRequestEvent event1 = 
-//      createReq(1, 2048, new String[]{"h1", "h2"}, true, false);
-//    allocator.sendRequest(event1);
-//
-//    //send REDUCE request
-//    ContainerRequestEvent event2 = createReq(2, 3000, new String[]{"h1"}, false, true);
-//    allocator.sendRequest(event2);
-//
-//    //send MAP request
-//    ContainerRequestEvent event3 = createReq(3, 2048, new String[]{"h3"}, false, false);
-//    allocator.sendRequest(event3);
-//
-//    //this tells the scheduler about the requests
-//    //as nodes are not added, no allocations
-//    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
-//    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
-//
-//    //update resources in scheduler
-//    scheduler.nodeUpdate(nodeManager1); // Node heartbeat
-//    scheduler.nodeUpdate(nodeManager2); // Node heartbeat
-//    scheduler.nodeUpdate(nodeManager3); // Node heartbeat
-//
-//    assigned = allocator.schedule();
-//    checkAssignments(
-//        new ContainerRequestEvent[]{event1, event3}, assigned, false);
-//
-//    //validate that no container is assigned to h1 as it doesn't have 2048
-//    for (TaskAttemptContainerAssignedEvent assig : assigned) {
-//      Assert.assertFalse("Assigned count not correct", 
-//          "h1".equals(assig.getContainer().getNodeId().getHost()));
-//    }
-//  }
-//
-//
-//
-//  private RMNode addNode(FifoScheduler scheduler, 
-//      String nodeName, int memory) {
-//    NodeId nodeId = recordFactory.newRecordInstance(NodeId.class);
-//    nodeId.setHost(nodeName);
-//    nodeId.setPort(1234);
-//    Resource resource = recordFactory.newRecordInstance(Resource.class);
-//    resource.setMemory(memory);
-//    RMNode nodeManager = new RMNodeImpl(nodeId, null, nodeName, 0, 0,
-//        ResourceTrackerService.resolve(nodeName), resource);
-//    scheduler.addNode(nodeManager); // Node registration
-//    return nodeManager;
-//  }
-//
-//  private FifoScheduler createScheduler() throws YarnRemoteException {
-//    FifoScheduler fsc = new FifoScheduler() {
-//      //override this to copy the objects
-//      //otherwise FifoScheduler updates the numContainers in same objects as kept by
-//      //RMContainerAllocator
-//      
-//      @Override
-//      public synchronized void allocate(ApplicationAttemptId applicationId,
-//          List<ResourceRequest> ask) {
-//        List<ResourceRequest> askCopy = new ArrayList<ResourceRequest>();
-//        for (ResourceRequest req : ask) {
-//          ResourceRequest reqCopy = recordFactory.newRecordInstance(ResourceRequest.class);
-//          reqCopy.setPriority(req.getPriority());
-//          reqCopy.setHostName(req.getHostName());
-//          reqCopy.setCapability(req.getCapability());
-//          reqCopy.setNumContainers(req.getNumContainers());
-//          askCopy.add(reqCopy);
-//        }
-//        super.allocate(applicationId, askCopy);
-//      }
-//    };
-//    try {
-//      fsc.reinitialize(new Configuration(), new ContainerTokenSecretManager(), null);
-//      fsc.addApplication(recordFactory.newRecordInstance(ApplicationId.class),
-//          recordFactory.newRecordInstance(ApplicationMaster.class),
-//          "test", null, null, StoreFactory.createVoidAppStore());
-//    } catch(IOException ie) {
-//      LOG.info("add application failed with ", ie);
-//      assert(false);
-//    }
-//    return fsc;
-//  }
-//
-//  private ContainerRequestEvent createReq(
-//      int attemptid, int memory, String[] hosts) {
-//    return createReq(attemptid, memory, hosts, false, false);
-//  }
-//  
-//  private ContainerRequestEvent createReq(
-//      int attemptid, int memory, String[] hosts, boolean earlierFailedAttempt, boolean reduce) {
-//    ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class);
-//    appId.setClusterTimestamp(0);
-//    appId.setId(0);
-//    JobId jobId = recordFactory.newRecordInstance(JobId.class);
-//    jobId.setAppId(appId);
-//    jobId.setId(0);
-//    TaskId taskId = recordFactory.newRecordInstance(TaskId.class);
-//    taskId.setId(0);
-//    taskId.setJobId(jobId);
-//    if (reduce) {
-//      taskId.setTaskType(TaskType.REDUCE);
-//    } else {
-//      taskId.setTaskType(TaskType.MAP);
-//    }
-//    TaskAttemptId attemptId = recordFactory.newRecordInstance(TaskAttemptId.class);
-//    attemptId.setId(attemptid);
-//    attemptId.setTaskId(taskId);
-//    Resource containerNeed = recordFactory.newRecordInstance(Resource.class);
-//    containerNeed.setMemory(memory);
-//    if (earlierFailedAttempt) {
-//      return ContainerRequestEvent.
-//           createContainerRequestEventForFailedContainer(attemptId, containerNeed);
-//    }
-//    return new ContainerRequestEvent(attemptId, 
-//        containerNeed, 
-//        hosts, new String[] {NetworkTopology.DEFAULT_RACK});
-//  }
-//
-//  private void checkAssignments(ContainerRequestEvent[] requests, 
-//      List<TaskAttemptContainerAssignedEvent> assignments, 
-//      boolean checkHostMatch) {
-//    Assert.assertNotNull("Container not assigned", assignments);
-//    Assert.assertEquals("Assigned count not correct", 
-//        requests.length, assignments.size());
-//
-//    //check for uniqueness of containerIDs
-//    Set<ContainerId> containerIds = new HashSet<ContainerId>();
-//    for (TaskAttemptContainerAssignedEvent assigned : assignments) {
-//      containerIds.add(assigned.getContainer().getId());
-//    }
-//    Assert.assertEquals("Assigned containers must be different", 
-//        assignments.size(), containerIds.size());
-//
-//    //check for all assignment
-//    for (ContainerRequestEvent req : requests) {
-//      TaskAttemptContainerAssignedEvent assigned = null;
-//      for (TaskAttemptContainerAssignedEvent ass : assignments) {
-//        if (ass.getTaskAttemptID().equals(req.getAttemptID())){
-//          assigned = ass;
-//          break;
-//        }
-//      }
-//      checkAssignment(req, assigned, checkHostMatch);
-//    }
-//  }
-//
-//  private void checkAssignment(ContainerRequestEvent request, 
-//      TaskAttemptContainerAssignedEvent assigned, boolean checkHostMatch) {
-//    Assert.assertNotNull("Nothing assigned to attempt " + request.getAttemptID(),
-//        assigned);
-//    Assert.assertEquals("assigned to wrong attempt", request.getAttemptID(),
-//        assigned.getTaskAttemptID());
-//    if (checkHostMatch) {
-//      Assert.assertTrue("Not assigned to requested host", Arrays.asList(
-//          request.getHosts()).contains(
-//          assigned.getContainer().getNodeId().toString()));
-//    }
-//
-//  }
-//
-//  //Mock RMContainerAllocator
-//  //Instead of talking to remote Scheduler,uses the local Scheduler
-//  public static class LocalRMContainerAllocator extends RMContainerAllocator {
-//    private static final List<TaskAttemptContainerAssignedEvent> events = 
-//      new ArrayList<TaskAttemptContainerAssignedEvent>();
-//
-//    public static class AMRMProtocolImpl implements AMRMProtocol {
-//
-//      private ResourceScheduler resourceScheduler;
-//
-//      public AMRMProtocolImpl(ResourceScheduler resourceScheduler) {
-//        this.resourceScheduler = resourceScheduler;
-//      }
-//
-//      @Override
-//      public RegisterApplicationMasterResponse registerApplicationMaster(RegisterApplicationMasterRequest request) throws YarnRemoteException {
-//        RegisterApplicationMasterResponse response = recordFactory.newRecordInstance(RegisterApplicationMasterResponse.class);
-//        return response;
-//      }
-//
-//      public AllocateResponse allocate(AllocateRequest request) throws YarnRemoteException {
-//        List<ResourceRequest> ask = request.getAskList();
-//        List<Container> release = request.getReleaseList();
-//        try {
-//          AMResponse response = recordFactory.newRecordInstance(AMResponse.class);
-//          Allocation allocation = resourceScheduler.allocate(request.getApplicationAttemptId(), ask);
-//          response.addAllNewContainers(allocation.getContainers());
-//          response.setAvailableResources(allocation.getResourceLimit());
-//          AllocateResponse allocateResponse = recordFactory.newRecordInstance(AllocateResponse.class);
-//          allocateResponse.setAMResponse(response);
-//          return allocateResponse;
-//        } catch(IOException ie) {
-//          throw RPCUtil.getRemoteException(ie);
-//        }
-//      }
-//
-//      @Override
-//      public FinishApplicationMasterResponse finishApplicationMaster(FinishApplicationMasterRequest request) throws YarnRemoteException {
-//        FinishApplicationMasterResponse response = recordFactory.newRecordInstance(FinishApplicationMasterResponse.class);
-//        return response;
-//      }
-//
-//    }
-//
-//    private ResourceScheduler scheduler;
-//    LocalRMContainerAllocator(ResourceScheduler scheduler, Configuration conf) {
-//      super(null, new TestContext(events));
-//      this.scheduler = scheduler;
-//      super.init(conf);
-//      super.start();
-//    }
-//
-//    protected AMRMProtocol createSchedulerProxy() {
-//      return new AMRMProtocolImpl(scheduler);
-//    }
-//
-//    @Override
-//    protected void register() {}
-//    @Override
-//    protected void unregister() {}
-//
-//    @Override
-//    protected Resource getMinContainerCapability() {
-//      Resource res = recordFactory.newRecordInstance(Resource.class);
-//      res.setMemory(1024);
-//      return res;
-//    }
-//    
-//    @Override
-//    protected Resource getMaxContainerCapability() {
-//      Resource res = recordFactory.newRecordInstance(Resource.class);
-//      res.setMemory(10240);
-//      return res;
-//    }
-//    
-//    public void sendRequest(ContainerRequestEvent req) {
-//      sendRequests(Arrays.asList(new ContainerRequestEvent[]{req}));
-//    }
-//
-//    public void sendRequests(List<ContainerRequestEvent> reqs) {
-//      for (ContainerRequestEvent req : reqs) {
-//        handle(req);
-//      }
-//    }
-//
-//    //API to be used by tests
-//    public List<TaskAttemptContainerAssignedEvent> schedule() {
-//      //run the scheduler
-//      try {
-//        heartbeat();
-//      } catch (Exception e) {
-//        LOG.error("error in heartbeat ", e);
-//        throw new YarnException(e);
-//      }
-//
-//      List<TaskAttemptContainerAssignedEvent> result = new ArrayList(events);
-//      events.clear();
-//      return result;
-//    }
-//
-//    protected void startAllocatorThread() {
-//      //override to NOT start thread
-//    }
-//
-//    static class TestContext implements AppContext {
-//      private List<TaskAttemptContainerAssignedEvent> events;
-//      TestContext(List<TaskAttemptContainerAssignedEvent> events) {
-//        this.events = events;
-//      }
-//      @Override
-//      public Map<JobId, Job> getAllJobs() {
-//        return null;
-//      }
-//      @Override
-//      public ApplicationAttemptId getApplicationAttemptId() {
-//        return recordFactory.newRecordInstance(ApplicationAttemptId.class);
-//      }
-//      @Override
-//      public ApplicationId getApplicationID() {
-//        return recordFactory.newRecordInstance(ApplicationId.class);
-//      }
-//      @Override
-//      public EventHandler getEventHandler() {
-//        return new EventHandler() {
-//          @Override
-//          public void handle(Event event) {
-//            events.add((TaskAttemptContainerAssignedEvent) event);
-//          }
-//        };
-//      }
-//      @Override
-//      public Job getJob(JobId jobID) {
-//        return null;
-//      }
-//
-//      @Override
-//      public String getUser() {
-//        return null;
-//      }
-//
-//      @Override
-//      public Clock getClock() {
-//        return null;
-//      }
-//
-//      @Override
-//      public String getApplicationName() {
-//        return null;
-//      }
-//
-//      @Override
-//      public long getStartTime() {
-//        return 0;
-//      }
-//    }
-//  }
-//
-//  public static void main(String[] args) throws Exception {
-//    TestRMContainerAllocator t = new TestRMContainerAllocator();
-//    t.testSimple();
-//    //t.testResource();
-//    t.testMapReduceScheduling();
-//  }
+
+  static final Log LOG = LogFactory
+      .getLog(TestRMContainerAllocator.class);
+  static final RecordFactory recordFactory = RecordFactoryProvider
+      .getRecordFactory(null);
+
+  @After
+  public void tearDown() {
+    DefaultMetricsSystem.shutdown();
+  }
+
+  @Test
+  public void testSimple() throws Exception {
+
+    LOG.info("Running testSimple");
+
+    Configuration conf = new Configuration();
+    MyResourceManager rm = new MyResourceManager(conf);
+    rm.start();
+    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
+        .getDispatcher();
+
+    // Submit the application
+    RMApp app = rm.submitApp(1024);
+    dispatcher.await();
+
+    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
+    amNodeManager.nodeHeartbeat(true);
+    dispatcher.await();
+
+    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
+        .getAppAttemptId();
+    rm.sendAMLaunched(appAttemptId);
+    dispatcher.await();
+
+    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
+    Job mockJob = mock(Job.class);
+    when(mockJob.getReport()).thenReturn(
+        MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING,
+            0, 0, 0, 0, 0, 0));
+    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
+        appAttemptId, mockJob);
+
+    // add resources to scheduler
+    MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
+    MockNM nodeManager2 = rm.registerNode("h2:1234", 10240);
+    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
+    dispatcher.await();
+
+    // create the container request
+    ContainerRequestEvent event1 = createReq(jobId, 1, 1024,
+        new String[] { "h1" });
+    allocator.sendRequest(event1);
+
+    // send 1 more request with different resource req
+    ContainerRequestEvent event2 = createReq(jobId, 2, 1024,
+        new String[] { "h2" });
+    allocator.sendRequest(event2);
+
+    // this tells the scheduler about the requests
+    // as nodes are not added, no allocations
+    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
+
+    // send another request with different resource and priority
+    ContainerRequestEvent event3 = createReq(jobId, 3, 1024,
+        new String[] { "h3" });
+    allocator.sendRequest(event3);
+
+    // this tells the scheduler about the requests
+    // as nodes are not added, no allocations
+    assigned = allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
+
+    // update resources in scheduler
+    nodeManager1.nodeHeartbeat(true); // Node heartbeat
+    nodeManager2.nodeHeartbeat(true); // Node heartbeat
+    nodeManager3.nodeHeartbeat(true); // Node heartbeat
+    dispatcher.await();
+
+    assigned = allocator.schedule();
+    dispatcher.await();
+    checkAssignments(new ContainerRequestEvent[] { event1, event2, event3 },
+        assigned, false);
+  }
+
+  @Test
+  public void testResource() throws Exception {
+
+    LOG.info("Running testResource");
+
+    Configuration conf = new Configuration();
+    MyResourceManager rm = new MyResourceManager(conf);
+    rm.start();
+    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
+        .getDispatcher();
+
+    // Submit the application
+    RMApp app = rm.submitApp(1024);
+    dispatcher.await();
+
+    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
+    amNodeManager.nodeHeartbeat(true);
+    dispatcher.await();
+
+    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
+        .getAppAttemptId();
+    rm.sendAMLaunched(appAttemptId);
+    dispatcher.await();
+
+    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
+    Job mockJob = mock(Job.class);
+    when(mockJob.getReport()).thenReturn(
+        MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING,
+            0, 0, 0, 0, 0, 0));
+    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
+        appAttemptId, mockJob);
+
+    // add resources to scheduler
+    MockNM nodeManager1 = rm.registerNode("h1:1234", 10240);
+    MockNM nodeManager2 = rm.registerNode("h2:1234", 10240);
+    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
+    dispatcher.await();
+
+    // create the container request
+    ContainerRequestEvent event1 = createReq(jobId, 1, 1024,
+        new String[] { "h1" });
+    allocator.sendRequest(event1);
+
+    // send 1 more request with different resource req
+    ContainerRequestEvent event2 = createReq(jobId, 2, 2048,
+        new String[] { "h2" });
+    allocator.sendRequest(event2);
+
+    // this tells the scheduler about the requests
+    // as nodes are not added, no allocations
+    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
+
+    // update resources in scheduler
+    nodeManager1.nodeHeartbeat(true); // Node heartbeat
+    nodeManager2.nodeHeartbeat(true); // Node heartbeat
+    nodeManager3.nodeHeartbeat(true); // Node heartbeat
+    dispatcher.await();
+
+    assigned = allocator.schedule();
+    dispatcher.await();
+    checkAssignments(new ContainerRequestEvent[] { event1, event2 },
+        assigned, false);
+  }
+
+  @Test
+  public void testMapReduceScheduling() throws Exception {
+
+    LOG.info("Running testMapReduceScheduling");
+
+    Configuration conf = new Configuration();
+    MyResourceManager rm = new MyResourceManager(conf);
+    rm.start();
+    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
+        .getDispatcher();
+
+    // Submit the application
+    RMApp app = rm.submitApp(1024);
+    dispatcher.await();
+
+    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
+    amNodeManager.nodeHeartbeat(true);
+    dispatcher.await();
+
+    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
+        .getAppAttemptId();
+    rm.sendAMLaunched(appAttemptId);
+    dispatcher.await();
+
+    JobId jobId = MRBuilderUtils.newJobId(appAttemptId.getApplicationId(), 0);
+    Job mockJob = mock(Job.class);
+    when(mockJob.getReport()).thenReturn(
+        MRBuilderUtils.newJobReport(jobId, "job", "user", JobState.RUNNING,
+            0, 0, 0, 0, 0, 0));
+    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
+        appAttemptId, mockJob);
+
+    // add resources to scheduler
+    MockNM nodeManager1 = rm.registerNode("h1:1234", 1024);
+    MockNM nodeManager2 = rm.registerNode("h2:1234", 10240);
+    MockNM nodeManager3 = rm.registerNode("h3:1234", 10240);
+    dispatcher.await();
+
+    // create the container request
+    // send MAP request
+    ContainerRequestEvent event1 = createReq(jobId, 1, 2048, new String[] {
+        "h1", "h2" }, true, false);
+    allocator.sendRequest(event1);
+
+    // send REDUCE request
+    ContainerRequestEvent event2 = createReq(jobId, 2, 3000,
+        new String[] { "h1" }, false, true);
+    allocator.sendRequest(event2);
+
+    // send MAP request
+    ContainerRequestEvent event3 = createReq(jobId, 3, 2048,
+        new String[] { "h3" }, false, false);
+    allocator.sendRequest(event3);
+
+    // this tells the scheduler about the requests
+    // as nodes are not added, no allocations
+    List<TaskAttemptContainerAssignedEvent> assigned = allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals("No of assignments must be 0", 0, assigned.size());
+
+    // update resources in scheduler
+    nodeManager1.nodeHeartbeat(true); // Node heartbeat
+    nodeManager2.nodeHeartbeat(true); // Node heartbeat
+    nodeManager3.nodeHeartbeat(true); // Node heartbeat
+    dispatcher.await();
+
+    assigned = allocator.schedule();
+    dispatcher.await();
+    checkAssignments(new ContainerRequestEvent[] { event1, event3 },
+        assigned, false);
+
+    // validate that no container is assigned to h1 as it doesn't have 2048
+    for (TaskAttemptContainerAssignedEvent assig : assigned) {
+      Assert.assertFalse("Assigned count not correct", "h1".equals(assig
+          .getContainer().getNodeId().getHost()));
+    }
+  }
+
+  private static class MyResourceManager extends MockRM {
+
+    public MyResourceManager(Configuration conf) {
+      super(conf);
+    }
+
+    @Override
+    protected Dispatcher createDispatcher() {
+      return new DrainDispatcher();
+    }
+
+    @Override
+    protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
+      // Dispatch inline for test sanity
+      return new EventHandler<SchedulerEvent>() {
+        @Override
+        public void handle(SchedulerEvent event) {
+          scheduler.handle(event);
+        }
+      };
+    }
+    @Override
+    protected ResourceScheduler createScheduler() {
+      return new MyFifoScheduler(getRMContext());
+    }
+  }
+
+  private static class FakeJob extends JobImpl {
+
+    public FakeJob(ApplicationAttemptId appAttemptID, Configuration conf,
+        int numMaps, int numReduces) {
+      super(appAttemptID, conf, null, null, null, null, null, null, null,
+          null);
+      this.jobId = MRBuilderUtils
+          .newJobId(appAttemptID.getApplicationId(), 0);
+      this.numMaps = numMaps;
+      this.numReduces = numReduces;
+    }
+
+    private float setupProgress;
+    private float mapProgress;
+    private float reduceProgress;
+    private float cleanupProgress;
+    private final int numMaps;
+    private final int numReduces;
+    private JobId jobId;
+
+    void setProgress(float setupProgress, float mapProgress,
+        float reduceProgress, float cleanupProgress) {
+      this.setupProgress = setupProgress;
+      this.mapProgress = mapProgress;
+      this.reduceProgress = reduceProgress;
+      this.cleanupProgress = cleanupProgress;
+    }
+
+    @Override
+    public int getTotalMaps() { return this.numMaps; }
+    @Override
+    public int getTotalReduces() { return this.numReduces;}
+
+    @Override
+    public JobReport getReport() {
+      return MRBuilderUtils.newJobReport(this.jobId, "job", "user",
+          JobState.RUNNING, 0, 0, this.setupProgress, this.mapProgress,
+          this.reduceProgress, this.cleanupProgress);
+    }
+  }
+
+  @Test
+  public void testReportedAppProgress() throws Exception {
+
+    LOG.info("Running testReportedAppProgress");
+
+    Configuration conf = new Configuration();
+    MyResourceManager rm = new MyResourceManager(conf);
+    rm.start();
+    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
+        .getDispatcher();
+
+    // Submit the application
+    RMApp app = rm.submitApp(1024);
+    dispatcher.await();
+
+    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
+    amNodeManager.nodeHeartbeat(true);
+    dispatcher.await();
+
+    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
+        .getAppAttemptId();
+    rm.sendAMLaunched(appAttemptId);
+    dispatcher.await();
+
+    FakeJob job = new FakeJob(appAttemptId, conf, 2, 2);
+    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
+        appAttemptId, job);
+
+    allocator.schedule(); // Send heartbeat
+    dispatcher.await();
+    Assert.assertEquals(0.0, app.getProgress(), 0.0);
+
+    job.setProgress(100, 10, 0, 0);
+    allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals(9.5f, app.getProgress(), 0.0);
+
+    job.setProgress(100, 80, 0, 0);
+    allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals(41.0f, app.getProgress(), 0.0);
+
+    job.setProgress(100, 100, 20, 0);
+    allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals(59.0f, app.getProgress(), 0.0);
+
+    job.setProgress(100, 100, 100, 100);
+    allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals(100.0f, app.getProgress(), 0.0);
+  }
+
+  @Test
+  public void testReportedAppProgressWithOnlyMaps() throws Exception {
+
+    LOG.info("Running testReportedAppProgressWithOnlyMaps");
+
+    Configuration conf = new Configuration();
+    MyResourceManager rm = new MyResourceManager(conf);
+    rm.start();
+    DrainDispatcher dispatcher = (DrainDispatcher) rm.getRMContext()
+        .getDispatcher();
+
+    // Submit the application
+    RMApp app = rm.submitApp(1024);
+    dispatcher.await();
+
+    MockNM amNodeManager = rm.registerNode("amNM:1234", 2048);
+    amNodeManager.nodeHeartbeat(true);
+    dispatcher.await();
+
+    ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt()
+        .getAppAttemptId();
+    rm.sendAMLaunched(appAttemptId);
+    dispatcher.await();
+
+    FakeJob job = new FakeJob(appAttemptId, conf, 2, 0);
+    MyContainerAllocator allocator = new MyContainerAllocator(rm, conf,
+        appAttemptId, job);
+
+    allocator.schedule(); // Send heartbeat
+    dispatcher.await();
+    Assert.assertEquals(0.0, app.getProgress(), 0.0);
+
+    job.setProgress(100, 10, 0, 0);
+    allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals(14f, app.getProgress(), 0.0);
+
+    job.setProgress(100, 60, 0, 0);
+    allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals(59.0f, app.getProgress(), 0.0);
+
+    job.setProgress(100, 100, 0, 100);
+    allocator.schedule();
+    dispatcher.await();
+    Assert.assertEquals(100.0f, app.getProgress(), 0.0);
+  }
+
+  private static class MyFifoScheduler extends FifoScheduler {
+
+    public MyFifoScheduler(RMContext rmContext) {
+      super();
+      try {
+        reinitialize(new Configuration(), new ContainerTokenSecretManager(),
+            rmContext);
+      } catch (IOException ie) {
+        LOG.info("add application failed with ", ie);
+        assert (false);
+      }
+    }
+
+    // override this to copy the objects otherwise FifoScheduler updates the
+    // numContainers in same objects as kept by RMContainerAllocator
+    @Override
+    public synchronized Allocation allocate(
+        ApplicationAttemptId applicationAttemptId, List<ResourceRequest> ask,
+        List<ContainerId> release) {
+      List<ResourceRequest> askCopy = new ArrayList<ResourceRequest>();
+      for (ResourceRequest req : ask) {
+        ResourceRequest reqCopy = BuilderUtils.newResourceRequest(req
+            .getPriority(), req.getHostName(), req.getCapability(), req
+            .getNumContainers());
+        askCopy.add(reqCopy);
+      }
+      return super.allocate(applicationAttemptId, askCopy, release);
+    }
+  }
+
+  private ContainerRequestEvent createReq(JobId jobId, int taskAttemptId,
+      int memory, String[] hosts) {
+    return createReq(jobId, taskAttemptId, memory, hosts, false, false);
+  }
+
+  private ContainerRequestEvent
+      createReq(JobId jobId, int taskAttemptId, int memory, String[] hosts,
+          boolean earlierFailedAttempt, boolean reduce) {
+    TaskId taskId;
+    if (reduce) {
+      taskId = MRBuilderUtils.newTaskId(jobId, 0, TaskType.REDUCE);
+    } else {
+      taskId = MRBuilderUtils.newTaskId(jobId, 0, TaskType.MAP);
+    }
+    TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId,
+        taskAttemptId);
+    Resource containerNeed = BuilderUtils.newResource(memory);
+    if (earlierFailedAttempt) {
+      return ContainerRequestEvent
+          .createContainerRequestEventForFailedContainer(attemptId,
+              containerNeed);
+    }
+    return new ContainerRequestEvent(attemptId, containerNeed, hosts,
+        new String[] { NetworkTopology.DEFAULT_RACK });
+  }
+
+  private void checkAssignments(ContainerRequestEvent[] requests,
+      List<TaskAttemptContainerAssignedEvent> assignments,
+      boolean checkHostMatch) {
+    Assert.assertNotNull("Container not assigned", assignments);
+    Assert.assertEquals("Assigned count not correct", requests.length,
+        assignments.size());
+
+    // check for uniqueness of containerIDs
+    Set<ContainerId> containerIds = new HashSet<ContainerId>();
+    for (TaskAttemptContainerAssignedEvent assigned : assignments) {
+      containerIds.add(assigned.getContainer().getId());
+    }
+    Assert.assertEquals("Assigned containers must be different", assignments
+        .size(), containerIds.size());
+
+    // check for all assignment
+    for (ContainerRequestEvent req : requests) {
+      TaskAttemptContainerAssignedEvent assigned = null;
+      for (TaskAttemptContainerAssignedEvent ass : assignments) {
+        if (ass.getTaskAttemptID().equals(req.getAttemptID())) {
+          assigned = ass;
+          break;
+        }
+      }
+      checkAssignment(req, assigned, checkHostMatch);
+    }
+  }
+
+  private void checkAssignment(ContainerRequestEvent request,
+      TaskAttemptContainerAssignedEvent assigned, boolean checkHostMatch) {
+    Assert.assertNotNull("Nothing assigned to attempt "
+        + request.getAttemptID(), assigned);
+    Assert.assertEquals("assigned to wrong attempt", request.getAttemptID(),
+        assigned.getTaskAttemptID());
+    if (checkHostMatch) {
+      Assert.assertTrue("Not assigned to requested host", Arrays.asList(
+          request.getHosts()).contains(
+          assigned.getContainer().getNodeId().toString()));
+    }
+  }
+
+  // Mock RMContainerAllocator
+  // Instead of talking to remote Scheduler,uses the local Scheduler
+  private static class MyContainerAllocator extends RMContainerAllocator {
+    static final List<TaskAttemptContainerAssignedEvent> events
+      = new ArrayList<TaskAttemptContainerAssignedEvent>();
+
+    private MyResourceManager rm;
+
+    @SuppressWarnings("rawtypes")
+    private static AppContext createAppContext(
+        ApplicationAttemptId appAttemptId, Job job) {
+      AppContext context = mock(AppContext.class);
+      ApplicationId appId = appAttemptId.getApplicationId();
+      when(context.getApplicationID()).thenReturn(appId);
+      when(context.getApplicationAttemptId()).thenReturn(appAttemptId);
+      when(context.getJob(isA(JobId.class))).thenReturn(job);
+      when(context.getEventHandler()).thenReturn(new EventHandler() {
+        @Override
+        public void handle(Event event) {
+          // Only capture interesting events.
+          if (event instanceof TaskAttemptContainerAssignedEvent) {
+            events.add((TaskAttemptContainerAssignedEvent) event);
+          }
+        }
+      });
+      return context;
+    }
+
+    private static ClientService createMockClientService() {
+      ClientService service = mock(ClientService.class);
+      when(service.getBindAddress()).thenReturn(
+          NetUtils.createSocketAddr("localhost:4567"));
+      when(service.getHttpPort()).thenReturn(890);
+      return service;
+    }
+
+    MyContainerAllocator(MyResourceManager rm, Configuration conf,
+        ApplicationAttemptId appAttemptId, Job job) {
+      super(createMockClientService(), createAppContext(appAttemptId, job));
+      this.rm = rm;
+      super.init(conf);
+      super.start();
+    }
+
+    @Override
+    protected AMRMProtocol createSchedulerProxy() {
+      return this.rm.getApplicationMasterService();
+    }
+
+    @Override
+    protected void register() {
+      super.register();
+    }
+
+    @Override
+    protected void unregister() {
+    }
+
+    @Override
+    protected Resource getMinContainerCapability() {
+      return BuilderUtils.newResource(1024);
+    }
+
+    @Override
+    protected Resource getMaxContainerCapability() {
+      return BuilderUtils.newResource(10240);
+    }
+
+    public void sendRequest(ContainerRequestEvent req) {
+      sendRequests(Arrays.asList(new ContainerRequestEvent[] { req }));
+    }
+
+    public void sendRequests(List<ContainerRequestEvent> reqs) {
+      for (ContainerRequestEvent req : reqs) {
+        super.handle(req);
+      }
+    }
+
+    // API to be used by tests
+    public List<TaskAttemptContainerAssignedEvent> schedule() {
+      // run the scheduler
+      try {
+        super.heartbeat();
+      } catch (Exception e) {
+        LOG.error("error in heartbeat ", e);
+        throw new YarnException(e);
+      }
+
+      List<TaskAttemptContainerAssignedEvent> result
+        = new ArrayList<TaskAttemptContainerAssignedEvent>(events);
+      events.clear();
+      return result;
+    }
+
+    protected void startAllocatorThread() {
+      // override to NOT start thread
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    TestRMContainerAllocator t = new TestRMContainerAllocator();
+    t.testSimple();
+    t.testResource();
+    t.testMapReduceScheduling();
+    t.testReportedAppProgress();
+    t.testReportedAppProgressWithOnlyMaps();
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java
index a678e4660e7..9f221e6354a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/TypeConverter.java
@@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationState;
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.QueueACL;
+import org.apache.hadoop.yarn.api.records.QueueState;
 import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -280,16 +281,28 @@ public class TypeConverter {
   }
   
   public static org.apache.hadoop.mapred.JobStatus fromYarn(
-      JobReport jobreport, String jobFile, String trackingUrl) {
+      JobReport jobreport, String jobFile) {
     JobPriority jobPriority = JobPriority.NORMAL;
-    return new org.apache.hadoop.mapred.JobStatus(fromYarn(jobreport.getJobId()),
-        jobreport.getSetupProgress(), jobreport.getMapProgress(),
-        jobreport.getReduceProgress(), jobreport.getCleanupProgress(),
-        fromYarn(jobreport.getJobState()),
-        jobPriority, jobreport.getUser(), jobreport.getJobName(),
-        jobFile, trackingUrl);
+    org.apache.hadoop.mapred.JobStatus jobStatus =
+        new org.apache.hadoop.mapred.JobStatus(fromYarn(jobreport.getJobId()),
+            jobreport.getSetupProgress(), jobreport.getMapProgress(),
+            jobreport.getReduceProgress(), jobreport.getCleanupProgress(),
+            fromYarn(jobreport.getJobState()),
+            jobPriority, jobreport.getUser(), jobreport.getJobName(),
+            jobFile, jobreport.getTrackingUrl());
+    jobStatus.setFailureInfo(jobreport.getDiagnostics());
+    return jobStatus;
   }
   
+  public static org.apache.hadoop.mapreduce.QueueState fromYarn(
+      QueueState state) {
+    org.apache.hadoop.mapreduce.QueueState qState = 
+      org.apache.hadoop.mapreduce.QueueState.getState(
+        state.toString().toLowerCase());
+    return qState;
+  }
+
+  
   public static int fromYarn(JobState state) {
     switch (state) {
     case NEW:
@@ -412,6 +425,7 @@ public class TypeConverter {
       );
     jobStatus.setSchedulingInfo(trackingUrl); // Set AM tracking url
     jobStatus.setStartTime(application.getStartTime());
+    jobStatus.setFailureInfo(application.getDiagnostics());
     return jobStatus;
   }
 
@@ -431,9 +445,9 @@ public class TypeConverter {
   
   public static QueueInfo fromYarn(org.apache.hadoop.yarn.api.records.QueueInfo 
       queueInfo, Configuration conf) {
-    return new QueueInfo(queueInfo.getQueueName(), 
-        queueInfo.toString(), QueueState.RUNNING, 
-        TypeConverter.fromYarnApps(queueInfo.getApplications(), conf));
+    return new QueueInfo(queueInfo.getQueueName(),queueInfo.toString(),
+        fromYarn(queueInfo.getQueueState()), TypeConverter.fromYarnApps(
+        queueInfo.getApplications(), conf));
   }
   
   public static QueueInfo[] fromYarnQueueInfo(
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/MRConstants.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/MRConstants.java
deleted file mode 100644
index 6ac05361dce..00000000000
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/MRConstants.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.mapreduce.v2;
-
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.classification.InterfaceStability;
-
-@InterfaceAudience.Private
-@InterfaceStability.Evolving
-public interface MRConstants {
-  // This should be the directory where splits file gets localized on the node
-  // running ApplicationMaster.
-  public static final String JOB_SUBMIT_DIR = "jobSubmitDir";
-  
-  // This should be the name of the localized job-configuration file on the node
-  // running ApplicationMaster and Task
-  public static final String JOB_CONF_FILE = "job.xml";
-  // This should be the name of the localized job-jar file on the node running
-  // individual containers/tasks.
-  public static final String JOB_JAR = "job.jar";
-
-  public static final String HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME =
-      "hadoop-mapreduce-client-app-0.24.0-SNAPSHOT.jar";
-
-  public static final String YARN_MAPREDUCE_APP_JAR_PATH =
-    "$YARN_HOME/modules/" + HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME;
-
-  // The token file for the application. Should contain tokens for access to
-  // remote file system and may optionally contain application specific tokens.
-  // For now, generated by the AppManagers and used by NodeManagers and the
-  // Containers.
-  public static final String APPLICATION_TOKENS_FILE = "appTokens";
-}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java
index fb585e8dd27..0bfc9db3ed4 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/JobReport.java
@@ -29,6 +29,8 @@ public interface JobReport {
   public abstract long getFinishTime();
   public abstract String getUser();
   public abstract String getJobName();
+  public abstract String getTrackingUrl();
+  public abstract String getDiagnostics();
 
   public abstract void setJobId(JobId jobId);
   public abstract void setJobState(JobState jobState);
@@ -40,4 +42,6 @@ public interface JobReport {
   public abstract void setFinishTime(long finishTime);
   public abstract void setUser(String user);
   public abstract void setJobName(String jobName);
+  public abstract void setTrackingUrl(String trackingUrl);
+  public abstract void setDiagnostics(String diagnostics);
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java
index a4033e695f2..c5d2527a9da 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/impl/pb/JobReportPBImpl.java
@@ -206,6 +206,30 @@ public class JobReportPBImpl extends ProtoBase<JobReportProto> implements JobRep
     builder.setJobName((jobName));
   }
 
+  @Override
+  public String getTrackingUrl() {
+    JobReportProtoOrBuilder p = viaProto ? proto : builder;
+    return (p.getTrackingUrl());
+  }
+
+  @Override
+  public void setTrackingUrl(String trackingUrl) {
+    maybeInitBuilder();
+    builder.setTrackingUrl(trackingUrl);
+  }
+
+  @Override
+  public String getDiagnostics() {
+    JobReportProtoOrBuilder p = viaProto ? proto : builder;
+    return p.getDiagnostics();
+  }
+
+  @Override
+  public void setDiagnostics(String diagnostics) {
+    maybeInitBuilder();
+    builder.setDiagnostics(diagnostics);
+  }
+
   private JobIdPBImpl convertFromProtoFormat(JobIdProto p) {
     return new JobIdPBImpl(p);
   }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
index dcddd126cc2..e57cf8d3c63 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
@@ -489,7 +489,7 @@ public class JobHistoryUtils {
       sb.append(address.getHostName());
     }
     sb.append(":").append(address.getPort());
-    sb.append("/yarn/job/"); // TODO This will change when the history server
+    sb.append("/jobhistory/job/"); // TODO This will change when the history server
                             // understands apps.
     // TOOD Use JobId toString once UI stops using _id_id
     sb.append("job_").append(appId.getClusterTimestamp());
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
index 68499497ac3..9094da39ba3 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRApps.java
@@ -39,14 +39,14 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.filecache.DistributedCache;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
-import org.apache.hadoop.util.Shell.ShellCommandExecutor;
 import org.apache.hadoop.yarn.YarnException;
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.LocalResource;
 import org.apache.hadoop.yarn.api.records.LocalResourceType;
@@ -167,7 +167,7 @@ public class MRApps extends Apps {
     return TaskAttemptStateUI.valueOf(attemptStateStr);
   }
 
-  public static void setInitialClasspath(
+  private static void setMRFrameworkClasspath(
       Map<String, String> environment) throws IOException {
     InputStream classpathFileStream = null;
     BufferedReader reader = null;
@@ -182,30 +182,17 @@ public class MRApps extends Apps {
       reader = new BufferedReader(new InputStreamReader(classpathFileStream));
       String cp = reader.readLine();
       if (cp != null) {
-        addToClassPath(environment, cp.trim());
+        addToEnvironment(environment, Environment.CLASSPATH.name(), cp.trim());
       }
       // Put the file itself on classpath for tasks.
-      addToClassPath(environment,
+      addToEnvironment(
+          environment,
+          Environment.CLASSPATH.name(),
           thisClassLoader.getResource(mrAppGeneratedClasspathFile).getFile());
 
-      // If runtime env is different.
-      if (System.getenv().get("YARN_HOME") != null) {
-        ShellCommandExecutor exec =
-            new ShellCommandExecutor(new String[] {
-                System.getenv().get("YARN_HOME") + "/bin/yarn",
-            "classpath" });
-        exec.execute();
-        addToClassPath(environment, exec.getOutput().trim());
-      }
-
-      // Get yarn mapreduce-app classpath
-      if (System.getenv().get("HADOOP_MAPRED_HOME")!= null) {
-        ShellCommandExecutor exec =
-            new ShellCommandExecutor(new String[] {
-                System.getenv().get("HADOOP_MAPRED_HOME") + "/bin/mapred",
-            "classpath" });
-        exec.execute();
-        addToClassPath(environment, exec.getOutput().trim());
+      // Add standard Hadoop classes
+      for (String c : ApplicationConstants.APPLICATION_CLASSPATH) {
+        addToEnvironment(environment, Environment.CLASSPATH.name(), c);
       }
     } finally {
       if (classpathFileStream != null) {
@@ -217,20 +204,35 @@ public class MRApps extends Apps {
     }
     // TODO: Remove duplicates.
   }
+  
+  private static final String SYSTEM_PATH_SEPARATOR = 
+      System.getProperty("path.separator");
 
-  public static void addToClassPath(
-      Map<String, String> environment, String fileName) {
-    String classpath = environment.get(CLASSPATH);
-    if (classpath == null) {
-      classpath = fileName;
+  public static void addToEnvironment(
+      Map<String, String> environment, 
+      String variable, String value) {
+    String val = environment.get(variable);
+    if (val == null) {
+      val = value;
     } else {
-      classpath = classpath + ":" + fileName;
+      val = val + SYSTEM_PATH_SEPARATOR + value;
     }
-    environment.put(CLASSPATH, classpath);
+    environment.put(variable, val);
   }
 
-  public static final String CLASSPATH = "CLASSPATH";
-
+  public static void setClasspath(Map<String, String> environment) 
+      throws IOException {
+    MRApps.addToEnvironment(
+        environment, 
+        Environment.CLASSPATH.name(), 
+        MRJobConfig.JOB_JAR);
+    MRApps.addToEnvironment(
+        environment, 
+        Environment.CLASSPATH.name(),
+        Environment.PWD.$() + Path.SEPARATOR + "*");
+    MRApps.setMRFrameworkClasspath(environment);
+  }
+  
   private static final String STAGING_CONSTANT = ".staging";
   public static Path getStagingAreaDir(Configuration conf, String user) {
     return new Path(
@@ -241,7 +243,7 @@ public class MRApps extends Apps {
   public static String getJobFile(Configuration conf, String user, 
       org.apache.hadoop.mapreduce.JobID jobId) {
     Path jobFile = new Path(MRApps.getStagingAreaDir(conf, user),
-        jobId.toString() + Path.SEPARATOR + MRConstants.JOB_CONF_FILE);
+        jobId.toString() + Path.SEPARATOR + MRJobConfig.JOB_CONF_FILE);
     return jobFile.toString();
   }
   
@@ -260,12 +262,11 @@ public class MRApps extends Apps {
 
   public static void setupDistributedCache( 
       Configuration conf, 
-      Map<String, LocalResource> localResources,
-      Map<String, String> env) 
+      Map<String, LocalResource> localResources) 
   throws IOException {
     
     // Cache archives
-    parseDistributedCacheArtifacts(conf, localResources, env, 
+    parseDistributedCacheArtifacts(conf, localResources,  
         LocalResourceType.ARCHIVE, 
         DistributedCache.getCacheArchives(conf), 
         parseTimeStamps(DistributedCache.getArchiveTimestamps(conf)), 
@@ -275,7 +276,7 @@ public class MRApps extends Apps {
     
     // Cache files
     parseDistributedCacheArtifacts(conf, 
-        localResources, env, 
+        localResources,  
         LocalResourceType.FILE, 
         DistributedCache.getCacheFiles(conf),
         parseTimeStamps(DistributedCache.getFileTimestamps(conf)),
@@ -290,7 +291,6 @@ public class MRApps extends Apps {
   private static void parseDistributedCacheArtifacts(
       Configuration conf,
       Map<String, LocalResource> localResources,
-      Map<String, String> env,
       LocalResourceType type,
       URI[] uris, long[] timestamps, long[] sizes, boolean visibilities[], 
       Path[] pathsToPutOnClasspath) throws IOException {
@@ -339,9 +339,6 @@ public class MRApps extends Apps {
                   : LocalResourceVisibility.PRIVATE,
                 sizes[i], timestamps[i])
         );
-        if (classPaths.containsKey(u.getPath())) {
-          MRApps.addToClassPath(env, linkName);
-        }
       }
     }
   }
@@ -358,6 +355,42 @@ public class MRApps extends Apps {
     }
     return result;
   }
+
+  public static void setEnvFromInputString(Map<String, String> env,
+      String envString) {
+    if (envString != null && envString.length() > 0) {
+      String childEnvs[] = envString.split(",");
+      for (String cEnv : childEnvs) {
+        String[] parts = cEnv.split("="); // split on '='
+        String value = env.get(parts[0]);
+  
+        if (value != null) {
+          // Replace $env with the child's env constructed by NM's
+          // For example: LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp
+          value = parts[1].replace("$" + parts[0], value);
+        } else {
+          // example PATH=$PATH:/tmp
+          value = System.getenv(parts[0]);
+          if (value != null) {
+            // the env key is present in the tt's env
+            value = parts[1].replace("$" + parts[0], value);
+          } else {
+            // check for simple variable substitution
+            // for e.g. ROOT=$HOME
+            String envValue = System.getenv(parts[1].substring(1)); 
+            if (envValue != null) {
+              value = envValue;
+            } else {
+              // the env key is note present anywhere .. simply set it
+              // example X=$X:/tmp or X=/tmp
+              value = parts[1].replace("$" + parts[0], "");
+            }
+          }
+        }
+        addToEnvironment(env, parts[0], value);
+      }
+    }
+  }
   
 
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java
index c429ca55b51..d710a6f7b88 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRBuilderUtils.java
@@ -19,27 +19,25 @@
 package org.apache.hadoop.mapreduce.v2.util;
 
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
+import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
+import org.apache.hadoop.mapreduce.v2.api.records.JobState;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.util.Records;
 
 public class MRBuilderUtils {
 
-  private static final RecordFactory recordFactory = RecordFactoryProvider
-      .getRecordFactory(null);
-
   public static JobId newJobId(ApplicationId appId, int id) {
-    JobId jobId = recordFactory.newRecordInstance(JobId.class);
+    JobId jobId = Records.newRecord(JobId.class);
     jobId.setAppId(appId);
     jobId.setId(id);
     return jobId;
   }
 
   public static TaskId newTaskId(JobId jobId, int id, TaskType taskType) {
-    TaskId taskId = recordFactory.newRecordInstance(TaskId.class);
+    TaskId taskId = Records.newRecord(TaskId.class);
     taskId.setJobId(jobId);
     taskId.setId(id);
     taskId.setTaskType(taskType);
@@ -48,9 +46,27 @@ public class MRBuilderUtils {
 
   public static TaskAttemptId newTaskAttemptId(TaskId taskId, int attemptId) {
     TaskAttemptId taskAttemptId =
-        recordFactory.newRecordInstance(TaskAttemptId.class);
+        Records.newRecord(TaskAttemptId.class);
     taskAttemptId.setTaskId(taskId);
     taskAttemptId.setId(attemptId);
     return taskAttemptId;
   }
+
+  public static JobReport newJobReport(JobId jobId, String jobName,
+      String userName, JobState state, long startTime, long finishTime,
+      float setupProgress, float mapProgress, float reduceProgress,
+      float cleanupProgress) {
+    JobReport report = Records.newRecord(JobReport.class);
+    report.setJobId(jobId);
+    report.setJobName(jobName);
+    report.setUser(userName);
+    report.setJobState(state);
+    report.setStartTime(startTime);
+    report.setFinishTime(finishTime);
+    report.setSetupProgress(setupProgress);
+    report.setCleanupProgress(cleanupProgress);
+    report.setMapProgress(mapProgress);
+    report.setReduceProgress(reduceProgress);
+    return report;
+  }
 }
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto
index 7d8d1b2e0b8..29184da4868 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/proto/mr_protos.proto
@@ -143,6 +143,8 @@ message JobReportProto {
   optional int64 finish_time = 8;
   optional string user = 9;
   optional string jobName = 10;
+  optional string trackingUrl = 11;
+  optional string diagnostics = 12;
 }
 
 enum TaskAttemptCompletionEventStatusProto {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java
index bda7fb9d658..1aeae987c80 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/TestTypeConverter.java
@@ -19,11 +19,14 @@ package org.apache.hadoop.mapreduce;
 
 import junit.framework.Assert;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationState;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
 import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationReportPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.QueueInfoPBImpl;
+
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 import org.junit.Test;
@@ -67,4 +70,14 @@ public class TestTypeConverter {
     Assert.assertEquals("jobId set incorrectly", 6789, status.getJobID().getId());
     Assert.assertEquals("state set incorrectly", JobStatus.State.KILLED, status.getState());
   }
+
+  @Test
+  public void testFromYarnQueueInfo() {
+    org.apache.hadoop.yarn.api.records.QueueInfo queueInfo = new QueueInfoPBImpl();
+    queueInfo.setQueueState(org.apache.hadoop.yarn.api.records.QueueState.STOPPED);
+    org.apache.hadoop.mapreduce.QueueInfo returned =
+      TypeConverter.fromYarn(queueInfo, new Configuration());
+    Assert.assertEquals("queueInfo translation didn't work.",
+      returned.getState().toString(), queueInfo.getQueueState().toString().toLowerCase());
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
index 7a2ee00a92d..11589980625 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/util/TestMRApps.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -115,7 +114,8 @@ public class TestMRApps {
   @Test public void testGetJobFileWithUser() {
     Configuration conf = new Configuration();
     conf.set(MRJobConfig.MR_AM_STAGING_DIR, "/my/path/to/staging");
-    String jobFile = MRApps.getJobFile(conf, "dummy-user", new JobID("dummy-job", 12345));
+    String jobFile = MRApps.getJobFile(conf, "dummy-user", 
+        new JobID("dummy-job", 12345));
     assertNotNull("getJobFile results in null.", jobFile);
     assertEquals("jobFile with specified user is not as expected.",
         "/my/path/to/staging/dummy-user/.staging/job_dummy-job_12345/job.xml", jobFile);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/BackupStore.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/BackupStore.java
index 026793c5374..f409d2298eb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/BackupStore.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/BackupStore.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.mapred.IFile.Reader;
 import org.apache.hadoop.mapred.IFile.Writer;
 import org.apache.hadoop.mapred.Merger.Segment;
 import org.apache.hadoop.mapreduce.MRConfig;
+import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 
 /**
@@ -560,7 +561,7 @@ public class BackupStore<K,V> {
 
     private Writer<K,V> createSpillFile() throws IOException {
       Path tmp =
-          new Path(Constants.OUTPUT + "/backup_" + tid.getId() + "_"
+          new Path(MRJobConfig.OUTPUT + "/backup_" + tid.getId() + "_"
               + (spillNumber++) + ".out");
 
       LOG.info("Created file: " + tmp);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
index 49d12d764d5..b489d41b17c 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java
@@ -348,6 +348,7 @@ public class JobConf extends Configuration {
    */
   public static final Level DEFAULT_LOG_LEVEL = Level.INFO;
   
+  
   /**
    * Construct a map/reduce job configuration.
    */
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobStatus.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobStatus.java
index 90b68872ff4..e5add2139f5 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobStatus.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobStatus.java
@@ -321,6 +321,10 @@ public class JobStatus extends org.apache.hadoop.mapreduce.JobStatus {
      super.setJobACLs(acls);
    }
 
+   public synchronized void setFailureInfo(String failureInfo) {
+     super.setFailureInfo(failureInfo);
+   }
+   
   /**
    * Set the priority of the job, defaulting to NORMAL.
    * @param jp new job priority
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MRConstants.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MRConstants.java
index e2c16fbfac1..3d7363e5faa 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MRConstants.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MRConstants.java
@@ -17,11 +17,16 @@
  */
 package org.apache.hadoop.mapred;
 
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+
 /*******************************
  * Some handy constants
  * 
  *******************************/
-interface MRConstants {
+@Private
+@Unstable
+public interface MRConstants {
   //
   // Timeouts, constants
   //
@@ -53,5 +58,6 @@ interface MRConstants {
    */
   public static final String FOR_REDUCE_TASK = "for-reduce-task";
   
+  /** Used in MRv1, mostly in TaskTracker code **/
   public static final String WORKDIR = "work";
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MROutputFiles.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MROutputFiles.java
index e81e11d3fb6..a9e25f287d4 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MROutputFiles.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MROutputFiles.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.MRConfig;
+import org.apache.hadoop.mapreduce.MRJobConfig;
 
 /**
  * Manipulate the working area for the transient store for maps and reduces.
@@ -54,7 +55,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getOutputFile()
       throws IOException {
-    return lDirAlloc.getLocalPathToRead(Constants.OUTPUT + Path.SEPARATOR
+    return lDirAlloc.getLocalPathToRead(MRJobConfig.OUTPUT + Path.SEPARATOR
         + MAP_OUTPUT_FILENAME_STRING, getConf());
   }
 
@@ -68,7 +69,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getOutputFileForWrite(long size)
       throws IOException {
-    return lDirAlloc.getLocalPathForWrite(Constants.OUTPUT + Path.SEPARATOR
+    return lDirAlloc.getLocalPathForWrite(MRJobConfig.OUTPUT + Path.SEPARATOR
         + MAP_OUTPUT_FILENAME_STRING, size, getConf());
   }
 
@@ -89,7 +90,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getOutputIndexFile()
       throws IOException {
-    return lDirAlloc.getLocalPathToRead(Constants.OUTPUT + Path.SEPARATOR
+    return lDirAlloc.getLocalPathToRead(MRJobConfig.OUTPUT + Path.SEPARATOR
         + MAP_OUTPUT_FILENAME_STRING + MAP_OUTPUT_INDEX_SUFFIX_STRING,
         getConf());
   }
@@ -104,7 +105,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getOutputIndexFileForWrite(long size)
       throws IOException {
-    return lDirAlloc.getLocalPathForWrite(Constants.OUTPUT + Path.SEPARATOR
+    return lDirAlloc.getLocalPathForWrite(MRJobConfig.OUTPUT + Path.SEPARATOR
         + MAP_OUTPUT_FILENAME_STRING + MAP_OUTPUT_INDEX_SUFFIX_STRING,
         size, getConf());
   }
@@ -128,7 +129,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getSpillFile(int spillNumber)
       throws IOException {
-    return lDirAlloc.getLocalPathToRead(Constants.OUTPUT + "/spill"
+    return lDirAlloc.getLocalPathToRead(MRJobConfig.OUTPUT + "/spill"
         + spillNumber + ".out", getConf());
   }
 
@@ -143,7 +144,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getSpillFileForWrite(int spillNumber, long size)
       throws IOException {
-    return lDirAlloc.getLocalPathForWrite(Constants.OUTPUT + "/spill"
+    return lDirAlloc.getLocalPathForWrite(MRJobConfig.OUTPUT + "/spill"
         + spillNumber + ".out", size, getConf());
   }
 
@@ -157,7 +158,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getSpillIndexFile(int spillNumber)
       throws IOException {
-    return lDirAlloc.getLocalPathToRead(Constants.OUTPUT + "/spill"
+    return lDirAlloc.getLocalPathToRead(MRJobConfig.OUTPUT + "/spill"
         + spillNumber + ".out.index", getConf());
   }
 
@@ -172,7 +173,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public Path getSpillIndexFileForWrite(int spillNumber, long size)
       throws IOException {
-    return lDirAlloc.getLocalPathForWrite(Constants.OUTPUT + "/spill"
+    return lDirAlloc.getLocalPathForWrite(MRJobConfig.OUTPUT + "/spill"
         + spillNumber + ".out.index", size, getConf());
   }
 
@@ -187,7 +188,7 @@ public class MROutputFiles extends MapOutputFile {
   public Path getInputFile(int mapId)
       throws IOException {
     return lDirAlloc.getLocalPathToRead(String.format(
-        REDUCE_INPUT_FILE_FORMAT_STRING, Constants.OUTPUT, Integer
+        REDUCE_INPUT_FILE_FORMAT_STRING, MRJobConfig.OUTPUT, Integer
             .valueOf(mapId)), getConf());
   }
 
@@ -204,7 +205,7 @@ public class MROutputFiles extends MapOutputFile {
                                    long size)
       throws IOException {
     return lDirAlloc.getLocalPathForWrite(String.format(
-        REDUCE_INPUT_FILE_FORMAT_STRING, Constants.OUTPUT, mapId.getId()),
+        REDUCE_INPUT_FILE_FORMAT_STRING, MRJobConfig.OUTPUT, mapId.getId()),
         size, getConf());
   }
 
@@ -212,7 +213,7 @@ public class MROutputFiles extends MapOutputFile {
   @Override
   public void removeAll()
       throws IOException {
-    ((JobConf)getConf()).deleteLocalFiles(Constants.OUTPUT);
+    ((JobConf)getConf()).deleteLocalFiles(MRJobConfig.OUTPUT);
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java
index 7e978e9cf94..597b2edaa39 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.SecureIOUtils;
 import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.util.ProcessTree;
 import org.apache.hadoop.util.Shell;
 import org.apache.log4j.Appender;
@@ -75,10 +76,18 @@ public class TaskLog {
       }
     }
   }
-
+  
+  public static String getMRv2LogDir() {
+    return System.getProperty(MRJobConfig.TASK_LOG_DIR);
+  }
+  
   public static File getTaskLogFile(TaskAttemptID taskid, boolean isCleanup,
       LogName filter) {
-    return new File(getAttemptDir(taskid, isCleanup), filter.toString());
+    if (getMRv2LogDir() != null) {
+      return new File(getMRv2LogDir(), filter.toString());
+    } else {
+      return new File(getAttemptDir(taskid, isCleanup), filter.toString());
+    }
   }
 
   static File getRealTaskLogFileLocation(TaskAttemptID taskid,
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java
index cb8b476ac75..0a108d73b63 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Application.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.mapred.pipes;
 
+import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.IOException;
 import java.net.ServerSocket;
@@ -26,6 +27,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Random;
 import javax.crypto.SecretKey;
 
@@ -111,7 +113,6 @@ class Application<K1 extends WritableComparable, V1 extends Writable,
     if (interpretor != null) {
       cmd.add(interpretor);
     }
-
     String executable = DistributedCache.getLocalCacheFiles(conf)[0].toString();
     if (!new File(executable).canExecute()) {
       // LinuxTaskController sets +x permissions on all distcache files already.
@@ -129,7 +130,7 @@ class Application<K1 extends WritableComparable, V1 extends Writable,
     long logLength = TaskLog.getTaskLogLength(conf);
     cmd = TaskLog.captureOutAndError(null, cmd, stdout, stderr, logLength,
                                      false);
-
+    
     process = runClient(cmd, env);
     clientSocket = serverSocket.accept();
     
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java
index 5112c86e7bb..33d5f81b4fc 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java
@@ -41,8 +41,8 @@ import org.apache.hadoop.mapreduce.util.ConfigUtil;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
+import org.apache.hadoop.security.token.Token;
 
 /**
  * Provides a way to access information about the map/reduce cluster.
@@ -68,30 +68,41 @@ public class Cluster {
   }
   
   public Cluster(Configuration conf) throws IOException {
-    this.conf = conf;
-    this.ugi = UserGroupInformation.getCurrentUser();
-    for (ClientProtocolProvider provider : ServiceLoader.load(ClientProtocolProvider.class)) {
-      ClientProtocol clientProtocol = provider.create(conf);
-      if (clientProtocol != null) {
-        clientProtocolProvider = provider;
-        client = clientProtocol;
-        break;
-      }
-    }
+    this(null, conf);
   }
 
   public Cluster(InetSocketAddress jobTrackAddr, Configuration conf) 
       throws IOException {
     this.conf = conf;
     this.ugi = UserGroupInformation.getCurrentUser();
-    for (ClientProtocolProvider provider : ServiceLoader.load(ClientProtocolProvider.class)) {
-      ClientProtocol clientProtocol = provider.create(jobTrackAddr, conf);
+    initialize(jobTrackAddr, conf);
+  }
+  
+  private void initialize(InetSocketAddress jobTrackAddr, Configuration conf)
+      throws IOException {
+
+    for (ClientProtocolProvider provider : ServiceLoader
+        .load(ClientProtocolProvider.class)) {
+      ClientProtocol clientProtocol = null;
+      if (jobTrackAddr == null) {
+        clientProtocol = provider.create(conf);
+      } else {
+        clientProtocol = provider.create(jobTrackAddr, conf);
+      }
+
       if (clientProtocol != null) {
         clientProtocolProvider = provider;
         client = clientProtocol;
         break;
       }
     }
+
+    if (null == clientProtocolProvider || null == client) {
+      throw new IOException(
+          "Cannot initialize Cluster. Please check your configuration for "
+              + MRConfig.FRAMEWORK_NAME
+              + " and the correspond server addresses.");
+    }
   }
 
   ClientProtocol getClient() {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
index c30216e0669..f616df80b8f 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
@@ -1239,7 +1239,8 @@ public class Job extends JobContextImpl implements JobContext {
     if (success) {
       LOG.info("Job " + jobId + " completed successfully");
     } else {
-      LOG.info("Job " + jobId + " failed with state " + status.getState());
+      LOG.info("Job " + jobId + " failed with state " + status.getState() + 
+          " due to: " + status.getFailureInfo());
     }
     Counters counters = getCounters();
     if (counters != null) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java
index 9e438989cfb..6f57f1733ad 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java
@@ -81,6 +81,7 @@ public class JobStatus implements Writable, Cloneable {
   private String queue;
   private JobPriority priority;
   private String schedulingInfo="NA";
+  private String failureInfo = "NA";
 
   private Map<JobACL, AccessControlList> jobACLs =
       new HashMap<JobACL, AccessControlList>();
@@ -278,6 +279,14 @@ public class JobStatus implements Writable, Cloneable {
     this.queue = queue;
   }
 
+  /**
+   * Set diagnostic information.
+   * @param failureInfo diagnostic information
+   */
+  protected synchronized void setFailureInfo(String failureInfo) {
+    this.failureInfo = failureInfo;
+  }
+  
   /**
    * Get queue name
    * @return queue name
@@ -359,6 +368,15 @@ public class JobStatus implements Writable, Cloneable {
    */
    public synchronized JobPriority getPriority() { return priority; }
   
+   /**
+    * Gets any available info on the reason of failure of the job.
+    * @return diagnostic information on why a job might have failed.
+    */
+   public synchronized String getFailureInfo() {
+     return this.failureInfo;
+   }
+
+
   /**
    * Returns true if the status is for a completed job.
    */
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
index 33884bb82e9..accfdddc3db 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@@ -210,6 +210,8 @@ public interface MRJobConfig {
 
   public static final String REDUCE_LOG_LEVEL = "mapreduce.reduce.log.level";
 
+  public static final String DEFAULT_LOG_LEVEL = "INFO";
+
   public static final String REDUCE_MERGE_INMEM_THRESHOLD = "mapreduce.reduce.merge.inmem.threshold";
 
   public static final String REDUCE_INPUT_BUFFER_PERCENT = "mapreduce.reduce.input.buffer.percent";
@@ -330,9 +332,15 @@ public interface MRJobConfig {
     MR_AM_PREFIX+"num-progress-splits";
   public static final int DEFAULT_MR_AM_NUM_PROGRESS_SPLITS = 12;
 
-  /** Number of threads user to launch containers in the app master.*/
-  public static final String MR_AM_CONTAINERLAUNCHER_THREAD_COUNT =
-    MR_AM_PREFIX+"containerlauncher.thread-count";
+  /**
+   * Upper limit on the number of threads user to launch containers in the app
+   * master. Expect level config, you shouldn't be needing it in most cases.
+   */
+  public static final String MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT =
+    MR_AM_PREFIX+"containerlauncher.thread-count-limit";
+
+  public static final int DEFAULT_MR_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT = 
+      500;
 
   /** Number of threads to handle job client RPC requests.*/
   public static final String MR_AM_JOB_CLIENT_THREAD_COUNT =
@@ -400,4 +408,69 @@ public interface MRJobConfig {
    */
   public static final String MR_AM_CREATE_JH_INTERMEDIATE_BASE_DIR = 
     MR_AM_PREFIX + "create-intermediate-jh-base-dir";
+  
+  public static final String MAPRED_MAP_ADMIN_JAVA_OPTS =
+      "mapreduce.admin.map.child.java.opts";
+
+  public static final String MAPRED_REDUCE_ADMIN_JAVA_OPTS =
+      "mapreduce.admin.reduce.child.java.opts";
+
+  public static final String DEFAULT_MAPRED_ADMIN_JAVA_OPTS =
+      "-Djava.net.preferIPv4Stack=true " +
+          "-Dhadoop.metrics.log.level=WARN ";
+
+  public static final String MAPRED_ADMIN_USER_SHELL =
+      "mapreduce.admin.user.shell";
+
+  public static final String DEFAULT_SHELL = "/bin/bash";
+
+  public static final String MAPRED_ADMIN_USER_ENV =
+      "mapreduce.admin.user.env";
+
+  public static final String DEFAULT_MAPRED_ADMIN_USER_ENV =
+      "LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib";
+
+  public static final String WORKDIR = "work";
+
+  public static final String OUTPUT = "output";
+
+  public static final String HADOOP_WORK_DIR = "HADOOP_WORK_DIR";
+
+  public static final String STDOUT_LOGFILE_ENV = "STDOUT_LOGFILE_ENV";
+
+  public static final String STDERR_LOGFILE_ENV = "STDERR_LOGFILE_ENV";
+
+  // This should be the directory where splits file gets localized on the node
+  // running ApplicationMaster.
+  public static final String JOB_SUBMIT_DIR = "jobSubmitDir";
+
+  // This should be the name of the localized job-configuration file on the node
+  // running ApplicationMaster and Task
+  public static final String JOB_CONF_FILE = "job.xml";
+
+  // This should be the name of the localized job-jar file on the node running
+  // individual containers/tasks.
+  public static final String JOB_JAR = "job.jar";
+
+  public static final String JOB_SPLIT = "job.split";
+
+  public static final String JOB_SPLIT_METAINFO = "job.splitmetainfo";
+
+  public static final String APPLICATION_MASTER_CLASS =
+      "org.apache.hadoop.mapreduce.v2.app.MRAppMaster";
+
+  // The token file for the application. Should contain tokens for access to
+  // remote file system and may optionally contain application specific tokens.
+  // For now, generated by the AppManagers and used by NodeManagers and the
+  // Containers.
+  public static final String APPLICATION_TOKENS_FILE = "appTokens";
+  
+  /** The log directory for the containers */
+  public static final String TASK_LOG_DIR = MR_PREFIX + "container.log.dir";
+  
+  public static final String TASK_LOG_SIZE = MR_PREFIX + "container.log.filesize";
+  
+  public static final String MAPREDUCE_V2_CHILD_CLASS = 
+      "org.apache.hadoop.mapred.YarnChild";
+
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
index 56f114adc5c..e86eb279e9a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java
@@ -135,7 +135,7 @@ public class HistoryClientService extends AbstractService {
     webApp = new HsWebApp(history);
     String bindAddress = conf.get(JHAdminConfig.MR_HISTORY_WEBAPP_ADDRESS,
         JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS);
-    WebApps.$for("yarn", this).at(bindAddress).start(webApp); 
+    WebApps.$for("jobhistory", this).at(bindAddress).start(webApp); 
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java
index c9f90b9e79e..7e9e67c3c3d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistory.java
@@ -22,7 +22,6 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
@@ -84,25 +83,6 @@ public class JobHistory extends AbstractService implements HistoryContext   {
 
   private static final Log SUMMARY_LOG = LogFactory.getLog(JobSummary.class);
 
-  /*
-   * TODO Get rid of this once JobId has it's own comparator
-   */
-  private static final Comparator<JobId> JOB_ID_COMPARATOR = 
-    new Comparator<JobId>() {
-    @Override
-    public int compare(JobId o1, JobId o2) {
-      if (o1.getAppId().getClusterTimestamp() > 
-          o2.getAppId().getClusterTimestamp()) {
-        return 1;
-      } else if (o1.getAppId().getClusterTimestamp() < 
-          o2.getAppId().getClusterTimestamp()) {
-        return -1;
-      } else {
-        return o1.getId() - o2.getId();
-      }
-    }
-  };
-  
   private static String DONE_BEFORE_SERIAL_TAIL = 
     JobHistoryUtils.doneSubdirsBeforeSerialTail();
   
@@ -118,19 +98,19 @@ public class JobHistory extends AbstractService implements HistoryContext   {
   //Maintains minimal details for recent jobs (parsed from history file name).
   //Sorted on Job Completion Time.
   private final SortedMap<JobId, MetaInfo> jobListCache = 
-    new ConcurrentSkipListMap<JobId, MetaInfo>(JOB_ID_COMPARATOR);
+    new ConcurrentSkipListMap<JobId, MetaInfo>();
   
   
   // Re-use exisiting MetaInfo objects if they exist for the specific JobId. (synchronization on MetaInfo)
   // Check for existance of the object when using iterators.
   private final SortedMap<JobId, MetaInfo> intermediateListCache = 
-    new ConcurrentSkipListMap<JobId, JobHistory.MetaInfo>(JOB_ID_COMPARATOR);
+    new ConcurrentSkipListMap<JobId, JobHistory.MetaInfo>();
   
   //Maintains a list of known done subdirectories. Not currently used.
   private final Set<Path> existingDoneSubdirs = new HashSet<Path>();
   
   private final SortedMap<JobId, Job> loadedJobCache = 
-    new ConcurrentSkipListMap<JobId, Job>(JOB_ID_COMPARATOR);
+    new ConcurrentSkipListMap<JobId, Job>();
 
   /**
    * Maintains a mapping between intermediate user directories and the last 
@@ -673,7 +653,7 @@ public class JobHistory extends AbstractService implements HistoryContext   {
   private Map<JobId, Job> getAllJobsInternal() {
     //TODO This should ideally be using getAllJobsMetaInfo
     // or get rid of that method once Job has APIs for user, finishTime etc.
-    SortedMap<JobId, Job> result = new TreeMap<JobId, Job>(JOB_ID_COMPARATOR);
+    SortedMap<JobId, Job> result = new TreeMap<JobId, Job>();
     try {
       scanIntermediateDirectory();
     } catch (IOException e) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
index 4b605cb2ae5..ef388fcd86a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml
@@ -64,6 +64,12 @@
       <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-yarn-server-common</artifactId>
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java
index 80c8d91a1b3..20c6ce7c000 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientCache.java
@@ -1,20 +1,20 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package org.apache.hadoop.mapred;
 
@@ -42,29 +42,29 @@ public class ClientCache {
 
   private final Configuration conf;
   private final ResourceMgrDelegate rm;
-  
+
   private static final Log LOG = LogFactory.getLog(ClientCache.class);
 
   private Map<JobID, ClientServiceDelegate> cache = 
-    new HashMap<JobID, ClientServiceDelegate>();
-  
+      new HashMap<JobID, ClientServiceDelegate>();
+
   private MRClientProtocol hsProxy;
 
-  ClientCache(Configuration conf, ResourceMgrDelegate rm) {
+  public ClientCache(Configuration conf, ResourceMgrDelegate rm) {
     this.conf = conf;
     this.rm = rm;
   }
 
   //TODO: evict from the cache on some threshold
-  synchronized ClientServiceDelegate getClient(JobID jobId) {
-	if (hsProxy == null) {
+  public synchronized ClientServiceDelegate getClient(JobID jobId) {
+    if (hsProxy == null) {
       try {
-		hsProxy = instantiateHistoryProxy();
-	  } catch (IOException e) {
-		LOG.warn("Could not connect to History server.", e);
-		throw new YarnException("Could not connect to History server.", e);
-	  }
-	}
+        hsProxy = instantiateHistoryProxy();
+      } catch (IOException e) {
+        LOG.warn("Could not connect to History server.", e);
+        throw new YarnException("Could not connect to History server.", e);
+      }
+    }
     ClientServiceDelegate client = cache.get(jobId);
     if (client == null) {
       client = new ClientServiceDelegate(conf, rm, jobId, hsProxy);
@@ -74,7 +74,7 @@ public class ClientCache {
   }
 
   private MRClientProtocol instantiateHistoryProxy()
-  throws IOException {
+      throws IOException {
     final String serviceAddr = conf.get(JHAdminConfig.MR_HISTORY_ADDRESS);
     if (StringUtils.isEmpty(serviceAddr)) {
       return null;
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
index 605c44e5ed9..341e17e9513 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ClientServiceDelegate.java
@@ -70,7 +70,7 @@ import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.security.ApplicationTokenIdentifier;
 import org.apache.hadoop.yarn.security.SchedulerSecurityInfo;
 
-class ClientServiceDelegate {
+public class ClientServiceDelegate {
   private static final Log LOG = LogFactory.getLog(ClientServiceDelegate.class);
 
   // Caches for per-user NotRunningJobs
@@ -87,7 +87,7 @@ class ClientServiceDelegate {
   private RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
   private static String UNKNOWN_USER = "Unknown User";
 
-  ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, 
+  public ClientServiceDelegate(Configuration conf, ResourceMgrDelegate rm, 
       JobID jobId, MRClientProtocol historyServerProxy) {
     this.conf = new Configuration(conf); // Cloning for modifying.
     // For faster redirects from AM to HS.
@@ -101,16 +101,20 @@ class ClientServiceDelegate {
 
   // Get the instance of the NotRunningJob corresponding to the specified
   // user and state
-  private NotRunningJob getNotRunningJob(String user, JobState state) {
+  private NotRunningJob getNotRunningJob(ApplicationReport applicationReport, 
+      JobState state) {
     synchronized (notRunningJobs) {
       HashMap<String, NotRunningJob> map = notRunningJobs.get(state);
       if (map == null) {
         map = new HashMap<String, NotRunningJob>();
         notRunningJobs.put(state, map);
       }
+      String user = 
+          (applicationReport == null) ? 
+              UNKNOWN_USER : applicationReport.getUser();
       NotRunningJob notRunningJob = map.get(user);
       if (notRunningJob == null) {
-        notRunningJob = new NotRunningJob(user, state);
+        notRunningJob = new NotRunningJob(applicationReport, state);
         map.put(user, notRunningJob);
       }
       return notRunningJob;
@@ -130,7 +134,7 @@ class ClientServiceDelegate {
       if (application == null) {
         LOG.info("Could not get Job info from RM for job " + jobId
             + ". Redirecting to job history server.");
-        return checkAndGetHSProxy(UNKNOWN_USER, JobState.NEW);
+        return checkAndGetHSProxy(null, JobState.NEW);
       }
       try {
         if (application.getHost() == null || "".equals(application.getHost())) {
@@ -171,7 +175,7 @@ class ClientServiceDelegate {
         if (application == null) {
           LOG.info("Could not get Job info from RM for job " + jobId
               + ". Redirecting to job history server.");
-          return checkAndGetHSProxy(UNKNOWN_USER, JobState.RUNNING);
+          return checkAndGetHSProxy(null, JobState.RUNNING);
         }
       } catch (InterruptedException e) {
         LOG.warn("getProxy() call interruped", e);
@@ -191,17 +195,17 @@ class ClientServiceDelegate {
     if (application.getState() == ApplicationState.NEW ||
         application.getState() == ApplicationState.SUBMITTED) {
       realProxy = null;
-      return getNotRunningJob(user, JobState.NEW);
+      return getNotRunningJob(application, JobState.NEW);
     }
     
     if (application.getState() == ApplicationState.FAILED) {
       realProxy = null;
-      return getNotRunningJob(user, JobState.FAILED);
+      return getNotRunningJob(application, JobState.FAILED);
     }
     
     if (application.getState() == ApplicationState.KILLED) {
       realProxy = null;
-      return getNotRunningJob(user, JobState.KILLED);
+      return getNotRunningJob(application, JobState.KILLED);
     }
     
     //History server can serve a job only if application 
@@ -209,15 +213,16 @@ class ClientServiceDelegate {
     if (application.getState() == ApplicationState.SUCCEEDED) {
       LOG.info("Application state is completed. " +
           "Redirecting to job history server");
-      realProxy = checkAndGetHSProxy(user, JobState.SUCCEEDED);
+      realProxy = checkAndGetHSProxy(application, JobState.SUCCEEDED);
     }
     return realProxy;
   }
 
-  private MRClientProtocol checkAndGetHSProxy(String user, JobState state) {
+  private MRClientProtocol checkAndGetHSProxy(
+      ApplicationReport applicationReport, JobState state) {
     if (null == historyServerProxy) {
       LOG.warn("Job History Server is not configured.");
-      return getNotRunningJob(user, state);
+      return getNotRunningJob(applicationReport, state);
     }
     return historyServerProxy;
   }
@@ -274,7 +279,7 @@ class ClientServiceDelegate {
     }
   }
 
-  org.apache.hadoop.mapreduce.Counters getJobCounters(JobID arg0) throws IOException,
+  public org.apache.hadoop.mapreduce.Counters getJobCounters(JobID arg0) throws IOException,
   InterruptedException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobID = TypeConverter.toYarn(arg0);
       GetCountersRequest request = recordFactory.newRecordInstance(GetCountersRequest.class);
@@ -285,7 +290,7 @@ class ClientServiceDelegate {
       
   }
 
-  TaskCompletionEvent[] getTaskCompletionEvents(JobID arg0, int arg1, int arg2)
+  public TaskCompletionEvent[] getTaskCompletionEvents(JobID arg0, int arg1, int arg2)
       throws IOException, InterruptedException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobID = TypeConverter
         .toYarn(arg0);
@@ -303,7 +308,7 @@ class ClientServiceDelegate {
             .toArray(new org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent[0]));
   }
 
-  String[] getTaskDiagnostics(org.apache.hadoop.mapreduce.TaskAttemptID arg0)
+  public String[] getTaskDiagnostics(org.apache.hadoop.mapreduce.TaskAttemptID arg0)
       throws IOException, InterruptedException {
 
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID = TypeConverter
@@ -321,24 +326,25 @@ class ClientServiceDelegate {
     return result;
   }
   
-  JobStatus getJobStatus(JobID oldJobID) throws YarnRemoteException {
+  public JobStatus getJobStatus(JobID oldJobID) throws YarnRemoteException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = 
       TypeConverter.toYarn(oldJobID);
-    GetJobReportRequest request = recordFactory.newRecordInstance(GetJobReportRequest.class);
+    GetJobReportRequest request = 
+        recordFactory.newRecordInstance(GetJobReportRequest.class);
     request.setJobId(jobId);
     JobReport report = ((GetJobReportResponse) invoke("getJobReport", 
         GetJobReportRequest.class, request)).getJobReport();
     String jobFile = MRApps.getJobFile(conf, report.getUser(), oldJobID); 
 
-    //TODO: add tracking url in JobReport
-    return TypeConverter.fromYarn(report, jobFile, "");
+    return TypeConverter.fromYarn(report, jobFile);
   }
 
-  org.apache.hadoop.mapreduce.TaskReport[] getTaskReports(JobID oldJobID, TaskType taskType)
+  public org.apache.hadoop.mapreduce.TaskReport[] getTaskReports(JobID oldJobID, TaskType taskType)
        throws YarnRemoteException, YarnRemoteException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobId = 
       TypeConverter.toYarn(oldJobID);
-    GetTaskReportsRequest request = recordFactory.newRecordInstance(GetTaskReportsRequest.class);
+    GetTaskReportsRequest request = 
+        recordFactory.newRecordInstance(GetTaskReportsRequest.class);
     request.setJobId(jobId);
     request.setTaskType(TypeConverter.toYarn(taskType));
     
@@ -350,7 +356,7 @@ class ClientServiceDelegate {
     (taskReports).toArray(new org.apache.hadoop.mapreduce.TaskReport[0]);
   }
 
-  boolean killTask(TaskAttemptID taskAttemptID, boolean fail)
+  public boolean killTask(TaskAttemptID taskAttemptID, boolean fail)
        throws YarnRemoteException {
     org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId attemptID 
       = TypeConverter.toYarn(taskAttemptID);
@@ -366,7 +372,7 @@ class ClientServiceDelegate {
     return true;
   }
   
-  boolean killJob(JobID oldJobID)
+  public boolean killJob(JobID oldJobID)
        throws YarnRemoteException {
     org.apache.hadoop.mapreduce.v2.api.records.JobId jobId 
     = TypeConverter.toYarn(oldJobID);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java
index a40fcedda39..17ad9f62aae 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/NotRunningJob.java
@@ -22,6 +22,8 @@ import java.util.ArrayList;
 import java.util.HashMap;
 
 import org.apache.commons.lang.NotImplementedException;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.mapreduce.v2.api.MRClientProtocol;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.FailTaskAttemptRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.FailTaskAttemptResponse;
@@ -53,20 +55,41 @@ import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskReport;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
+import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 
 public class NotRunningJob implements MRClientProtocol {
 
+  private static final Log LOG = LogFactory.getLog(NotRunningJob.class);
+  
   private RecordFactory recordFactory = 
     RecordFactoryProvider.getRecordFactory(null);
   
   private final JobState jobState;
-  private final String user;
-
-  NotRunningJob(String username, JobState jobState) {
-    this.user = username;
+  private final ApplicationReport applicationReport;
+  
+  
+  private ApplicationReport getUnknownApplicationReport() {
+    ApplicationReport unknown = 
+        recordFactory.newRecordInstance(ApplicationReport.class);
+    unknown.setUser("N/A");
+    unknown.setHost("N/A");
+    unknown.setName("N/A");
+    unknown.setQueue("N/A");
+    unknown.setStartTime(0);
+    unknown.setFinishTime(0);
+    unknown.setTrackingUrl("N/A");
+    unknown.setDiagnostics("N/A");
+    LOG.info("getUnknownApplicationReport");
+    return unknown;
+  }
+  
+  NotRunningJob(ApplicationReport applicationReport, JobState jobState) {
+    this.applicationReport = 
+        (applicationReport ==  null) ? 
+            getUnknownApplicationReport() : applicationReport;
     this.jobState = jobState;
   }
 
@@ -101,15 +124,19 @@ public class NotRunningJob implements MRClientProtocol {
   @Override
   public GetJobReportResponse getJobReport(GetJobReportRequest request)
       throws YarnRemoteException {
-    GetJobReportResponse resp = 
-      recordFactory.newRecordInstance(GetJobReportResponse.class);
     JobReport jobReport =
       recordFactory.newRecordInstance(JobReport.class);
     jobReport.setJobId(request.getJobId());
-    jobReport.setJobState(this.jobState);
+    jobReport.setJobState(jobState);
+    jobReport.setUser(applicationReport.getUser());
+    jobReport.setStartTime(applicationReport.getStartTime());
+    jobReport.setDiagnostics(applicationReport.getDiagnostics());
+    jobReport.setJobName(applicationReport.getName());
+    jobReport.setTrackingUrl(applicationReport.getTrackingUrl());
+    jobReport.setFinishTime(applicationReport.getFinishTime());
 
-    jobReport.setUser(this.user);
-    // TODO: Add jobName & other job information that is available
+    GetJobReportResponse resp = 
+        recordFactory.newRecordInstance(GetJobReportResponse.class);
     resp.setJobReport(jobReport);
     return resp;
   }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java
index 8e8081abe4d..8b7c818b1e3 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java
@@ -32,19 +32,19 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.ClusterMetrics;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.JobStatus;
+import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.QueueAclsInfo;
 import org.apache.hadoop.mapreduce.QueueInfo;
 import org.apache.hadoop.mapreduce.TaskTrackerInfo;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.SecurityInfo;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -53,7 +53,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
@@ -79,6 +79,10 @@ public class ResourceMgrDelegate {
   private ApplicationId applicationId;
   private final RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
 
+  /**
+   * Delegate responsible for communicating with the Resource Manager's {@link ClientRMProtocol}.
+   * @param conf the configuration object.
+   */
   public ResourceMgrDelegate(YarnConfiguration conf) {
     this.conf = conf;
     YarnRPC rpc = YarnRPC.create(this.conf);
@@ -97,6 +101,16 @@ public class ResourceMgrDelegate {
     LOG.info("Connected to ResourceManager at " + rmAddress);
   }
   
+  /**
+   * Used for injecting applicationsManager, mostly for testing.
+   * @param conf the configuration object
+   * @param applicationsManager the handle to talk the resource managers {@link ClientRMProtocol}.
+   */
+  public ResourceMgrDelegate(YarnConfiguration conf, ClientRMProtocol applicationsManager) {
+    this.conf = conf;
+    this.applicationsManager = applicationsManager;
+  }
+  
   public void cancelDelegationToken(Token<DelegationTokenIdentifier> arg0)
       throws IOException, InterruptedException {
     return;
@@ -155,8 +169,8 @@ public class ResourceMgrDelegate {
   }
 
   public JobID getNewJobID() throws IOException, InterruptedException {
-    GetNewApplicationIdRequest request = recordFactory.newRecordInstance(GetNewApplicationIdRequest.class);
-    applicationId = applicationsManager.getNewApplicationId(request).getApplicationId();
+    GetNewApplicationRequest request = recordFactory.newRecordInstance(GetNewApplicationRequest.class);
+    applicationId = applicationsManager.getNewApplication(request).getApplicationId();
     return TypeConverter.fromYarn(applicationId);
   }
 
@@ -254,7 +268,7 @@ public class ResourceMgrDelegate {
 
 
   public String getSystemDir() throws IOException, InterruptedException {
-    Path sysDir = new Path(MRConstants.JOB_SUBMIT_DIR);
+    Path sysDir = new Path(MRJobConfig.JOB_SUBMIT_DIR);
     //FileContext.getFileContext(conf).delete(sysDir, true);
     return sysDir.toString();
   }
@@ -294,9 +308,9 @@ public class ResourceMgrDelegate {
   }
   
   public void killApplication(ApplicationId applicationId) throws IOException {
-    FinishApplicationRequest request = recordFactory.newRecordInstance(FinishApplicationRequest.class);
+    KillApplicationRequest request = recordFactory.newRecordInstance(KillApplicationRequest.class);
     request.setApplicationId(applicationId);
-    applicationsManager.finishApplication(request);
+    applicationsManager.forceKillApplication(request);
     LOG.info("Killing application " + applicationId);
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
index 82134c7520f..a11968a16f9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/YARNRunner.java
@@ -51,7 +51,6 @@ import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
 import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
-import org.apache.hadoop.mapreduce.v2.MRConstants;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.apache.hadoop.mapreduce.v2.util.MRApps;
 import org.apache.hadoop.security.Credentials;
@@ -60,6 +59,7 @@ import org.apache.hadoop.security.authorize.AccessControlList;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationState;
@@ -105,10 +105,22 @@ public class YARNRunner implements ClientProtocol {
    * @param resMgrDelegate the resourcemanager client handle.
    */
   public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate) {
+   this(conf, resMgrDelegate, new ClientCache(conf, resMgrDelegate));
+  }
+  
+  /**
+   * Similar to {@link YARNRunner#YARNRunner(Configuration, ResourceMgrDelegate)} 
+   * but allowing injecting {@link ClientCache}. Enable mocking and testing.
+   * @param conf the configuration object
+   * @param resMgrDelegate the resource manager delegate 
+   * @param clientCache the client cache object.
+   */
+  public YARNRunner(Configuration conf, ResourceMgrDelegate resMgrDelegate,
+      ClientCache clientCache) {
     this.conf = conf;
     try {
       this.resMgrDelegate = resMgrDelegate;
-      this.clientCache = new ClientCache(this.conf, resMgrDelegate);
+      this.clientCache = clientCache;
       this.defaultFileContext = FileContext.getFileContext(this.conf);
     } catch (UnsupportedFileSystemException ufe) {
       throw new RuntimeException("Error in instantiating YarnClient", ufe);
@@ -210,7 +222,7 @@ public class YARNRunner implements ClientProtocol {
 
     // Upload only in security mode: TODO
     Path applicationTokensFile =
-        new Path(jobSubmitDir, MRConstants.APPLICATION_TOKENS_FILE);
+        new Path(jobSubmitDir, MRJobConfig.APPLICATION_TOKENS_FILE);
     try {
       ts.writeTokenStorageFile(applicationTokensFile, conf);
     } catch (IOException e) {
@@ -226,7 +238,9 @@ public class YARNRunner implements ClientProtocol {
     
     ApplicationReport appMaster = resMgrDelegate
         .getApplicationReport(applicationId);
-    String diagnostics = (appMaster == null ? "application report is null" : appMaster.getDiagnostics());
+    String diagnostics = 
+        (appMaster == null ? 
+            "application report is null" : appMaster.getDiagnostics());
     if (appMaster == null || appMaster.getState() == ApplicationState.FAILED 
         || appMaster.getState() == ApplicationState.KILLED) {
       throw new IOException("Failed to run job : " + 
@@ -263,7 +277,7 @@ public class YARNRunner implements ClientProtocol {
     Map<String, LocalResource> localResources =
         new HashMap<String, LocalResource>();
     
-    Path jobConfPath = new Path(jobSubmitDir, MRConstants.JOB_CONF_FILE);
+    Path jobConfPath = new Path(jobSubmitDir, MRJobConfig.JOB_CONF_FILE);
     
     URL yarnUrlForJobSubmitDir = ConverterUtils
         .getYarnUrlFromPath(defaultFileContext.getDefaultFileSystem()
@@ -272,13 +286,13 @@ public class YARNRunner implements ClientProtocol {
     LOG.debug("Creating setup context, jobSubmitDir url is "
         + yarnUrlForJobSubmitDir);
 
-    localResources.put(MRConstants.JOB_CONF_FILE,
+    localResources.put(MRJobConfig.JOB_CONF_FILE,
         createApplicationResource(defaultFileContext,
             jobConfPath));
     if (jobConf.get(MRJobConfig.JAR) != null) {
-      localResources.put(MRConstants.JOB_JAR,
+      localResources.put(MRJobConfig.JOB_JAR,
           createApplicationResource(defaultFileContext,
-              new Path(jobSubmitDir, MRConstants.JOB_JAR)));
+              new Path(jobSubmitDir, MRJobConfig.JOB_JAR)));
     } else {
       // Job jar may be null. For e.g, for pipes, the job jar is the hadoop
       // mapreduce jar itself which is already on the classpath.
@@ -287,10 +301,12 @@ public class YARNRunner implements ClientProtocol {
     }
     
     // TODO gross hack
-    for (String s : new String[] { "job.split", "job.splitmetainfo",
-        MRConstants.APPLICATION_TOKENS_FILE }) {
+    for (String s : new String[] { 
+        MRJobConfig.JOB_SPLIT, 
+        MRJobConfig.JOB_SPLIT_METAINFO,
+        MRJobConfig.APPLICATION_TOKENS_FILE }) {
       localResources.put(
-          MRConstants.JOB_SUBMIT_DIR + "/" + s,
+          MRJobConfig.JOB_SUBMIT_DIR + "/" + s,
           createApplicationResource(defaultFileContext, 
               new Path(jobSubmitDir, s)));
     }
@@ -304,22 +320,24 @@ public class YARNRunner implements ClientProtocol {
     }
 
     // Setup the command to run the AM
-    String javaHome = "$JAVA_HOME";
     Vector<CharSequence> vargs = new Vector<CharSequence>(8);
-    vargs.add(javaHome + "/bin/java");
-    vargs.add("-Dhadoop.root.logger="
-        + conf.get(MRJobConfig.MR_AM_LOG_OPTS,
-            MRJobConfig.DEFAULT_MR_AM_LOG_OPTS) + ",console");
+    vargs.add(Environment.JAVA_HOME.$() + "/bin/java");
+    
+    long logSize = TaskLog.getTaskLogLength(new JobConf(conf));
+    vargs.add("-Dlog4j.configuration=container-log4j.properties");
+    vargs.add("-D" + MRJobConfig.TASK_LOG_DIR + "="
+        + ApplicationConstants.LOG_DIR_EXPANSION_VAR);
+    vargs.add("-D" + MRJobConfig.TASK_LOG_SIZE + "=" + logSize);
     
     vargs.add(conf.get(MRJobConfig.MR_AM_COMMAND_OPTS,
         MRJobConfig.DEFAULT_MR_AM_COMMAND_OPTS));
 
-    vargs.add("org.apache.hadoop.mapreduce.v2.app.MRAppMaster");
-    vargs.add(String.valueOf(applicationId.getClusterTimestamp()));
-    vargs.add(String.valueOf(applicationId.getId()));
-    vargs.add(ApplicationConstants.AM_FAIL_COUNT_STRING);
-    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stdout");
-    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + "/stderr");
+    vargs.add(MRJobConfig.APPLICATION_MASTER_CLASS);
+    vargs.add("1>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + 
+        Path.SEPARATOR + ApplicationConstants.STDOUT);
+    vargs.add("2>" + ApplicationConstants.LOG_DIR_EXPANSION_VAR + 
+        Path.SEPARATOR + ApplicationConstants.STDERR);
+
 
     Vector<String> vargsFinal = new Vector<String>(8);
     // Final commmand
@@ -332,15 +350,13 @@ public class YARNRunner implements ClientProtocol {
     LOG.info("Command to launch container for ApplicationMaster is : "
         + mergedCommand);
     
-    // Setup the environment - Add { job jar, MR app jar } to classpath.
+    // Setup the CLASSPATH in environment 
+    // i.e. add { job jar, CWD, Hadoop jars} to classpath.
     Map<String, String> environment = new HashMap<String, String>();
-    MRApps.setInitialClasspath(environment);
-    MRApps.addToClassPath(environment, MRConstants.JOB_JAR);
-    MRApps.addToClassPath(environment,
-        MRConstants.YARN_MAPREDUCE_APP_JAR_PATH);
-
+    MRApps.setClasspath(environment);
+    
     // Parse distributed cache
-    MRApps.setupDistributedCache(jobConf, localResources, environment);
+    MRApps.setupDistributedCache(jobConf, localResources);
 
     // Setup ContainerLaunchContext for AM container
     ContainerLaunchContext amContainer =
@@ -425,9 +441,35 @@ public class YARNRunner implements ClientProtocol {
 
   @Override
   public void killJob(JobID arg0) throws IOException, InterruptedException {
-    if (!clientCache.getClient(arg0).killJob(arg0)) {
-    resMgrDelegate.killApplication(TypeConverter.toYarn(arg0).getAppId());
-  }
+    /* check if the status is not running, if not send kill to RM */
+    JobStatus status = clientCache.getClient(arg0).getJobStatus(arg0);
+    if (status.getState() != JobStatus.State.RUNNING) {
+      resMgrDelegate.killApplication(TypeConverter.toYarn(arg0).getAppId());
+      return;
+    } 
+    
+    try {
+      /* send a kill to the AM */
+      clientCache.getClient(arg0).killJob(arg0);
+      long currentTimeMillis = System.currentTimeMillis();
+      long timeKillIssued = currentTimeMillis;
+      while ((currentTimeMillis < timeKillIssued + 10000L) && (status.getState()
+          != JobStatus.State.KILLED)) {
+          try {
+            Thread.sleep(1000L);
+          } catch(InterruptedException ie) {
+            /** interrupted, just break */
+            break;
+          }
+          currentTimeMillis = System.currentTimeMillis();
+          status = clientCache.getClient(arg0).getJobStatus(arg0);
+      }
+    } catch(IOException io) {
+      LOG.debug("Error when checking for application status", io);
+    }
+    if (status.getState() != JobStatus.State.KILLED) {
+      resMgrDelegate.killApplication(TypeConverter.toYarn(arg0).getAppId());
+    }
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java
index e2cb1e05ea7..d90e7216941 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java
@@ -68,8 +68,8 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.yarn.YarnException;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -78,8 +78,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
@@ -245,7 +245,7 @@ public class TestClientRedirect {
     }
 
     @Override
-    public GetNewApplicationIdResponse getNewApplicationId(GetNewApplicationIdRequest request) throws YarnRemoteException {
+    public GetNewApplicationResponse getNewApplication(GetNewApplicationRequest request) throws YarnRemoteException {
       return null;
     }
     
@@ -267,6 +267,13 @@ public class TestClientRedirect {
       application.setHost(split[0]);
       application.setRpcPort(Integer.parseInt(split[1]));
       application.setUser("TestClientRedirect-user");
+      application.setName("N/A");
+      application.setQueue("N/A");
+      application.setStartTime(0);
+      application.setFinishTime(0);
+      application.setTrackingUrl("N/A");
+      application.setDiagnostics("N/A");
+
       GetApplicationReportResponse response = recordFactory
           .newRecordInstance(GetApplicationReportResponse.class);
       response.setApplicationReport(application);
@@ -281,9 +288,9 @@ public class TestClientRedirect {
     }
 
     @Override
-    public FinishApplicationResponse finishApplication(
-        FinishApplicationRequest request) throws YarnRemoteException {
-      return null;
+    public KillApplicationResponse forceKillApplication(
+        KillApplicationRequest request) throws YarnRemoteException {
+      return recordFactory.newRecordInstance(KillApplicationResponse.class);
     }
 
     @Override
@@ -444,7 +451,7 @@ public class TestClientRedirect {
     @Override
     public KillJobResponse killJob(KillJobRequest request)
         throws YarnRemoteException {
-      return null;
+      return recordFactory.newRecordInstance(KillJobResponse.class);
     }
 
     @Override
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java
index b7fd6c9475a..5b07d4997d7 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientServiceDelegate.java
@@ -109,7 +109,7 @@ public class TestClientServiceDelegate {
     ClientServiceDelegate clientServiceDelegate = getClientServiceDelegate(
         null, getRMDelegate());
     JobStatus jobStatus = clientServiceDelegate.getJobStatus(oldJobId);
-    Assert.assertEquals("Unknown User", jobStatus.getUsername());
+    Assert.assertEquals("N/A", jobStatus.getUsername());
     Assert.assertEquals(JobStatus.State.PREP, jobStatus.getState());
 
     //RM has app report and job History Server is not configured
@@ -145,6 +145,13 @@ public class TestClientServiceDelegate {
         .newRecord(ApplicationReport.class);
     applicationReport.setState(ApplicationState.SUCCEEDED);
     applicationReport.setUser("root");
+    applicationReport.setHost("N/A");
+    applicationReport.setName("N/A");
+    applicationReport.setQueue("N/A");
+    applicationReport.setStartTime(0);
+    applicationReport.setFinishTime(0);
+    applicationReport.setTrackingUrl("N/A");
+    applicationReport.setDiagnostics("N/A");
     return applicationReport;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java
new file mode 100644
index 00000000000..2bc9030bf85
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/TestYarnClientProtocolProvider.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.YARNRunner;
+import org.apache.hadoop.mapreduce.protocol.ClientProtocol;
+import org.junit.Test;
+
+public class TestYarnClientProtocolProvider extends TestCase {
+
+  @Test
+  public void testClusterWithYarnClientProtocolProvider() throws Exception {
+
+    Configuration conf = new Configuration(false);
+    Cluster cluster = null;
+
+    try {
+      cluster = new Cluster(conf);
+      fail("Cluster should not be initialized with out any framework name");
+    } catch (IOException e) {
+
+    }
+
+    try {
+      conf = new Configuration();
+      conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME);
+      cluster = new Cluster(conf);
+      ClientProtocol client = cluster.getClient();
+      assertTrue(client instanceof YARNRunner);
+    } catch (IOException e) {
+
+    } finally {
+      if (cluster != null) {
+        cluster.close();
+      }
+    }
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
index fcb2a79fafb..49a63db44ba 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java
@@ -43,9 +43,15 @@ import org.apache.hadoop.yarn.service.Service;
  */
 public class MiniMRYarnCluster extends MiniYARNCluster {
 
+  public static final String HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME =
+  "hadoop-mapreduce-client-app-0.24.0-SNAPSHOT.jar";
+  
+  public static final String YARN_MAPREDUCE_APP_JAR_PATH =
+  "$YARN_HOME/modules/" + HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME;
+
   public static final String APPJAR =
     "../hadoop-mapreduce-client-app/target/"
-        + MRConstants.HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME;
+        + HADOOP_MAPREDUCE_CLIENT_APP_JAR_NAME;
 
   private static final Log LOG = LogFactory.getLog(MiniMRYarnCluster.class);
   private JobHistoryServer historyServer;
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
index 0a1943c013b..aa832aa1cc2 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
@@ -402,7 +402,7 @@ public class TestMRJobs {
       // both should be reachable via the class loader.
       Assert.assertNotNull(cl.getResource("distributed.jar.inside2"));
       Assert.assertNotNull(cl.getResource("distributed.jar.inside3"));
-      Assert.assertNull(cl.getResource("distributed.jar.inside4"));
+      Assert.assertNotNull(cl.getResource("distributed.jar.inside4"));
 
       // Check that the symlink for the renaming was created in the cwd;
       File symlinkFile = new File("distributed.first.symlink");
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java
index bc0dfe5fa4a..346ccd2f0da 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestYARNRunner.java
@@ -22,6 +22,7 @@ import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.doAnswer;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import java.io.File;
@@ -36,15 +37,37 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.ClientCache;
+import org.apache.hadoop.mapred.ClientServiceDelegate;
 import org.apache.hadoop.mapred.ResourceMgrDelegate;
 import org.apache.hadoop.mapred.YARNRunner;
 import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.JobPriority;
+import org.apache.hadoop.mapreduce.JobStatus.State;
 import org.apache.hadoop.mapreduce.TypeConverter;
 import org.apache.hadoop.security.Credentials;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationState;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.QueueInfo;
+import org.apache.hadoop.yarn.api.records.YarnClusterMetrics;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.factories.RecordFactory;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
@@ -54,9 +77,8 @@ import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
 /**
- * Test if the jobclient shows enough diagnostics 
- * on a job failure.
- *
+ * Test YarnRunner and make sure the client side plugin works 
+ * fine
  */
 public class TestYARNRunner extends TestCase {
   private static final Log LOG = LogFactory.getLog(TestYARNRunner.class);
@@ -65,18 +87,22 @@ public class TestYARNRunner extends TestCase {
   private YARNRunner yarnRunner;
   private ResourceMgrDelegate resourceMgrDelegate;
   private YarnConfiguration conf;
+  private ClientCache clientCache;
   private ApplicationId appId;
   private JobID jobId;
   private File testWorkDir = 
       new File("target", TestYARNRunner.class.getName());
   private ApplicationSubmissionContext submissionContext;
+  private  ClientServiceDelegate clientDelegate;
   private static final String failString = "Rejected job";
  
   @Before
   public void setUp() throws Exception {
     resourceMgrDelegate = mock(ResourceMgrDelegate.class);
     conf = new YarnConfiguration();
-    yarnRunner = new YARNRunner(conf, resourceMgrDelegate);
+    clientCache = new ClientCache(conf, resourceMgrDelegate);
+    clientCache = spy(clientCache);
+    yarnRunner = new YARNRunner(conf, resourceMgrDelegate, clientCache);
     yarnRunner = spy(yarnRunner);
     submissionContext = mock(ApplicationSubmissionContext.class);
     doAnswer(
@@ -101,6 +127,31 @@ public class TestYARNRunner extends TestCase {
    }
   
   
+  @Test
+  public void testJobKill() throws Exception {
+    clientDelegate = mock(ClientServiceDelegate.class);
+    when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new 
+        org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, 
+            State.PREP, JobPriority.HIGH, "tmp", "tmp", "tmp", "tmp"));
+    when(clientDelegate.killJob(any(JobID.class))).thenReturn(true);
+    doAnswer(
+        new Answer<ClientServiceDelegate>() {
+          @Override
+          public ClientServiceDelegate answer(InvocationOnMock invocation)
+              throws Throwable {
+            return clientDelegate;
+          }
+        }
+        ).when(clientCache).getClient(any(JobID.class));
+    yarnRunner.killJob(jobId);
+    verify(resourceMgrDelegate).killApplication(appId);
+    when(clientDelegate.getJobStatus(any(JobID.class))).thenReturn(new 
+        org.apache.hadoop.mapreduce.JobStatus(jobId, 0f, 0f, 0f, 0f, 
+            State.RUNNING, JobPriority.HIGH, "tmp", "tmp", "tmp", "tmp"));
+    yarnRunner.killJob(jobId);
+    verify(clientDelegate).killJob(jobId);
+  }
+  
   @Test
   public void testJobSubmissionFailure() throws Exception {
     when(resourceMgrDelegate.submitApplication(any(ApplicationSubmissionContext.class))).
@@ -122,4 +173,66 @@ public class TestYARNRunner extends TestCase {
       assertTrue(io.getLocalizedMessage().contains(failString));
     }
   }
+  
+  @Test
+  public void testResourceMgrDelegate() throws Exception {
+    /* we not want a mock of resourcemgr deleagte */
+    ClientRMProtocol clientRMProtocol = mock(ClientRMProtocol.class);
+    ResourceMgrDelegate delegate = new ResourceMgrDelegate(conf, clientRMProtocol);
+    /* make sure kill calls finish application master */
+    when(clientRMProtocol.forceKillApplication(any(KillApplicationRequest.class)))
+    .thenReturn(null);
+    delegate.killApplication(appId);
+    verify(clientRMProtocol).forceKillApplication(any(KillApplicationRequest.class));
+    
+    /* make sure getalljobs calls get all applications */
+    when(clientRMProtocol.getAllApplications(any(GetAllApplicationsRequest.class))).
+    thenReturn(recordFactory.newRecordInstance(GetAllApplicationsResponse.class));
+    delegate.getAllJobs();
+    verify(clientRMProtocol).getAllApplications(any(GetAllApplicationsRequest.class));
+    
+    /* make sure getapplication report is called */
+    when(clientRMProtocol.getApplicationReport(any(GetApplicationReportRequest.class)))
+    .thenReturn(recordFactory.newRecordInstance(GetApplicationReportResponse.class));
+    delegate.getApplicationReport(appId);
+    verify(clientRMProtocol).getApplicationReport(any(GetApplicationReportRequest.class));
+    
+    /* make sure metrics is called */
+    GetClusterMetricsResponse clusterMetricsResponse = recordFactory.newRecordInstance
+        (GetClusterMetricsResponse.class);
+    clusterMetricsResponse.setClusterMetrics(recordFactory.newRecordInstance(
+        YarnClusterMetrics.class));
+    when(clientRMProtocol.getClusterMetrics(any(GetClusterMetricsRequest.class)))
+    .thenReturn(clusterMetricsResponse);
+    delegate.getClusterMetrics();
+    verify(clientRMProtocol).getClusterMetrics(any(GetClusterMetricsRequest.class));
+    
+    when(clientRMProtocol.getClusterNodes(any(GetClusterNodesRequest.class))).
+    thenReturn(recordFactory.newRecordInstance(GetClusterNodesResponse.class));
+    delegate.getActiveTrackers();
+    verify(clientRMProtocol).getClusterNodes(any(GetClusterNodesRequest.class));
+    
+    GetNewApplicationResponse newAppResponse = recordFactory.newRecordInstance(
+        GetNewApplicationResponse.class);
+    newAppResponse.setApplicationId(appId);
+    when(clientRMProtocol.getNewApplication(any(GetNewApplicationRequest.class))).
+    thenReturn(newAppResponse);
+    delegate.getNewJobID();
+    verify(clientRMProtocol).getNewApplication(any(GetNewApplicationRequest.class));
+    
+    GetQueueInfoResponse queueInfoResponse = recordFactory.newRecordInstance(
+        GetQueueInfoResponse.class);
+    queueInfoResponse.setQueueInfo(recordFactory.newRecordInstance(QueueInfo.class));
+    when(clientRMProtocol.getQueueInfo(any(GetQueueInfoRequest.class))).
+    thenReturn(queueInfoResponse);
+    delegate.getQueues();
+    verify(clientRMProtocol).getQueueInfo(any(GetQueueInfoRequest.class));
+    
+    GetQueueUserAclsInfoResponse aclResponse = recordFactory.newRecordInstance(
+        GetQueueUserAclsInfoResponse.class);
+    when(clientRMProtocol.getQueueUserAcls(any(GetQueueUserAclsInfoRequest.class)))
+    .thenReturn(aclResponse);
+    delegate.getQueueAclsForCurrentUser();
+    verify(clientRMProtocol).getQueueUserAcls(any(GetQueueUserAclsInfoRequest.class));
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml
index ab1ffcca988..2a5cef3cbc9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml
@@ -88,6 +88,12 @@
         <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
         <version>${yarn.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+        <version>${yarn.version}</version>
+        <type>test-jar</type> 
+      </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-mapreduce-client-core</artifactId>
diff --git a/hadoop-mapreduce-project/hadoop-yarn/README b/hadoop-mapreduce-project/hadoop-yarn/README
index 8c4f43454ea..713871ab768 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/README
+++ b/hadoop-mapreduce-project/hadoop-yarn/README
@@ -30,7 +30,6 @@ clean and test: mvn clean install
 run selected test after compile: mvn test -Dtest=TestClassName (combined: mvn clean install -Dtest=TestClassName)
 create runnable binaries after install: mvn assembly:assembly (combined: mvn clean install assembly:assembly)
 
-
 Eclipse Projects
 ----------------
 http://maven.apache.org/guides/mini/guide-ide-eclipse.html
@@ -71,3 +70,16 @@ hadoop-yarn-server - Implementation of the hadoop-yarn-api
 	hadoop-yarn-server-common - APIs shared between resourcemanager and nodemanager
 	hadoop-yarn-server-nodemanager (TaskTracker replacement)
 	hadoop-yarn-server-resourcemanager (JobTracker replacement)
+
+Utilities for understanding the code
+------------------------------------
+Almost all of the yarn components as well as the mapreduce framework use
+state-machines for all the data objects. To understand those central pieces of
+the code, a visual representation of the state-machines helps much. You can first
+convert the state-machines into graphviz(.gv) format by
+running:
+   mvn compile -Pvisualize
+Then you can use the dot program for generating directed graphs and convert the above
+.gv files to images. The graphviz package has the needed dot program and related
+utilites.For e.g., to generate png files you can run:
+   dot -Tpng NodeManager.gv > NodeManager.png
diff --git a/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
index 219fd1eb579..7e34ff5487d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/dev-support/findbugs-exclude.xml
@@ -49,6 +49,10 @@
     <Class name="org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl" />
     <Bug pattern="BC_UNCONFIRMED_CAST" />
   </Match>
+  <Match>
+    <Class name="~org\.apache\.hadoop\.yarn\.server\.resourcemanager\.rmapp\.RMAppImpl.*" />
+    <Bug pattern="BC_UNCONFIRMED_CAST" />
+  </Match>
   <Match>
     <Class name="~org\.apache\.hadoop\.yarn\.server\.resourcemanager\.rmapp\.attempt\.RMAppAttemptImpl.*" />
     <Bug pattern="BC_UNCONFIRMED_CAST" />
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
index 212ca671c89..99f145fbdc3 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java
@@ -37,8 +37,11 @@ public interface ApplicationConstants {
   public static final String APPLICATION_CLIENT_SECRET_ENV_NAME =
     "AppClientTokenEnv";
 
-  // TODO: Weird. This is part of AM command line. Instead it should be a env.
-  public static final String AM_FAIL_COUNT_STRING = "<FAILCOUNT>";
+  /**
+   * The environmental variable for APPLICATION_ATTEMPT_ID. Set in
+   * ApplicationMaster's environment only.
+   */
+  public static final String APPLICATION_ATTEMPT_ID_ENV = "APPLICATION_ATTEMPT_ID";
 
   public static final String CONTAINER_TOKEN_FILE_ENV_NAME =
       UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
@@ -46,4 +49,117 @@ public interface ApplicationConstants {
   public static final String LOCAL_DIR_ENV = "YARN_LOCAL_DIRS";
 
   public static final String LOG_DIR_EXPANSION_VAR = "<LOG_DIR>";
+
+  public static final String STDERR = "stderr";
+
+  public static final String STDOUT = "stdout";
+  
+  /**
+   * Classpath for typical applications.
+   */
+  public static final String[] APPLICATION_CLASSPATH =
+      new String[] {
+        "$HADOOP_CONF_DIR",
+        "$HADOOP_COMMON_HOME/share/hadoop/common/*",
+        "$HADOOP_COMMON_HOME/share/hadoop/common/lib/*",
+        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/*",
+        "$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*",
+        "$YARN_HOME/modules/*",
+        "$YARN_HOME/lib/*"
+      };
+  
+  /**
+   * Environment for Applications.
+   * 
+   * Some of the environment variables for applications are <em>final</em> 
+   * i.e. they cannot be modified by the applications.
+   */
+  public enum Environment {
+    /**
+     * $USER
+     * Final, non-modifiable.
+     */
+    USER("USER"),
+    
+    /**
+     * $LOGNAME
+     * Final, non-modifiable.
+     */
+    LOGNAME("LOGNAME"),
+    
+    /**
+     * $HOME
+     * Final, non-modifiable.
+     */
+    HOME("HOME"),
+    
+    /**
+     * $PWD
+     * Final, non-modifiable.
+     */
+    PWD("PWD"),
+    
+    /**
+     * $PATH
+     */
+    PATH("PATH"),
+    
+    /**
+     * $SHELL
+     */
+    SHELL("SHELL"),
+    
+    /**
+     * $JAVA_HOME
+     */
+    JAVA_HOME("JAVA_HOME"),
+    
+    /**
+     * $CLASSPATH
+     */
+    CLASSPATH("CLASSPATH"),
+    
+    /**
+     * $LD_LIBRARY_PATH
+     */
+    LD_LIBRARY_PATH("LD_LIBRARY_PATH"),
+    
+    /**
+     * $HADOOP_CONF_DIR
+     * Final, non-modifiable.
+     */
+    HADOOP_CONF_DIR("HADOOP_CONF_DIR"),
+    
+    /**
+     * $HADOOP_COMMON_HOME
+     */
+    HADOOP_COMMON_HOME("HADOOP_COMMON_HOME"),
+    
+    /**
+     * $HADOOP_HDFS_HOME
+     */
+    HADOOP_HDFS_HOME("HADOOP_HDFS_HOME"),
+    
+    /**
+     * $YARN_HOME
+     */
+    YARN_HOME("YARN_HOME");
+
+    private final String variable;
+    private Environment(String variable) {
+      this.variable = variable;
+    }
+    
+    public String key() {
+      return variable;
+    }
+    
+    public String toString() {
+      return variable;
+    }
+    
+    public String $() {
+      return "$" + variable;
+    }
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ClientRMProtocol.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ClientRMProtocol.java
index db4c4790cf0..fb934591354 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ClientRMProtocol.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ClientRMProtocol.java
@@ -21,8 +21,8 @@ package org.apache.hadoop.yarn.api;
 import org.apache.hadoop.classification.InterfaceAudience.Public;
 import org.apache.hadoop.classification.InterfaceStability.Stable;
 
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -31,8 +31,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
@@ -62,14 +62,18 @@ public interface ClientRMProtocol {
    * <p>The <code>ResourceManager</code> responds with a new, monotonically
    * increasing, {@link ApplicationId} which is used by the client to submit
    * a new application.</p>
-   * 
+   *
+   * <p>The <code>ResourceManager</code> also responds with details such 
+   * as minimum and maximum resource capabilities in the cluster as specified in
+   * {@link GetNewApplicationResponse}.</p>
+   *
    * @param request request to get a new <code>ApplicationId</code>
    * @return new <code>ApplicationId</code> to be used to submit an application
    * @throws YarnRemoteException
    * @see #submitApplication(SubmitApplicationRequest)
    */
-  public GetNewApplicationIdResponse getNewApplicationId(
-      GetNewApplicationIdRequest request) 
+  public GetNewApplicationResponse getNewApplication(
+      GetNewApplicationRequest request)
   throws YarnRemoteException;
   
   /**
@@ -92,7 +96,7 @@ public interface ClientRMProtocol {
    * @param request request to submit a new application
    * @return (empty) response on accepting the submission
    * @throws YarnRemoteException
-   * @see #getNewApplicationId(GetNewApplicationIdRequest)
+   * @see #getNewApplication(GetNewApplicationRequest)
    */
   public SubmitApplicationResponse submitApplication(
       SubmitApplicationRequest request) 
@@ -102,7 +106,7 @@ public interface ClientRMProtocol {
    * <p>The interface used by clients to request the 
    * <code>ResourceManager</code> to abort submitted application.</p>
    * 
-   * <p>The client, via {@link FinishApplicationRequest} provides the
+   * <p>The client, via {@link KillApplicationRequest} provides the
    * {@link ApplicationId} of the application to be aborted.</p>
    * 
    * <p> In secure mode,the <code>ResourceManager</code> verifies access to the
@@ -117,8 +121,8 @@ public interface ClientRMProtocol {
    * @throws YarnRemoteException
    * @see #getQueueUserAcls(GetQueueUserAclsInfoRequest) 
    */
-  public FinishApplicationResponse finishApplication(
-      FinishApplicationRequest request) 
+  public KillApplicationResponse forceKillApplication(
+      KillApplicationRequest request) 
   throws YarnRemoteException;
 
   /**
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdRequest.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationRequest.java
similarity index 91%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdRequest.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationRequest.java
index c841070080d..a70989f1aab 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdRequest.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationRequest.java
@@ -27,10 +27,10 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
  * <p>The request sent by clients to get a new {@link ApplicationId} for
  * submitting an application.</p>
  * 
- * @see ClientRMProtocol#getNewApplicationId(GetNewApplicationIdRequest)
+ * @see ClientRMProtocol#getNewApplication(GetNewApplicationRequest)
  */
 @Public
 @Stable
-public interface GetNewApplicationIdRequest {
+public interface GetNewApplicationRequest {
 
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdResponse.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationResponse.java
similarity index 66%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdResponse.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationResponse.java
index 93a1ab680b3..4e7a7e565be 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationIdResponse.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNewApplicationResponse.java
@@ -24,16 +24,17 @@ import org.apache.hadoop.classification.InterfaceStability.Stable;
 import org.apache.hadoop.classification.InterfaceStability.Unstable;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Resource;
 
 /**
  * <p>The response sent by the <code>ResourceManager</code> to the client for 
  * a request to a new {@link ApplicationId} for submitting applications.</p>
  * 
- * @see ClientRMProtocol#getNewApplicationId(GetNewApplicationIdRequest)
+ * @see ClientRMProtocol#getNewApplication(GetNewApplicationRequest)
  */
 @Public
 @Stable
-public interface GetNewApplicationIdResponse {
+public interface GetNewApplicationResponse {
   /**
    * Get the <em>new</em> <code>ApplicationId</code> allocated by the 
    * <code>ResourceManager</code>.
@@ -47,4 +48,30 @@ public interface GetNewApplicationIdResponse {
   @Private
   @Unstable
   public abstract void setApplicationId(ApplicationId applicationId);
+  
+  /**
+   * Get the minimum capability for any {@link Resource} allocated by the 
+   * <code>ResourceManager</code> in the cluster.
+   * @return minimum capability of allocated resources in the cluster
+   */
+  @Public
+  @Stable
+  public Resource getMinimumResourceCapability();
+  
+  @Private
+  @Unstable
+  public void setMinimumResourceCapability(Resource capability);
+  
+  /**
+   * Get the maximum capability for any {@link Resource} allocated by the 
+   * <code>ResourceManager</code> in the cluster.
+   * @return maximum capability of allocated resources in the cluster
+   */
+  @Public
+  @Stable
+  public Resource getMaximumResourceCapability();
+  
+  @Private
+  @Unstable
+  public void setMaximumResourceCapability(Resource capability); 
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationRequest.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java
similarity index 94%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationRequest.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java
index 023ee3c4ac7..c033e64bb20 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationRequest.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationRequest.java
@@ -32,11 +32,11 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
  * <p>The request includes the {@link ApplicationId} of the application to be
  * aborted.</p>
  * 
- * @see ClientRMProtocol#finishApplication(FinishApplicationRequest)
+ * @see ClientRMProtocol#forceKillApplication(KillApplicationRequest)
  */
 @Public
 @Stable
-public interface FinishApplicationRequest {
+public interface KillApplicationRequest {
   /**
    * Get the <code>ApplicationId</code> of the application to be aborted.
    * @return <code>ApplicationId</code> of the application to be aborted
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationResponse.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java
similarity index 91%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationResponse.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java
index cd0c728e536..2a8d0f06d29 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationResponse.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java
@@ -28,10 +28,10 @@ import org.apache.hadoop.yarn.api.ClientRMProtocol;
  *
  * <p>Currently it's empty.</p>
  * 
- * @see ClientRMProtocol#finishApplication(FinishApplicationRequest)
+ * @see ClientRMProtocol#forceKillApplication(KillApplicationRequest)
  */
 @Public
 @Stable
-public interface FinishApplicationResponse {
+public interface KillApplicationResponse {
 
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdResponsePBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdResponsePBImpl.java
deleted file mode 100644
index 45fefd390ee..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdResponsePBImpl.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
-
-
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
-import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdResponseProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdResponseProtoOrBuilder;
-
-
-    
-public class GetNewApplicationIdResponsePBImpl extends ProtoBase<GetNewApplicationIdResponseProto> implements GetNewApplicationIdResponse {
-  GetNewApplicationIdResponseProto proto = GetNewApplicationIdResponseProto.getDefaultInstance();
-  GetNewApplicationIdResponseProto.Builder builder = null;
-  boolean viaProto = false;
-  
-  private ApplicationId applicationId = null;
-  
-  
-  public GetNewApplicationIdResponsePBImpl() {
-    builder = GetNewApplicationIdResponseProto.newBuilder();
-  }
-
-  public GetNewApplicationIdResponsePBImpl(GetNewApplicationIdResponseProto proto) {
-    this.proto = proto;
-    viaProto = true;
-  }
-  
-  public GetNewApplicationIdResponseProto getProto() {
-      mergeLocalToProto();
-    proto = viaProto ? proto : builder.build();
-    viaProto = true;
-    return proto;
-  }
-
-  private void mergeLocalToBuilder() {
-    if (applicationId != null) {
-      builder.setApplicationId(convertToProtoFormat(this.applicationId));
-    }
-  }
-
-  private void mergeLocalToProto() {
-    if (viaProto) 
-      maybeInitBuilder();
-    mergeLocalToBuilder();
-    proto = builder.build();
-    viaProto = true;
-  }
-
-  private void maybeInitBuilder() {
-    if (viaProto || builder == null) {
-      builder = GetNewApplicationIdResponseProto.newBuilder(proto);
-    }
-    viaProto = false;
-  }
-    
-  
-  @Override
-  public ApplicationId getApplicationId() {
-    GetNewApplicationIdResponseProtoOrBuilder p = viaProto ? proto : builder;
-    if (this.applicationId != null) {
-      return this.applicationId;
-    }
-    if (!p.hasApplicationId()) {
-      return null;
-    }
-    this.applicationId = convertFromProtoFormat(p.getApplicationId());
-    return this.applicationId;
-  }
-
-  @Override
-  public void setApplicationId(ApplicationId applicationId) {
-    maybeInitBuilder();
-    if (applicationId == null) 
-      builder.clearApplicationId();
-    this.applicationId = applicationId;
-  }
-
-  private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) {
-    return new ApplicationIdPBImpl(p);
-  }
-
-  private ApplicationIdProto convertToProtoFormat(ApplicationId t) {
-    return ((ApplicationIdPBImpl)t).getProto();
-  }
-
-
-
-}  
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdRequestPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationRequestPBImpl.java
similarity index 68%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdRequestPBImpl.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationRequestPBImpl.java
index 0d318674d56..90eae078397 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationIdRequestPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationRequestPBImpl.java
@@ -19,27 +19,26 @@
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdRequestProto;
-
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationRequestProto;
 
     
-public class GetNewApplicationIdRequestPBImpl extends ProtoBase<GetNewApplicationIdRequestProto> implements GetNewApplicationIdRequest {
-  GetNewApplicationIdRequestProto proto = GetNewApplicationIdRequestProto.getDefaultInstance();
-  GetNewApplicationIdRequestProto.Builder builder = null;
+public class GetNewApplicationRequestPBImpl extends ProtoBase<GetNewApplicationRequestProto> implements GetNewApplicationRequest {
+  GetNewApplicationRequestProto proto = GetNewApplicationRequestProto.getDefaultInstance();
+  GetNewApplicationRequestProto.Builder builder = null;
   boolean viaProto = false;
   
-  public GetNewApplicationIdRequestPBImpl() {
-    builder = GetNewApplicationIdRequestProto.newBuilder();
+  public GetNewApplicationRequestPBImpl() {
+    builder = GetNewApplicationRequestProto.newBuilder();
   }
 
-  public GetNewApplicationIdRequestPBImpl(GetNewApplicationIdRequestProto proto) {
+  public GetNewApplicationRequestPBImpl(GetNewApplicationRequestProto proto) {
     this.proto = proto;
     viaProto = true;
   }
   
-  public GetNewApplicationIdRequestProto getProto() {
+  public GetNewApplicationRequestProto getProto() {
     proto = viaProto ? proto : builder.build();
     viaProto = true;
     return proto;
@@ -47,7 +46,7 @@ public class GetNewApplicationIdRequestPBImpl extends ProtoBase<GetNewApplicatio
 
   private void maybeInitBuilder() {
     if (viaProto || builder == null) {
-      builder = GetNewApplicationIdRequestProto.newBuilder(proto);
+      builder = GetNewApplicationRequestProto.newBuilder(proto);
     }
     viaProto = false;
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationResponsePBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationResponsePBImpl.java
new file mode 100644
index 00000000000..d15f1b75274
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNewApplicationResponsePBImpl.java
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
+
+
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ProtoBase;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
+import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationResponseProtoOrBuilder;
+    
+public class GetNewApplicationResponsePBImpl extends ProtoBase<GetNewApplicationResponseProto> implements GetNewApplicationResponse {
+  GetNewApplicationResponseProto proto = GetNewApplicationResponseProto.getDefaultInstance();
+  GetNewApplicationResponseProto.Builder builder = null;
+  boolean viaProto = false;
+  
+  private ApplicationId applicationId = null;
+  private Resource minimumResourceCapability = null;
+  private Resource maximumResourceCapability = null;
+  
+  public GetNewApplicationResponsePBImpl() {
+    builder = GetNewApplicationResponseProto.newBuilder();
+  }
+
+  public GetNewApplicationResponsePBImpl(GetNewApplicationResponseProto proto) {
+    this.proto = proto;
+    viaProto = true;
+  }
+  
+  public GetNewApplicationResponseProto getProto() {
+      mergeLocalToProto();
+    proto = viaProto ? proto : builder.build();
+    viaProto = true;
+    return proto;
+  }
+
+  private void mergeLocalToBuilder() {
+    if (applicationId != null) {
+      builder.setApplicationId(convertToProtoFormat(this.applicationId));
+    }
+    if (minimumResourceCapability != null) {
+    	builder.setMinimumCapability(convertToProtoFormat(this.minimumResourceCapability));
+    }
+    if (maximumResourceCapability != null) {
+    	builder.setMaximumCapability(convertToProtoFormat(this.maximumResourceCapability));
+    }
+  }
+
+  private void mergeLocalToProto() {
+    if (viaProto) 
+      maybeInitBuilder();
+    mergeLocalToBuilder();
+    proto = builder.build();
+    viaProto = true;
+  }
+
+  private void maybeInitBuilder() {
+    if (viaProto || builder == null) {
+      builder = GetNewApplicationResponseProto.newBuilder(proto);
+    }
+    viaProto = false;
+  }
+    
+  
+  @Override
+  public ApplicationId getApplicationId() {
+    if (this.applicationId != null) {
+      return this.applicationId;
+    }
+    
+    GetNewApplicationResponseProtoOrBuilder p = viaProto ? proto : builder;
+    if (!p.hasApplicationId()) {
+      return null;
+    }
+    
+    this.applicationId = convertFromProtoFormat(p.getApplicationId());
+    return this.applicationId;
+  }
+
+  @Override
+  public void setApplicationId(ApplicationId applicationId) {
+    maybeInitBuilder();
+    if (applicationId == null) 
+      builder.clearApplicationId();
+    this.applicationId = applicationId;
+  }
+
+  @Override
+  public Resource getMaximumResourceCapability() {
+    if (this.maximumResourceCapability != null) {
+      return this.maximumResourceCapability;
+    }
+ 
+    GetNewApplicationResponseProtoOrBuilder p = viaProto ? proto : builder;
+    if (!p.hasMaximumCapability()) {
+      return null;
+    }
+    
+    this.maximumResourceCapability = convertFromProtoFormat(p.getMaximumCapability());
+    return this.maximumResourceCapability;
+  }
+
+  @Override
+  public Resource getMinimumResourceCapability() {
+    if (this.minimumResourceCapability != null) {
+      return this.minimumResourceCapability;
+    }
+    
+    GetNewApplicationResponseProtoOrBuilder p = viaProto ? proto : builder;
+    if (!p.hasMinimumCapability()) {
+      return null;
+    }
+    
+    this.minimumResourceCapability = convertFromProtoFormat(p.getMinimumCapability());
+    return this.minimumResourceCapability;
+  }
+
+  @Override
+  public void setMaximumResourceCapability(Resource capability) {
+    maybeInitBuilder();
+    if(maximumResourceCapability == null) {
+      builder.clearMaximumCapability();
+    }
+    this.maximumResourceCapability = capability;
+  }
+
+  @Override
+  public void setMinimumResourceCapability(Resource capability) {
+    maybeInitBuilder();
+    if(minimumResourceCapability == null) {
+      builder.clearMinimumCapability();
+    }
+    this.minimumResourceCapability = capability;
+  }
+    
+  private ApplicationIdPBImpl convertFromProtoFormat(ApplicationIdProto p) {
+    return new ApplicationIdPBImpl(p);
+  }
+
+  private ApplicationIdProto convertToProtoFormat(ApplicationId t) {
+    return ((ApplicationIdPBImpl)t).getProto();
+  }
+  
+  private Resource convertFromProtoFormat(ResourceProto resource) {
+	  return new ResourcePBImpl(resource);
+  }
+
+  private ResourceProto convertToProtoFormat(Resource resource) {
+	  return ((ResourcePBImpl)resource).getProto();
+  }
+
+}  
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationRequestPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java
similarity index 74%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationRequestPBImpl.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java
index 044382bddeb..e2761a090be 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationRequestPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationRequestPBImpl.java
@@ -19,34 +19,34 @@
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
 import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationRequestProtoOrBuilder;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProtoOrBuilder;
 
 
     
-public class FinishApplicationRequestPBImpl extends ProtoBase<FinishApplicationRequestProto> implements FinishApplicationRequest {
-  FinishApplicationRequestProto proto = FinishApplicationRequestProto.getDefaultInstance();
-  FinishApplicationRequestProto.Builder builder = null;
+public class KillApplicationRequestPBImpl extends ProtoBase<KillApplicationRequestProto> implements KillApplicationRequest {
+  KillApplicationRequestProto proto = KillApplicationRequestProto.getDefaultInstance();
+  KillApplicationRequestProto.Builder builder = null;
   boolean viaProto = false;
   
   private ApplicationId applicationId = null;
   
   
-  public FinishApplicationRequestPBImpl() {
-    builder = FinishApplicationRequestProto.newBuilder();
+  public KillApplicationRequestPBImpl() {
+    builder = KillApplicationRequestProto.newBuilder();
   }
 
-  public FinishApplicationRequestPBImpl(FinishApplicationRequestProto proto) {
+  public KillApplicationRequestPBImpl(KillApplicationRequestProto proto) {
     this.proto = proto;
     viaProto = true;
   }
   
-  public FinishApplicationRequestProto getProto() {
+  public KillApplicationRequestProto getProto() {
       mergeLocalToProto();
     proto = viaProto ? proto : builder.build();
     viaProto = true;
@@ -69,7 +69,7 @@ public class FinishApplicationRequestPBImpl extends ProtoBase<FinishApplicationR
 
   private void maybeInitBuilder() {
     if (viaProto || builder == null) {
-      builder = FinishApplicationRequestProto.newBuilder(proto);
+      builder = KillApplicationRequestProto.newBuilder(proto);
     }
     viaProto = false;
   }
@@ -77,7 +77,7 @@ public class FinishApplicationRequestPBImpl extends ProtoBase<FinishApplicationR
   
   @Override
   public ApplicationId getApplicationId() {
-    FinishApplicationRequestProtoOrBuilder p = viaProto ? proto : builder;
+    KillApplicationRequestProtoOrBuilder p = viaProto ? proto : builder;
     if (this.applicationId != null) {
       return this.applicationId;
     }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationResponsePBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java
similarity index 62%
rename from hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationResponsePBImpl.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java
index b8ad6dd7ea4..61c42fd20d7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationResponsePBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java
@@ -19,27 +19,27 @@
 package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
 
 
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProto;
 
 
     
-public class FinishApplicationResponsePBImpl extends ProtoBase<FinishApplicationResponseProto> implements FinishApplicationResponse {
-  FinishApplicationResponseProto proto = FinishApplicationResponseProto.getDefaultInstance();
-  FinishApplicationResponseProto.Builder builder = null;
+public class KillApplicationResponsePBImpl extends ProtoBase<KillApplicationResponseProto> implements KillApplicationResponse {
+  KillApplicationResponseProto proto = KillApplicationResponseProto.getDefaultInstance();
+  KillApplicationResponseProto.Builder builder = null;
   boolean viaProto = false;
   
-  public FinishApplicationResponsePBImpl() {
-    builder = FinishApplicationResponseProto.newBuilder();
+  public KillApplicationResponsePBImpl() {
+    builder = KillApplicationResponseProto.newBuilder();
   }
 
-  public FinishApplicationResponsePBImpl(FinishApplicationResponseProto proto) {
+  public KillApplicationResponsePBImpl(KillApplicationResponseProto proto) {
     this.proto = proto;
     viaProto = true;
   }
   
-  public FinishApplicationResponseProto getProto() {
+  public KillApplicationResponseProto getProto() {
     proto = viaProto ? proto : builder.build();
     viaProto = true;
     return proto;
@@ -47,7 +47,7 @@ public class FinishApplicationResponsePBImpl extends ProtoBase<FinishApplication
 
   private void maybeInitBuilder() {
     if (viaProto || builder == null) {
-      builder = FinishApplicationResponseProto.newBuilder(proto);
+      builder = KillApplicationResponseProto.newBuilder(proto);
     }
     viaProto = false;
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java
index ca7a6f415a1..ffb920d5b90 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ApplicationReport.java
@@ -186,4 +186,16 @@ public interface ApplicationReport {
   @Private
   @Unstable
   void setStartTime(long startTime);
+
+  /**
+   * Get the <em>finish time</em> of the application.
+   * @return <em>finish time</em> of the application
+   */
+  @Public
+  @Stable
+  long getFinishTime();
+  
+  @Private
+  @Unstable
+  void setFinishTime(long finishTime);
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java
index 97c84e4d10a..ff054b22ac5 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/Container.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.ContainerManager;
  *     </li>
  *     <li>HTTP uri of the node.</li>
  *     <li>{@link Resource} allocated to the container.</li>
+ *     <li>{@link Priority} at which the container was allocated.</li>
  *     <li>{@link ContainerState} of the container.</li>
  *     <li>
  *       {@link ContainerToken} of the container, used to securely verify 
@@ -111,6 +112,18 @@ public interface Container extends Comparable<Container> {
   @Private
   @Unstable
   void setResource(Resource resource);
+
+  /**
+   * Get the <code>Priority</code> at which the <code>Container</code> was
+   * allocated.
+   * @return <code>Priority</code> at which the <code>Container</code> was
+   *         allocated
+   */
+  Priority getPriority();
+  
+  @Private
+  @Unstable
+  void setPriority(Priority priority);
   
   /**
    * Get the current <code>ContainerState</code> of the container.
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java
index b1e80fc7598..2ea2ddbcdb2 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ApplicationReportPBImpl.java
@@ -240,6 +240,30 @@ implements ApplicationReport {
     return proto;
   }
 
+  @Override
+  public long getStartTime() {
+    ApplicationReportProtoOrBuilder p = viaProto ? proto : builder;
+    return p.getStartTime();
+  }
+
+  @Override
+  public void setStartTime(long startTime) {
+    maybeInitBuilder();
+    builder.setStartTime(startTime);
+  }
+
+  @Override
+  public long getFinishTime() {
+    ApplicationReportProtoOrBuilder p = viaProto ? proto : builder;
+    return p.getFinishTime();
+  }
+
+  @Override
+  public void setFinishTime(long finishTime) {
+    maybeInitBuilder();
+    builder.setFinishTime(finishTime);
+  }
+
   private void mergeLocalToBuilder() {
     if (this.applicationId != null
         && !((ApplicationIdPBImpl) this.applicationId).getProto().equals(
@@ -279,16 +303,4 @@ implements ApplicationReport {
       ApplicationIdProto applicationId) {
     return new ApplicationIdPBImpl(applicationId);
   }
-
-  @Override
-  public long getStartTime() {
-    ApplicationReportProtoOrBuilder p = viaProto ? proto : builder;
-    return p.getStartTime();
-  }
-
-  @Override
-  public void setStartTime(long startTime) {
-    maybeInitBuilder();
-    builder.setStartTime(startTime);
-  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java
index 388cad0f4d8..39b15e0cefd 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerPBImpl.java
@@ -25,6 +25,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.ContainerToken;
 import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.ProtoBase;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto;
@@ -34,6 +35,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStateProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ContainerTokenProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.PriorityProto;
 import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
 import org.apache.hadoop.yarn.util.ProtoUtils;
 
@@ -48,6 +50,7 @@ public class ContainerPBImpl extends ProtoBase<ContainerProto> implements Contai
   private ContainerId containerId = null;
   private NodeId nodeId = null;
   private Resource resource = null;
+  private Priority priority = null;
   private ContainerToken containerToken = null;
   private ContainerStatus containerStatus = null;
   
@@ -84,6 +87,11 @@ public class ContainerPBImpl extends ProtoBase<ContainerProto> implements Contai
             builder.getResource())) {
       builder.setResource(convertToProtoFormat(this.resource));
     }
+    if (this.priority != null && 
+        !((PriorityPBImpl) this.priority).getProto().equals(
+            builder.getPriority())) {
+      builder.setPriority(convertToProtoFormat(this.priority));
+    }
     if (this.containerToken != null
         && !((ContainerTokenPBImpl) this.containerToken).getProto().equals(
             builder.getContainerToken())) {
@@ -211,6 +219,29 @@ public class ContainerPBImpl extends ProtoBase<ContainerProto> implements Contai
       builder.clearResource();
     this.resource = resource;
   }
+  
+  @Override
+  public Priority getPriority() {
+    ContainerProtoOrBuilder p = viaProto ? proto : builder;
+    if (this.priority != null) {
+      return this.priority;
+    }
+    if (!p.hasPriority()) {
+      return null;
+    }
+    this.priority = convertFromProtoFormat(p.getPriority());
+    return this.priority;
+  }
+
+  @Override
+  public void setPriority(Priority priority) {
+    maybeInitBuilder();
+    if (priority == null) {
+      builder.clearPriority();
+    }
+    this.priority = priority;
+  }
+
   @Override
   public ContainerToken getContainerToken() {
     ContainerProtoOrBuilder p = viaProto ? proto : builder;
@@ -285,6 +316,14 @@ public class ContainerPBImpl extends ProtoBase<ContainerProto> implements Contai
     return ((ResourcePBImpl)t).getProto();
   }
 
+  private PriorityPBImpl convertFromProtoFormat(PriorityProto p) {
+    return new PriorityPBImpl(p);
+  }
+
+  private PriorityProto convertToProtoFormat(Priority p) {
+    return ((PriorityPBImpl)p).getProto();
+  }
+  
   private ContainerTokenPBImpl convertFromProtoFormat(ContainerTokenProto p) {
     return new ContainerTokenPBImpl(p);
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/client_RM_protocol.proto b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/client_RM_protocol.proto
index cfb14ff3518..fb5f5f6e741 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/client_RM_protocol.proto
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/client_RM_protocol.proto
@@ -24,10 +24,10 @@ option java_generate_equals_and_hash = true;
 import "yarn_service_protos.proto";
 
 service ClientRMProtocolService {
-  rpc getNewApplicationId (GetNewApplicationIdRequestProto) returns (GetNewApplicationIdResponseProto);
+  rpc getNewApplication (GetNewApplicationRequestProto) returns (GetNewApplicationResponseProto);
   rpc getApplicationReport (GetApplicationReportRequestProto) returns (GetApplicationReportResponseProto);
   rpc submitApplication (SubmitApplicationRequestProto) returns (SubmitApplicationResponseProto);
-  rpc finishApplication (FinishApplicationRequestProto) returns (FinishApplicationResponseProto);
+  rpc forceKillApplication (KillApplicationRequestProto) returns (KillApplicationResponseProto);
   rpc getClusterMetrics (GetClusterMetricsRequestProto) returns (GetClusterMetricsResponseProto);
   rpc getAllApplications (GetAllApplicationsRequestProto) returns (GetAllApplicationsResponseProto);
   rpc getClusterNodes (GetClusterNodesRequestProto) returns (GetClusterNodesResponseProto);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
index cdcd1a747b8..704c7109964 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto
@@ -48,6 +48,10 @@ message ResourceProto {
   optional int32 memory = 1;
 }
 
+message PriorityProto {
+  optional int32 priority = 1;
+}
+
 enum ContainerStateProto {
   C_NEW = 1;
   C_RUNNING = 2;
@@ -66,9 +70,10 @@ message ContainerProto {
   optional NodeIdProto nodeId = 2;
   optional string node_http_address = 3;
   optional ResourceProto resource = 4;
-  optional ContainerStateProto state = 5;
-  optional ContainerTokenProto container_token = 6;
-  optional ContainerStatusProto container_status = 7;
+  optional PriorityProto priority = 5;
+  optional ContainerStateProto state = 6;
+  optional ContainerTokenProto container_token = 7;
+  optional ContainerStatusProto container_status = 8;
 }
 
 enum ApplicationStateProto {
@@ -140,6 +145,7 @@ message ApplicationReportProto {
   optional string trackingUrl = 11;
   optional string diagnostics = 12 [default = "N/A"];
   optional int64 startTime = 13;
+  optional int64 finishTime = 14;
 }
 
 message NodeIdProto {
@@ -252,10 +258,6 @@ message ContainerStatusProto {
 ////////////////////////////////////////////////////////////////////////
 ////// From common//////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////
-message PriorityProto {
-  optional int32 priority = 1;
-}
-
 message StringURLMapProto {
   optional string key = 1;
   optional URLProto value = 2;
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
index 753c6b8c9a8..1a992ad578e 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
@@ -66,11 +66,13 @@ message AllocateResponseProto {
 /////// client_RM_Protocol ///////////////////////////
 //////////////////////////////////////////////////////
 
-message GetNewApplicationIdRequestProto {
+message GetNewApplicationRequestProto {
 }
 
-message GetNewApplicationIdResponseProto {
+message GetNewApplicationResponseProto {
   optional ApplicationIdProto application_id = 1;
+  optional ResourceProto minimumCapability = 2;
+  optional ResourceProto maximumCapability = 3;
 }
 
 message GetApplicationReportRequestProto {
@@ -88,11 +90,11 @@ message SubmitApplicationRequestProto {
 message SubmitApplicationResponseProto {
 }
 
-message FinishApplicationRequestProto {
+message KillApplicationRequestProto {
   optional ApplicationIdProto application_id = 1;
 }
 
-message FinishApplicationResponseProto {
+message KillApplicationResponseProto {
 }
 
 message GetClusterMetricsRequestProto {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ClientRMProtocolPBClientImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ClientRMProtocolPBClientImpl.java
index 8972c656d91..b4f2dc46e0f 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ClientRMProtocolPBClientImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ClientRMProtocolPBClientImpl.java
@@ -25,8 +25,6 @@ import java.net.InetSocketAddress;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -35,16 +33,16 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationRequestPBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllApplicationsRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllApplicationsResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationReportRequestPBImpl;
@@ -53,27 +51,28 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterMetricsReque
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterMetricsResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterNodesRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterNodesResponsePBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationIdRequestPBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationIdResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.ipc.ProtoOverHadoopRpcEngine;
 import org.apache.hadoop.yarn.proto.ClientRMProtocol.ClientRMProtocolService;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllApplicationsRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationReportRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterMetricsRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterNodesRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto;
 
 import com.google.protobuf.ServiceException;
 
@@ -88,11 +87,11 @@ public class ClientRMProtocolPBClientImpl implements ClientRMProtocol {
   }
   
   @Override
-  public FinishApplicationResponse finishApplication(
-      FinishApplicationRequest request) throws YarnRemoteException {
-    FinishApplicationRequestProto requestProto = ((FinishApplicationRequestPBImpl)request).getProto();
+  public KillApplicationResponse forceKillApplication(
+      KillApplicationRequest request) throws YarnRemoteException {
+    KillApplicationRequestProto requestProto = ((KillApplicationRequestPBImpl)request).getProto();
     try {
-      return new FinishApplicationResponsePBImpl(proxy.finishApplication(null, requestProto));
+      return new KillApplicationResponsePBImpl(proxy.forceKillApplication(null, requestProto));
     } catch (ServiceException e) {
       if (e.getCause() instanceof YarnRemoteException) {
         throw (YarnRemoteException)e.getCause();
@@ -139,11 +138,11 @@ public class ClientRMProtocolPBClientImpl implements ClientRMProtocol {
   }
 
   @Override
-  public GetNewApplicationIdResponse getNewApplicationId(
-      GetNewApplicationIdRequest request) throws YarnRemoteException {
-    GetNewApplicationIdRequestProto requestProto = ((GetNewApplicationIdRequestPBImpl)request).getProto();
+  public GetNewApplicationResponse getNewApplication(
+      GetNewApplicationRequest request) throws YarnRemoteException {
+    GetNewApplicationRequestProto requestProto = ((GetNewApplicationRequestPBImpl)request).getProto();
     try {
-      return new GetNewApplicationIdResponsePBImpl(proxy.getNewApplicationId(null, requestProto));
+      return new GetNewApplicationResponsePBImpl(proxy.getNewApplication(null, requestProto));
     } catch (ServiceException e) {
       if (e.getCause() instanceof YarnRemoteException) {
         throw (YarnRemoteException)e.getCause();
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ClientRMProtocolPBServiceImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ClientRMProtocolPBServiceImpl.java
index 35e4be53984..342d864ca5d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ClientRMProtocolPBServiceImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ClientRMProtocolPBServiceImpl.java
@@ -19,17 +19,15 @@
 package org.apache.hadoop.yarn.api.impl.pb.service;
 
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationRequestPBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.FinishApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllApplicationsRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetAllApplicationsResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetApplicationReportRequestPBImpl;
@@ -38,18 +36,18 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterMetricsReque
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterMetricsResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterNodesRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetClusterNodesResponsePBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationIdRequestPBImpl;
-import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationIdResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetNewApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueInfoResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetQueueUserAclsInfoResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.KillApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl;
 import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.proto.ClientRMProtocol.ClientRMProtocolService.BlockingInterface;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllApplicationsRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetAllApplicationsResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetApplicationReportRequestProto;
@@ -58,12 +56,14 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterMetricsRequestPr
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterMetricsResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterNodesRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetClusterNodesResponseProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdRequestProto;
-import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationIdResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNewApplicationResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueInfoResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetQueueUserAclsInfoResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationRequestProto;
 import org.apache.hadoop.yarn.proto.YarnServiceProtos.SubmitApplicationResponseProto;
 
@@ -79,12 +79,12 @@ public class ClientRMProtocolPBServiceImpl implements BlockingInterface {
   }
   
   @Override
-  public FinishApplicationResponseProto finishApplication(RpcController arg0,
-      FinishApplicationRequestProto proto) throws ServiceException {
-    FinishApplicationRequestPBImpl request = new FinishApplicationRequestPBImpl(proto);
+  public KillApplicationResponseProto forceKillApplication(RpcController arg0,
+      KillApplicationRequestProto proto) throws ServiceException {
+    KillApplicationRequestPBImpl request = new KillApplicationRequestPBImpl(proto);
     try {
-      FinishApplicationResponse response = real.finishApplication(request);
-      return ((FinishApplicationResponsePBImpl)response).getProto();
+      KillApplicationResponse response = real.forceKillApplication(request);
+      return ((KillApplicationResponsePBImpl)response).getProto();
     } catch (YarnRemoteException e) {
       throw new ServiceException(e);
     }
@@ -116,13 +116,13 @@ public class ClientRMProtocolPBServiceImpl implements BlockingInterface {
   }
 
   @Override
-  public GetNewApplicationIdResponseProto getNewApplicationId(
-      RpcController arg0, GetNewApplicationIdRequestProto proto)
+  public GetNewApplicationResponseProto getNewApplication(
+      RpcController arg0, GetNewApplicationRequestProto proto)
       throws ServiceException {
-    GetNewApplicationIdRequestPBImpl request = new GetNewApplicationIdRequestPBImpl(proto);
+    GetNewApplicationRequestPBImpl request = new GetNewApplicationRequestPBImpl(proto);
     try {
-      GetNewApplicationIdResponse response = real.getNewApplicationId(request);
-      return ((GetNewApplicationIdResponsePBImpl)response).getProto();
+      GetNewApplicationResponse response = real.getNewApplication(request);
+      return ((GetNewApplicationResponsePBImpl)response).getProto();
     } catch (YarnRemoteException e) {
       throw new ServiceException(e);
     }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index ba23134170f..cb955af8c43 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -357,6 +357,12 @@ public class YarnConfiguration extends Configuration {
   public static final String NM_AUX_SERVICE_FMT =
     NM_PREFIX + "aux-services.%s.class";
 
+  public static final String NM_USER_HOME_DIR =
+      NM_PREFIX + "user-home-dir";
+
+  public static final String DEFAULT_NM_USER_HOME_DIR= "/home/";
+
+
   public static final int INVALID_CONTAINER_EXIT_STATUS = -1000;
   public static final int ABORTED_CONTAINER_EXIT_STATUS = -100;
   
@@ -380,6 +386,6 @@ public class YarnConfiguration extends Configuration {
     // Use apps manager address to figure out the host for webapp
     addr = conf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS);
     String host = ADDR_SPLITTER.split(addr).iterator().next();
-    return JOINER.join("http://", host, ":", port, "/");
+    return JOINER.join("http://", host, ":", port);
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/ProtoOverHadoopRpcEngine.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/ProtoOverHadoopRpcEngine.java
index 9a623a1a8a7..9d8b846a3b6 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/ProtoOverHadoopRpcEngine.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/ipc/ProtoOverHadoopRpcEngine.java
@@ -320,6 +320,12 @@ public class ProtoOverHadoopRpcEngine implements RpcEngine {
             + methodName);
       MethodDescriptor methodDescriptor = service.getDescriptorForType()
           .findMethodByName(methodName);
+      if (methodDescriptor == null) {
+        String msg = "Unknown method " + methodName + " called on "
+            + protocol + " protocol.";
+        LOG.warn(msg);
+        return handleException(new IOException(msg));
+      }
       Message prototype = service.getRequestPrototype(methodDescriptor);
       Message param = prototype.newBuilderForType()
           .mergeFrom(rpcRequest.getRequestProto()).build();
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java
index 2a5244d6512..71c829ac582 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/state/StateMachineFactory.java
@@ -20,10 +20,14 @@ package org.apache.hadoop.yarn.state;
 
 import java.util.EnumMap;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.Stack;
 
+import org.apache.hadoop.yarn.util.Graph;
+
 /**
  * State machine topology.
  * This object is semantically immutable.  If you have a
@@ -441,4 +445,39 @@ final public class StateMachineFactory
       return currentState;
     }
   }
+
+  /**
+   * Generate a graph represents the state graph of this StateMachine
+   * @param name graph name
+   * @return Graph object generated
+   */
+  public Graph generateStateGraph(String name) {
+    maybeMakeStateMachineTable();
+    Graph g = new Graph(name);
+    for (STATE startState : stateMachineTable.keySet()) {
+      Map<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>> transitions
+          = stateMachineTable.get(startState);
+      for (Entry<EVENTTYPE, Transition<OPERAND, STATE, EVENTTYPE, EVENT>> entry :
+         transitions.entrySet()) {
+        Transition<OPERAND, STATE, EVENTTYPE, EVENT> transition = entry.getValue();
+        if (transition instanceof StateMachineFactory.SingleInternalArc) {
+          StateMachineFactory.SingleInternalArc sa
+              = (StateMachineFactory.SingleInternalArc) transition;
+          Graph.Node fromNode = g.getNode(startState.toString());
+          Graph.Node toNode = g.getNode(sa.postState.toString());
+          fromNode.addEdge(toNode, entry.getKey().toString());
+        } else if (transition instanceof StateMachineFactory.MultipleInternalArc) {
+          StateMachineFactory.MultipleInternalArc ma
+              = (StateMachineFactory.MultipleInternalArc) transition;
+          Iterator<STATE> iter = ma.validPostStates.iterator();
+          while (iter.hasNext()) {
+            Graph.Node fromNode = g.getNode(startState.toString());
+            Graph.Node toNode = g.getNode(iter.next().toString());
+            fromNode.addEdge(toNode, entry.getKey().toString());
+          }
+        }
+      }
+    }
+    return g;
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
index 4eb63c04470..7ec367292e1 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BuilderUtils.java
@@ -20,7 +20,9 @@ package org.apache.hadoop.yarn.util;
 
 import java.net.URI;
 import java.util.Comparator;
+import java.util.List;
 
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
@@ -184,32 +186,31 @@ public class BuilderUtils {
     return id;
   }
 
-  public static Container clone(Container c) {
-    Container container = recordFactory.newRecordInstance(Container.class);
-    container.setId(c.getId());
-    container.setContainerToken(c.getContainerToken());
-    container.setNodeId(c.getNodeId());
-    container.setNodeHttpAddress(c.getNodeHttpAddress());
-    container.setResource(c.getResource());
-    container.setState(c.getState());
-    return container;
+  public static NodeId newNodeId(String host, int port) {
+    NodeId nodeId = recordFactory.newRecordInstance(NodeId.class);
+    nodeId.setHost(host);
+    nodeId.setPort(port);
+    return nodeId;
   }
 
   public static Container newContainer(RecordFactory recordFactory,
       ApplicationAttemptId appAttemptId, int containerId, NodeId nodeId,
-      String nodeHttpAddress, Resource resource) {
+      String nodeHttpAddress, Resource resource, Priority priority) {
     ContainerId containerID =
         newContainerId(recordFactory, appAttemptId, containerId);
-    return newContainer(containerID, nodeId, nodeHttpAddress, resource);
+    return newContainer(containerID, nodeId, nodeHttpAddress, 
+        resource, priority);
   }
 
   public static Container newContainer(ContainerId containerId,
-      NodeId nodeId, String nodeHttpAddress, Resource resource) {
+      NodeId nodeId, String nodeHttpAddress, 
+      Resource resource, Priority priority) {
     Container container = recordFactory.newRecordInstance(Container.class);
     container.setId(containerId);
     container.setNodeId(nodeId);
     container.setNodeHttpAddress(nodeHttpAddress);
     container.setResource(resource);
+    container.setPriority(priority);
     container.setState(ContainerState.NEW);
     ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class);
     containerStatus.setContainerId(containerId);
@@ -242,7 +243,7 @@ public class BuilderUtils {
   public static ApplicationReport newApplicationReport(
       ApplicationId applicationId, String user, String queue, String name,
       String host, int rpcPort, String clientToken, ApplicationState state,
-      String diagnostics, String url, long startTime) {
+      String diagnostics, String url, long startTime, long finishTime) {
     ApplicationReport report = recordFactory
         .newRecordInstance(ApplicationReport.class);
     report.setApplicationId(applicationId);
@@ -256,6 +257,7 @@ public class BuilderUtils {
     report.setDiagnostics(diagnostics);
     report.setTrackingUrl(url);
     report.setStartTime(startTime);
+    report.setFinishTime(finishTime);
     return report;
   }
   
@@ -273,5 +275,18 @@ public class BuilderUtils {
     url.setFile(file);
     return url;
   }
-  
+
+  public static AllocateRequest newAllocateRequest(
+      ApplicationAttemptId applicationAttemptId, int responseID,
+      float appProgress, List<ResourceRequest> resourceAsk,
+      List<ContainerId> containersToBeReleased) {
+    AllocateRequest allocateRequest = recordFactory
+        .newRecordInstance(AllocateRequest.class);
+    allocateRequest.setApplicationAttemptId(applicationAttemptId);
+    allocateRequest.setResponseId(responseID);
+    allocateRequest.setProgress(appProgress);
+    allocateRequest.addAllAsks(resourceAsk);
+    allocateRequest.addAllReleases(containersToBeReleased);
+    return allocateRequest;
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java
index ab6bd7395dc..6f5e9043192 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ConverterUtils.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.util;
 
 import static org.apache.hadoop.yarn.util.StringHelper._split;
 
+import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.text.NumberFormat;
@@ -45,6 +46,8 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 public class ConverterUtils {
 
   public static final String APPLICATION_PREFIX = "application";
+  public static final String CONTAINER_PREFIX = "container";
+  public static final String APPLICATION_ATTEMPT_PREFIX = "appattempt";
 
   /**
    * return a hadoop path from a given url
@@ -132,14 +135,12 @@ public class ConverterUtils {
   }
 
   private static ApplicationAttemptId toApplicationAttemptId(
-      RecordFactory recordFactory,
-      Iterator<String> it) {
-    ApplicationId appId =
-        recordFactory.newRecordInstance(ApplicationId.class);
+      Iterator<String> it) throws NumberFormatException {
+    ApplicationId appId = Records.newRecord(ApplicationId.class);
     appId.setClusterTimestamp(Long.parseLong(it.next()));
     appId.setId(Integer.parseInt(it.next()));
-    ApplicationAttemptId appAttemptId =
-        recordFactory.newRecordInstance(ApplicationAttemptId.class);
+    ApplicationAttemptId appAttemptId = Records
+        .newRecord(ApplicationAttemptId.class);
     appAttemptId.setApplicationId(appId);
     appAttemptId.setAttemptId(Integer.parseInt(it.next()));
     return appAttemptId;
@@ -149,16 +150,35 @@ public class ConverterUtils {
     return cId.toString();
   }
 
-  public static ContainerId toContainerId(RecordFactory recordFactory,
-      String containerIdStr) {
+  public static ContainerId toContainerId(String containerIdStr)
+      throws IOException {
     Iterator<String> it = _split(containerIdStr).iterator();
-    it.next(); // prefix. TODO: Validate container prefix
-    ApplicationAttemptId appAttemptID = 
-        toApplicationAttemptId(recordFactory, it);
-    ContainerId containerId =
-        recordFactory.newRecordInstance(ContainerId.class);
-    containerId.setApplicationAttemptId(appAttemptID);
-    containerId.setId(Integer.parseInt(it.next()));
-    return containerId;
+    if (!it.next().equals(CONTAINER_PREFIX)) {
+      throw new IOException("Invalid ContainerId prefix: " + containerIdStr);
+    }
+    try {
+      ApplicationAttemptId appAttemptID = toApplicationAttemptId(it);
+      ContainerId containerId = Records.newRecord(ContainerId.class);
+      containerId.setApplicationAttemptId(appAttemptID);
+      containerId.setId(Integer.parseInt(it.next()));
+      return containerId;
+    } catch (NumberFormatException n) {
+      throw new IOException("Invalid ContainerId: " + containerIdStr, n);
+    }
+  }
+
+  public static ApplicationAttemptId toApplicationAttemptId(
+      String applicationAttmeptIdStr) throws IOException {
+    Iterator<String> it = _split(applicationAttmeptIdStr).iterator();
+    if (!it.next().equals(APPLICATION_ATTEMPT_PREFIX)) {
+      throw new IOException("Invalid AppAttemptId prefix: "
+          + applicationAttmeptIdStr);
+    }
+    try {
+      return toApplicationAttemptId(it);
+    } catch (NumberFormatException n) {
+      throw new IOException("Invalid AppAttemptId: "
+          + applicationAttmeptIdStr, n);
+    }
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java
new file mode 100644
index 00000000000..aa3604fa87a
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/Graph.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.util;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang.StringEscapeUtils;
+
+public class Graph {
+  public class Edge {
+    Node from;
+    Node to;
+    String label;
+
+    public Edge(Node from, Node to, String info) {
+      this.from = from;
+      this.to = to;
+      this.label = info;
+    }
+
+    public boolean sameAs(Edge rhs) {
+      if (this.from == rhs.from &&
+          this.to == rhs.to) {
+        return true;
+      }
+      return false;
+    }
+
+    public Edge combine(Edge rhs) {
+      String newlabel = this.label + "," + rhs.label;
+      return new Edge(this.from, this.to, newlabel);
+    }
+  }
+
+  public class Node {
+    Graph parent;
+    String id;
+    List<Edge> ins;
+    List<Edge> outs;
+
+    public Node(String id) {
+      this.id = id;
+      this.parent = Graph.this;
+      this.ins = new ArrayList<Graph.Edge>();
+      this.outs = new ArrayList<Graph.Edge>();
+    }
+
+    public Graph getParent() {
+      return parent;
+    }
+
+    public Node addEdge(Node to, String info) {
+      Edge e = new Edge(this, to, info);
+      outs.add(e);
+      to.ins.add(e);
+      return this;
+    }
+
+    public String getUniqueId() {
+      return Graph.this.name + "." + id;
+    }
+  }
+
+  private String name;
+  private Graph parent;
+  private Set<Graph.Node> nodes = new HashSet<Graph.Node>();
+  private Set<Graph> subgraphs = new HashSet<Graph>();
+
+  public Graph(String name, Graph parent) {
+    this.name = name;
+    this.parent = parent;
+  }
+
+  public Graph(String name) {
+    this(name, null);
+  }
+
+  public Graph() {
+    this("graph", null);
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public Graph getParent() {
+    return parent;
+  }
+
+  private Node newNode(String id) {
+    Node ret = new Node(id);
+    nodes.add(ret);
+    return ret;
+  }
+
+  public Node getNode(String id) {
+    for (Node node : nodes) {
+      if (node.id.equals(id)) {
+        return node;
+      }
+    }
+    return newNode(id);
+  }
+
+  public Graph newSubGraph(String name) {
+    Graph ret = new Graph(name, this);
+    subgraphs.add(ret);
+    return ret;
+  }
+
+  public void addSubGraph(Graph graph) {
+    subgraphs.add(graph);
+    graph.parent = this;
+  }
+
+  private static String wrapSafeString(String label) {
+    if (label.indexOf(',') >= 0) {
+      if (label.length()>14) {
+        label = label.replaceAll(",", ",\n");
+      }
+    }
+    label = "\"" + StringEscapeUtils.escapeJava(label) + "\"";
+    return label;
+  }
+
+  public String generateGraphViz(String indent) {
+    StringBuilder sb = new StringBuilder();
+    if (this.parent == null) {
+      sb.append("digraph " + name + " {\n");
+      sb.append(String.format("graph [ label=%s, fontsize=24, fontname=Helvetica];\n",
+          wrapSafeString(name)));
+      sb.append("node [fontsize=12, fontname=Helvetica];\n");
+      sb.append("edge [fontsize=9, fontcolor=blue, fontname=Arial];\n");
+    } else {
+      sb.append("subgraph cluster_" + name + " {\nlabel=\"" + name + "\"\n");
+    }
+    for (Graph g : subgraphs) {
+      String ginfo = g.generateGraphViz(indent+"  ");
+      sb.append(ginfo);
+      sb.append("\n");
+    }
+    for (Node n : nodes) {
+      sb.append(String.format(
+          "%s%s [ label = %s ];\n",
+          indent,
+          wrapSafeString(n.getUniqueId()),
+          n.id));
+      List<Edge> combinedOuts = combineEdges(n.outs);
+      for (Edge e : combinedOuts) {
+        sb.append(String.format(
+            "%s%s -> %s [ label = %s ];\n",
+            indent,
+            wrapSafeString(e.from.getUniqueId()),
+            wrapSafeString(e.to.getUniqueId()),
+            wrapSafeString(e.label)));
+      }
+    }
+    sb.append("}\n");
+    return sb.toString();
+  }
+
+  public String generateGraphViz() {
+    return generateGraphViz("");
+  }
+
+  public void save(String filepath) throws IOException {
+    FileWriter fout = new FileWriter(filepath);
+    fout.write(generateGraphViz());
+    fout.close();
+  }
+
+  public static List<Edge> combineEdges(List<Edge> edges) {
+    List<Edge> ret = new ArrayList<Edge>();
+    for (Edge edge : edges) {
+      boolean found = false;
+      for (int i = 0; i < ret.size(); i++) {
+        Edge current = ret.get(i);
+        if (edge.sameAs(current)) {
+          ret.set(i, current.combine(edge));
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        ret.add(edge);
+      }
+    }
+    return ret;
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java
new file mode 100644
index 00000000000..0fb9a48b098
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/VisualizeStateMachine.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.util;
+
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.yarn.state.StateMachineFactory;
+
+public class VisualizeStateMachine {
+
+  /**
+   * @param classes list of classes which have static field
+   *                stateMachineFactory of type StateMachineFactory
+   * @return graph represent this StateMachine
+   */
+  public static Graph getGraphFromClasses(String graphName, List<String> classes)
+      throws Exception {
+    Graph ret = null;
+    if (classes.size() != 1) {
+      ret = new Graph(graphName);
+    }
+    for (String className : classes) {
+      Class clz = Class.forName(className);
+      Field factoryField = clz.getDeclaredField("stateMachineFactory");
+      factoryField.setAccessible(true);
+      StateMachineFactory factory = (StateMachineFactory) factoryField.get(null);
+      if (classes.size() == 1) {
+        return factory.generateStateGraph(graphName);
+      }
+      String gname = clz.getSimpleName();
+      if (gname.endsWith("Impl")) {
+        gname = gname.substring(0, gname.length()-4);
+      }
+      ret.addSubGraph(factory.generateStateGraph(gname));
+    }
+    return ret;
+  }
+
+  public static void main(String [] args) throws Exception {
+    if (args.length < 3) {
+      System.err.printf("Usage: %s <GraphName> <class[,class[,...]]> <OutputFile>\n",
+          VisualizeStateMachine.class.getName());
+      System.exit(1);
+    }
+    String [] classes = args[1].split(",");
+    ArrayList<String> validClasses = new ArrayList<String>();
+    for (String c : classes) {
+      String vc = c.trim();
+      if (vc.length()>0) {
+        validClasses.add(vc);
+      }
+    }
+    Graph g = getGraphFromClasses(args[0], validClasses);
+    g.save(args[2]);
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
index ef8ab976ef8..e404fe5a723 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java
@@ -84,6 +84,15 @@ public class Dispatcher extends HttpServlet {
       prepareToExit();
       return;
     }
+    // if they provide a redirectPath go there instead of going to
+    // "/" so that filters can differentiate the webapps.
+    if (uri.equals("/")) {
+      String redirectPath = webApp.getRedirectPath();
+      if (redirectPath != null && !redirectPath.isEmpty()) {
+        res.sendRedirect(redirectPath);
+        return;
+      }
+    }
     String method = req.getMethod();
     if (method.equals("OPTIONS")) {
       doOptions(req, res);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
index b9afe81ca85..f83843e97e6 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java
@@ -26,6 +26,7 @@ import com.google.inject.Provides;
 import com.google.inject.servlet.GuiceFilter;
 import com.google.inject.servlet.ServletModule;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -44,6 +45,9 @@ public abstract class WebApp extends ServletModule {
   public enum HTTP { GET, POST, HEAD, PUT, DELETE };
 
   private volatile String name;
+  private volatile List<String> servePathSpecs = new ArrayList<String>(); 
+  // path to redirect to if user goes to "/"
+  private volatile String redirectPath;
   private volatile Configuration conf;
   private volatile HttpServer httpServer;
   private volatile GuiceFilter guiceFilter;
@@ -98,6 +102,22 @@ public abstract class WebApp extends ServletModule {
 
   public String name() { return this.name; }
 
+  void addServePathSpec(String path) { this.servePathSpecs.add(path); }
+
+  public String[] getServePathSpecs() { 
+    return this.servePathSpecs.toArray(new String[this.servePathSpecs.size()]);
+  }
+
+  /**
+   * Set a path to redirect the user to if they just go to "/". For 
+   * instance "/" goes to "/yarn/apps". This allows the filters to 
+   * more easily differentiate the different webapps.
+   * @param path  the path to redirect to
+   */
+  void setRedirectPath(String path) { this.redirectPath = path; }
+
+  public String getRedirectPath() { return this.redirectPath; }
+
   void setHostClass(Class<?> cls) {
     router.setHostClass(cls);
   }
@@ -109,7 +129,10 @@ public abstract class WebApp extends ServletModule {
   @Override
   public void configureServlets() {
     setup();
-    serve("/", "/__stop", StringHelper.join('/', name, '*')).with(Dispatcher.class);
+    serve("/", "/__stop").with(Dispatcher.class);
+    for (String path : this.servePathSpecs) {
+      serve(path).with(Dispatcher.class);
+    }
   }
 
   /**
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java
index 85b88d16cc4..b5217999687 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApps.java
@@ -113,6 +113,14 @@ public class WebApps {
         };
       }
       webapp.setName(name);
+      String basePath = "/" + name;
+      webapp.setRedirectPath(basePath);
+      if (basePath.equals("/")) { 
+        webapp.addServePathSpec("/*");
+      }  else {
+        webapp.addServePathSpec(basePath);
+        webapp.addServePathSpec(basePath + "/*");
+      }
       if (conf == null) {
         conf = new Configuration();
       }
@@ -142,7 +150,8 @@ public class WebApps {
           }
         }
         HttpServer server =
-            new HttpServer(name, bindAddress, port, findPort, conf);
+            new HttpServer(name, bindAddress, port, findPort, conf, 
+            webapp.getServePathSpecs());
         server.addGlobalFilter("guice", GuiceFilter.class.getName(), null);
         webapp.setConf(conf);
         webapp.setHttpServer(server);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/cluster/.keep b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/cluster/.keep
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/jobhistory/.keep b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/jobhistory/.keep
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/mapreduce/.keep b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/mapreduce/.keep
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/node/.keep b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/node/.keep
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java
index 65f6c548fbc..7d233e2d9fc 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/MockApps.java
@@ -167,6 +167,16 @@ public class MockApps {
         // TODO Auto-generated method stub
 
       }
+      @Override
+      public long getFinishTime() {
+        // TODO Auto-generated method stub
+        return 0;
+      }
+      @Override
+      public void setFinishTime(long finishTime) {
+        // TODO Auto-generated method stub
+        
+      }
     };
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
index 58efcc42307..a855cc6f218 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
@@ -25,9 +25,11 @@ import junit.framework.Assert;
 import org.apache.avro.ipc.Server;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.yarn.api.ClientRMProtocol;
 import org.apache.hadoop.yarn.api.ContainerManager;
 import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.StartContainerResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
@@ -47,6 +49,7 @@ import org.apache.hadoop.yarn.factory.providers.YarnRemoteExceptionFactoryProvid
 import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC;
 import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.util.Records;
 import org.junit.Test;
 
 public class TestRPC {
@@ -65,6 +68,35 @@ public class TestRPC {
 //    test(HadoopYarnRPC.class.getName());
 //  }
 
+  @Test
+  public void testUnknownCall() {
+    Configuration conf = new Configuration();
+    conf.set(YarnConfiguration.IPC_RPC_IMPL, HadoopYarnProtoRPC.class
+        .getName());
+    YarnRPC rpc = YarnRPC.create(conf);
+    String bindAddr = "localhost:0";
+    InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr);
+    Server server = rpc.getServer(ContainerManager.class,
+        new DummyContainerManager(), addr, conf, null, 1);
+    server.start();
+
+    // Any unrelated protocol would do
+    ClientRMProtocol proxy = (ClientRMProtocol) rpc.getProxy(
+        ClientRMProtocol.class, NetUtils.createSocketAddr("localhost:"
+            + server.getPort()), conf);
+
+    try {
+      proxy.getNewApplication(Records
+          .newRecord(GetNewApplicationRequest.class));
+      Assert.fail("Excepted RPC call to fail with unknown method.");
+    } catch (YarnRemoteException e) {
+      Assert.assertTrue(e.getMessage().matches(
+          "Unknown method getNewApplication called on.*"
+              + "org.apache.hadoop.yarn.proto.ClientRMProtocol"
+              + "\\$ClientRMProtocolService\\$BlockingInterface protocol."));
+    }
+  }
+
   @Test
   public void testHadoopProtoRPC() throws Exception {
     test(HadoopYarnProtoRPC.class.getName());
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
new file mode 100644
index 00000000000..3d2a5769097
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java
@@ -0,0 +1,54 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.conf;
+
+import java.net.InetSocketAddress;
+
+import junit.framework.Assert;
+
+import org.apache.avro.ipc.Server;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.junit.Test;
+
+public class TestYarnConfiguration {
+
+  @Test
+  public void testDefaultRMWebUrl() throws Exception {
+    YarnConfiguration conf = new YarnConfiguration();
+    String rmWebUrl = YarnConfiguration.getRMWebAppURL(conf);
+    // shouldn't have a "/" on the end of the url as all the other uri routinnes
+    // specifically add slashes and Jetty doesn't handle double slashes.
+    Assert.assertEquals("RM Web Url is not correct", "http://0.0.0.0:8088", 
+        rmWebUrl);
+  }
+
+  @Test
+  public void testRMWebUrlSpecified() throws Exception {
+    YarnConfiguration conf = new YarnConfiguration();
+    // seems a bit odd but right now we are forcing webapp for RM to be RM_ADDRESS
+    // for host and use the port from the RM_WEBAPP_ADDRESS
+    conf.set(YarnConfiguration.RM_WEBAPP_ADDRESS, "footesting:99110");
+    conf.set(YarnConfiguration.RM_ADDRESS, "rmtesting:9999");
+    String rmWebUrl = YarnConfiguration.getRMWebAppURL(conf);
+    Assert.assertEquals("RM Web Url is not correct", "http://rmtesting:99110",
+        rmWebUrl);
+  }
+  
+}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java
index db84f32cf64..31b2aaa2ed6 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.yarn.webapp;
 
+import org.apache.commons.lang.ArrayUtils;
 import org.apache.hadoop.yarn.MockApps;
 import org.apache.hadoop.yarn.webapp.Controller;
 import org.apache.hadoop.yarn.webapp.WebApp;
@@ -148,6 +149,32 @@ public class TestWebApp {
     app.stop();
   }
 
+  @Test public void testServePaths() {
+    WebApp app = WebApps.$for("test", this).start();
+    assertEquals("/test", app.getRedirectPath());
+    String[] expectedPaths = { "/test", "/test/*" };
+    String[] pathSpecs = app.getServePathSpecs();
+     
+    assertEquals(2, pathSpecs.length);
+    for(int i = 0; i < expectedPaths.length; i++) {
+      assertTrue(ArrayUtils.contains(pathSpecs, expectedPaths[i]));
+    }
+    app.stop();
+  }
+
+  @Test public void testServePathsNoName() {
+    WebApp app = WebApps.$for("", this).start();
+    assertEquals("/", app.getRedirectPath());
+    String[] expectedPaths = { "/*" };
+    String[] pathSpecs = app.getServePathSpecs();
+     
+    assertEquals(1, pathSpecs.length);
+    for(int i = 0; i < expectedPaths.length; i++) {
+      assertTrue(ArrayUtils.contains(pathSpecs, expectedPaths[i]));
+    }
+    app.stop();
+  }
+
   @Test public void testDefaultRoutes() throws Exception {
     WebApp app = WebApps.$for("test", this).start();
     String baseUrl = baseUrl(app);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
index 98959644cf6..1d7b9cb2d1f 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml
@@ -103,6 +103,39 @@
         <activeByDefault>true</activeByDefault>
       </activation>
     </profile>
+    <profile>
+      <id>visualize</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>exec-maven-plugin</artifactId>
+            <version>1.2</version>
+            <executions>
+              <execution>
+                <phase>compile</phase>
+                <goals>
+                  <goal>java</goal>
+                </goals>
+                <configuration>
+                  <mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
+                  <arguments>
+                    <argument>NodeManager</argument>
+                    <argument>org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl,
+                       org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl,
+                       org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.LocalizedResource</argument>
+                    <argument>NodeManager.gv</argument>
+                  </arguments>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 
   <build>
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
index a7e82a2d41a..83872876797 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DefaultContainerExecutor.java
@@ -133,8 +133,10 @@ public class DefaultContainerExecutor extends ContainerExecutor {
       String[] command = 
           new String[] { "bash", "-c", launchDst.toUri().getPath().toString() };
       LOG.info("launchContainer: " + Arrays.toString(command));
-      shExec = new ShellCommandExecutor(command,
-          new File(containerWorkDir.toUri().getPath()));
+      shExec = new ShellCommandExecutor(
+          command,
+          new File(containerWorkDir.toUri().getPath()), 
+          container.getLaunchContext().getEnvironment());      // sanitized env
       launchCommandObjs.put(containerId, shExec);
       shExec.execute();
     } catch (IOException e) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
index 97721f72a36..0779d3b1581 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java
@@ -161,7 +161,11 @@ public class LinuxContainerExecutor extends ContainerExecutor {
                     nmPrivateCotainerScriptPath.toUri().getPath().toString(),
                     nmPrivateTokensPath.toUri().getPath().toString()));
     String[] commandArray = command.toArray(new String[command.size()]);
-    ShellCommandExecutor shExec = new ShellCommandExecutor(commandArray);
+    ShellCommandExecutor shExec = 
+        new ShellCommandExecutor(
+            commandArray,
+            null,                                              // NM's cwd
+            container.getLaunchContext().getEnvironment());    // sanitized env
     launchCommandObjs.put(containerId, shExec);
     // DEBUG
     LOG.info("launchContainer: " + Arrays.toString(commandArray));
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index 641e74b8018..1b1fd46b9e7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -101,7 +101,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
   public synchronized void init(Configuration conf) {
     this.rmAddress =
         conf.get(YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS,
-            YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS);
+            YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_ADDRESS);
     this.heartBeatInterval =
         conf.getLong(YarnConfiguration.NM_TO_RM_HEARTBEAT_INTERVAL_MS,
             YarnConfiguration.DEFAULT_NM_TO_RM_HEARTBEAT_INTERVAL_MS);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 4e02c3adede..8d3f3fe0842 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -158,10 +158,12 @@ public class ContainerImpl implements Container {
         ContainerEventType.CONTAINER_LAUNCHED, new LaunchTransition())
     .addTransition(ContainerState.LOCALIZED, ContainerState.EXITED_WITH_FAILURE,
         ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
-        new ExitedWithFailureTransition())
+        new ExitedWithFailureTransition(true))
     .addTransition(ContainerState.LOCALIZED, ContainerState.LOCALIZED,
        ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
        UPDATE_DIAGNOSTICS_TRANSITION)
+       // TODO race: Can lead to a CONTAINER_LAUNCHED event at state KILLING, 
+       // and a container which will never be killed by the NM.
     .addTransition(ContainerState.LOCALIZED, ContainerState.KILLING,
         ContainerEventType.KILL_CONTAINER, new KillTransition())
 
@@ -169,16 +171,19 @@ public class ContainerImpl implements Container {
     .addTransition(ContainerState.RUNNING,
         ContainerState.EXITED_WITH_SUCCESS,
         ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS,
-        new ExitedWithSuccessTransition())
+        new ExitedWithSuccessTransition(true))
     .addTransition(ContainerState.RUNNING,
         ContainerState.EXITED_WITH_FAILURE,
         ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
-        new ExitedWithFailureTransition())
+        new ExitedWithFailureTransition(true))
     .addTransition(ContainerState.RUNNING, ContainerState.RUNNING,
        ContainerEventType.UPDATE_DIAGNOSTICS_MSG,
        UPDATE_DIAGNOSTICS_TRANSITION)
     .addTransition(ContainerState.RUNNING, ContainerState.KILLING,
         ContainerEventType.KILL_CONTAINER, new KillTransition())
+    .addTransition(ContainerState.RUNNING, ContainerState.EXITED_WITH_FAILURE,
+        ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
+        new KilledExternallyTransition()) 
 
     // From CONTAINER_EXITED_WITH_SUCCESS State
     .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE,
@@ -220,10 +225,10 @@ public class ContainerImpl implements Container {
         ContainerEventType.KILL_CONTAINER)
     .addTransition(ContainerState.KILLING, ContainerState.EXITED_WITH_SUCCESS,
         ContainerEventType.CONTAINER_EXITED_WITH_SUCCESS,
-        new ExitedWithSuccessTransition())
+        new ExitedWithSuccessTransition(false))
     .addTransition(ContainerState.KILLING, ContainerState.EXITED_WITH_FAILURE,
         ContainerEventType.CONTAINER_EXITED_WITH_FAILURE,
-        new ExitedWithFailureTransition())
+        new ExitedWithFailureTransition(false))
     .addTransition(ContainerState.KILLING,
             ContainerState.DONE,
             ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP,
@@ -551,18 +556,41 @@ public class ContainerImpl implements Container {
     }
   }
 
+  @SuppressWarnings("unchecked")  // dispatcher not typed
   static class ExitedWithSuccessTransition extends ContainerTransition {
+
+    boolean clCleanupRequired;
+
+    public ExitedWithSuccessTransition(boolean clCleanupRequired) {
+      this.clCleanupRequired = clCleanupRequired;
+    }
+
     @Override
     public void transition(ContainerImpl container, ContainerEvent event) {
+      // Set exit code to 0 on success    	
+      container.exitCode = 0;
+    	
       // TODO: Add containerWorkDir to the deletion service.
 
-      // Inform the localizer to decrement reference counts and cleanup
-      // resources.
+      if (clCleanupRequired) {
+        container.dispatcher.getEventHandler().handle(
+            new ContainersLauncherEvent(container,
+                ContainersLauncherEventType.CLEANUP_CONTAINER));
+      }
+
       container.cleanup();
     }
   }
 
+  @SuppressWarnings("unchecked")  // dispatcher not typed
   static class ExitedWithFailureTransition extends ContainerTransition {
+
+    boolean clCleanupRequired;
+
+    public ExitedWithFailureTransition(boolean clCleanupRequired) {
+      this.clCleanupRequired = clCleanupRequired;
+    }
+
     @Override
     public void transition(ContainerImpl container, ContainerEvent event) {
       ContainerExitEvent exitEvent = (ContainerExitEvent) event;
@@ -571,12 +599,28 @@ public class ContainerImpl implements Container {
       // TODO: Add containerWorkDir to the deletion service.
       // TODO: Add containerOuputDir to the deletion service.
 
-      // Inform the localizer to decrement reference counts and cleanup
-      // resources.
+      if (clCleanupRequired) {
+        container.dispatcher.getEventHandler().handle(
+            new ContainersLauncherEvent(container,
+                ContainersLauncherEventType.CLEANUP_CONTAINER));
+      }
+
       container.cleanup();
     }
   }
 
+  static class KilledExternallyTransition extends ExitedWithFailureTransition {
+    KilledExternallyTransition() {
+      super(true);
+    }
+
+    @Override
+    public void transition(ContainerImpl container, ContainerEvent event) {
+      super.transition(container, event);
+      container.diagnostics.append("Killed by external signal\n");
+    }
+  }
+
   static class ResourceFailedTransition implements
       SingleArcTransition<ContainerImpl, ContainerEvent> {
     @Override
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
index 497460d3e7d..43afa4cb85e 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/ContainerLaunch.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.util.Shell;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ApplicationConstants.Environment;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
@@ -89,7 +90,6 @@ public class ContainerLaunch implements Callable<Integer> {
     final Map<Path,String> localResources = container.getLocalizedResources();
     String containerIdStr = ConverterUtils.toString(container.getContainerID());
     final String user = launchContext.getUser();
-    final Map<String,String> env = launchContext.getEnvironment();
     final List<String> command = launchContext.getCommands();
     int ret = -1;
 
@@ -109,16 +109,16 @@ public class ContainerLaunch implements Callable<Integer> {
       }
       launchContext.setCommands(newCmds);
 
-      Map<String, String> envs = launchContext.getEnvironment();
-      Map<String, String> newEnvs = new HashMap<String, String>(envs.size());
-      for (Entry<String, String> entry : envs.entrySet()) {
-        newEnvs.put(
-            entry.getKey(),
-            entry.getValue().replace(
+      Map<String, String> environment = launchContext.getEnvironment();
+      // Make a copy of env to iterate & do variable expansion
+      for (Entry<String, String> entry : environment.entrySet()) {
+        String value = entry.getValue();
+        entry.setValue(
+            value.replace(
                 ApplicationConstants.LOG_DIR_EXPANSION_VAR,
-                containerLogDir.toUri().getPath()));
+                containerLogDir.toUri().getPath())
+            );
       }
-      launchContext.setEnvironment(newEnvs);
       // /////////////////////////// End of variable expansion
 
       FileContext lfs = FileContext.getLocalFSFileContext();
@@ -164,11 +164,18 @@ public class ContainerLaunch implements Callable<Integer> {
               EnumSet.of(CREATE, OVERWRITE));
 
         // Set the token location too.
-        env.put(ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, new Path(
-            containerWorkDir, FINAL_CONTAINER_TOKENS_FILE).toUri().getPath());
+        environment.put(
+            ApplicationConstants.CONTAINER_TOKEN_FILE_ENV_NAME, 
+            new Path(containerWorkDir, 
+                FINAL_CONTAINER_TOKENS_FILE).toUri().getPath());
 
-        writeLaunchEnv(containerScriptOutStream, env, localResources,
-            launchContext.getCommands(), appDirs);
+        // Sanitize the container's environment
+        sanitizeEnv(environment, containerWorkDir, appDirs);
+        
+        // Write out the environment
+        writeLaunchEnv(containerScriptOutStream, environment, localResources,
+            launchContext.getCommands());
+        
         // /////////// End of writing out container-script
 
         // /////////// Write out the container-tokens in the nmPrivate space.
@@ -275,19 +282,71 @@ public class ContainerLaunch implements Callable<Integer> {
   
   }
 
+  private static void putEnvIfNotNull(
+      Map<String, String> environment, String variable, String value) {
+    if (value != null) {
+      environment.put(variable, value);
+    }
+  }
+  
+  private static void putEnvIfAbsent(
+      Map<String, String> environment, String variable) {
+    if (environment.get(variable) == null) {
+      putEnvIfNotNull(environment, variable, System.getenv(variable));
+    }
+  }
+  
+  public void sanitizeEnv(Map<String, String> environment, 
+      Path pwd, List<Path> appDirs) {
+    /**
+     * Non-modifiable environment variables
+     */
+    
+    putEnvIfNotNull(environment, Environment.USER.name(), container.getUser());
+    
+    putEnvIfNotNull(environment, 
+        Environment.LOGNAME.name(),container.getUser());
+    
+    putEnvIfNotNull(environment, 
+        Environment.HOME.name(),
+        conf.get(
+            YarnConfiguration.NM_USER_HOME_DIR, 
+            YarnConfiguration.DEFAULT_NM_USER_HOME_DIR
+            )
+        );
+    
+    putEnvIfNotNull(environment, Environment.PWD.name(), pwd.toString());
+    
+    putEnvIfNotNull(environment, 
+        Environment.HADOOP_CONF_DIR.name(), 
+        System.getenv(Environment.HADOOP_CONF_DIR.name())
+        );
+    
+    putEnvIfNotNull(environment, 
+        ApplicationConstants.LOCAL_DIR_ENV, 
+        StringUtils.join(",", appDirs)
+        );
+
+    if (!Shell.WINDOWS) {
+      environment.put("JVM_PID", "$$");
+    }
+
+    /**
+     * Modifiable environment variables
+     */
+    
+    putEnvIfAbsent(environment, Environment.JAVA_HOME.name());
+    putEnvIfAbsent(environment, Environment.HADOOP_COMMON_HOME.name());
+    putEnvIfAbsent(environment, Environment.HADOOP_HDFS_HOME.name());
+    putEnvIfAbsent(environment, Environment.YARN_HOME.name());
+
+  }
+  
   private static void writeLaunchEnv(OutputStream out,
       Map<String,String> environment, Map<Path,String> resources,
-      List<String> command, List<Path> appDirs)
+      List<String> command)
       throws IOException {
     ShellScriptBuilder sb = new ShellScriptBuilder();
-    if (System.getenv("YARN_HOME") != null) {
-      // TODO: Get from whitelist.
-      sb.env("YARN_HOME", System.getenv("YARN_HOME"));
-    }
-    sb.env(ApplicationConstants.LOCAL_DIR_ENV, StringUtils.join(",", appDirs));
-    if (!Shell.WINDOWS) {
-      sb.env("JVM_PID", "$$");
-    }
     if (environment != null) {
       for (Map.Entry<String,String> env : environment.entrySet()) {
         sb.env(env.getKey().toString(), env.getValue().toString());
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java
index 68b0686a254..e0795613b65 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsPage.java
@@ -31,8 +31,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
@@ -56,22 +54,26 @@ public class ContainerLogsPage extends NMView {
     private final Configuration conf;
     private final LocalDirAllocator logsSelector;
     private final Context nmContext;
-    private final RecordFactory recordFactory;
 
     @Inject
     public ContainersLogsBlock(Configuration conf, Context context) {
       this.conf = conf;
       this.logsSelector = new LocalDirAllocator(YarnConfiguration.NM_LOG_DIRS);
       this.nmContext = context;
-      this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
     }
 
     @Override
     protected void render(Block html) {
       DIV<Hamlet> div = html.div("#content");
 
-      ContainerId containerId =
-        ConverterUtils.toContainerId(this.recordFactory, $(CONTAINER_ID));
+      ContainerId containerId;
+      try {
+        containerId = ConverterUtils.toContainerId($(CONTAINER_ID));
+      } catch (IOException e) {
+        div.h1("Invalid containerId " + $(CONTAINER_ID))._();
+        return;
+      }
+
       Container container = this.nmContext.getContainers().get(containerId);
 
       if (container == null) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java
index 27be38a0299..de76b84e277 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerPage.java
@@ -18,20 +18,21 @@
 
 package org.apache.hadoop.yarn.server.nodemanager.webapp;
 
+import static org.apache.hadoop.yarn.util.StringHelper.ujoin;
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI.ACCORDION;
 import static org.apache.hadoop.yarn.webapp.view.JQueryUI.initID;
-import static org.apache.hadoop.yarn.util.StringHelper.ujoin;
 
-import org.apache.hadoop.conf.Configuration;
+import java.io.IOException;
+
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.server.nodemanager.Context;
 import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.apache.hadoop.yarn.webapp.SubView;
+import org.apache.hadoop.yarn.webapp.hamlet.Hamlet;
+import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.DIV;
 import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
 import org.apache.hadoop.yarn.webapp.view.InfoBlock;
 
@@ -53,22 +54,30 @@ public class ContainerPage extends NMView implements NMWebParams {
 
   public static class ContainerBlock extends HtmlBlock implements NMWebParams {
 
-    private final Configuration conf;
     private final Context nmContext;
-    private final RecordFactory recordFactory;
 
     @Inject
-    public ContainerBlock(Configuration conf, Context nmContext) {
-      this.conf = conf;
+    public ContainerBlock(Context nmContext) {
       this.nmContext = nmContext;
-      this.recordFactory = RecordFactoryProvider.getRecordFactory(this.conf);
     }
 
     @Override
     protected void render(Block html) {
-      ContainerId containerID =
-        ConverterUtils.toContainerId(this.recordFactory, $(CONTAINER_ID));
+      ContainerId containerID;
+      try {
+        containerID = ConverterUtils.toContainerId($(CONTAINER_ID));
+      } catch (IOException e) {
+        html.p()._("Invalid containerId " + $(CONTAINER_ID))._();
+        return;
+      }
+
+      DIV<Hamlet> div = html.div("#content");
       Container container = this.nmContext.getContainers().get(containerID);
+      if (container == null) {
+        div.h1("Unknown Container. Container might have completed, "
+                + "please go back to the previous page and retry.")._();
+        return;
+      }
       ContainerStatus containerData = container.cloneAndGetContainerStatus();
       int exitCode = containerData.getExitStatus();
       String exiStatus = 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
index 307e87eccd6..a043a37f594 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/WebServer.java
@@ -57,7 +57,7 @@ public class WebServer extends AbstractService {
     LOG.info("Instantiating NMWebApp at " + bindAddress);
     try {
       this.webApp =
-          WebApps.$for("yarn", Context.class, this.nmContext)
+          WebApps.$for("node", Context.class, this.nmContext)
               .at(bindAddress).with(getConfig())
               .start(new NMWebApp(this.resourceView));
     } catch (Exception e) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties
index 897bca3f414..96108ab9656 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/container-log4j.properties
@@ -12,12 +12,12 @@ log4j.threshold=ALL
 #
 
 #Default values
-hadoop.yarn.mr.containerLogDir=null
-hadoop.yarn.mr.totalLogFileSize=100
+yarn.app.mapreduce.container.log.dir=null
+yarn.app.mapreduce.container.log.filesize=100
 
 log4j.appender.CLA=org.apache.hadoop.yarn.ContainerLogAppender
-log4j.appender.CLA.containerLogDir=${hadoop.yarn.mr.containerLogDir}
-log4j.appender.CLA.totalLogFileSize=${hadoop.yarn.mr.totalLogFileSize}
+log4j.appender.CLA.containerLogDir=${yarn.app.mapreduce.container.log.dir}
+log4j.appender.CLA.totalLogFileSize=${yarn.app.mapreduce.container.log.filesize}
 
 log4j.appender.CLA.layout=org.apache.log4j.PatternLayout
 log4j.appender.CLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
index 6ee220b674a..44328dbe0aa 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
@@ -105,7 +105,31 @@ public class TestContainerManagerWithLCE extends TestContainerManager {
     LOG.info("Running testContainerLaunchAndStop");
     super.testContainerLaunchAndStop();
   }
+  
+  @Override
+  public void testContainerLaunchAndExitSuccess() throws IOException,
+      InterruptedException {
+    // Don't run the test if the binary is not available.
+    if (!shouldRunTest()) {
+      LOG.info("LCE binary path is not passed. Not running the test");
+      return;
+    }
+    LOG.info("Running testContainerLaunchAndExitSuccess");
+    super.testContainerLaunchAndExitSuccess();
+  }
 
+  @Override
+  public void testContainerLaunchAndExitFailure() throws IOException,
+      InterruptedException {
+    // Don't run the test if the binary is not available.
+    if (!shouldRunTest()) {
+      LOG.info("LCE binary path is not passed. Not running the test");
+      return;
+    }
+    LOG.info("Running testContainerLaunchAndExitFailure");
+    super.testContainerLaunchAndExitFailure();
+  }
+  
   @Override
   public void testLocalFilesCleanup() throws InterruptedException,
       IOException {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
index 87460d045ce..2de0428cb6b 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
@@ -287,7 +287,95 @@ public class TestContainerManager extends BaseContainerManagerTest {
         exec.signalContainer(user,
             pid, Signal.NULL));
   }
+  
+  private void testContainerLaunchAndExit(int exitCode) throws IOException, InterruptedException {
 
+	  File scriptFile = new File(tmpDir, "scriptFile.sh");
+	  PrintWriter fileWriter = new PrintWriter(scriptFile);
+	  File processStartFile =
+			  new File(tmpDir, "start_file.txt").getAbsoluteFile();
+	  fileWriter.write("\numask 0"); // So that start file is readable by the test
+	  fileWriter.write("\necho Hello World! > " + processStartFile);
+	  fileWriter.write("\necho $$ >> " + processStartFile); 
+
+	  // Have script throw an exit code at the end
+	  if (exitCode != 0) {
+		  fileWriter.write("\nexit "+exitCode);
+	  }
+	  
+	  fileWriter.close();
+
+	  ContainerLaunchContext containerLaunchContext = 
+			  recordFactory.newRecordInstance(ContainerLaunchContext.class);
+
+	  // ////// Construct the Container-id
+	  ContainerId cId = createContainerId();
+	  containerLaunchContext.setContainerId(cId);
+
+	  containerLaunchContext.setUser(user);
+
+	  URL resource_alpha =
+			  ConverterUtils.getYarnUrlFromPath(localFS
+					  .makeQualified(new Path(scriptFile.getAbsolutePath())));
+	  LocalResource rsrc_alpha =
+			  recordFactory.newRecordInstance(LocalResource.class);
+	  rsrc_alpha.setResource(resource_alpha);
+	  rsrc_alpha.setSize(-1);
+	  rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
+	  rsrc_alpha.setType(LocalResourceType.FILE);
+	  rsrc_alpha.setTimestamp(scriptFile.lastModified());
+	  String destinationFile = "dest_file";
+	  Map<String, LocalResource> localResources = 
+			  new HashMap<String, LocalResource>();
+	  localResources.put(destinationFile, rsrc_alpha);
+	  containerLaunchContext.setLocalResources(localResources);
+	  containerLaunchContext.setUser(containerLaunchContext.getUser());
+	  List<String> commands = new ArrayList<String>();
+	  commands.add("/bin/bash");
+	  commands.add(scriptFile.getAbsolutePath());
+	  containerLaunchContext.setCommands(commands);
+	  containerLaunchContext.setResource(recordFactory
+			  .newRecordInstance(Resource.class));
+	  containerLaunchContext.getResource().setMemory(100 * 1024 * 1024);
+
+	  StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class);
+	  startRequest.setContainerLaunchContext(containerLaunchContext);
+	  containerManager.startContainer(startRequest);
+
+	  BaseContainerManagerTest.waitForContainerState(containerManager, cId,
+			  ContainerState.COMPLETE);
+
+	  GetContainerStatusRequest gcsRequest = 
+			  recordFactory.newRecordInstance(GetContainerStatusRequest.class);
+	  gcsRequest.setContainerId(cId);
+	  ContainerStatus containerStatus = 
+			  containerManager.getContainerStatus(gcsRequest).getStatus();
+
+	  // Verify exit status matches exit state of script
+	  Assert.assertEquals(exitCode,
+			  containerStatus.getExitStatus());	    
+  }
+  
+  @Test
+  public void testContainerLaunchAndExitSuccess() throws IOException, InterruptedException {
+	  containerManager.start();
+	  int exitCode = 0; 
+
+	  // launch context for a command that will return exit code 0 
+	  // and verify exit code returned 
+	  testContainerLaunchAndExit(exitCode);	  
+  }
+
+  @Test
+  public void testContainerLaunchAndExitFailure() throws IOException, InterruptedException {
+	  containerManager.start();
+	  int exitCode = 50; 
+
+	  // launch context for a command that will return exit code 0 
+	  // and verify exit code returned 
+	  testContainerLaunchAndExit(exitCode);	  
+  }
+  
   @Test
   public void testLocalFilesCleanup() throws InterruptedException,
       IOException {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
index 04d400ad18d..48c745457a7 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java
@@ -38,8 +38,6 @@ import java.util.Map.Entry;
 import java.util.Random;
 
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
 import org.apache.hadoop.yarn.api.records.LocalResource;
@@ -135,6 +133,28 @@ public class TestContainer {
     }
   }
 
+  @Test
+  @SuppressWarnings("unchecked") // mocked generic
+  public void testExternalKill() throws Exception {
+    WrappedContainer wc = null;
+    try {
+      wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
+      wc.initContainer();
+      wc.localizeResources();
+      wc.launchContainer();
+      reset(wc.localizerBus);
+      wc.containerKilledOnRequest();
+      assertEquals(ContainerState.EXITED_WITH_FAILURE, 
+          wc.c.getContainerState());
+      verifyCleanupCall(wc);
+    }
+    finally {
+      if (wc != null) {
+        wc.finished();
+      }
+    }
+  }
+
   @Test
   @SuppressWarnings("unchecked") // mocked generic
   public void testCleanupOnFailure() throws Exception {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml
index b4c398f70f0..d94f5973144 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml
@@ -37,6 +37,20 @@
 
   <build>
     <plugins>
+
+      <!-- Publish tests jar -->
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+            <phase>test-compile</phase>
+          </execution>
+        </executions>
+      </plugin>
+
       <plugin>
         <artifactId>maven-antrun-plugin</artifactId>
         <executions>
@@ -98,4 +112,41 @@
       </plugin>
     </plugins>
   </build>
+
+  <profiles>
+    <profile>
+      <id>visualize</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>exec-maven-plugin</artifactId>
+            <version>1.2</version>
+            <executions>
+              <execution>
+                <phase>compile</phase>
+                <goals>
+                  <goal>java</goal>
+                </goals>
+                <configuration>
+                  <mainClass>org.apache.hadoop.yarn.util.VisualizeStateMachine</mainClass>
+                  <arguments>
+                    <argument>ResourceManager</argument>
+                    <argument>org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl,
+                      org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl,
+                      org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl,
+                      org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl</argument>
+                    <argument>ResourceManager.gv</argument>
+                  </arguments>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
 </project>
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
index 1fc34f0dfd0..6237f8961f0 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java
@@ -84,7 +84,7 @@ public class AdminService extends AbstractService implements RMAdminProtocol {
     super.init(conf);
     String bindAddress =
       conf.get(YarnConfiguration.RM_ADMIN_ADDRESS,
-          YarnConfiguration.RM_ADMIN_ADDRESS);
+          YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS);
     masterServiceAddress =  NetUtils.createSocketAddr(bindAddress);
     adminAcl = 
       new AccessControlList(
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
index a31bef8af9d..2cf19000985 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
@@ -36,8 +36,8 @@ import org.apache.hadoop.security.SecurityInfo;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.AccessControlList;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetAllApplicationsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
@@ -46,8 +46,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesResponse;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
@@ -165,11 +165,17 @@ public class ClientRMService extends AbstractService implements
   }
 
   @Override
-  public GetNewApplicationIdResponse getNewApplicationId(
-      GetNewApplicationIdRequest request) throws YarnRemoteException {
-    GetNewApplicationIdResponse response = recordFactory
-        .newRecordInstance(GetNewApplicationIdResponse.class);
+  public GetNewApplicationResponse getNewApplication(
+      GetNewApplicationRequest request) throws YarnRemoteException {
+    GetNewApplicationResponse response = recordFactory
+        .newRecordInstance(GetNewApplicationResponse.class);
     response.setApplicationId(getNewApplicationId());
+    // Pick up min/max resource from scheduler...
+    response.setMinimumResourceCapability(scheduler
+        .getMinimumResourceCapability());
+    response.setMaximumResourceCapability(scheduler
+        .getMaximumResourceCapability());       
+    
     return response;
   }
   
@@ -228,8 +234,8 @@ public class ClientRMService extends AbstractService implements
 
   @SuppressWarnings("unchecked")
   @Override
-  public FinishApplicationResponse finishApplication(
-      FinishApplicationRequest request) throws YarnRemoteException {
+  public KillApplicationResponse forceKillApplication(
+      KillApplicationRequest request) throws YarnRemoteException {
 
     ApplicationId applicationId = request.getApplicationId();
 
@@ -262,8 +268,8 @@ public class ClientRMService extends AbstractService implements
 
     RMAuditLogger.logSuccess(callerUGI.getShortUserName(), 
         AuditConstants.KILL_APP_REQUEST, "ClientRMService" , applicationId);
-    FinishApplicationResponse response = recordFactory
-        .newRecordInstance(FinishApplicationResponse.class);
+    KillApplicationResponse response = recordFactory
+        .newRecordInstance(KillApplicationResponse.class);
     return response;
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
index d0cd0a7ff86..3f175a34a0a 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.ipc.RPCUtil;
 import org.apache.hadoop.yarn.security.ApplicationTokenIdentifier;
 import org.apache.hadoop.yarn.security.client.ClientToAMSecretManager;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
@@ -250,13 +251,10 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent> {
 
       if (rmContext.getRMApps().putIfAbsent(applicationId, application) != 
           null) {
-        LOG.info("Application with id " + applicationId + 
-            " is already present! Cannot add a duplicate!");
-        // don't send event through dispatcher as it will be handled by app 
-        // already present with this id.
-        application.handle(new RMAppRejectedEvent(applicationId,
-            "Application with this id is already present! " +
-            "Cannot add a duplicate!"));
+        String message = "Application with id " + applicationId
+            + " is already present! Cannot add a duplicate!";
+        LOG.info(message);
+        throw RPCUtil.getRemoteException(message);
       } else {
         this.rmContext.getDispatcher().getEventHandler().handle(
             new RMAppEvent(applicationId, RMAppEventType.START));
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
index 85cd8825daa..997906a62e4 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java
@@ -22,7 +22,6 @@ import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ConcurrentMap;
 
 import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore;
@@ -31,7 +30,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
-import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 
 public class RMContextImpl implements RMContext {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
index 179b56a4af4..8a56d504d69 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager;
 import org.apache.hadoop.yarn.security.client.ClientToAMSecretManager;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
@@ -97,7 +98,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
   private ContainerAllocationExpirer containerAllocationExpirer;
   protected NMLivelinessMonitor nmLivelinessMonitor;
   protected NodesListManager nodesListManager;
-  private SchedulerEventDispatcher schedulerDispatcher;
+  private EventHandler<SchedulerEvent> schedulerDispatcher;
   protected RMAppManager rmAppManager;
 
   private WebApp webApp;
@@ -118,7 +119,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
   @Override
   public synchronized void init(Configuration conf) {
 
-    this.rmDispatcher = new AsyncDispatcher();
+    this.rmDispatcher = createDispatcher();
     addIfService(this.rmDispatcher);
 
     this.containerAllocationExpirer = new ContainerAllocationExpirer(
@@ -137,8 +138,8 @@ public class ResourceManager extends CompositeService implements Recoverable {
     this.conf = new YarnConfiguration(conf);
     // Initialize the scheduler
     this.scheduler = createScheduler();
-    this.schedulerDispatcher = new SchedulerEventDispatcher(this.scheduler);
-    addService(this.schedulerDispatcher);
+    this.schedulerDispatcher = createSchedulerEventDispatcher();
+    addIfService(this.schedulerDispatcher);
     this.rmDispatcher.register(SchedulerEventType.class,
         this.schedulerDispatcher);
 
@@ -186,11 +187,22 @@ public class ResourceManager extends CompositeService implements Recoverable {
     addService(adminService);
 
     this.applicationMasterLauncher = createAMLauncher();
+    this.rmDispatcher.register(AMLauncherEventType.class, 
+        this.applicationMasterLauncher);
+
     addService(applicationMasterLauncher);
 
     super.init(conf);
   }
 
+  protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
+    return new SchedulerEventDispatcher(this.scheduler);
+  }
+
+  protected Dispatcher createDispatcher() {
+    return new AsyncDispatcher();
+  }
+
   protected void addIfService(Object object) {
     if (object instanceof Service) {
       addService((Service) object);
@@ -381,7 +393,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
   }
 
   protected void startWepApp() {
-    webApp = WebApps.$for("yarn", masterService).at(
+    webApp = WebApps.$for("cluster", masterService).at(
         conf.get(YarnConfiguration.RM_WEBAPP_ADDRESS,
         YarnConfiguration.DEFAULT_RM_WEBAPP_ADDRESS)).
       start(new RMWebApp(this));
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
index b394faa85d2..337f4816890 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java
@@ -136,7 +136,7 @@ public class AMLauncher implements Runnable {
     containerMgrProxy.stopContainer(stopRequest);
   }
 
-  private ContainerManager getContainerMgrProxy(
+  protected ContainerManager getContainerMgrProxy(
       final ApplicationId applicationID) throws IOException {
 
     Container container = application.getMasterContainer();
@@ -173,23 +173,11 @@ public class AMLauncher implements Runnable {
     // Construct the actual Container
     ContainerLaunchContext container = 
         applicationMasterContext.getAMContainerSpec();
-    StringBuilder mergedCommand = new StringBuilder();
-    String failCount = Integer.toString(application.getAppAttemptId()
-        .getAttemptId());
-    List<String> commandList = new ArrayList<String>();
-    for (String str : container.getCommands()) {
-      // This is out-right wrong. AM FAIL count should be passed via env.
-      String result =
-          str.replaceFirst(ApplicationConstants.AM_FAIL_COUNT_STRING,
-              failCount);
-      mergedCommand.append(result).append(" ");
-      commandList.add(result);
-    }
-    container.setCommands(commandList);
-    /** add the failed count to the app master command line */
-   
-    LOG.info("Command to launch container " + 
-        containerID + " : " + mergedCommand);
+    LOG.info("Command to launch container "
+        + containerID
+        + " : "
+        + StringUtils.arrayToString(container.getCommands().toArray(
+            new String[0])));
     
     // Finalize the container
     container.setContainerId(containerID);
@@ -203,6 +191,11 @@ public class AMLauncher implements Runnable {
       ContainerLaunchContext container)
       throws IOException {
     Map<String, String> environment = container.getEnvironment();
+
+    // Set the AppAttemptId to be consumable by the AM.
+    environment.put(ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV,
+        application.getAppAttemptId().toString());
+
     if (UserGroupInformation.isSecurityEnabled()) {
       // TODO: Security enabled/disabled info should come from RM.
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
index d1ef1d14004..a25a4312b17 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/ApplicationMasterLauncher.java
@@ -42,17 +42,16 @@ public class ApplicationMasterLauncher extends AbstractService implements
   private final BlockingQueue<Runnable> masterEvents
     = new LinkedBlockingQueue<Runnable>();
   
-  private ApplicationTokenSecretManager applicationTokenSecretManager;
+  protected ApplicationTokenSecretManager applicationTokenSecretManager;
   private ClientToAMSecretManager clientToAMSecretManager;
-  private final RMContext context;
+  protected final RMContext context;
   
-  public ApplicationMasterLauncher(ApplicationTokenSecretManager 
-      applicationTokenSecretManager, ClientToAMSecretManager clientToAMSecretManager,
+  public ApplicationMasterLauncher(
+      ApplicationTokenSecretManager applicationTokenSecretManager, 
+      ClientToAMSecretManager clientToAMSecretManager,
       RMContext context) {
     super(ApplicationMasterLauncher.class.getName());
     this.context = context;
-    /* register to dispatcher */
-    this.context.getDispatcher().register(AMLauncherEventType.class, this);
     this.launcherPool = new ThreadPoolExecutor(1, 10, 1, 
         TimeUnit.HOURS, new LinkedBlockingQueue<Runnable>());
     this.launcherHandlingThread = new LauncherThread();
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
index 2e739a98b99..6e63e2248d6 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
@@ -24,7 +24,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationReport;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 
@@ -33,7 +32,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
  * look at {@link RMAppImpl} for its implementation. This interface 
  * exposes methods to access various updates in application status/report.
  */
-public interface RMApp extends EventHandler<RMAppEvent>{
+public interface RMApp extends EventHandler<RMAppEvent> {
 
   /**
    * The application id for this {@link RMApp}.
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Constants.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFailedAttemptEvent.java
similarity index 64%
rename from hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Constants.java
rename to hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFailedAttemptEvent.java
index e8a202ed44b..111c6acc41b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Constants.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppFailedAttemptEvent.java
@@ -16,12 +16,21 @@
  * limitations under the License.
  */
 
-package org.apache.hadoop.mapred;
+package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
 
-public class Constants {
-  static final String OUTPUT = "output";
-  public static final String HADOOP_WORK_DIR = "HADOOP_WORK_DIR";
-  public static final String JOBFILE = "job.xml";
-  public static final String STDOUT_LOGFILE_ENV = "STDOUT_LOGFILE_ENV";
-  public static final String STDERR_LOGFILE_ENV = "STDERR_LOGFILE_ENV";
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+
+public class RMAppFailedAttemptEvent extends RMAppEvent {
+
+  private final String diagnostics;
+
+  public RMAppFailedAttemptEvent(ApplicationId appId, RMAppEventType event, 
+      String diagnostics) {
+    super(appId, event);
+    this.diagnostics = diagnostics;
+  }
+
+  public String getDiagnostics() {
+    return this.diagnostics;
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index 9246d1838c7..94d04a8d127 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.NodeId;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
@@ -86,7 +87,8 @@ public class RMAppImpl implements RMApp {
   private long startTime;
   private long finishTime;
   private RMAppAttempt currentAttempt;
-
+  @SuppressWarnings("rawtypes")
+  private EventHandler handler;
   private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
 
   private static final StateMachineFactory<RMAppImpl,
@@ -99,9 +101,6 @@ public class RMAppImpl implements RMApp {
                                            RMAppEvent>(RMAppState.NEW)
 
 
-     // TODO - ATTEMPT_KILLED not sent right now but should handle if 
-     // attempt starts sending
-
      // Transitions from NEW state
     .addTransition(RMAppState.NEW, RMAppState.SUBMITTED,
         RMAppEventType.START, new StartAppAttemptTransition())
@@ -116,7 +115,7 @@ public class RMAppImpl implements RMApp {
     .addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED,
         RMAppEventType.APP_ACCEPTED)
     .addTransition(RMAppState.SUBMITTED, RMAppState.KILLED,
-        RMAppEventType.KILL, new AppKilledTransition())
+        RMAppEventType.KILL, new KillAppAndAttemptTransition())
 
      // Transitions from ACCEPTED state
     .addTransition(RMAppState.ACCEPTED, RMAppState.RUNNING,
@@ -126,7 +125,7 @@ public class RMAppImpl implements RMApp {
         RMAppEventType.ATTEMPT_FAILED,
         new AttemptFailedTransition(RMAppState.SUBMITTED))
     .addTransition(RMAppState.ACCEPTED, RMAppState.KILLED,
-        RMAppEventType.KILL, new AppKilledTransition())
+        RMAppEventType.KILL, new KillAppAndAttemptTransition())
 
      // Transitions from RUNNING state
     .addTransition(RMAppState.RUNNING, RMAppState.FINISHED,
@@ -136,7 +135,7 @@ public class RMAppImpl implements RMApp {
         RMAppEventType.ATTEMPT_FAILED,
         new AttemptFailedTransition(RMAppState.SUBMITTED))
     .addTransition(RMAppState.RUNNING, RMAppState.KILLED,
-        RMAppEventType.KILL, new AppKilledTransition())
+        RMAppEventType.KILL, new KillAppAndAttemptTransition())
 
      // Transitions from FINISHED state
     .addTransition(RMAppState.FINISHED, RMAppState.FINISHED,
@@ -168,6 +167,7 @@ public class RMAppImpl implements RMApp {
     this.name = name;
     this.rmContext = rmContext;
     this.dispatcher = rmContext.getDispatcher();
+    this.handler = dispatcher.getEventHandler();
     this.conf = config;
     this.user = user;
     this.queue = queue;
@@ -310,7 +310,8 @@ public class RMAppImpl implements RMApp {
       return BuilderUtils.newApplicationReport(this.applicationId, this.user,
           this.queue, this.name, host, rpcPort, clientToken,
           createApplicationState(this.stateMachine.getCurrentState()),
-          this.diagnostics.toString(), trackingUrl, this.startTime);
+          this.diagnostics.toString(), trackingUrl, 
+          this.startTime, this.finishTime);
     } finally {
       this.readLock.unlock();
     }
@@ -402,7 +403,7 @@ public class RMAppImpl implements RMApp {
         submissionContext);
     attempts.put(appAttemptId, attempt);
     currentAttempt = attempt;
-    dispatcher.getEventHandler().handle(
+    handler.handle(
         new RMAppAttemptEvent(appAttemptId, RMAppAttemptEventType.START));
   }
 
@@ -419,13 +420,23 @@ public class RMAppImpl implements RMApp {
     };
   }
 
-  private static final class AppKilledTransition extends FinalTransition {
+  private static class AppKilledTransition extends FinalTransition {
+    @Override
     public void transition(RMAppImpl app, RMAppEvent event) {
       app.diagnostics.append("Application killed by user.");
       super.transition(app, event);
     };
   }
 
+  private static class KillAppAndAttemptTransition extends AppKilledTransition {
+    @SuppressWarnings("unchecked")
+    @Override
+    public void transition(RMAppImpl app, RMAppEvent event) {
+      app.handler.handle(new RMAppAttemptEvent(app.currentAttempt.getAppAttemptId(), 
+          RMAppAttemptEventType.KILL));
+      super.transition(app, event);
+    }
+  }
   private static final class AppRejectedTransition extends
       FinalTransition{
     public void transition(RMAppImpl app, RMAppEvent event) {
@@ -449,11 +460,11 @@ public class RMAppImpl implements RMApp {
     public void transition(RMAppImpl app, RMAppEvent event) {
       Set<NodeId> nodes = getNodesOnWhichAttemptRan(app);
       for (NodeId nodeId : nodes) {
-        app.dispatcher.getEventHandler().handle(
+        app.handler.handle(
             new RMNodeCleanAppEvent(nodeId, app.applicationId));
       }
       app.finishTime = System.currentTimeMillis();
-      app.dispatcher.getEventHandler().handle(
+      app.handler.handle(
           new RMAppManagerEvent(app.applicationId, 
           RMAppManagerEventType.APP_COMPLETED));
     };
@@ -470,11 +481,13 @@ public class RMAppImpl implements RMApp {
 
     @Override
     public RMAppState transition(RMAppImpl app, RMAppEvent event) {
-
+      
+      RMAppFailedAttemptEvent failedEvent = ((RMAppFailedAttemptEvent)event);
       if (app.attempts.size() == app.maxRetries) {
         String msg = "Application " + app.getApplicationId()
         + " failed " + app.maxRetries
-        + " times. Failing the application.";
+        + " times due to " + failedEvent.getDiagnostics()
+        + ". Failing the application.";
         LOG.info(msg);
         app.diagnostics.append(msg);
         // Inform the node for app-finish
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
index 70747deacba..aeb3d2af045 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java
@@ -36,7 +36,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
  * {@link YarnConfiguration#RM_AM_MAX_RETRIES}. For specific 
  * implementation take a look at {@link RMAppAttemptImpl}.
  */
-public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent>{
+public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent> {
 
   /**
    * Get the application attempt id for this {@link RMAppAttempt}.
@@ -79,7 +79,7 @@ public interface RMAppAttempt extends EventHandler<RMAppAttemptEvent>{
    * Diagnostics information for the application attempt.
    * @return diagnostics information for the application attempt.
    */
-  StringBuilder getDiagnostics();
+  String getDiagnostics();
 
   /**
    * Progress for the application attempt.
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 7c6357defab..7f8ff82d6a1 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -31,6 +31,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -47,6 +48,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAcquiredEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerFinishedEvent;
@@ -104,10 +106,10 @@ public class RMAppAttemptImpl implements RMAppAttempt {
   private Container masterContainer;
 
   private float progress = 0;
-  private String host;
+  private String host = "N/A";
   private int rpcPort;
-  private String trackingUrl;
-  private String finalState;
+  private String trackingUrl = "N/A";
+  private String finalState = "N/A";
   private final StringBuilder diagnostics = new StringBuilder();
 
   private static final StateMachineFactory<RMAppAttemptImpl,
@@ -123,7 +125,8 @@ public class RMAppAttemptImpl implements RMAppAttempt {
       .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.SUBMITTED,
           RMAppAttemptEventType.START, new AttemptStartedTransition())
       .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.KILLED,
-          RMAppAttemptEventType.KILL)
+          RMAppAttemptEventType.KILL,
+          new BaseFinalTransition(RMAppAttemptState.KILLED))
 
       // Transitions from SUBMITTED state
       .addTransition(RMAppAttemptState.SUBMITTED, RMAppAttemptState.FAILED,
@@ -323,16 +326,26 @@ public class RMAppAttemptImpl implements RMAppAttempt {
   }
 
   @Override
-  public StringBuilder getDiagnostics() {
+  public String getDiagnostics() {
     this.readLock.lock();
 
     try {
-      return this.diagnostics;
+      return this.diagnostics.toString();
     } finally {
       this.readLock.unlock();
     }
   }
 
+  public void setDiagnostics(String message) {
+    this.writeLock.lock();
+
+    try {
+      this.diagnostics.append(message);
+    } finally {
+      this.writeLock.unlock();
+    }
+  }
+
   @Override
   public float getProgress() {
     this.readLock.lock();
@@ -446,10 +459,17 @@ public class RMAppAttemptImpl implements RMAppAttempt {
         RMAppAttemptEvent event) {
 
       RMAppAttemptRejectedEvent rejectedEvent = (RMAppAttemptRejectedEvent) event;
+      
+      // Save the diagnostic message
+      String message = rejectedEvent.getMessage();
+      appAttempt.setDiagnostics(message);
+      
       // Send the rejection event to app
-      appAttempt.eventHandler.handle(new RMAppRejectedEvent(rejectedEvent
-          .getApplicationAttemptId().getApplicationId(), rejectedEvent
-          .getMessage()));
+      appAttempt.eventHandler.handle(
+          new RMAppRejectedEvent(
+              rejectedEvent.getApplicationAttemptId().getApplicationId(), 
+              message)
+          );
     }
   }
 
@@ -472,8 +492,6 @@ public class RMAppAttemptImpl implements RMAppAttempt {
       ResourceRequest request = BuilderUtils.newResourceRequest(
           AM_CONTAINER_PRIORITY, "*", appAttempt.submissionContext
               .getAMContainerSpec().getResource(), 1);
-      LOG.debug("About to request resources for AM of "
-          + appAttempt.applicationAttemptId + " required " + request);
 
       appAttempt.scheduler.allocate(appAttempt.applicationAttemptId,
           Collections.singletonList(request), EMPTY_CONTAINER_RELEASE_LIST);
@@ -517,23 +535,39 @@ public class RMAppAttemptImpl implements RMAppAttempt {
           .unregisterAttempt(appAttempt.applicationAttemptId);
 
       // Tell the application and the scheduler
-      RMAppEventType eventToApp = null;
+      ApplicationId applicationId = appAttempt.getAppAttemptId().getApplicationId();
+      RMAppEvent appEvent = null;
       switch (finalAttemptState) {
-      case FINISHED:
-        eventToApp = RMAppEventType.ATTEMPT_FINISHED;
+        case FINISHED:
+        {
+          appEvent = 
+              new RMAppEvent(applicationId, RMAppEventType.ATTEMPT_FINISHED);
+        }
         break;
-      case KILLED:
-        eventToApp = RMAppEventType.ATTEMPT_KILLED;
+        case KILLED:
+        {
+          appEvent = 
+              new RMAppFailedAttemptEvent(applicationId, 
+                  RMAppEventType.ATTEMPT_KILLED, 
+                  "Application killed by user.");
+        }
         break;
-      case FAILED:
-        eventToApp = RMAppEventType.ATTEMPT_FAILED;
+        case FAILED:
+        {
+          appEvent = 
+              new RMAppFailedAttemptEvent(applicationId, 
+                  RMAppEventType.ATTEMPT_FAILED, 
+                  appAttempt.getDiagnostics());
+        }
         break;
-      default:
-        LOG.info("Cannot get this state!! Error!!");
+        default:
+        {
+          LOG.error("Cannot get this state!! Error!!");
+        }
         break;
       }
-      appAttempt.eventHandler.handle(new RMAppEvent(
-          appAttempt.applicationAttemptId.getApplicationId(), eventToApp));
+      
+      appAttempt.eventHandler.handle(appEvent);
       appAttempt.eventHandler.handle(new AppRemovedSchedulerEvent(appAttempt
           .getAppAttemptId(), finalAttemptState));
     }
@@ -621,16 +655,23 @@ public class RMAppAttemptImpl implements RMAppAttempt {
     public void transition(RMAppAttemptImpl appAttempt,
         RMAppAttemptEvent event) {
 
+      RMAppAttemptContainerFinishedEvent finishEvent =
+          ((RMAppAttemptContainerFinishedEvent)event);
+      
       // UnRegister from AMLivelinessMonitor
       appAttempt.rmContext.getAMLivelinessMonitor().unregister(
           appAttempt.getAppAttemptId());
 
-      // Tell the app, scheduler
-      super.transition(appAttempt, event);
+      // Setup diagnostic message
+      ContainerStatus status = finishEvent.getContainerStatus();
+      appAttempt.diagnostics.append("AM Container for " +
+          appAttempt.getAppAttemptId() + " exited with " +
+          " exitCode: " + status.getExitStatus() + 
+          " due to: " +  status.getDiagnostics() + "." +
+          "Failing this attempt.");
 
-      // Use diagnostic saying crashed.
-      appAttempt.diagnostics.append("AM Container for "
-          + appAttempt.getAppAttemptId() + " exited. Failing this attempt.");
+      // Tell the app, scheduler
+      super.transition(appAttempt, finishEvent);
     }
   }
 
@@ -644,6 +685,8 @@ public class RMAppAttemptImpl implements RMAppAttempt {
     public void transition(RMAppAttemptImpl appAttempt,
         RMAppAttemptEvent event) {
 
+      appAttempt.progress = 1.0f;
+
       // Tell the app and the scheduler
       super.transition(appAttempt, event);
 
@@ -725,6 +768,13 @@ public class RMAppAttemptImpl implements RMAppAttempt {
       // the AMContainer, AppAttempt fails
       if (appAttempt.masterContainer.getId().equals(
           containerStatus.getContainerId())) {
+        // Setup diagnostic message
+        appAttempt.diagnostics.append("AM Container for " +
+            appAttempt.getAppAttemptId() + " exited with " +
+            " exitCode: " + containerStatus.getExitStatus() + 
+            " due to: " +  containerStatus.getDiagnostics() + "." +
+            "Failing this attempt.");
+
         new FinalTransition(RMAppAttemptState.FAILED).transition(
             appAttempt, containerFinishedEvent);
         return RMAppAttemptState.FAILED;
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
index dd8d7f840f7..3b3864a541d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java
@@ -144,9 +144,10 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
     this.httpPort = httpPort;
     this.totalCapability = capability; 
     this.nodeAddress = hostName + ":" + cmPort;
-    this.httpAddress = hostName + ":" + httpPort;;
+    this.httpAddress = hostName + ":" + httpPort;
     this.node = node;
     this.nodeHealthStatus.setIsNodeHealthy(true);
+    this.nodeHealthStatus.setHealthReport("Healthy");
     this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
 
     this.latestHeartBeatResponse.setResponseId(0);
@@ -222,6 +223,18 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
     }
   }
 
+  private void setNodeHealthStatus(NodeHealthStatus status)
+  {
+    this.writeLock.lock();
+    try {
+      this.nodeHealthStatus.setHealthReport(status.getHealthReport());
+      this.nodeHealthStatus.setIsNodeHealthy(status.getIsNodeHealthy());
+      this.nodeHealthStatus.setLastHealthReportTime(status.getLastHealthReportTime());
+    } finally {
+      this.writeLock.unlock();
+    }
+  }
+
   @Override
   public RMNodeState getState() {
     this.readLock.lock();
@@ -345,7 +358,10 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
       // Switch the last heartbeatresponse.
       rmNode.latestHeartBeatResponse = statusEvent.getLatestResponse();
 
-      if (!statusEvent.getNodeHealthStatus().getIsNodeHealthy()) {
+      NodeHealthStatus remoteNodeHealthStatus = 
+          statusEvent.getNodeHealthStatus();
+      rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
+      if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
         // Inform the scheduler
         rmNode.context.getDispatcher().getEventHandler().handle(
             new NodeRemovedSchedulerEvent(rmNode));
@@ -392,8 +408,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
 
       // Switch the last heartbeatresponse.
       rmNode.latestHeartBeatResponse = statusEvent.getLatestResponse();
-
-      if (statusEvent.getNodeHealthStatus().getIsNodeHealthy()) {
+      NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus();
+      rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
+      if (remoteNodeHealthStatus.getIsNodeHealthy()) {
         rmNode.context.getDispatcher().getEventHandler().handle(
             new NodeAddedSchedulerEvent(rmNode));
         return RMNodeState.RUNNING;
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
index 61c829507e2..6928cdb19d0 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java
@@ -32,10 +32,8 @@ import static org.apache.hadoop.metrics2.lib.Interns.info;
 import org.apache.hadoop.metrics2.lib.MetricsRegistry;
 import org.apache.hadoop.metrics2.lib.MutableCounterInt;
 import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
-import org.apache.hadoop.yarn.api.records.ApplicationState;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
-import org.apache.hadoop.yarn.util.Self;
 import static org.apache.hadoop.yarn.server.resourcemanager.resource.Resources.*;
 
 import org.slf4j.LoggerFactory;
@@ -282,4 +280,56 @@ public class QueueMetrics {
       parent.unreserveResource(user, res);
     }
   }
+  
+  public int getAppsSubmitted() {
+    return appsSubmitted.value();
+  }
+  
+  public int getAppsRunning() {
+    return appsRunning.value();
+  }
+  
+  public int getAppsPending() {
+    return appsPending.value();
+  }
+  
+  public int getAppsCompleted() {
+    return appsCompleted.value();
+  }
+  
+  public int getAppsKilled() {
+    return appsKilled.value();
+  }
+  
+  public int getAppsFailed() {
+    return appsFailed.value();
+  }
+
+  public int getAllocatedGB() {
+    return allocatedGB.value();
+  }
+
+  public int getAllocatedContainers() {
+    return allocatedContainers.value();
+  }
+  
+  public int getAvailableGB() {
+    return availableGB.value();
+  }  
+
+  public int getPendingGB() {
+    return pendingGB.value();
+  }
+
+  public int getPendingContainers() {
+    return pendingContainers.value();
+  }
+  
+  public int getReservedGB() {
+    return reservedGB.value();
+  }
+
+  public int getReservedContainers() {
+    return reservedContainers.value();
+  }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
index b4037aaeaf7..10913e09999 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApp.java
@@ -207,13 +207,18 @@ public class SchedulerApp {
         .getDispatcher().getEventHandler(), this.rmContext
         .getContainerAllocationExpirer());
 
+    // Add it to allContainers list.
+    newlyAllocatedContainers.add(rmContainer);
+    liveContainers.put(container.getId(), rmContainer);    
+
     // Update consumption and track allocations
-    
+    appSchedulingInfo.allocate(type, node, priority, request, container);
+    Resources.addTo(currentConsumption, container.getResource());
+
     // Inform the container
     rmContainer.handle(
         new RMContainerEvent(container.getId(), RMContainerEventType.START));
 
-    Resources.addTo(currentConsumption, container.getResource());
     if (LOG.isDebugEnabled()) {
       LOG.debug("allocate: applicationAttemptId=" 
           + container.getId().getApplicationAttemptId() 
@@ -223,12 +228,6 @@ public class SchedulerApp {
     RMAuditLogger.logSuccess(getUser(), 
         AuditConstants.ALLOC_CONTAINER, "SchedulerApp", 
         getApplicationId(), container.getId());
-
-    // Add it to allContainers list.
-    newlyAllocatedContainers.add(rmContainer);
-    liveContainers.put(container.getId(), rmContainer);
-    
-    appSchedulingInfo.allocate(type, node, priority, request, container);
     
     return rmContainer;
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
index 9a3b1c4da35..e67d371ee61 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java
@@ -1046,19 +1046,20 @@ public class LeafQueue implements CSQueue {
   }
   
   private Container getContainer(RMContainer rmContainer, 
-      SchedulerApp application, SchedulerNode node, Resource capability) {
+      SchedulerApp application, SchedulerNode node, 
+      Resource capability, Priority priority) {
     return (rmContainer != null) ? rmContainer.getContainer() :
-      createContainer(application, node, capability);
+      createContainer(application, node, capability, priority);
   }
   
   public Container createContainer(SchedulerApp application, SchedulerNode node, 
-      Resource capability) {
+      Resource capability, Priority priority) {
     Container container = 
           BuilderUtils.newContainer(this.recordFactory,
               application.getApplicationAttemptId(),
               application.getNewContainerId(),
-              node.getNodeID(),
-              node.getHttpAddress(), capability);
+              node.getNodeID(), node.getHttpAddress(), 
+              capability, priority);
 
     // If security is enabled, send the container-tokens too.
     if (UserGroupInformation.isSecurityEnabled()) {
@@ -1099,7 +1100,7 @@ public class LeafQueue implements CSQueue {
 
     // Create the container if necessary
     Container container = 
-        getContainer(rmContainer, application, node, capability);
+        getContainer(rmContainer, application, node, capability, priority);
 
     // Can we allocate a container on this node?
     int availableContainers = 
@@ -1152,14 +1153,17 @@ public class LeafQueue implements CSQueue {
 
   private void reserve(SchedulerApp application, Priority priority, 
       SchedulerNode node, RMContainer rmContainer, Container container) {
-    rmContainer = application.reserve(node, priority, rmContainer, container);
-    node.reserveResource(application, priority, rmContainer);
-    
     // Update reserved metrics if this is the first reservation
     if (rmContainer == null) {
       getMetrics().reserveResource(
           application.getUser(), container.getResource());
     }
+
+    // Inform the application 
+    rmContainer = application.reserve(node, priority, rmContainer, container);
+    
+    // Update the node
+    node.reserveResource(application, priority, rmContainer);
   }
 
   private void unreserve(SchedulerApp application, Priority priority, 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/NodeUpdateSchedulerEvent.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/NodeUpdateSchedulerEvent.java
index 9f3bc1cce7a..ff51d62d910 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/NodeUpdateSchedulerEvent.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/event/NodeUpdateSchedulerEvent.java
@@ -19,10 +19,7 @@
 package org.apache.hadoop.yarn.server.resourcemanager.scheduler.event;
 
 import java.util.List;
-import java.util.Map;
 
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
index 752b81ce5de..7a90c5b6fac 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java
@@ -291,7 +291,7 @@ public class FifoScheduler implements ResourceScheduler {
   
   @SuppressWarnings("unchecked")
   private synchronized void addApplication(ApplicationAttemptId appAttemptId,
-      String queueName, String user) {
+      String user) {
     // TODO: Fix store
     SchedulerApp schedulerApp = 
         new SchedulerApp(appAttemptId, user, DEFAULT_QUEUE, 
@@ -528,7 +528,8 @@ public class FifoScheduler implements ResourceScheduler {
                 application.getApplicationAttemptId(),
                 application.getNewContainerId(),
                 node.getRMNode().getNodeID(),
-                node.getRMNode().getHttpAddress(), capability);
+                node.getRMNode().getHttpAddress(), 
+                capability, priority);
         
         // If security is enabled, send the container-tokens too.
         if (UserGroupInformation.isSecurityEnabled()) {
@@ -627,7 +628,7 @@ public class FifoScheduler implements ResourceScheduler {
     {
       AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
       addApplication(appAddedEvent.getApplicationAttemptId(), appAddedEvent
-          .getQueue(), appAddedEvent.getUser());
+          .getUser());
     }
     break;
     case APP_REMOVED:
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
index a621cc10472..1d074e3160b 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java
@@ -76,7 +76,7 @@ class NodesPage extends RmView {
             // TODO: FIXME Vinodkv
 //            td(String.valueOf(ni.getUsedResource().getMemory())).
 //            td(String.valueOf(ni.getAvailableResource().getMemory())).
-            _();
+            td("n/a")._();
       }
       tbody._()._();
     }
@@ -100,7 +100,7 @@ class NodesPage extends RmView {
         // rack, nodeid, host, healthStatus, health update ts, health report,
         // containers, memused, memavail
         append(", aoColumns:[null, null, null, null, null, null, ").
-        append("{bSearchable:false},{bSearchable:false},{bSearchable:false}]}").
+        append("{sType:'title-numeric', bSearchable:false}]}").
         toString();
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
index 234f93e2f0c..698bc3c933a 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java
@@ -102,7 +102,7 @@ public class RmController extends Controller {
         .getMasterContainer();
     if (masterContainer != null) {
       String url = join("http://", masterContainer.getNodeHttpAddress(),
-          "/yarn", "/containerlogs/",
+          "/node", "/containerlogs/",
           ConverterUtils.toString(masterContainer.getId()));
       info._("AM container logs:", url, url);
     } else {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java
index 9a9ae2f51cd..727cd1a2323 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 
 public class MockAM {
@@ -128,7 +129,7 @@ public class MockAM {
     req.setHostName(resource);
     req.setNumContainers(containers);
     Priority pri = Records.newRecord(Priority.class);
-    pri.setPriority(1);
+    pri.setPriority(priority);
     req.setPriority(pri);
     Resource capability = Records.newRecord(Resource.class);
     capability.setMemory(memory);
@@ -139,11 +140,8 @@ public class MockAM {
   public AMResponse allocate(
       List<ResourceRequest> resourceRequest, List<ContainerId> releases) 
       throws Exception {
-    AllocateRequest req = Records.newRecord(AllocateRequest.class);
-    req.setResponseId(++responseId);
-    req.setApplicationAttemptId(attemptId);
-    req.addAllAsks(resourceRequest);
-    req.addAllReleases(releases);
+    AllocateRequest req = BuilderUtils.newAllocateRequest(attemptId,
+        ++responseId, 0F, resourceRequest, releases);
     AllocateResponse resp = amRMProtocol.allocate(req);
     return resp.getAMResponse();
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
index 4be27399672..d2a9a11182f 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java
@@ -22,9 +22,9 @@ import junit.framework.Assert;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.yarn.api.ClientRMProtocol;
-import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdRequest;
-import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationIdResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.api.records.ApplicationId;
@@ -74,11 +74,17 @@ public class MockRM extends ResourceManager {
     Assert.assertEquals("App state is not correct (timedout)",
         finalState, app.getState());
   }
+  
+  // get new application id 
+  public GetNewApplicationResponse getNewAppId() throws Exception {
+    ClientRMProtocol client = getClientRMService();
+    return client.getNewApplication(Records.newRecord(GetNewApplicationRequest.class));	  
+  }
 
   //client
   public RMApp submitApp(int masterMemory) throws Exception {
     ClientRMProtocol client = getClientRMService();
-    GetNewApplicationIdResponse resp = client.getNewApplicationId(Records.newRecord(GetNewApplicationIdRequest.class));
+    GetNewApplicationResponse resp = client.getNewApplication(Records.newRecord(GetNewApplicationRequest.class));
     ApplicationId appId = resp.getApplicationId();
     
     SubmitApplicationRequest req = Records.newRecord(SubmitApplicationRequest.class);
@@ -89,7 +95,7 @@ public class MockRM extends ResourceManager {
     sub.setUser("");
     ContainerLaunchContext clc = 
         Records.newRecord(ContainerLaunchContext.class);
-    Resource capability = Records.newRecord(Resource.class);
+    Resource capability = Records.newRecord(Resource.class);    
     capability.setMemory(masterMemory);
     clc.setResource(capability);
     sub.setAMContainerSpec(clc);
@@ -109,9 +115,9 @@ public class MockRM extends ResourceManager {
 
   public void killApp(ApplicationId appId) throws Exception {
     ClientRMProtocol client = getClientRMService();
-    FinishApplicationRequest req = Records.newRecord(FinishApplicationRequest.class);
+    KillApplicationRequest req = Records.newRecord(KillApplicationRequest.class);
     req.setApplicationId(appId);
-    client.finishApplication(req);
+    client.forceKillApplication(req);
   }
 
   //from AMLauncher
@@ -195,6 +201,7 @@ public class MockRM extends ResourceManager {
     };
   }
 
+  @Override
   protected AdminService createAdminService() {
     return new AdminService(getConfig(), scheduler, getRMContext(), 
         this.nodesListManager){
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
index a7b5d02c914..72ade5c1da8 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState;
 import org.apache.hadoop.yarn.api.records.ContainerStatus;
 import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
 import org.apache.hadoop.yarn.factories.RecordFactory;
@@ -184,7 +185,9 @@ public class NodeManager implements ContainerManager {
     Container container =
         BuilderUtils.newContainer(containerLaunchContext.getContainerId(),
             this.nodeId, nodeHttpAddress,
-            containerLaunchContext.getResource());
+            containerLaunchContext.getResource(), 
+            null                                        // DKDC - Doesn't matter
+            );
 
     applicationContainers.add(container);
     
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
new file mode 100644
index 00000000000..a12049f9e82
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
@@ -0,0 +1,159 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.hadoop.yarn.server.resourcemanager;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.api.ApplicationConstants;
+import org.apache.hadoop.yarn.api.ContainerManager;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainerResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StopContainerRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StopContainerResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.exceptions.YarnRemoteException;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncher;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestApplicationMasterLauncher {
+
+  private static final Log LOG = LogFactory
+      .getLog(TestApplicationMasterLauncher.class);
+
+  private static final class MyContainerManagerImpl implements
+      ContainerManager {
+
+    boolean launched = false;
+    boolean cleanedup = false;
+    String attemptIdAtContainerManager = null;
+
+    @Override
+    public StartContainerResponse
+        startContainer(StartContainerRequest request)
+            throws YarnRemoteException {
+      LOG.info("Container started by MyContainerManager: " + request);
+      launched = true;
+      attemptIdAtContainerManager = request.getContainerLaunchContext()
+          .getEnvironment().get(
+              ApplicationConstants.APPLICATION_ATTEMPT_ID_ENV);
+      return null;
+    }
+
+    @Override
+    public StopContainerResponse stopContainer(StopContainerRequest request)
+        throws YarnRemoteException {
+      LOG.info("Container cleaned up by MyContainerManager");
+      cleanedup = true;
+      return null;
+    }
+
+    @Override
+    public GetContainerStatusResponse getContainerStatus(
+        GetContainerStatusRequest request) throws YarnRemoteException {
+      return null;
+    }
+
+  }
+
+  private static final class MockRMWithCustomAMLauncher extends MockRM {
+
+    private final ContainerManager containerManager;
+
+    public MockRMWithCustomAMLauncher(ContainerManager containerManager) {
+      this.containerManager = containerManager;
+    }
+
+    @Override
+    protected ApplicationMasterLauncher createAMLauncher() {
+      return new ApplicationMasterLauncher(super.appTokenSecretManager,
+          super.clientToAMSecretManager, getRMContext()) {
+        @Override
+        protected Runnable createRunnableLauncher(RMAppAttempt application,
+            AMLauncherEventType event) {
+          return new AMLauncher(context, application, event,
+              applicationTokenSecretManager, clientToAMSecretManager,
+              getConfig()) {
+            @Override
+            protected ContainerManager getContainerMgrProxy(
+                ApplicationId applicationID) throws IOException {
+              return containerManager;
+            }
+          };
+        }
+      };
+    }
+  }
+
+  @Test
+  public void testAMLaunchAndCleanup() throws Exception {
+    Logger rootLogger = LogManager.getRootLogger();
+    rootLogger.setLevel(Level.DEBUG);
+    MyContainerManagerImpl containerManager = new MyContainerManagerImpl();
+    MockRMWithCustomAMLauncher rm = new MockRMWithCustomAMLauncher(
+        containerManager);
+    rm.start();
+    MockNM nm1 = rm.registerNode("h1:1234", 5120);
+
+    RMApp app = rm.submitApp(2000);
+
+    // kick the scheduling
+    nm1.nodeHeartbeat(true);
+
+    int waitCount = 0;
+    while (containerManager.launched == false && waitCount++ < 20) {
+      LOG.info("Waiting for AM Launch to happen..");
+      Thread.sleep(1000);
+    }
+    Assert.assertTrue(containerManager.launched);
+
+    RMAppAttempt attempt = app.getCurrentAppAttempt();
+    ApplicationAttemptId appAttemptId = attempt.getAppAttemptId();
+    Assert.assertEquals(appAttemptId.toString(),
+        containerManager.attemptIdAtContainerManager);
+
+    MockAM am = new MockAM(rm.getRMContext(), rm
+        .getApplicationMasterService(), appAttemptId);
+    am.registerAppAttempt();
+    am.unregisterAppAttempt();
+
+    waitCount = 0;
+    while (containerManager.cleanedup == false && waitCount++ < 20) {
+      LOG.info("Waiting for AM Cleanup to happen..");
+      Thread.sleep(1000);
+    }
+    Assert.assertTrue(containerManager.cleanedup);
+
+    am.waitForState(RMAppAttemptState.FINISHED);
+    rm.stop();
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
index 03941e3625d..3bba11e1fb1 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRM.java
@@ -25,6 +25,7 @@ import junit.framework.Assert;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
 import org.apache.hadoop.yarn.api.records.Container;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.api.records.ResourceRequest;
@@ -40,6 +41,20 @@ public class TestRM {
 
   private static final Log LOG = LogFactory.getLog(TestRM.class);
 
+  @Test
+  public void testGetNewAppId() throws Exception {
+    Logger rootLogger = LogManager.getRootLogger();
+    rootLogger.setLevel(Level.DEBUG);
+    MockRM rm = new MockRM();
+    rm.start();
+    
+    GetNewApplicationResponse resp = rm.getNewAppId();
+    assert (resp.getApplicationId().getId() != 0);    
+    assert (resp.getMinimumResourceCapability().getMemory() > 0);
+    assert (resp.getMaximumResourceCapability().getMemory() > 0);    
+    rm.stop();
+  }
+  
   @Test
   public void testAppWithNoContainers() throws Exception {
     Logger rootLogger = LogManager.getRootLogger();
@@ -119,6 +134,7 @@ public class TestRM {
 
   public static void main(String[] args) throws Exception {
     TestRM t = new TestRM();
+    t.testGetNewAppId();
     t.testAppWithNoContainers();
     t.testAppOnMultiNode();
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
index 60a227bc6d4..536aa672d79 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java
@@ -18,12 +18,16 @@
 
 package org.apache.hadoop.yarn.server.resourcemanager;
 
+import static org.junit.Assert.assertNotNull;
+
 import java.io.IOException;
+import java.util.Collection;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
 import org.apache.hadoop.yarn.api.records.Priority;
 import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.Store;
@@ -153,6 +157,23 @@ public class TestResourceManager {
     
     LOG.info("--- END: testResourceAllocation ---");
   }
+  
+  @Test
+  public void testNodeHealthReportIsNotNull() throws Exception{
+    String host1 = "host1";
+    final int memory = 4 * 1024;
+    org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm1 = 
+      registerNode(host1, 1234, 2345, NetworkTopology.DEFAULT_RACK, memory);
+    nm1.heartbeat();
+    nm1.heartbeat();
+    Collection<RMNode> values = resourceManager.getRMContext().getRMNodes().values();
+    for (RMNode ni : values)
+    {
+      NodeHealthStatus nodeHealthStatus = ni.getNodeHealthStatus();
+      String healthReport = nodeHealthStatus.getHealthReport();
+      assertNotNull(healthReport);
+    }
+  }
 
   private void checkResourceUsage(
       org.apache.hadoop.yarn.server.resourcemanager.NodeManager... nodes ) {
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java
index 61d678ea01c..3bc55473423 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRMRPCResponseId.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -77,13 +78,14 @@ public class TestAMRMRPCResponseId {
 
     am.registerAppAttempt();
     
-    AllocateRequest allocateRequest = recordFactory.newRecordInstance(AllocateRequest.class);
-    allocateRequest.setApplicationAttemptId(attempt.getAppAttemptId());
+    AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(attempt
+        .getAppAttemptId(), 0, 0F, null, null);
 
     AMResponse response = amService.allocate(allocateRequest).getAMResponse();
     Assert.assertEquals(1, response.getResponseId());
     Assert.assertFalse(response.getReboot());
-    allocateRequest.setResponseId(response.getResponseId());
+    allocateRequest = BuilderUtils.newAllocateRequest(attempt
+        .getAppAttemptId(), response.getResponseId(), 0F, null, null);
     
     response = amService.allocate(allocateRequest).getAMResponse();
     Assert.assertEquals(2, response.getResponseId());
@@ -91,8 +93,9 @@ public class TestAMRMRPCResponseId {
     response = amService.allocate(allocateRequest).getAMResponse();
     Assert.assertEquals(2, response.getResponseId());
     
-    /** try sending old **/
-    allocateRequest.setResponseId(0);
+    /** try sending old request again **/
+    allocateRequest = BuilderUtils.newAllocateRequest(attempt
+        .getAppAttemptId(), 0, 0F, null, null);
     response = amService.allocate(allocateRequest).getAMResponse();
     Assert.assertTrue(response.getReboot());
   }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterLauncher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterLauncher.java
deleted file mode 100644
index 8cc948400e1..00000000000
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestApplicationMasterLauncher.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-package org.apache.hadoop.yarn.server.resourcemanager.applicationsmanager;
-
-import java.util.concurrent.atomic.AtomicInteger;
-
-import junit.framework.Assert;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
-import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ApplicationState;
-import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
-import org.apache.hadoop.yarn.event.EventHandler;
-import org.apache.hadoop.yarn.factories.RecordFactory;
-import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
-import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager;
-import org.apache.hadoop.yarn.security.client.ClientToAMSecretManager;
-import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
-import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
-import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.StoreFactory;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
-import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptImpl;
-import org.apache.hadoop.yarn.util.Records;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
-
-/**
- * Testing the applications manager launcher.
- *
- */
-public class TestApplicationMasterLauncher {
-//  private static final Log LOG = LogFactory.getLog(TestApplicationMasterLauncher.class);
-//  private static RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
-//  private ApplicationMasterLauncher amLauncher;
-//  private DummyEventHandler asmHandle;
-//  private final ApplicationTokenSecretManager applicationTokenSecretManager =
-//    new ApplicationTokenSecretManager();
-//  private final ClientToAMSecretManager clientToAMSecretManager = 
-//    new ClientToAMSecretManager();
-//
-//  Object doneLaunching = new Object();
-//  AtomicInteger launched = new AtomicInteger();
-//  AtomicInteger cleanedUp = new AtomicInteger();
-//  private RMContext context = new RMContextImpl(new MemStore(), null, null,
-//      null);
-//
-//  private Configuration conf = new Configuration();
-//  
-//  private class DummyEventHandler implements EventHandler<ApplicationEvent> {
-//    @Override
-//    public void handle(ApplicationEvent appEvent) {
-//      ApplicationEventType event = appEvent.getType();
-//      switch (event) {
-//      case FINISH:
-//        synchronized(doneLaunching) {
-//          doneLaunching.notify();
-//        }
-//        break;
-//
-//      default:
-//        break;
-//      }
-//    }
-//  }
-//
-//  private class DummyLaunch implements Runnable {
-//    public void run() {
-//      launched.incrementAndGet();
-//    }
-//  }
-//
-//  private class DummyCleanUp implements Runnable {
-//    private EventHandler eventHandler;
-//    
-//    public DummyCleanUp(EventHandler eventHandler) {
-//      this.eventHandler = eventHandler;
-//    }
-//    public void run() {
-//      cleanedUp.incrementAndGet();
-//      eventHandler.handle(new AMFinishEvent(null,
-//          ApplicationState.COMPLETED, "", ""));
-//    }
-//  }
-//
-//  private class DummyApplicationMasterLauncher extends
-//      ApplicationMasterLauncher {
-//    private EventHandler eventHandler;
-//
-//    public DummyApplicationMasterLauncher(
-//        ApplicationTokenSecretManager applicationTokenSecretManager,
-//        ClientToAMSecretManager clientToAMSecretManager,
-//        EventHandler eventHandler) {
-//      super(applicationTokenSecretManager, clientToAMSecretManager, context);
-//      this.eventHandler = eventHandler;
-//    }
-//
-//    @Override
-//    protected Runnable createRunnableLauncher(RMAppAttempt application,
-//        AMLauncherEventType event) {
-//      Runnable r = null;
-//      switch (event) {
-//      case LAUNCH:
-//        r = new DummyLaunch();
-//        break;
-//      case CLEANUP:
-//        r = new DummyCleanUp(eventHandler);
-//      default:
-//        break;
-//      }
-//      return r;
-//    }
-//  }
-//
-//  @Before
-//  public void setUp() {
-//    asmHandle = new DummyEventHandler();
-//    amLauncher = new DummyApplicationMasterLauncher(applicationTokenSecretManager,
-//        clientToAMSecretManager, asmHandle);
-//    context.getDispatcher().init(conf);
-//    amLauncher.init(conf);
-//    context.getDispatcher().start();
-//    amLauncher.start();
-//    
-//  }
-//
-//  @After
-//  public void tearDown() {
-//    amLauncher.stop();
-//  }
-//
-//  @Test
-//  public void testAMLauncher() throws Exception {
-//
-//    // Creat AppId
-//    ApplicationId appId = recordFactory
-//        .newRecordInstance(ApplicationId.class);
-//    appId.setClusterTimestamp(System.currentTimeMillis());
-//    appId.setId(1);
-//
-//    ApplicationAttemptId appAttemptId = Records
-//        .newRecord(ApplicationAttemptId.class);
-//    appAttemptId.setApplicationId(appId);
-//    appAttemptId.setAttemptId(1);
-//
-//    // Create submissionContext
-//    ApplicationSubmissionContext submissionContext = recordFactory
-//        .newRecordInstance(ApplicationSubmissionContext.class);
-//    submissionContext.setApplicationId(appId);
-//    submissionContext.setUser("dummyuser");
-//
-//    RMAppAttempt appAttempt = new RMAppAttemptImpl(appAttemptId,
-//        "dummyclienttoken", context, null, submissionContext);
-//
-//    // Tell AMLauncher to launch the appAttempt
-//    amLauncher.handle(new AMLauncherEvent(AMLauncherEventType.LAUNCH,
-//        appAttempt));
-//
-//    // Tell AMLauncher to cleanup the appAttempt
-//    amLauncher.handle(new AMLauncherEvent(AMLauncherEventType.CLEANUP,
-//        appAttempt));
-//
-//    synchronized (doneLaunching) {
-//      doneLaunching.wait(10000);
-//    }
-//    Assert.assertEquals(1, launched.get());
-//    Assert.assertEquals(1, cleanedUp.get());
-//  }
-}
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/InlineDispatcher.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/InlineDispatcher.java
index 51eb8cf2ec7..d771a61d864 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/InlineDispatcher.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/resourcetracker/InlineDispatcher.java
@@ -1,50 +1,57 @@
 /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 
 package org.apache.hadoop.yarn.server.resourcemanager.resourcetracker;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 
-class InlineDispatcher extends AsyncDispatcher {
-  private class InlineEventHandler implements EventHandler {
-    private final InlineDispatcher dispatcher;
-    public InlineEventHandler(InlineDispatcher dispatcher) {
-      this.dispatcher = dispatcher;
-    }
+public class InlineDispatcher extends AsyncDispatcher {
+  private static final Log LOG = LogFactory.getLog(InlineDispatcher.class);
+
+  private class TestEventHandler implements EventHandler {
     @Override
     public void handle(Event event) {
-      this.dispatcher.dispatch(event);
+      dispatch(event);
     }
   }
-  public void dispatch(Event event) {
-    super.dispatch(event);
+  @Override
+  protected void dispatch(Event event) {
+      LOG.info("Dispatching the event " + event.getClass().getName() + "."
+        + event.toString());
+
+    Class<? extends Enum> type = event.getType().getDeclaringClass();
+    if (eventDispatchers.get(type) != null) {
+      eventDispatchers.get(type).handle(event);
+    }
   }
   @Override
   public EventHandler getEventHandler() {
-    return new InlineEventHandler(this);
+    return new TestEventHandler();
   }
-
+  
   static class EmptyEventHandler implements EventHandler<Event> {
     @Override
     public void handle(Event event) {
-      ; // ignore
-    }
+      //do nothing      
+    }    
   }
 }
\ No newline at end of file
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
index 4fb6486c2c7..03229c34b48 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java
@@ -162,6 +162,7 @@ public class MockRMApp implements RMApp {
     this.diagnostics  = new StringBuilder(diag);
   }
 
+  @Override
   public void handle(RMAppEvent event) {
   }
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
index 56b3f4b18af..24408821e2d 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java
@@ -1,26 +1,27 @@
 /**
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
 
 import static org.mockito.Mockito.mock;
 
 import java.io.IOException;
+import java.util.List;
 
 import junit.framework.Assert;
 
@@ -32,46 +33,62 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
 import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
 import org.apache.hadoop.yarn.conf.YarnConfiguration;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
-import org.apache.hadoop.yarn.event.Dispatcher;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.security.ApplicationTokenSecretManager;
 import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
-import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
 import org.apache.hadoop.yarn.server.resourcemanager.recovery.ApplicationsStore.ApplicationStore;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
+import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
 import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
 import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
+
 import org.junit.Before;
+import org.junit.After;
 import org.junit.Test;
 
 
 public class TestRMAppTransitions {
-  private static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
-  
+  static final Log LOG = LogFactory.getLog(TestRMAppTransitions.class);
+
   private RMContext rmContext;
   private static int maxRetries = 4;
   private static int appId = 1;
+  private AsyncDispatcher rmDispatcher;
 
   // ignore all the RM application attempt events
   private static final class TestApplicationAttemptEventDispatcher implements
-      EventHandler<RMAppAttemptEvent> {
+  EventHandler<RMAppAttemptEvent> {
 
-    public TestApplicationAttemptEventDispatcher() {
+    private final RMContext rmContext;
+    public  TestApplicationAttemptEventDispatcher(RMContext rmContext) {
+      this.rmContext = rmContext;
     }
 
     @Override
     public void handle(RMAppAttemptEvent event) {
+      ApplicationId appId = event.getApplicationAttemptId().getApplicationId();
+      RMApp rmApp = this.rmContext.getRMApps().get(appId);
+      if (rmApp != null) {
+        try {
+          rmApp.getRMAppAttempt(event.getApplicationAttemptId()).handle(event);
+        } catch (Throwable t) {
+          LOG.error("Error in handling event type " + event.getType()
+              + " for application " + appId, t);
+        }    
+      }
     }
   }
 
   // handle all the RM application events - same as in ResourceManager.java
   private static final class TestApplicationEventDispatcher implements
-      EventHandler<RMAppEvent> {
+  EventHandler<RMAppEvent> {
 
     private final RMContext rmContext;
     public TestApplicationEventDispatcher(RMContext rmContext) {
@@ -95,19 +112,23 @@ public class TestRMAppTransitions {
 
   @Before
   public void setUp() throws Exception {
+    AsyncDispatcher rmDispatcher = new AsyncDispatcher();
     Configuration conf = new Configuration();
-    Dispatcher rmDispatcher = new AsyncDispatcher();
+    rmDispatcher = new InlineDispatcher();
 
-    ContainerAllocationExpirer containerAllocationExpirer = mock(ContainerAllocationExpirer.class);
+    ContainerAllocationExpirer containerAllocationExpirer = 
+        mock(ContainerAllocationExpirer.class);
     AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class);
     this.rmContext = new RMContextImpl(new MemStore(), rmDispatcher,
-      containerAllocationExpirer, amLivelinessMonitor);
+        containerAllocationExpirer, amLivelinessMonitor);
 
     rmDispatcher.register(RMAppAttemptEventType.class,
-        new TestApplicationAttemptEventDispatcher());
+        new TestApplicationAttemptEventDispatcher(this.rmContext));
 
     rmDispatcher.register(RMAppEventType.class,
         new TestApplicationEventDispatcher(rmContext));
+    rmDispatcher.init(conf);
+    rmDispatcher.start();
   }
 
   protected RMApp createNewTestApp() {
@@ -122,22 +143,23 @@ public class TestRMAppTransitions {
     String clientTokenStr = "bogusstring";
     ApplicationStore appStore = mock(ApplicationStore.class);
     YarnScheduler scheduler = mock(YarnScheduler.class);
-    ApplicationMasterService masterService = new ApplicationMasterService(rmContext,
-        new ApplicationTokenSecretManager(), scheduler);
+    ApplicationMasterService masterService = 
+        new ApplicationMasterService(rmContext,
+            new ApplicationTokenSecretManager(), scheduler);
 
     RMApp application = new RMAppImpl(applicationId, rmContext,
-          conf, name, user,
-          queue, submissionContext, clientTokenStr,
-          appStore, scheduler,
-          masterService);
+        conf, name, user,
+        queue, submissionContext, clientTokenStr,
+        appStore, scheduler,
+        masterService);
 
     testAppStartState(applicationId, user, name, queue, application);
     return application;
   }
 
   // Test expected newly created app state
-  private static void testAppStartState(ApplicationId applicationId, String user, 
-        String name, String queue, RMApp application) {
+  private static void testAppStartState(ApplicationId applicationId, 
+      String user, String name, String queue, RMApp application) {
     Assert.assertTrue("application start time is not greater then 0", 
         application.getStartTime() > 0);
     Assert.assertTrue("application start time is before currentTime", 
@@ -191,6 +213,14 @@ public class TestRMAppTransitions {
         "Application killed by user.", diag.toString());
   }
 
+  private static void assertAppAndAttemptKilled(RMApp application) {
+    assertKilled(application);
+    /* also check if the attempt is killed */
+    Assert.assertEquals( RMAppAttemptState.KILLED, 
+        application.getCurrentAppAttempt().getAppAttemptState() 
+        );
+  }
+
   private static void assertFailed(RMApp application, String regex) {
     assertTimesAtFinish(application);
     assertAppState(RMAppState.FAILED, application);
@@ -202,7 +232,8 @@ public class TestRMAppTransitions {
   protected RMApp testCreateAppSubmitted() throws IOException {
     RMApp application = createNewTestApp();
     // NEW => SUBMITTED event RMAppEventType.START
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.START);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.START);
     application.handle(event);
     assertStartTimeSet(application);
     assertAppState(RMAppState.SUBMITTED, application);
@@ -212,7 +243,9 @@ public class TestRMAppTransitions {
   protected RMApp testCreateAppAccepted() throws IOException {
     RMApp application = testCreateAppSubmitted();
     // SUBMITTED => ACCEPTED event RMAppEventType.APP_ACCEPTED
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_ACCEPTED);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), 
+            RMAppEventType.APP_ACCEPTED);
     application.handle(event);
     assertStartTimeSet(application);
     assertAppState(RMAppState.ACCEPTED, application);
@@ -222,7 +255,9 @@ public class TestRMAppTransitions {
   protected RMApp testCreateAppRunning() throws IOException {
     RMApp application = testCreateAppAccepted();
     // ACCEPTED => RUNNING event RMAppEventType.ATTEMPT_REGISTERED
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_REGISTERED);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), 
+            RMAppEventType.ATTEMPT_REGISTERED);
     application.handle(event);
     assertStartTimeSet(application);
     assertAppState(RMAppState.RUNNING, application);
@@ -232,7 +267,9 @@ public class TestRMAppTransitions {
   protected RMApp testCreateAppFinished() throws IOException {
     RMApp application = testCreateAppRunning();
     // RUNNING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FINISHED);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), 
+            RMAppEventType.ATTEMPT_FINISHED);
     application.handle(event);
     assertAppState(RMAppState.FINISHED, application);
     assertTimesAtFinish(application);
@@ -251,7 +288,8 @@ public class TestRMAppTransitions {
 
     RMApp application = createNewTestApp();
     // NEW => KILLED event RMAppEventType.KILL
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertKilled(application);
   }
@@ -263,7 +301,8 @@ public class TestRMAppTransitions {
     RMApp application = createNewTestApp();
     // NEW => FAILED event RMAppEventType.APP_REJECTED
     String rejectedText = "Test Application Rejected";
-    RMAppEvent event = new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
+    RMAppEvent event = 
+        new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
     application.handle(event);
     assertFailed(application, rejectedText);
   }
@@ -275,7 +314,8 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppSubmitted();
     // SUBMITTED => FAILED event RMAppEventType.APP_REJECTED
     String rejectedText = "app rejected";
-    RMAppEvent event = new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
+    RMAppEvent event = 
+        new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
     application.handle(event);
     assertFailed(application, rejectedText);
   }
@@ -287,8 +327,9 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppAccepted();
     // SUBMITTED => KILLED event RMAppEventType.KILL 
     RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    this.rmContext.getRMApps().putIfAbsent(application.getApplicationId(), application);
     application.handle(event);
-    assertKilled(application);
+    assertAppAndAttemptKilled(application);
   }
 
   @Test
@@ -298,18 +339,26 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppAccepted();
     // ACCEPTED => ACCEPTED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED
     for (int i=1; i<maxRetries; i++) {
-      RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
+      RMAppEvent event = 
+          new RMAppFailedAttemptEvent(application.getApplicationId(), 
+              RMAppEventType.ATTEMPT_FAILED, "");
       application.handle(event);
       assertAppState(RMAppState.SUBMITTED, application);
-      event = new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_ACCEPTED);
+      event = 
+          new RMAppEvent(application.getApplicationId(), 
+              RMAppEventType.APP_ACCEPTED);
       application.handle(event);
       assertAppState(RMAppState.ACCEPTED, application);
     }
 
-    // ACCEPTED => FAILED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED after max retries
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
+    // ACCEPTED => FAILED event RMAppEventType.RMAppEventType.ATTEMPT_FAILED 
+    // after max retries
+    String message = "Test fail";
+    RMAppEvent event = 
+        new RMAppFailedAttemptEvent(application.getApplicationId(), 
+            RMAppEventType.ATTEMPT_FAILED, message);
     application.handle(event);
-    assertFailed(application, ".*Failing the application.*");
+    assertFailed(application, ".*" + message + ".*Failing the application.*");
   }
 
   @Test
@@ -318,7 +367,8 @@ public class TestRMAppTransitions {
 
     RMApp application = testCreateAppAccepted();
     // ACCEPTED => KILLED event RMAppEventType.KILL
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertKilled(application);
   }
@@ -329,7 +379,8 @@ public class TestRMAppTransitions {
 
     RMApp application = testCreateAppRunning();
     // RUNNING => KILLED event RMAppEventType.KILL
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertKilled(application);
   }
@@ -341,25 +392,35 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppRunning();
     RMAppAttempt appAttempt = application.getCurrentAppAttempt();
     int expectedAttemptId = 1;
-    Assert.assertEquals(expectedAttemptId, appAttempt.getAppAttemptId().getAttemptId());
+    Assert.assertEquals(expectedAttemptId, 
+        appAttempt.getAppAttemptId().getAttemptId());
     // RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED
     for (int i=1; i<maxRetries; i++) {
-      RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
+      RMAppEvent event = 
+          new RMAppFailedAttemptEvent(application.getApplicationId(), 
+              RMAppEventType.ATTEMPT_FAILED, "");
       application.handle(event);
       assertAppState(RMAppState.SUBMITTED, application);
       appAttempt = application.getCurrentAppAttempt();
       Assert.assertEquals(++expectedAttemptId, 
           appAttempt.getAppAttemptId().getAttemptId());
-      event = new RMAppEvent(application.getApplicationId(), RMAppEventType.APP_ACCEPTED);
+      event = 
+          new RMAppEvent(application.getApplicationId(), 
+              RMAppEventType.APP_ACCEPTED);
       application.handle(event);
       assertAppState(RMAppState.ACCEPTED, application);
-      event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_REGISTERED);
+      event = 
+          new RMAppEvent(application.getApplicationId(), 
+              RMAppEventType.ATTEMPT_REGISTERED);
       application.handle(event);
       assertAppState(RMAppState.RUNNING, application);
     }
 
-    // RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED after max retries
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
+    // RUNNING => FAILED/RESTARTING event RMAppEventType.ATTEMPT_FAILED 
+    // after max retries
+    RMAppEvent event = 
+        new RMAppFailedAttemptEvent(application.getApplicationId(), 
+            RMAppEventType.ATTEMPT_FAILED, "");
     application.handle(event);
     assertFailed(application, ".*Failing the application.*");
 
@@ -376,7 +437,8 @@ public class TestRMAppTransitions {
 
     RMApp application = testCreateAppFinished();
     // FINISHED => FINISHED event RMAppEventType.KILL
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.FINISHED, application);
@@ -392,25 +454,32 @@ public class TestRMAppTransitions {
     RMApp application = testCreateAppRunning();
 
     // RUNNING => KILLED event RMAppEventType.KILL
-    RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+    RMAppEvent event = 
+        new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.KILLED, application);
 
     // KILLED => KILLED event RMAppEventType.ATTEMPT_FINISHED
-    event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FINISHED);
+    event =
+        new RMAppEvent(application.getApplicationId(), 
+            RMAppEventType.ATTEMPT_FINISHED);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.KILLED, application);
 
     // KILLED => KILLED event RMAppEventType.ATTEMPT_FAILED
-    event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_FAILED);
+    event = 
+        new RMAppFailedAttemptEvent(application.getApplicationId(), 
+            RMAppEventType.ATTEMPT_FAILED, "");
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.KILLED, application);
 
     // KILLED => KILLED event RMAppEventType.ATTEMPT_KILLED
-    event = new RMAppEvent(application.getApplicationId(), RMAppEventType.ATTEMPT_KILLED);
+    event = 
+        new RMAppEvent(application.getApplicationId(), 
+            RMAppEventType.ATTEMPT_KILLED);
     application.handle(event);
     assertTimesAtFinish(application);
     assertAppState(RMAppState.KILLED, application);
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
new file mode 100644
index 00000000000..03a4ba07441
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
@@ -0,0 +1,403 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.mockito.Matchers.*;
+import static org.mockito.Mockito.*;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.MockApps;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.resourcemanager.ApplicationMasterService;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemStore;
+import org.apache.hadoop.yarn.server.resourcemanager.resourcetracker.InlineDispatcher;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRejectedEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptContainerAllocatedEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptLaunchFailedEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRejectedEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestRMAppAttemptTransitions {
+
+  private static final Log LOG = 
+      LogFactory.getLog(TestRMAppAttemptTransitions.class);
+  
+  private static final String EMPTY_DIAGNOSTICS = "";
+  
+  private RMContext rmContext;
+  private YarnScheduler scheduler;
+  private ApplicationMasterService masterService;
+  private ApplicationMasterLauncher applicationMasterLauncher;
+  
+  private RMApp application;
+  private RMAppAttempt applicationAttempt;
+  
+  private final class TestApplicationAttemptEventDispatcher implements
+      EventHandler<RMAppAttemptEvent> {
+
+    @Override
+    public void handle(RMAppAttemptEvent event) {
+      ApplicationAttemptId appID = event.getApplicationAttemptId();
+      assertEquals(applicationAttempt.getAppAttemptId(), appID);
+      try {
+        applicationAttempt.handle(event);
+      } catch (Throwable t) {
+        LOG.error("Error in handling event type " + event.getType()
+            + " for application " + appID, t);
+      }
+    }
+  }
+
+  // handle all the RM application events - same as in ResourceManager.java
+  private final class TestApplicationEventDispatcher implements
+      EventHandler<RMAppEvent> {
+    @Override
+    public void handle(RMAppEvent event) {
+      assertEquals(application.getApplicationId(), event.getApplicationId());
+      try {
+        application.handle(event);
+      } catch (Throwable t) {
+        LOG.error("Error in handling event type " + event.getType()
+            + " for application " + application.getApplicationId(), t);
+      }
+    }
+  }
+
+  private final class TestSchedulerEventDispatcher implements
+  EventHandler<SchedulerEvent> {
+    @Override
+    public void handle(SchedulerEvent event) {
+      scheduler.handle(event);
+    }
+  }
+  
+  private final class TestAMLauncherEventDispatcher implements
+  EventHandler<AMLauncherEvent> {
+    @Override
+    public void handle(AMLauncherEvent event) {
+      applicationMasterLauncher.handle(event);
+    }
+  }
+  
+  private static int appId = 1;
+
+  @Before
+  public void setUp() throws Exception {
+    InlineDispatcher rmDispatcher = new InlineDispatcher();
+  
+    ContainerAllocationExpirer containerAllocationExpirer =
+        mock(ContainerAllocationExpirer.class);
+    AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class);
+    rmContext = new RMContextImpl(new MemStore(), rmDispatcher,
+      containerAllocationExpirer, amLivelinessMonitor);
+    
+    scheduler = mock(YarnScheduler.class);
+    masterService = mock(ApplicationMasterService.class);
+    applicationMasterLauncher = mock(ApplicationMasterLauncher.class);
+    
+    rmDispatcher.register(RMAppAttemptEventType.class,
+        new TestApplicationAttemptEventDispatcher());
+  
+    rmDispatcher.register(RMAppEventType.class,
+        new TestApplicationEventDispatcher());
+    
+    rmDispatcher.register(SchedulerEventType.class, 
+        new TestSchedulerEventDispatcher());
+    
+    rmDispatcher.register(AMLauncherEventType.class, 
+        new TestAMLauncherEventDispatcher());
+
+    rmDispatcher.init(new Configuration());
+    rmDispatcher.start();
+    
+
+    ApplicationId applicationId = MockApps.newAppID(appId++);
+    ApplicationAttemptId applicationAttemptId = 
+        MockApps.newAppAttemptID(applicationId, 0);
+    
+    final String user = MockApps.newUserName();
+    final String queue = MockApps.newQueue();
+    ApplicationSubmissionContext submissionContext = 
+        mock(ApplicationSubmissionContext.class);
+    when(submissionContext.getUser()).thenReturn(user);
+    when(submissionContext.getQueue()).thenReturn(queue);
+    ContainerLaunchContext amContainerSpec = mock(ContainerLaunchContext.class);
+    Resource resource = mock(Resource.class);
+    when(amContainerSpec.getResource()).thenReturn(resource);
+    when(submissionContext.getAMContainerSpec()).thenReturn(amContainerSpec);
+    
+    application = mock(RMApp.class);
+    applicationAttempt = 
+        new RMAppAttemptImpl(applicationAttemptId, null, rmContext, scheduler, 
+            masterService, submissionContext);
+    when(application.getCurrentAppAttempt()).thenReturn(applicationAttempt);
+    when(application.getApplicationId()).thenReturn(applicationId);
+    
+    testAppAttemptNewState();
+  }
+
+  @After
+  public void tearDown() throws Exception {
+    ((AsyncDispatcher)this.rmContext.getDispatcher()).stop();
+  }
+  
+
+  /**
+   * {@link RMAppAttemptState#NEW}
+   */
+  private void testAppAttemptNewState() {
+    assertEquals(RMAppAttemptState.NEW, 
+        applicationAttempt.getAppAttemptState());
+    assertEquals(0, applicationAttempt.getDiagnostics().length());
+    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
+    assertNull(applicationAttempt.getMasterContainer());
+    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
+    assertEquals(0, applicationAttempt.getRanNodes().size());
+  }
+
+  /**
+   * {@link RMAppAttemptState#SUBMITTED}
+   */
+  private void testAppAttemptSubmittedState() {
+    assertEquals(RMAppAttemptState.SUBMITTED, 
+        applicationAttempt.getAppAttemptState());
+    assertEquals(0, applicationAttempt.getDiagnostics().length());
+    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
+    assertNull(applicationAttempt.getMasterContainer());
+    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
+    assertEquals(0, applicationAttempt.getRanNodes().size());
+    
+    // Check events
+    verify(masterService).
+        registerAppAttempt(applicationAttempt.getAppAttemptId());
+    verify(scheduler).handle(any(AppAddedSchedulerEvent.class));
+  }
+
+  /**
+   * {@link RMAppAttemptState#SUBMITTED} -> {@link RMAppAttemptState#FAILED}
+   */
+  private void testAppAttemptSubmittedToFailedState(String diagnostics) {
+    assertEquals(RMAppAttemptState.FAILED, 
+        applicationAttempt.getAppAttemptState());
+    assertEquals(diagnostics, applicationAttempt.getDiagnostics());
+    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
+    assertNull(applicationAttempt.getMasterContainer());
+    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
+    assertEquals(0, applicationAttempt.getRanNodes().size());
+    
+    // Check events
+    verify(application).handle(any(RMAppRejectedEvent.class));
+  }
+
+  /**
+   * {@link RMAppAttemptState#KILLED}
+   */
+  private void testAppAttemptKilledState(Container amContainer, 
+      String diagnostics) {
+    assertEquals(RMAppAttemptState.KILLED, 
+        applicationAttempt.getAppAttemptState());
+    assertEquals(diagnostics, applicationAttempt.getDiagnostics());
+    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
+    assertEquals(amContainer, applicationAttempt.getMasterContainer());
+    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
+    assertEquals(0, applicationAttempt.getRanNodes().size());
+  }
+
+  /**
+   * {@link RMAppAttemptState#SCHEDULED}
+   */
+  private void testAppAttemptScheduledState() {
+    assertEquals(RMAppAttemptState.SCHEDULED, 
+        applicationAttempt.getAppAttemptState());
+    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
+    assertNull(applicationAttempt.getMasterContainer());
+    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
+    assertEquals(0, applicationAttempt.getRanNodes().size());
+    
+    // Check events
+    verify(application).handle(any(RMAppEvent.class));
+    verify(scheduler).
+        allocate(any(ApplicationAttemptId.class), 
+            any(List.class), any(List.class));
+  }
+
+  /**
+   * {@link RMAppAttemptState#ALLOCATED}
+   */
+  private void testAppAttemptAllocatedState(Container amContainer) {
+    assertEquals(RMAppAttemptState.ALLOCATED, 
+        applicationAttempt.getAppAttemptState());
+    assertEquals(amContainer, applicationAttempt.getMasterContainer());
+    
+    // Check events
+    verify(applicationMasterLauncher).handle(any(AMLauncherEvent.class));
+    verify(scheduler, times(2)).
+        allocate(
+            any(ApplicationAttemptId.class), any(List.class), any(List.class));
+  }
+  
+  /**
+   * {@link RMAppAttemptState#FAILED}
+   */
+  private void testAppAttemptFailedState(Container container, 
+      String diagnostics) {
+    assertEquals(RMAppAttemptState.FAILED, 
+        applicationAttempt.getAppAttemptState());
+    assertEquals(diagnostics, applicationAttempt.getDiagnostics());
+    assertEquals(0,applicationAttempt.getJustFinishedContainers().size());
+    assertEquals(container, applicationAttempt.getMasterContainer());
+    assertEquals(0.0, (double)applicationAttempt.getProgress(), 0.0001);
+    assertEquals(0, applicationAttempt.getRanNodes().size());
+    
+    // Check events
+    verify(application, times(2)).handle(any(RMAppFailedAttemptEvent.class));
+  }
+
+  private void submitApplicationAttempt() {
+    ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId();
+    applicationAttempt.handle(
+        new RMAppAttemptEvent(appAttemptId, RMAppAttemptEventType.START));
+    testAppAttemptSubmittedState();
+  }
+
+  private void scheduleApplicationAttempt() {
+    submitApplicationAttempt();
+    applicationAttempt.handle(
+        new RMAppAttemptEvent(
+            applicationAttempt.getAppAttemptId(), 
+            RMAppAttemptEventType.APP_ACCEPTED));
+    testAppAttemptScheduledState();
+  }
+
+  private Container allocateApplicationAttempt() {
+    scheduleApplicationAttempt();
+    
+    // Mock the allocation of AM container 
+    Container container = mock(Container.class);
+    Allocation allocation = mock(Allocation.class);
+    when(allocation.getContainers()).
+        thenReturn(Collections.singletonList(container));
+    when(
+        scheduler.allocate(
+            any(ApplicationAttemptId.class), 
+            any(List.class), 
+            any(List.class))).
+    thenReturn(allocation);
+    
+    applicationAttempt.handle(
+        new RMAppAttemptContainerAllocatedEvent(
+            applicationAttempt.getAppAttemptId(), 
+            container));
+    
+    testAppAttemptAllocatedState(container);
+    
+    return container;
+  }
+
+  @Test
+  public void testNewToKilled() {
+    applicationAttempt.handle(
+        new RMAppAttemptEvent(
+            applicationAttempt.getAppAttemptId(), 
+            RMAppAttemptEventType.KILL));
+    testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
+  } 
+  
+  @Test
+  public void testSubmittedToFailed() {
+    submitApplicationAttempt();
+    String message = "Rejected";
+    applicationAttempt.handle(
+        new RMAppAttemptRejectedEvent(
+            applicationAttempt.getAppAttemptId(), message));
+    testAppAttemptSubmittedToFailedState(message);
+  }
+
+  @Test
+  public void testSubmittedToKilled() {
+    submitApplicationAttempt();
+    applicationAttempt.handle(
+        new RMAppAttemptEvent(
+            applicationAttempt.getAppAttemptId(), 
+            RMAppAttemptEventType.KILL));
+    testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
+  }
+
+  @Test
+  public void testScheduledToKilled() {
+    scheduleApplicationAttempt();
+    applicationAttempt.handle(        
+        new RMAppAttemptEvent(
+            applicationAttempt.getAppAttemptId(), 
+            RMAppAttemptEventType.KILL));
+    testAppAttemptKilledState(null, EMPTY_DIAGNOSTICS);
+  }
+
+  @Test
+  public void testAllocatedToKilled() {
+    Container amContainer = allocateApplicationAttempt();
+    applicationAttempt.handle(
+        new RMAppAttemptEvent(
+            applicationAttempt.getAppAttemptId(), 
+            RMAppAttemptEventType.KILL));
+    testAppAttemptKilledState(amContainer, EMPTY_DIAGNOSTICS);
+  }
+
+  @Test
+  public void testAllocatedToFailed() {
+    Container amContainer = allocateApplicationAttempt();
+    String diagnostics = "Launch Failed";
+    applicationAttempt.handle(
+        new RMAppAttemptLaunchFailedEvent(
+            applicationAttempt.getAppAttemptId(), 
+            diagnostics));
+    testAppAttemptFailedState(amContainer, diagnostics);
+  }
+  
+}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
index 3ea01003320..639daf9e5ac 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java
@@ -135,7 +135,8 @@ public class TestLeafQueue {
             Container container = TestUtils.getMockContainer(
                 containerId,
                 ((SchedulerNode)(invocation.getArguments()[1])).getNodeID(), 
-                (Resource)(invocation.getArguments()[2]));
+                (Resource)(invocation.getArguments()[2]),
+                ((Priority)invocation.getArguments()[3]));
             return container;
           }
         }
@@ -143,7 +144,9 @@ public class TestLeafQueue {
       when(queue).createContainer(
               any(SchedulerApp.class), 
               any(SchedulerNode.class), 
-              any(Resource.class));
+              any(Resource.class),
+              any(Priority.class)
+              );
     
     // 2. Stub out LeafQueue.parent.completedContainer
     CSQueue parent = queue.getParent();
@@ -202,6 +205,8 @@ public class TestLeafQueue {
     assertEquals(1*GB, a.getUsedResources().getMemory());
     assertEquals(1*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(1, a.getMetrics().getAllocatedGB());
 
     // Also 2nd -> minCapacity = 1024 since (.1 * 8G) < minAlloc, also
     // you can get one container more than user-limit
@@ -209,12 +214,16 @@ public class TestLeafQueue {
     assertEquals(2*GB, a.getUsedResources().getMemory());
     assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(2, a.getMetrics().getAllocatedGB());
     
     // Can't allocate 3rd due to user-limit
     a.assignContainers(clusterResource, node_0);
     assertEquals(2*GB, a.getUsedResources().getMemory());
     assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(2, a.getMetrics().getAllocatedGB());
     
     // Bump up user-limit-factor, now allocate should work
     a.setUserLimitFactor(10);
@@ -222,12 +231,16 @@ public class TestLeafQueue {
     assertEquals(3*GB, a.getUsedResources().getMemory());
     assertEquals(3*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(3, a.getMetrics().getAllocatedGB());
 
     // One more should work, for app_1, due to user-limit-factor
     a.assignContainers(clusterResource, node_0);
     assertEquals(4*GB, a.getUsedResources().getMemory());
     assertEquals(3*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(1*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(4, a.getMetrics().getAllocatedGB());
 
     // Test max-capacity
     // Now - no more allocs since we are at max-cap
@@ -236,6 +249,8 @@ public class TestLeafQueue {
     assertEquals(4*GB, a.getUsedResources().getMemory());
     assertEquals(3*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(1*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(4, a.getMetrics().getAllocatedGB());
     
     // Release each container from app_0
     for (RMContainer rmContainer : app_0.getLiveContainers()) {
@@ -245,6 +260,8 @@ public class TestLeafQueue {
     assertEquals(1*GB, a.getUsedResources().getMemory());
     assertEquals(0*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(1*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(1, a.getMetrics().getAllocatedGB());
     
     // Release each container from app_1
     for (RMContainer rmContainer : app_1.getLiveContainers()) {
@@ -254,6 +271,8 @@ public class TestLeafQueue {
     assertEquals(0*GB, a.getUsedResources().getMemory());
     assertEquals(0*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(0, a.getMetrics().getAllocatedGB());
   }
   
   @Test
@@ -473,6 +492,8 @@ public class TestLeafQueue {
     assertEquals(1*GB, a.getUsedResources().getMemory());
     assertEquals(1*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(1, a.getMetrics().getAllocatedGB());
 
     // Also 2nd -> minCapacity = 1024 since (.1 * 8G) < minAlloc, also
     // you can get one container more than user-limit
@@ -480,6 +501,8 @@ public class TestLeafQueue {
     assertEquals(2*GB, a.getUsedResources().getMemory());
     assertEquals(2*GB, app_0.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(2, a.getMetrics().getAllocatedGB());
     
     // Now, reservation should kick in for app_1
     a.assignContainers(clusterResource, node_0);
@@ -488,6 +511,8 @@ public class TestLeafQueue {
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
     assertEquals(4*GB, app_1.getCurrentReservation().getMemory());
     assertEquals(2*GB, node_0.getUsedResource().getMemory());
+    assertEquals(4, a.getMetrics().getReservedGB());
+    assertEquals(2, a.getMetrics().getAllocatedGB());
     
     // Now free 1 container from app_0 i.e. 1G
     a.completedContainer(clusterResource, app_0, node_0, 
@@ -498,6 +523,8 @@ public class TestLeafQueue {
     assertEquals(0*GB, app_1.getCurrentConsumption().getMemory());
     assertEquals(4*GB, app_1.getCurrentReservation().getMemory());
     assertEquals(1*GB, node_0.getUsedResource().getMemory());
+    assertEquals(4, a.getMetrics().getReservedGB());
+    assertEquals(1, a.getMetrics().getAllocatedGB());
 
     // Now finish another container from app_0 and fulfill the reservation
     a.completedContainer(clusterResource, app_0, node_0, 
@@ -508,6 +535,8 @@ public class TestLeafQueue {
     assertEquals(4*GB, app_1.getCurrentConsumption().getMemory());
     assertEquals(0*GB, app_1.getCurrentReservation().getMemory());
     assertEquals(4*GB, node_0.getUsedResource().getMemory());
+    assertEquals(0, a.getMetrics().getReservedGB());
+    assertEquals(4, a.getMetrics().getAllocatedGB());
   }
   
   @Test
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
index 84dbbac8676..8459e51d5c2 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestUtils.java
@@ -161,11 +161,13 @@ public class TestUtils {
   }
   
   public static Container getMockContainer(
-      ContainerId containerId, NodeId nodeId, Resource resource) {
+      ContainerId containerId, NodeId nodeId, 
+      Resource resource, Priority priority) {
     Container container = mock(Container.class);
     when(container.getId()).thenReturn(containerId);
     when(container.getNodeId()).thenReturn(nodeId);
     when(container.getResource()).thenReturn(resource);
+    when(container.getPriority()).thenReturn(priority);
     return container;
   }
 }
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java
new file mode 100644
index 00000000000..e0583a20075
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.server.resourcemanager.webapp;
+
+import java.io.PrintWriter;
+
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
+import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodesPage.NodesBlock;
+import org.apache.hadoop.yarn.webapp.test.WebAppTests;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+/**
+ * This tests the NodesPage block table that it should contain the table body
+ * data for all the columns in the table as specified in the header.
+ */
+public class TestNodesPage {
+
+  @Test
+  public void testNodesBlockRender() throws Exception {
+    int numberOfRacks = 2;
+    int numberOfNodesPerRack = 2;
+    // Number of Actual Table Headers for NodesPage.NodesBlock might change in
+    // future. In that case this value should be adjusted to the new value.
+    int numberOfActualTableHeaders = 7;
+
+    PrintWriter writer = WebAppTests.testBlock(
+        NodesBlock.class,
+        RMContext.class,
+        TestRMWebApp.mockRMContext(3, numberOfRacks, numberOfNodesPerRack,
+            8 * TestRMWebApp.GiB)).getInstance(PrintWriter.class);
+
+    Mockito.verify(writer, Mockito.times(numberOfActualTableHeaders)).print(
+        "<th");
+    Mockito.verify(
+        writer,
+        Mockito.times(numberOfRacks * numberOfNodesPerRack
+            * numberOfActualTableHeaders)).print("<td");
+  }
+}
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java
index 989f3483d91..1b681628c98 100644
--- a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestContainerTokenSecretManager.java
@@ -83,6 +83,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.util.BuilderUtils;
 import org.apache.hadoop.yarn.util.ConverterUtils;
 import org.junit.BeforeClass;
 import org.junit.AfterClass;
@@ -240,12 +241,8 @@ public class TestContainerTokenSecretManager {
     ask.add(rr);
     ArrayList<ContainerId> release = new ArrayList<ContainerId>();
     
-    AllocateRequest allocateRequest =
-        recordFactory.newRecordInstance(AllocateRequest.class);
-    allocateRequest.setApplicationAttemptId(appAttempt.getAppAttemptId());
-    allocateRequest.setResponseId(0);
-    allocateRequest.addAllAsks(ask);
-    allocateRequest.addAllReleases(release);
+    AllocateRequest allocateRequest = BuilderUtils.newAllocateRequest(
+        appAttempt.getAppAttemptId(), 0, 0F, ask, release);
     List<Container> allocatedContainers = scheduler.allocate(allocateRequest)
         .getAMResponse().getAllocatedContainers();
 
diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm
new file mode 100644
index 00000000000..affb277b7ff
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/SingleCluster.apt.vm
@@ -0,0 +1,180 @@
+~~ Licensed under the Apache License, Version 2.0 (the "License");
+~~ you may not use this file except in compliance with the License.
+~~ You may obtain a copy of the License at
+~~
+~~   http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License. See accompanying LICENSE file.
+
+  ---
+  Hadoop MapReduce Next Generation ${project.version} - Setting up a Single Node Cluster.
+  ---
+  ---
+  ${maven.build.timestamp}
+
+Hadoop MapReduce Next Generation - Setting up a Single Node Cluster.
+
+  \[ {{{./index.html}Go Back}} \]
+
+* Mapreduce Tarball
+
+  You should be able to obtain the MapReduce tarball from the release.
+  If not, you should be able to create a tarball from the source.
+
++---+
+$ mvn clean install -DskipTests
+$ cd hadoop-mapreduce-project
+$ mvn clean install assembly:assembly 
++---+
+  <<NOTE:>> You will need protoc installed of version 2.4.1 or greater.
+
+  To ignore the native builds in mapreduce you can use <<<-P-cbuild>>> argument
+  for maven. The tarball should be available in <<<target/>>> directory. 
+
+  
+* Setting up the environment.
+
+  Assuming you have installed hadoop-common/hadoop-hdfs and exported
+  <<$HADOOP_COMMON_HOME>>/<<$HADOOP_COMMON_HOME>>, untar hadoop mapreduce 
+  tarball and set environment variable <<$HADOOP_MAPRED_HOME>> to the 
+  untarred directory. Set <<$YARN_HOME>> the same as <<$HADOOP_MAPRED_HOME>>. 
+ 
+  <<NOTE:>> The following instructions assume you have hdfs running.
+
+* Setting up Configuration.
+
+  To start the ResourceManager and NodeManager, you will have to update the configs.
+  Assuming your $HADOOP_CONF_DIR is the configuration directory and has the installed
+  configs for HDFS and <<<core-site.xml>>>. There are 2 config files you will have to setup
+  <<<mapred-site.xml>>> and <<<yarn-site.xml>>>.
+
+** Setting up <<<mapred-site.xml>>>
+
+  Add the following configs to your <<<mapred-site.xml>>>.
+
++---+
+  <property>
+    <name>mapreduce.cluster.temp.dir</name>
+    <value></value>
+    <description>No description</description>
+    <final>true</final>
+  </property>
+
+  <property>
+    <name>mapreduce.cluster.local.dir</name>
+    <value></value>
+    <description>No description</description>
+    <final>true</final>
+  </property>
++---+
+
+** Setting up <<<yarn-site.xml>>>
+
+Add the following configs to your <<<yarn-site.xml>>>
+
++---+
+ <property>
+    <name>yarn.resourcemanager.resource-tracker.address</name>
+    <value>host:port</value>
+    <description>host is the hostname of the resource manager and 
+    port is the port on which the NodeManagers contact the Resource Manager.
+    </description>
+ </property>
+
+ <property>
+    <name>yarn.resourcemanager.scheduler.address</name>
+    <value>host:port</value>
+    <description>host is the hostname of the resourcemanager and port is the port
+    on which the Applications in the cluster talk to the Resource Manager.
+    </description>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.scheduler.class</name>
+    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
+    <description>In case you do not want to use the default scheduler</description>
+  </property>
+
+  <property>
+    <name>yarn.resourcemanager.address</name>
+    <value>host:port</value>
+    <description>the host is the hostname of the ResourceManager and the port is the port on
+    which the clients can talk to the Resource Manager. </description>
+  </property>
+
+  <property>
+    <name>yarn.nodemanager.local-dirs</name>
+    <value></value>
+    <description>the local directories used by the nodemanager</description>
+  </property>
+
+  <property>
+    <name>yarn.nodemanager.address</name>
+    <value>0.0.0.0:port</value>
+    <description>the nodemanagers bind to this port</description>
+  </property>  
+
+  <property>
+    <name>yarn.nodemanager.resource.memory-gb</name>
+    <value>10</value>
+    <description>the amount of memory on the NodeManager in GB</description>
+  </property>
+ 
+  <property>
+    <name>yarn.nodemanager.remote-app-log-dir</name>
+    <value>/app-logs</value>
+    <description>directory on hdfs where the application logs are moved to </description>
+  </property>
+
+   <property>
+    <name>yarn.nodemanager.log-dirs</name>
+    <value></value>
+    <description>the directories used by Nodemanagers as log directories</description>
+  </property>
+
+  <property>
+    <name>yarn.nodemanager.aux-services</name>
+    <value>mapreduce.shuffle</value>
+    <description>shuffle service that needs to be set for Map Reduce to run </description>
+  </property>
++---+
+
+* Create Symlinks.
+
+  You will have to create the following symlinks:
+
++---+
+$ cd $HADOOP_COMMON_HOME/share/hadoop/common/lib/
+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-app-*-SNAPSHOT.jar .
+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-jobclient-*-SNAPSHOT.jar .
+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-common-*-SNAPSHOT.jar .
+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-shuffle-*-SNAPSHOT.jar .
+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-mapreduce-client-core-*-SNAPSHOT.jar .
+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-common-*-SNAPSHOT.jar .
+$ ln -s $HADOOP_MAPRED_HOME/modules/hadoop-yarn-api-*-SNAPSHOT.jar .
++---+
+* Running daemons.
+
+  Assuming that the environment variables <<$HADOOP_COMMON_HOME>>, <<$HADOOP_HDFS_HOME>>, <<$HADOO_MAPRED_HOME>>,
+  <<$YARN_HOME>>, <<$JAVA_HOME>> and <<$HADOOP_CONF_DIR>> have been set appropriately.
+  Set $<<$YARN_CONF_DIR>> the same as $<<HADOOP_CONF_DIR>>
+ 
+  Run ResourceManager and NodeManager as:
+  
++---+
+$ cd $HADOOP_MAPRED_HOME
+$ bin/yarn-daemon.sh start resourcemanager
+$ bin/yarn-daemon.sh start nodemanager
++---+
+
+  You should be up and running. You can run randomwriter as:
+
++---+
+$ $HADOOP_COMMON_HOME/bin/hadoop jar hadoop-examples.jar randomwriter out
++---+
+
+Good luck.
diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm
new file mode 100644
index 00000000000..db9fe870349
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/src/site/apt/index.apt.vm
@@ -0,0 +1,39 @@
+~~ Licensed under the Apache License, Version 2.0 (the "License");
+~~ you may not use this file except in compliance with the License.
+~~ You may obtain a copy of the License at
+~~
+~~   http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License. See accompanying LICENSE file.
+
+  ---
+  Hadoop MapReduce Next Generation  ${project.version}
+  ---
+  ---
+  ${maven.build.timestamp}
+
+Hadoop MapReduce Next Generation
+
+* Architecture
+
+  The new architecture introduced in 0.23, divides the two major functions 
+  of the JobTracker, resource management and job scheduling/monitoring, into separate 
+  components. 
+  The new ResourceManager manages the global assignment of compute resources to applications 
+  and the per-application ApplicationMaster manages the application’s scheduling and coordination.
+  An application is either a single job in the classic MapReduce jobs or a DAG of such jobs. 
+  The ResourceManager and per-machine NodeManager server, which manages the user processes on that 
+  machine, form the computation fabric. The per-application ApplicationMaster is, in effect, a 
+  framework specific library and is tasked with negotiating resources from the ResourceManager 
+  and working with the NodeManager(s) to execute and monitor the tasks.
+
+* User Documentation
+
+  * {{{./SingleCluster.html}SingleCluster}}
+
+  * {{{./apidocs/index.html}JavaDocs}}
+
diff --git a/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml b/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml
new file mode 100644
index 00000000000..35a75cb2e55
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/src/site/site.xml
@@ -0,0 +1,34 @@
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<project name="Hadoop MapReduce Next Gen">
+
+  <version position="right"/>
+
+  <bannerLeft>
+    <name>&nbsp;</name>
+  </bannerLeft>
+
+  <skin>
+    <groupId>org.apache.maven.skins</groupId>
+    <artifactId>maven-stylus-skin</artifactId>
+    <version>1.1</version>
+  </skin>
+
+  <body>
+    <links>
+      <item name="Apache Hadoop" href="http://hadoop.apache.org/"/>
+    </links>
+  </body>
+
+</project>
diff --git a/hadoop-mapreduce-project/src/contrib/fairscheduler/ivy.xml b/hadoop-mapreduce-project/src/contrib/fairscheduler/ivy.xml
index e927032d7db..0b910158df5 100644
--- a/hadoop-mapreduce-project/src/contrib/fairscheduler/ivy.xml
+++ b/hadoop-mapreduce-project/src/contrib/fairscheduler/ivy.xml
@@ -48,9 +48,9 @@
       <artifact name="hadoop-common" type="tests" ext="jar" m:classifier="tests"/>
     </dependency>
     <dependency org="org.apache.hadoop" name="hadoop-hdfs" 
-                rev="${hadoop-common.version}" conf="common->default"/>
+                rev="${hadoop-hdfs.version}" conf="common->default"/>
     <dependency org="org.apache.hadoop" name="hadoop-hdfs" 
-                rev="${hadoop-common.version}" conf="test->default">
+                rev="${hadoop-hdfs.version}" conf="test->default">
       <artifact name="hadoop-hdfs" type="tests" ext="jar" m:classifier="tests"/>
     </dependency>
     <dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" 
diff --git a/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
index 3212a1fcdfb..27629476d92 100644
--- a/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
+++ b/hadoop-mapreduce-project/src/contrib/streaming/src/java/org/apache/hadoop/streaming/StreamJob.java
@@ -22,8 +22,10 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
@@ -43,6 +45,7 @@ import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.filecache.DistributedCache;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
 import org.apache.hadoop.mapred.FileInputFormat;
@@ -277,19 +280,25 @@ public class StreamJob implements Tool {
       if (values != null && values.length > 0) {
         LOG.warn("-file option is deprecated, please use generic option" +
         		" -files instead.");
-        StringBuilder unpackRegex = new StringBuilder(
-          config_.getPattern(MRJobConfig.JAR_UNPACK_PATTERN,
-                             JobConf.UNPACK_JAR_PATTERN_DEFAULT).pattern());
+
+        String fileList = null;
         for (String file : values) {
           packageFiles_.add(file);
-          String fname = new File(file).getName();
-          unpackRegex.append("|(?:").append(Pattern.quote(fname)).append(")");
+          try {
+            URI pathURI = new URI(file);
+            Path path = new Path(pathURI);
+            FileSystem localFs = FileSystem.getLocal(config_);
+            String finalPath = path.makeQualified(localFs).toString();
+            fileList = fileList == null ? finalPath : fileList + "," + finalPath;
+          } catch (Exception e) {
+            throw new IllegalArgumentException(e);
+          }
         }
-        config_.setPattern(MRJobConfig.JAR_UNPACK_PATTERN,
-                           Pattern.compile(unpackRegex.toString()));
+        config_.set("tmpfiles", config_.get("tmpfiles", "") +
+                                  (fileList == null ? "" : fileList));
         validate(packageFiles_);
       }
-         
+
       String fsName = cmdLine.getOptionValue("dfs");
       if (null != fsName){
         LOG.warn("-dfs option is deprecated, please use -fs instead.");
diff --git a/hadoop-mapreduce-project/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh b/hadoop-mapreduce-project/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
index 8ac5b61a5d3..2a32cbd1c9d 100644
--- a/hadoop-mapreduce-project/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
+++ b/hadoop-mapreduce-project/src/contrib/vaidya/src/java/org/apache/hadoop/vaidya/vaidya.sh
@@ -31,17 +31,78 @@ script=`basename "$this"`
 bin=`cd "$bin"; pwd`
 this="$bin/$script"
 
-# Check if HADOOP_PREFIX AND JAVA_HOME is set.
-if [ -z $HADOOP_PREFIX ] ; then
-  echo "HADOOP_PREFIX environment variable not defined"
+# Check if HADOOP_HOME AND JAVA_HOME is set.
+if [ -z "$HADOOP_HOME" ] && [ -z "$HADOOP_PREFIX" ] ; then
+  echo "HADOOP_HOME or HADOOP_PREFIX environment variable should be defined"
   exit -1;
 fi
 
-if [ -z $JAVA_HOME ] ; then
+if [ -z "$JAVA_HOME" ] ; then
   echo "JAVA_HOME environment variable not defined"
   exit -1;
 fi
 
-hadoopVersion=`$HADOOP_PREFIX/bin/hadoop version | grep Hadoop | awk '{print $2}'`
+if [ -z "$HADOOP_PREFIX" ]; then
+  hadoopVersion=`$HADOOP_HOME/bin/hadoop version | awk 'BEGIN { RS = "" ; FS = "\n" } ; { print $1 }' | awk '{print $2}'`
+else
+  hadoopVersion=`$HADOOP_PREFIX/bin/hadoop version | awk 'BEGIN { RS = "" ; FS = "\n" } ; { print $1 }' | awk '{print $2}'`
+fi
 
-$JAVA_HOME/bin/java -Xmx1024m -classpath $HADOOP_PREFIX/hadoop-${hadoopVersion}-core.jar:$HADOOP_PREFIX/contrib/vaidya/hadoop-${hadoopVersion}-vaidya.jar:$HADOOP_PREFIX/lib/commons-logging-1.0.4.jar:${CLASSPATH} org.apache.hadoop.vaidya.postexdiagnosis.PostExPerformanceDiagnoser $@
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add core hadoop jar to CLASSPATH
+if [ -e $HADOOP_PREFIX/share/hadoop/hadoop-core-* ]; then
+  for f in $HADOOP_PREFIX/share/hadoop/hadoop-core-*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+
+  # add libs to CLASSPATH
+  for f in $HADOOP_PREFIX/share/hadoop/lib/*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+else
+  # tarball layout
+  if [ -e $HADOOP_HOME/hadoop-core-* ]; then
+    for f in $HADOOP_HOME/hadoop-core-*.jar; do
+      CLASSPATH=${CLASSPATH}:$f;
+    done
+  fi
+  if [ -e $HADOOP_HOME/build/hadoop-core-* ]; then 
+    for f in $HADOOP_HOME/build/hadoop-core-*.jar; do
+      CLASSPATH=${CLASSPATH}:$f;
+    done
+  fi
+  for f in $HADOOP_HOME/lib/*.jar; do
+    CLASSPATH=${CLASSPATH}:$f;
+  done
+
+  if [ -d "$HADOOP_HOME/build/ivy/lib/Hadoop/common" ]; then
+    for f in $HADOOP_HOME/build/ivy/lib/Hadoop/common/*.jar; do
+      CLASSPATH=${CLASSPATH}:$f;
+    done
+  fi
+fi
+
+# Set the Vaidya home
+if [ -d "$HADOOP_PREFIX/share/hadoop/contrib/vaidya/" ]; then
+  VAIDYA_HOME=$HADOOP_PREFIX/share/hadoop/contrib/vaidya/
+fi
+if [ -d "$HADOOP_HOME/contrib/vaidya" ]; then
+  VAIDYA_HOME=$HADOOP_HOME/contrib/vaidya/
+fi
+if [ -d "$HADOOP_HOME/build/contrib/vaidya" ]; then
+  VAIDYA_HOME=$HADOOP_HOME/build/contrib/vaidya/
+fi
+
+# add user-specified CLASSPATH last
+if [ "$HADOOP_USER_CLASSPATH_FIRST" = "" ] && [ "$HADOOP_CLASSPATH" != "" ]; then
+  CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
+fi
+
+# restore ordinary behaviour
+unset IFS
+
+echo "$CLASSPATH"
+
+$JAVA_HOME/bin/java -Xmx1024m -classpath $VAIDYA_HOME/hadoop-vaidya-${hadoopVersion}.jar:${CLASSPATH} org.apache.hadoop.vaidya.postexdiagnosis.PostExPerformanceDiagnoser $@
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTrackerClientProtocolProvider.java b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTrackerClientProtocolProvider.java
index 42c958d77c1..c695816e414 100644
--- a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTrackerClientProtocolProvider.java
+++ b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTrackerClientProtocolProvider.java
@@ -37,26 +37,30 @@ public class JobTrackerClientProtocolProvider extends ClientProtocolProvider {
   @Override
   public ClientProtocol create(Configuration conf) throws IOException {
     String framework = conf.get(MRConfig.FRAMEWORK_NAME);
-    if (framework != null && !framework.equals("classic")) {
+    if (!MRConfig.CLASSIC_FRAMEWORK_NAME.equals(framework)) {
       return null;
     }
     String tracker = conf.get(JTConfig.JT_IPC_ADDRESS, "local");
     if (!"local".equals(tracker)) {
       return createRPCProxy(JobTracker.getAddress(conf), conf);
+    } else {
+      throw new IOException("Invalid \"" + JTConfig.JT_IPC_ADDRESS
+          + "\" configuration value for JobTracker: \""
+          + tracker + "\"");
     }
-    return null;
   }
 
   @Override
-  public ClientProtocol create(InetSocketAddress addr, Configuration conf) throws IOException {
+  public ClientProtocol create(InetSocketAddress addr, Configuration conf)
+      throws IOException {
     return createRPCProxy(addr, conf);
   }
-  
+
   private ClientProtocol createRPCProxy(InetSocketAddress addr,
       Configuration conf) throws IOException {
     return (ClientProtocol) RPC.getProxy(ClientProtocol.class,
-      ClientProtocol.versionID, addr, UserGroupInformation.getCurrentUser(),
-      conf, NetUtils.getSocketFactory(conf, ClientProtocol.class));
+        ClientProtocol.versionID, addr, UserGroupInformation.getCurrentUser(),
+        conf, NetUtils.getSocketFactory(conf, ClientProtocol.class));
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/LocalClientProtocolProvider.java b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/LocalClientProtocolProvider.java
index 68d10bc4d00..d09b222ee9b 100644
--- a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/LocalClientProtocolProvider.java
+++ b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/LocalClientProtocolProvider.java
@@ -37,11 +37,16 @@ public class LocalClientProtocolProvider extends ClientProtocolProvider {
     if (framework != null && !framework.equals("local")) {
       return null;
     }
-    if ("local".equals(conf.get(JTConfig.JT_IPC_ADDRESS, "local"))) {
+    String tracker = conf.get(JTConfig.JT_IPC_ADDRESS, "local");
+    if ("local".equals(tracker)) {
       conf.setInt("mapreduce.job.maps", 1);
       return new LocalJobRunner(conf);
+    } else {
+
+      throw new IOException("Invalid \"" + JTConfig.JT_IPC_ADDRESS
+          + "\" configuration value for LocalJobRunner : \""
+          + tracker + "\"");
     }
-    return null;
   }
 
   @Override
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
index 7581f8bc7be..86980bb73d6 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/MiniMRCluster.java
@@ -382,6 +382,7 @@ public class MiniMRCluster {
                                   UserGroupInformation ugi) {
     JobConf result = new JobConf(conf);
     FileSystem.setDefaultUri(result, namenode);
+    result.set(MRConfig.FRAMEWORK_NAME, MRConfig.CLASSIC_FRAMEWORK_NAME);
     result.set(JTConfig.JT_IPC_ADDRESS, "localhost:"+jobTrackerPort);
     result.set(JTConfig.JT_HTTP_ADDRESS, 
                         "127.0.0.1:" + jobTrackerInfoPort);
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/QueueManagerTestUtils.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/QueueManagerTestUtils.java
index dee6f57b72f..4cb0fee616c 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/QueueManagerTestUtils.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/QueueManagerTestUtils.java
@@ -24,6 +24,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Cluster;
 import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.QueueState;
 import org.apache.hadoop.mapreduce.SleepJob;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
@@ -314,6 +315,7 @@ public class QueueManagerTestUtils {
       final long reduceSleepTime, boolean shouldComplete, String userInfo,
       String queueName, Configuration clientConf) throws IOException,
       InterruptedException, ClassNotFoundException {
+    clientConf.set(MRConfig.FRAMEWORK_NAME, MRConfig.CLASSIC_FRAMEWORK_NAME);
     clientConf.set(JTConfig.JT_IPC_ADDRESS, "localhost:"
         + miniMRCluster.getJobTrackerPort());
     UserGroupInformation ugi;
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestMiniMRClasspath.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestMiniMRClasspath.java
index 911aa2cf7c2..2563902d4bc 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestMiniMRClasspath.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestMiniMRClasspath.java
@@ -55,6 +55,7 @@ public class TestMiniMRClasspath extends TestCase {
       file.close();
     }
     FileSystem.setDefaultUri(conf, fs.getUri());
+    conf.set(JTConfig.FRAMEWORK_NAME, JTConfig.CLASSIC_FRAMEWORK_NAME);
     conf.set(JTConfig.JT_IPC_ADDRESS, jobTracker);
     conf.setJobName("wordcount");
     conf.setInputFormat(TextInputFormat.class);
@@ -121,6 +122,7 @@ public class TestMiniMRClasspath extends TestCase {
       file.close();
     }
     FileSystem.setDefaultUri(conf, uri);
+    conf.set(JTConfig.FRAMEWORK_NAME, JTConfig.CLASSIC_FRAMEWORK_NAME);
     conf.set(JTConfig.JT_IPC_ADDRESS, jobTracker);
     conf.setJobName("wordcount");
     conf.setInputFormat(TextInputFormat.class);
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestSpecialCharactersInOutputPath.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestSpecialCharactersInOutputPath.java
index dc3355bb4b8..5e510094ced 100644
--- a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestSpecialCharactersInOutputPath.java
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapred/TestSpecialCharactersInOutputPath.java
@@ -27,13 +27,14 @@ import junit.framework.TestCase;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.lib.IdentityMapper;
 import org.apache.hadoop.mapred.lib.IdentityReducer;
+import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
 import org.apache.hadoop.util.Progressable;
 
@@ -67,6 +68,7 @@ public class TestSpecialCharactersInOutputPath extends TestCase {
 
     // use WordCount example
     FileSystem.setDefaultUri(conf, fileSys);
+    conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.CLASSIC_FRAMEWORK_NAME);
     conf.set(JTConfig.JT_IPC_ADDRESS, jobTracker);
     conf.setJobName("foo");
 
diff --git a/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java
new file mode 100644
index 00000000000..a9044e24308
--- /dev/null
+++ b/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/mapreduce/TestClientProtocolProviderImpls.java
@@ -0,0 +1,99 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce;
+
+import java.io.IOException;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.LocalJobRunner;
+import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig;
+import org.junit.Test;
+
+public class TestClientProtocolProviderImpls extends TestCase {
+
+  @Test
+  public void testClusterWithLocalClientProvider() throws Exception {
+
+    Configuration conf = new Configuration();
+
+    try {
+      conf.set(MRConfig.FRAMEWORK_NAME, "incorrect");
+      new Cluster(conf);
+      fail("Cluster should not be initialized with incorrect framework name");
+    } catch (IOException e) {
+
+    }
+
+    try {
+      conf.set(MRConfig.FRAMEWORK_NAME, "local");
+      conf.set(JTConfig.JT_IPC_ADDRESS, "127.0.0.1:0");
+
+      new Cluster(conf);
+      fail("Cluster with Local Framework name should use local JT address");
+    } catch (IOException e) {
+
+    }
+
+    try {
+      conf.set(JTConfig.JT_IPC_ADDRESS, "local");
+      Cluster cluster = new Cluster(conf);
+      assertTrue(cluster.getClient() instanceof LocalJobRunner);
+      cluster.close();
+    } catch (IOException e) {
+
+    }
+  }
+
+  @Test
+  public void testClusterWithJTClientProvider() throws Exception {
+
+    Configuration conf = new Configuration();
+    try {
+      conf.set(MRConfig.FRAMEWORK_NAME, "incorrect");
+      new Cluster(conf);
+      fail("Cluster should not be initialized with incorrect framework name");
+
+    } catch (IOException e) {
+
+    }
+
+    try {
+      conf.set(MRConfig.FRAMEWORK_NAME, "classic");
+      conf.set(JTConfig.JT_IPC_ADDRESS, "local");
+      new Cluster(conf);
+      fail("Cluster with classic Framework name shouldnot use local JT address");
+
+    } catch (IOException e) {
+
+    }
+
+    try {
+      conf = new Configuration();
+      conf.set(MRConfig.FRAMEWORK_NAME, "classic");
+      conf.set(JTConfig.JT_IPC_ADDRESS, "127.0.0.1:0");
+      Cluster cluster = new Cluster(conf);
+      cluster.close();
+    } catch (IOException e) {
+
+    }
+  }
+
+}
diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml
index a1114a13664..ec342060fe2 100644
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@@ -76,6 +76,9 @@
     <distMgmtSnapshotsUrl>https://repository.apache.org/content/repositories/snapshots</distMgmtSnapshotsUrl>
 
     <commons-daemon.version>1.0.3</commons-daemon.version>
+    
+    <test.build.dir>${project.build.directory}/test-dir</test.build.dir>
+    <test.build.data>${test.build.dir}</test.build.data>
   </properties>
 
   <dependencyManagement>
@@ -559,6 +562,25 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-antrun-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>create-testdirs</id>
+            <phase>validate</phase>
+            <goals>
+              <goal>run</goal>
+            </goals>
+            <configuration>
+              <target>
+                <mkdir dir="${test.build.dir}"/>
+                <mkdir dir="${test.build.data}"/>
+              </target>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-compiler-plugin</artifactId>

From 90727b82e077e9ac80b77a37d3dc29d128da8c7b Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 6 Oct 2011 16:14:50 +0000
Subject: [PATCH 012/177] HDFS-2407. getServerDefaults and getStats don't check
 operation category (atm)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1179685 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt       | 2 ++
 .../hadoop/hdfs/server/namenode/NameNodeRpcServer.java      | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 91d58c04a27..21d9b7d9db7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -7,3 +7,5 @@ branch is merged.
 HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd)
 
 HDFS-1974. Introduce active and standy states to the namenode. (suresh)
+
+HDFS-2407. getServerDefaults and getStats don't check operation category (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 5b0d7a79cfd..d79614f7d43 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -319,6 +319,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
   
   @Override // ClientProtocol
   public FsServerDefaults getServerDefaults() throws IOException {
+    nn.checkOperation(OperationCategory.READ);
     return namesystem.getServerDefaults();
   }
 
@@ -634,8 +635,9 @@ class NameNodeRpcServer implements NamenodeProtocols {
     return namesystem.getFileInfo(src, false);
   }
   
-  @Override
-  public long[] getStats() {
+  @Override // ClientProtocol
+  public long[] getStats() throws IOException {
+    nn.checkOperation(OperationCategory.READ);
     return namesystem.getStats();
   }
 

From 8b4f497af85b49519da2e05e8269db6c4e9d621f Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 6 Oct 2011 23:26:14 +0000
Subject: [PATCH 013/177] HDFS-1973. HA: HDFS clients must handle namenode
 failover and switch over to the new active namenode. (atm)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1179896 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../main/java/org/apache/hadoop/fs/Hdfs.java  |   3 +-
 .../org/apache/hadoop/hdfs/DFSClient.java     |  58 ++++++--
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   1 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |  16 +-
 .../hadoop/hdfs/DistributedFileSystem.java    |   3 +-
 .../hadoop/hdfs/protocol/ClientProtocol.java  |   5 +-
 .../hadoop/hdfs/server/namenode/NameNode.java |   2 +-
 .../ha/ConfiguredFailoverProxyProvider.java   | 140 ++++++++++++++++++
 .../hadoop/hdfs/TestDFSClientFailover.java    |  96 ++++++++++++
 10 files changed, 301 insertions(+), 25 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 21d9b7d9db7..4a847593381 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -9,3 +9,5 @@ HDFS-2179. Add fencing framework and mechanisms for NameNode HA. (todd)
 HDFS-1974. Introduce active and standy states to the namenode. (suresh)
 
 HDFS-2407. getServerDefaults and getStats don't check operation category (atm)
+
+HDFS-1973. HA: HDFS clients must handle namenode failover and switch over to the new active namenode. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/Hdfs.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/Hdfs.java
index 7772ad97928..5232ea9c9c2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/Hdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/Hdfs.java
@@ -80,8 +80,7 @@ public class Hdfs extends AbstractFileSystem {
       throw new IOException("Incomplete HDFS URI, no host: " + theUri);
     }
 
-    InetSocketAddress namenode = NameNode.getAddress(theUri.getAuthority());
-    this.dfs = new DFSClient(namenode, conf, getStatistics());
+    this.dfs = new DFSClient(theUri, conf, getStatistics());
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 41fc6510743..6b306df7810 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -1,4 +1,3 @@
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -26,11 +25,11 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetSocketAddress;
 import java.net.Socket;
+import java.net.URI;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
 import javax.net.SocketFactory;
 
 import org.apache.commons.logging.Log;
@@ -87,6 +86,9 @@ import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.ipc.Client;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
@@ -96,6 +98,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Progressable;
+import org.apache.hadoop.util.ReflectionUtils;
 
 /********************************************************
  * DFSClient can connect to a Hadoop Filesystem and 
@@ -199,7 +202,7 @@ public class DFSClient implements java.io.Closeable {
    */
   private final Map<String, DFSOutputStream> filesBeingWritten
       = new HashMap<String, DFSOutputStream>();
-        
+
   /**
    * Same as this(NameNode.getAddress(conf), conf);
    * @see #DFSClient(InetSocketAddress, Configuration)
@@ -209,12 +212,16 @@ public class DFSClient implements java.io.Closeable {
   public DFSClient(Configuration conf) throws IOException {
     this(NameNode.getAddress(conf), conf);
   }
+  
+  public DFSClient(InetSocketAddress address, Configuration conf) throws IOException {
+    this(NameNode.getUri(address), conf);
+  }
 
   /**
    * Same as this(nameNodeAddr, conf, null);
    * @see #DFSClient(InetSocketAddress, Configuration, org.apache.hadoop.fs.FileSystem.Statistics)
    */
-  public DFSClient(InetSocketAddress nameNodeAddr, Configuration conf
+  public DFSClient(URI nameNodeAddr, Configuration conf
       ) throws IOException {
     this(nameNodeAddr, conf, null);
   }
@@ -223,17 +230,17 @@ public class DFSClient implements java.io.Closeable {
    * Same as this(nameNodeAddr, null, conf, stats);
    * @see #DFSClient(InetSocketAddress, ClientProtocol, Configuration, org.apache.hadoop.fs.FileSystem.Statistics) 
    */
-  public DFSClient(InetSocketAddress nameNodeAddr, Configuration conf,
+  public DFSClient(URI nameNodeAddr, Configuration conf,
                    FileSystem.Statistics stats)
     throws IOException {
     this(nameNodeAddr, null, conf, stats);
   }
-
+  
   /** 
    * Create a new DFSClient connected to the given nameNodeAddr or rpcNamenode.
    * Exactly one of nameNodeAddr or rpcNamenode must be null.
    */
-  DFSClient(InetSocketAddress nameNodeAddr, ClientProtocol rpcNamenode,
+  DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,
       Configuration conf, FileSystem.Statistics stats)
     throws IOException {
     // Copy only the required DFSClient configuration
@@ -246,20 +253,45 @@ public class DFSClient implements java.io.Closeable {
     // The hdfsTimeout is currently the same as the ipc timeout 
     this.hdfsTimeout = Client.getTimeout(conf);
     this.ugi = UserGroupInformation.getCurrentUser();
-    final String authority = nameNodeAddr == null? "null":
-        nameNodeAddr.getHostName() + ":" + nameNodeAddr.getPort();
+    
+    final String authority = nameNodeUri == null? "null": nameNodeUri.getAuthority();
     this.leaserenewer = LeaseRenewer.getInstance(authority, ugi, this);
     this.clientName = leaserenewer.getClientName(dfsClientConf.taskId);
+    
     this.socketCache = new SocketCache(dfsClientConf.socketCacheCapacity);
-    if (nameNodeAddr != null && rpcNamenode == null) {
-      this.namenode = DFSUtil.createNamenode(nameNodeAddr, conf);
-    } else if (nameNodeAddr == null && rpcNamenode != null) {
+    
+    Class<?> failoverProxyProviderClass = getFailoverProxyProviderClass(authority, conf);
+    
+    if (nameNodeUri != null && failoverProxyProviderClass != null) {
+      FailoverProxyProvider failoverProxyProvider = (FailoverProxyProvider)
+          ReflectionUtils.newInstance(failoverProxyProviderClass, conf);
+      this.namenode = (ClientProtocol)RetryProxy.create(ClientProtocol.class,
+          failoverProxyProvider, RetryPolicies.failoverOnNetworkException(1));
+    } else if (nameNodeUri != null && rpcNamenode == null) {
+      this.namenode = DFSUtil.createNamenode(NameNode.getAddress(nameNodeUri), conf);
+    } else if (nameNodeUri == null && rpcNamenode != null) {
       //This case is used for testing.
       this.namenode = rpcNamenode;
     } else {
       throw new IllegalArgumentException(
           "Expecting exactly one of nameNodeAddr and rpcNamenode being null: "
-          + "nameNodeAddr=" + nameNodeAddr + ", rpcNamenode=" + rpcNamenode);
+          + "nameNodeAddr=" + nameNodeUri + ", rpcNamenode=" + rpcNamenode);
+    }
+  }
+  
+  private Class<?> getFailoverProxyProviderClass(String authority, Configuration conf)
+      throws IOException {
+    String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + authority;
+    try {
+      return conf.getClass(configKey, null);
+    } catch (RuntimeException e) {
+      if (e.getCause() instanceof ClassNotFoundException) {
+        throw new IOException("Could not load failover proxy provider class "
+            + conf.get(configKey) + " which is configured for authority " + authority,
+            e);
+      } else {
+        throw e;
+      }
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index f92064239c0..fdf38ed7066 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -46,6 +46,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_DEFAULT = "DEFAULT";
   public static final String  DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY = "dfs.client.socketcache.capacity";
   public static final int     DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT = 16;
+  public static final String  DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX = "dfs.client.failover.proxy.provider";
   
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT = "localhost:50100";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 6ae4a13952a..8a1baf2b68b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -617,15 +617,19 @@ public class DFSUtil {
   }
 
   /** Create a {@link NameNode} proxy */
-  public static ClientProtocol createNamenode( InetSocketAddress nameNodeAddr,
+  public static ClientProtocol createNamenode(InetSocketAddress nameNodeAddr,
       Configuration conf) throws IOException {
-    return createNamenode(createRPCNamenode(nameNodeAddr, conf,
-        UserGroupInformation.getCurrentUser()));
-    
+    return createNamenode(nameNodeAddr, conf, UserGroupInformation.getCurrentUser());
+  }
+  
+  /** Create a {@link NameNode} proxy */
+  public static ClientProtocol createNamenode(InetSocketAddress nameNodeAddr,
+      Configuration conf, UserGroupInformation ugi) throws IOException {
+    return createNamenode(createRPCNamenode(nameNodeAddr, conf, ugi));
   }
 
   /** Create a {@link NameNode} proxy */
-  static ClientProtocol createRPCNamenode(InetSocketAddress nameNodeAddr,
+  public static ClientProtocol createRPCNamenode(InetSocketAddress nameNodeAddr,
       Configuration conf, UserGroupInformation ugi) 
     throws IOException {
     return (ClientProtocol)RPC.getProxy(ClientProtocol.class,
@@ -634,7 +638,7 @@ public class DFSUtil {
   }
 
   /** Create a {@link NameNode} proxy */
-  static ClientProtocol createNamenode(ClientProtocol rpcNamenode)
+  public static ClientProtocol createNamenode(ClientProtocol rpcNamenode)
     throws IOException {
     RetryPolicy createPolicy = RetryPolicies.retryUpToMaximumCountWithFixedSleep(
         5, HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index 4d12efe5fcf..52343c3834b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -106,8 +106,7 @@ public class DistributedFileSystem extends FileSystem {
       throw new IOException("Incomplete HDFS URI, no host: "+ uri);
     }
 
-    InetSocketAddress namenode = NameNode.getAddress(uri.getAuthority());
-    this.dfs = new DFSClient(namenode, conf, statistics);
+    this.dfs = new DFSClient(uri, conf, statistics);
     this.uri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://" + uri.getAuthority());
     this.workingDir = getHomeDirectory();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
index e69a2727b45..262c1e3e04d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.retry.Idempotent;
 import org.apache.hadoop.ipc.VersionedProtocol;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.KerberosInfo;
@@ -99,6 +100,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws IOException If an I/O error occurred
    */
   @Nullable
+  @Idempotent
   public LocatedBlocks getBlockLocations(String src,
                                          long offset,
                                          long length) 
@@ -249,7 +251,7 @@ public interface ClientProtocol extends VersionedProtocol {
       UnresolvedLinkException, IOException;
 
   /**
-   * The client can give up on a blcok by calling abandonBlock().
+   * The client can give up on a block by calling abandonBlock().
    * The client can then
    * either obtain a new block, or complete or abandon the file.
    * Any partial writes to the block will be discarded.
@@ -721,6 +723,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws IOException If an I/O error occurred        
    */
   @Nullable
+  @Idempotent
   public HdfsFileStatus getFileInfo(String src) throws AccessControlException,
       FileNotFoundException, UnresolvedLinkException, IOException;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 0efa268e313..2b7c765ef4c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -267,7 +267,7 @@ public class NameNode {
    * @param filesystemURI
    * @return address of file system
    */
-  static InetSocketAddress getAddress(URI filesystemURI) {
+  public static InetSocketAddress getAddress(URI filesystemURI) {
     String authority = filesystemURI.getAuthority();
     if (authority == null) {
       throw new IllegalArgumentException(String.format(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
new file mode 100644
index 00000000000..987f345ae7c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -0,0 +1,140 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.security.UserGroupInformation;
+
+/**
+ * A FailoverProxyProvider implementation which allows one to configure two URIs
+ * to connect to during fail-over. The first configured address is tried first,
+ * and on a fail-over event the other address is tried.
+ */
+public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
+    Configurable {
+  
+  public static final String CONFIGURED_NAMENODE_ADDRESSES
+      = "dfs.ha.namenode.addresses";
+  
+  private static final Log LOG =
+      LogFactory.getLog(ConfiguredFailoverProxyProvider.class);
+  
+  private Configuration conf;
+  private int currentProxyIndex = 0;
+  private List<AddressRpcProxyPair> proxies = new ArrayList<AddressRpcProxyPair>();
+  private UserGroupInformation ugi;
+
+  @Override
+  public Class<?> getInterface() {
+    return ClientProtocol.class;
+  }
+
+  /**
+   * Lazily initialize the RPC proxy object.
+   */
+  @Override
+  public synchronized Object getProxy() {
+    AddressRpcProxyPair current = proxies.get(currentProxyIndex);
+    if (current.namenode == null) {
+      try {
+        current.namenode = DFSUtil.createRPCNamenode(current.address, conf, ugi);
+      } catch (IOException e) {
+        LOG.error("Failed to create RPC proxy to NameNode", e);
+        throw new RuntimeException(e);
+      }
+    }
+    return current.namenode;
+  }
+
+  @Override
+  public synchronized void performFailover(Object currentProxy) {
+    currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
+  }
+
+  @Override
+  public synchronized Configuration getConf() {
+    return conf;
+  }
+
+  @Override
+  public synchronized void setConf(Configuration conf) {
+    this.conf = conf;
+    try {
+      ugi = UserGroupInformation.getCurrentUser();
+      
+      Collection<String> addresses = conf.getTrimmedStringCollection(
+          CONFIGURED_NAMENODE_ADDRESSES);
+      if (addresses == null || addresses.size() == 0) {
+        throw new RuntimeException(this.getClass().getSimpleName() +
+            " is configured but " + CONFIGURED_NAMENODE_ADDRESSES +
+            " is not set.");
+      }
+      for (String address : addresses) {
+        proxies.add(new AddressRpcProxyPair(
+            NameNode.getAddress(new URI(address).getAuthority())));
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    } catch (URISyntaxException e) {
+      throw new RuntimeException("Malformed URI set in " +
+          CONFIGURED_NAMENODE_ADDRESSES, e);
+    }
+  }
+
+  /**
+   * A little pair object to store the address and connected RPC proxy object to
+   * an NN. Note that {@link AddressRpcProxyPair#namenode} may be null.
+   */
+  private static class AddressRpcProxyPair {
+    public InetSocketAddress address;
+    public ClientProtocol namenode;
+    
+    public AddressRpcProxyPair(InetSocketAddress address) {
+      this.address = address;
+    }
+  }
+
+  /**
+   * Close all the proxy objects which have been opened over the lifetime of
+   * this proxy provider.
+   */
+  @Override
+  public synchronized void close() throws IOException {
+    for (AddressRpcProxyPair proxy : proxies) {
+      if (proxy.namenode != null) {
+        RPC.stopProxy(proxy.namenode);
+      }
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
new file mode 100644
index 00000000000..5ac38c6a8fa
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -0,0 +1,96 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestDFSClientFailover {
+  
+  private static final Path TEST_FILE = new Path("/tmp/failover-test-file");
+  private static final int FILE_LENGTH_TO_VERIFY = 100;
+  
+  private Configuration conf = new Configuration();
+  private MiniDFSCluster cluster;
+  
+  @Before
+  public void setUpCluster() throws IOException {
+    cluster = new MiniDFSCluster.Builder(conf).numNameNodes(2).build();
+    cluster.waitActive();
+  }
+  
+  @After
+  public void tearDownCluster() throws IOException {
+    cluster.shutdown();
+  }
+  
+  // TODO(HA): This test should probably be made to fail if a client fails over
+  // to talk to an NN with a different block pool id. Once failover between
+  // active/standy in a single block pool is implemented, this test should be
+  // changed to exercise that.
+  @Test
+  public void testDfsClientFailover() throws IOException, URISyntaxException {
+    final String nameServiceId = "name-service-uri";
+    InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
+    InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
+    
+    ClientProtocol nn1 = DFSUtil.createNamenode(nnAddr1, conf);
+    ClientProtocol nn2 = DFSUtil.createNamenode(nnAddr2, conf);
+    
+    DFSClient dfsClient1 = new DFSClient(null, nn1, conf, null);
+    DFSClient dfsClient2 = new DFSClient(null, nn2, conf, null);
+    
+    OutputStream out1 = dfsClient1.create(TEST_FILE.toString(), false);
+    OutputStream out2 = dfsClient2.create(TEST_FILE.toString(), false);
+    AppendTestUtil.write(out1, 0, FILE_LENGTH_TO_VERIFY);
+    AppendTestUtil.write(out2, 0, FILE_LENGTH_TO_VERIFY);
+    out1.close();
+    out2.close();
+    
+    String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
+    String address2 = "hdfs://" + nnAddr2.getHostName() + ":" + nnAddr2.getPort();
+    conf.set(ConfiguredFailoverProxyProvider.CONFIGURED_NAMENODE_ADDRESSES,
+        address1 + "," + address2);
+        
+    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + nameServiceId,
+        ConfiguredFailoverProxyProvider.class.getName());
+    
+    FileSystem fs = FileSystem.get(new URI("hdfs://" + nameServiceId), conf);
+    
+    AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
+    cluster.getNameNode(0).stop();
+    AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
+    
+    fs.close();
+  }
+
+}
\ No newline at end of file

From f00198b16c529bafeb8460427f12de69401941c3 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Tue, 11 Oct 2011 20:44:34 +0000
Subject: [PATCH 014/177] HDFS-2301. Start/stop appropriate namenode services
 when transition to active and standby states. Contributed by Suresh Srinivas.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1182080 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/server/namenode/BackupNode.java      |   9 +-
 .../hdfs/server/namenode/FSNamesystem.java    | 121 ++++++++++----
 .../hadoop/hdfs/server/namenode/NameNode.java | 153 ++++++++++++------
 .../hdfs/server/namenode/ha/ActiveState.java  |  28 +++-
 .../hdfs/server/namenode/ha/HAContext.java    |  30 ++++
 .../hdfs/server/namenode/ha/HAState.java      |  27 ++--
 .../hdfs/server/namenode/ha/StandbyState.java |  26 ++-
 8 files changed, 274 insertions(+), 122 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4a847593381..66266911ead 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -11,3 +11,5 @@ HDFS-1974. Introduce active and standy states to the namenode. (suresh)
 HDFS-2407. getServerDefaults and getStats don't check operation category (atm)
 
 HDFS-1973. HA: HDFS clients must handle namenode failover and switch over to the new active namenode. (atm)
+
+HDFS-2301. Start/stop appropriate namenode services when transition to active and standby states. (suresh)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index 8a736572c0f..1e8be5b7075 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -81,13 +81,13 @@ public class BackupNode extends NameNode {
   // Common NameNode methods implementation for backup node.
   /////////////////////////////////////////////////////
   @Override // NameNode
-  protected InetSocketAddress getRpcServerAddress(Configuration conf) throws IOException {
+  protected InetSocketAddress getRpcServerAddress(Configuration conf) {
     String addr = conf.get(BN_ADDRESS_NAME_KEY, BN_ADDRESS_DEFAULT);
     return NetUtils.createSocketAddr(addr);
   }
   
   @Override
-  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) throws IOException {
+  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
     String addr = conf.get(BN_SERVICE_RPC_ADDRESS_KEY);
     if (addr == null || addr.isEmpty()) {
       return null;
@@ -135,11 +135,6 @@ public class BackupNode extends NameNode {
                  CommonConfigurationKeys.FS_TRASH_INTERVAL_DEFAULT);
     NamespaceInfo nsInfo = handshake(conf);
     super.initialize(conf);
-    // Backup node should never do lease recovery,
-    // therefore lease hard limit should never expire.
-    namesystem.leaseManager.setLeasePeriod(
-        HdfsConstants.LEASE_SOFTLIMIT_PERIOD, Long.MAX_VALUE);
-    
     clusterId = nsInfo.getClusterID();
     blockPoolId = nsInfo.getBlockPoolID();
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 4851796cea4..29e76f7db02 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -130,7 +130,6 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
 import org.apache.hadoop.util.Daemon;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.VersionInfo;
 import org.mortbay.util.ajax.JSON;
 
@@ -347,28 +346,30 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     dir.imageLoadComplete();
   }
 
-  void activateSecretManager() throws IOException {
+  void startSecretManager() throws IOException {
     if (dtSecretManager != null) {
       dtSecretManager.startThreads();
     }
   }
   
-  /**
-   * Activate FSNamesystem daemons.
+  void stopSecretManager() {
+    if (dtSecretManager != null) {
+      dtSecretManager.stopThreads();
+    }
+  }
+  
+  /** 
+   * Start services common to both active and standby states
+   * @throws IOException
    */
-  void activate(Configuration conf) throws IOException {
+  void startCommonServices(Configuration conf) throws IOException {
     this.registerMBean(); // register the MBean for the FSNamesystemState
-
     writeLock();
     try {
       nnResourceChecker = new NameNodeResourceChecker(conf);
       checkAvailableResources();
-
       setBlockTotal();
       blockManager.activate(conf);
-
-      this.lmthread = new Daemon(leaseManager.new Monitor());
-      lmthread.start();
       this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
       nnrmthread.start();
     } finally {
@@ -378,7 +379,70 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     registerMXBean();
     DefaultMetricsSystem.instance().register(this);
   }
+  
+  /** 
+   * Stop services common to both active and standby states
+   * @throws IOException
+   */
+  void stopCommonServices() {
+    writeLock();
+    try {
+      if (blockManager != null) blockManager.close();
+      if (nnrmthread != null) nnrmthread.interrupt();
+    } finally {
+      writeUnlock();
+    }
+  }
+  
+  /**
+   * Start services required in active state
+   * @throws IOException
+   */
+  void startActiveServices() throws IOException {
+    LOG.info("Starting services required for active state");
+    writeLock();
+    try {
+      startSecretManager();
+      lmthread = new Daemon(leaseManager.new Monitor());
+      lmthread.start();
+    } finally {
+      writeUnlock();
+    }
+  }
+  
+  /** 
+   * Start services required in active state 
+   * @throws InterruptedException
+   */
+  void stopActiveServices() {
+    LOG.info("Stopping services started for active state");
+    writeLock();
+    try {
+      stopSecretManager();
+      if (lmthread != null) {
+        try {
+          lmthread.interrupt();
+          lmthread.join(3000);
+        } catch (InterruptedException ie) {
+          LOG.warn("Encountered exception ", ie);
+        }
+        lmthread = null;
+      }
+    } finally {
+      writeUnlock();
+    }
+  }
+  
+  /** Start services required in standby state */
+  void startStandbyServices() {
+    LOG.info("Starting services required for standby state");
+  }
 
+  /** Stop services required in standby state */
+  void stopStandbyServices() {
+    LOG.info("Stopping services started for standby state");
+  }
+  
   public static Collection<URI> getNamespaceDirs(Configuration conf) {
     return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
   }
@@ -502,7 +566,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
 
   /**
-   * Version of {@see #getNamespaceInfo()} that is not protected by a lock.
+   * Version of @see #getNamespaceInfo() that is not protected by a lock.
    */
   NamespaceInfo unprotectedGetNamespaceInfo() {
     return new NamespaceInfo(dir.fsImage.getStorage().getNamespaceID(),
@@ -519,23 +583,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void close() {
     fsRunning = false;
     try {
-      if (blockManager != null) blockManager.close();
+      stopCommonServices();
       if (smmthread != null) smmthread.interrupt();
-      if (dtSecretManager != null) dtSecretManager.stopThreads();
-      if (nnrmthread != null) nnrmthread.interrupt();
-    } catch (Exception e) {
-      LOG.warn("Exception shutting down FSNamesystem", e);
     } finally {
       // using finally to ensure we also wait for lease daemon
       try {
-        if (lmthread != null) {
-          lmthread.interrupt();
-          lmthread.join(3000);
-        }
+        stopActiveServices();
+        stopStandbyServices();
         if (dir != null) {
           dir.close();
         }
-      } catch (InterruptedException ie) {
       } catch (IOException ie) {
         LOG.error("Error closing FSDirectory", ie);
         IOUtils.cleanup(LOG, dir);
@@ -1386,7 +1443,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       lb = startFileInternal(src, null, holder, clientMachine, 
                         EnumSet.of(CreateFlag.APPEND), 
-                        false, blockManager.maxReplication, (long)0);
+                        false, blockManager.maxReplication, 0);
     } finally {
       writeUnlock();
     }
@@ -1469,7 +1526,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       fileLength = pendingFile.computeContentSummary().getLength();
       blockSize = pendingFile.getPreferredBlockSize();
       clientNode = pendingFile.getClientNode();
-      replication = (int)pendingFile.getReplication();
+      replication = pendingFile.getReplication();
     } finally {
       writeUnlock();
     }
@@ -2264,7 +2321,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
   
   Lease reassignLeaseInternal(Lease lease, String src, String newHolder,
-      INodeFileUnderConstruction pendingFile) throws IOException {
+      INodeFileUnderConstruction pendingFile) {
     assert hasWriteLock();
     pendingFile.setClientName(newHolder);
     return leaseManager.reassignLease(lease, src, newHolder);
@@ -2869,13 +2926,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * @return true if in safe mode
      */
     private synchronized boolean isOn() {
-      try {
-        assert isConsistent() : " SafeMode: Inconsistent filesystem state: "
-          + "Total num of blocks, active blocks, or "
-          + "total safe blocks don't match.";
-      } catch(IOException e) {
-        System.err.print(StringUtils.stringifyException(e));
-      }
+      assert isConsistent() : " SafeMode: Inconsistent filesystem state: "
+        + "Total num of blocks, active blocks, or "
+        + "total safe blocks don't match.";
       return this.reached >= 0;
     }
       
@@ -3029,7 +3082,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       this.blockTotal = total;
       this.blockThreshold = (int) (blockTotal * threshold);
       this.blockReplQueueThreshold = 
-        (int) (((double) blockTotal) * replQueueThreshold);
+        (int) (blockTotal * replQueueThreshold);
       checkMode();
     }
       
@@ -3039,7 +3092,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * @param replication current replication 
      */
     private synchronized void incrementSafeBlockCount(short replication) {
-      if ((int)replication == safeReplication)
+      if (replication == safeReplication)
         this.blockSafe++;
       checkMode();
     }
@@ -3172,7 +3225,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * Checks consistency of the class state.
      * This is costly and currently called only in assert.
      */
-    private boolean isConsistent() throws IOException {
+    private boolean isConsistent() {
       if (blockTotal == -1 && blockSafe == -1) {
         return true; // manual safe mode
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 2b7c765ef4c..f8a375001d8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -38,15 +38,13 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Trash;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import org.apache.hadoop.hdfs.DFSUtil;
-import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
-import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
 import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
 import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
@@ -55,9 +53,6 @@ import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
-import org.apache.hadoop.hdfs.server.protocol.NodeRegistration;
-import org.apache.hadoop.ipc.RPC;
-import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
@@ -172,19 +167,18 @@ public class NameNode {
     }
   }
     
-
-
   public static final int DEFAULT_PORT = 8020;
-
   public static final Log LOG = LogFactory.getLog(NameNode.class.getName());
   public static final Log stateChangeLog = LogFactory.getLog("org.apache.hadoop.hdfs.StateChange");
   public static final HAState ACTIVE_STATE = new ActiveState();
   public static final HAState STANDBY_STATE = new StandbyState();
   
   protected FSNamesystem namesystem; 
+  protected final Configuration conf;
   protected NamenodeRole role;
   private HAState state;
   private final boolean haEnabled;
+  private final HAContext haContext;
 
   
   /** httpServer */
@@ -313,12 +307,11 @@ public class NameNode {
    * Given a configuration get the address of the service rpc server
    * If the service rpc is not configured returns null
    */
-  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf)
-    throws IOException {
+  protected InetSocketAddress getServiceRpcServerAddress(Configuration conf) {
     return NameNode.getServiceAddress(conf, false);
   }
 
-  protected InetSocketAddress getRpcServerAddress(Configuration conf) throws IOException {
+  protected InetSocketAddress getRpcServerAddress(Configuration conf) {
     return getAddress(conf);
   }
   
@@ -396,7 +389,7 @@ public class NameNode {
       throw e;
     }
 
-    activate(conf);
+    startCommonServices(conf);
   }
   
   /**
@@ -430,19 +423,11 @@ public class NameNode {
     } 
   }
 
-  /**
-   * Activate name-node servers and threads.
-   */
-  void activate(Configuration conf) throws IOException {
-    if ((isRole(NamenodeRole.NAMENODE))
-        && (UserGroupInformation.isSecurityEnabled())) {
-      namesystem.activateSecretManager();
-    }
-    namesystem.activate(conf);
+  /** Start the services common to active and standby states */
+  private void startCommonServices(Configuration conf) throws IOException {
+    namesystem.startCommonServices(conf);
     startHttpServer(conf);
     rpcServer.start();
-    startTrashEmptier(conf);
-    
     plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
         ServicePlugin.class);
     for (ServicePlugin p: plugins) {
@@ -452,13 +437,28 @@ public class NameNode {
         LOG.warn("ServicePlugin " + p + " could not be started", t);
       }
     }
-    
     LOG.info(getRole() + " up at: " + rpcServer.getRpcAddress());
     if (rpcServer.getServiceRpcAddress() != null) {
-      LOG.info(getRole() + " service server is up at: " + rpcServer.getServiceRpcAddress()); 
+      LOG.info(getRole() + " service server is up at: "
+          + rpcServer.getServiceRpcAddress());
     }
   }
-
+  
+  private void stopCommonServices() {
+    if(namesystem != null) namesystem.close();
+    if(rpcServer != null) rpcServer.stop();
+    if (plugins != null) {
+      for (ServicePlugin p : plugins) {
+        try {
+          p.stop();
+        } catch (Throwable t) {
+          LOG.warn("ServicePlugin " + p + " could not be stopped", t);
+        }
+      }
+    }   
+    stopHttpServer();
+  }
+  
   private void startTrashEmptier(Configuration conf) throws IOException {
     long trashInterval 
       = conf.getLong(CommonConfigurationKeys.FS_TRASH_INTERVAL_KEY, 
@@ -470,11 +470,26 @@ public class NameNode {
     this.emptier.start();
   }
   
+  private void stopTrashEmptier() {
+    if (this.emptier != null) {
+      emptier.interrupt();
+      emptier = null;
+    }
+  }
+  
   private void startHttpServer(final Configuration conf) throws IOException {
     httpServer = new NameNodeHttpServer(conf, this, getHttpServerAddress(conf));
     httpServer.start();
     setHttpServerAddress(conf);
   }
+  
+  private void stopHttpServer() {
+    try {
+      if (httpServer != null) httpServer.stop();
+    } catch (Exception e) {
+      LOG.error("Exception while stopping httpserver", e);
+    }
+  }
 
   /**
    * Start NameNode.
@@ -509,18 +524,28 @@ public class NameNode {
 
   protected NameNode(Configuration conf, NamenodeRole role) 
       throws IOException { 
+    this.conf = conf;
     this.role = role;
     this.haEnabled = DFSUtil.isHAEnabled(conf);
-    this.state = !haEnabled ? ACTIVE_STATE : STANDBY_STATE;
+    this.haContext = new NameNodeHAContext();
     try {
       initializeGenericKeys(conf, getNameServiceId(conf));
       initialize(conf);
+      if (!haEnabled) {
+        state = ACTIVE_STATE;
+      } else {
+        state = STANDBY_STATE;;
+      }
+      state.enterState(haContext);
     } catch (IOException e) {
       this.stop();
       throw e;
     } catch (HadoopIllegalArgumentException e) {
       this.stop();
       throw e;
+    } catch (ServiceFailedException e) {
+      this.stop();
+      throw new IOException("Service failed to start", e);
     }
   }
 
@@ -532,6 +557,7 @@ public class NameNode {
     try {
       this.rpcServer.join();
     } catch (InterruptedException ie) {
+      LOG.info("Caught interrupted exception ", ie);
     }
   }
 
@@ -544,23 +570,12 @@ public class NameNode {
         return;
       stopRequested = true;
     }
-    if (plugins != null) {
-      for (ServicePlugin p : plugins) {
-        try {
-          p.stop();
-        } catch (Throwable t) {
-          LOG.warn("ServicePlugin " + p + " could not be stopped", t);
-        }
-      }
-    }
     try {
-      if (httpServer != null) httpServer.stop();
-    } catch (Exception e) {
-      LOG.error("Exception while stopping httpserver", e);
+      state.exitState(haContext);
+    } catch (ServiceFailedException e) {
+      LOG.warn("Encountered exception while exiting state ", e);
     }
-    if(namesystem != null) namesystem.close();
-    if(emptier != null) emptier.interrupt();
-    if(rpcServer != null) rpcServer.stop();
+    stopCommonServices();
     if (metrics != null) {
       metrics.shutdown();
     }
@@ -876,27 +891,61 @@ public class NameNode {
     if (!haEnabled) {
       throw new ServiceFailedException("HA for namenode is not enabled");
     }
-    state.setState(this, ACTIVE_STATE);
+    state.setState(haContext, ACTIVE_STATE);
   }
   
   synchronized void transitionToStandby() throws ServiceFailedException {
     if (!haEnabled) {
       throw new ServiceFailedException("HA for namenode is not enabled");
     }
-    state.setState(this, STANDBY_STATE);
+    state.setState(haContext, STANDBY_STATE);
   }
   
   /** Check if an operation of given category is allowed */
   protected synchronized void checkOperation(final OperationCategory op)
       throws UnsupportedActionException {
-    state.checkOperation(this, op);
+    state.checkOperation(haContext, op);
   }
   
-  public synchronized HAState getState() {
-    return state;
-  }
-  
-  public synchronized void setState(final HAState s) {
-    state = s;
+  /**
+   * Class used as expose {@link NameNode} as context to {@link HAState}
+   * 
+   * TODO:HA
+   * When entering and exiting state, on failing to start services,
+   * appropriate action is needed todo either shutdown the node or recover
+   * from failure.
+   */
+  private class NameNodeHAContext implements HAContext {
+    @Override
+    public void setState(HAState s) {
+      state = s;
+    }
+
+    @Override
+    public HAState getState() {
+      return state;
+    }
+
+    @Override
+    public void startActiveServices() throws IOException {
+      namesystem.startActiveServices();
+      startTrashEmptier(conf);
+    }
+
+    @Override
+    public void stopActiveServices() throws IOException {
+      namesystem.stopActiveServices();
+      stopTrashEmptier();
+    }
+
+    @Override
+    public void startStandbyServices() throws IOException {
+      // TODO:HA Start reading editlog from active
+    }
+
+    @Override
+    public void stopStandbyServices() throws IOException {
+      // TODO:HA Stop reading editlog from active
+    }
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
index 1cf24f7f23a..e00df208d7b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
@@ -17,6 +17,9 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
@@ -27,33 +30,42 @@ import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
  * service and handles operations of type {@link OperationCategory#WRITE} and
  * {@link OperationCategory#READ}.
  */
+@InterfaceAudience.Private
 public class ActiveState extends HAState {
   public ActiveState() {
     super("active");
   }
 
   @Override
-  public void checkOperation(NameNode nn, OperationCategory op)
+  public void checkOperation(HAContext context, OperationCategory op)
       throws UnsupportedActionException {
     return; // Other than journal all operations are allowed in active state
   }
   
   @Override
-  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
+  public void setState(HAContext context, HAState s) throws ServiceFailedException {
     if (s == NameNode.STANDBY_STATE) {
-      setStateInternal(nn, s);
+      setStateInternal(context, s);
       return;
     }
-    super.setState(nn, s);
+    super.setState(context, s);
   }
 
   @Override
-  protected void enterState(NameNode nn) throws ServiceFailedException {
-    // TODO:HA
+  public void enterState(HAContext context) throws ServiceFailedException {
+    try {
+      context.startActiveServices();
+    } catch (IOException e) {
+      throw new ServiceFailedException("Failed to start active services", e);
+    }
   }
 
   @Override
-  protected void exitState(NameNode nn) throws ServiceFailedException {
-    // TODO:HA
+  public void exitState(HAContext context) throws ServiceFailedException {
+    try {
+      context.stopActiveServices();
+    } catch (IOException e) {
+      throw new ServiceFailedException("Failed to stop active services", e);
+    }
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
new file mode 100644
index 00000000000..58d7773d514
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
@@ -0,0 +1,30 @@
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Context that is to be used by {@link HAState} for getting/setting the
+ * current state and performing required operations.
+ */
+@InterfaceAudience.Private
+public interface HAContext {
+  /** Set the state of the context to given {@code state} */
+  public void setState(HAState state);
+  
+  /** Get the state from the context */
+  public HAState getState();
+  
+  /** Start the services required in active state */
+  public void startActiveServices() throws IOException;
+  
+  /** Stop the services when exiting active state */
+  public void stopActiveServices() throws IOException;
+  
+  /** Start the services required in standby state */
+  public void startStandbyServices() throws IOException;
+  
+  /** Stop the services when exiting standby state */
+  public void stopStandbyServices() throws IOException;
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
index 1828f9c83db..6ee516c4cab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.ServiceFailedException;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
 
@@ -44,38 +43,38 @@ abstract public class HAState {
    * @param s new state
    * @throws ServiceFailedException on failure to transition to new state.
    */
-  protected final void setStateInternal(final NameNode nn, final HAState s)
+  protected final void setStateInternal(final HAContext context, final HAState s)
       throws ServiceFailedException {
-    exitState(nn);
-    nn.setState(s);
-    s.enterState(nn);
+    exitState(context);
+    context.setState(s);
+    s.enterState(context);
   }
 
   /**
    * Method to be overridden by subclasses to perform steps necessary for
    * entering a state.
-   * @param nn Namenode
+   * @param context HA context
    * @throws ServiceFailedException on failure to enter the state.
    */
-  protected abstract void enterState(final NameNode nn)
+  public abstract void enterState(final HAContext context)
       throws ServiceFailedException;
 
   /**
    * Method to be overridden by subclasses to perform steps necessary for
    * exiting a state.
-   * @param nn Namenode
+   * @param context HA context
    * @throws ServiceFailedException on failure to enter the state.
    */
-  protected abstract void exitState(final NameNode nn)
+  public abstract void exitState(final HAContext context)
       throws ServiceFailedException;
 
   /**
    * Move from the existing state to a new state
-   * @param nn Namenode
+   * @param context HA context
    * @param s new state
    * @throws ServiceFailedException on failure to transition to new state.
    */
-  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
+  public void setState(HAContext context, HAState s) throws ServiceFailedException {
     if (this == s) { // Aleady in the new state
       return;
     }
@@ -85,15 +84,15 @@ abstract public class HAState {
   
   /**
    * Check if an operation is supported in a given state.
-   * @param nn Namenode
+   * @param context HA context
    * @param op Type of the operation.
    * @throws UnsupportedActionException if a given type of operation is not
    *           supported in this state.
    */
-  public void checkOperation(final NameNode nn, final OperationCategory op)
+  public void checkOperation(final HAContext context, final OperationCategory op)
       throws UnsupportedActionException {
     String msg = "Operation category " + op + " is not supported in state "
-        + nn.getState();
+        + context.getState();
     throw new UnsupportedActionException(msg);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
index b63866dc713..a329934f582 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -17,6 +17,9 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 
@@ -31,28 +34,37 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
  * 
  * It does not handle read/write/checkpoint operations.
  */
+@InterfaceAudience.Private
 public class StandbyState extends HAState {
   public StandbyState() {
     super("standby");
   }
 
   @Override
-  public void setState(NameNode nn, HAState s) throws ServiceFailedException {
+  public void setState(HAContext context, HAState s) throws ServiceFailedException {
     if (s == NameNode.ACTIVE_STATE) {
-      setStateInternal(nn, s);
+      setStateInternal(context, s);
       return;
     }
-    super.setState(nn, s);
+    super.setState(context, s);
   }
 
   @Override
-  protected void enterState(NameNode nn) throws ServiceFailedException {
-    // TODO:HA
+  public void enterState(HAContext context) throws ServiceFailedException {
+    try {
+      context.startStandbyServices();
+    } catch (IOException e) {
+      throw new ServiceFailedException("Failed to start standby services", e);
+    }
   }
 
   @Override
-  protected void exitState(NameNode nn) throws ServiceFailedException {
-    // TODO:HA
+  public void exitState(HAContext context) throws ServiceFailedException {
+    try {
+      context.stopStandbyServices();
+    } catch (IOException e) {
+      throw new ServiceFailedException("Failed to stop standby services", e);
+    }
   }
 }
 

From 7ca7832158333e4ddcd6914596ff7d781c9283fe Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 13 Oct 2011 00:09:55 +0000
Subject: [PATCH 015/177] HDFS-2231. Configuration changes for HA namenode.
 Contributed by Suresh Srinivas.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1182626 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |  12 +-
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 325 ++++++++++++------
 .../java/org/apache/hadoop/hdfs/HAUtil.java   |  91 +++++
 .../hadoop/hdfs/server/namenode/NameNode.java |  12 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   8 +-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   |  21 +-
 .../hdfs/server/namenode/TestCheckpoint.java  |   4 +-
 .../apache/hadoop/hdfs/tools/TestGetConf.java |   2 +-
 9 files changed, 344 insertions(+), 133 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 66266911ead..33ef72552bf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -13,3 +13,5 @@ HDFS-2407. getServerDefaults and getStats don't check operation category (atm)
 HDFS-1973. HA: HDFS clients must handle namenode failover and switch over to the new active namenode. (atm)
 
 HDFS-2301. Start/stop appropriate namenode services when transition to active and standby states. (suresh)
+
+HDFS-2231. Configuration changes for HA namenode. (suresh)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index fdf38ed7066..075de4b986c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -245,10 +245,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_DEFAULT_MAX_CORRUPT_FILES_RETURNED_KEY = "dfs.corruptfilesreturned.max";
   public static final int     DFS_DEFAULT_MAX_CORRUPT_FILES_RETURNED = 500;
 
-  // HA related configuration
-  public static final String DFS_HA_NAMENODE_IDS_KEY = "dfs.ha.namenode.ids";
-  public static final String DFS_HA_NAMENODE_IDS_DEFAULT = "";
-
   // property for fsimage compression
   public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
   public static final boolean DFS_IMAGE_COMPRESS_DEFAULT = false;
@@ -279,11 +275,15 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY = "dfs.namenode.name.cache.threshold";
   public static final int     DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT = 10;
   
-  public static final String DFS_FEDERATION_NAMESERVICES = "dfs.federation.nameservices";
-  public static final String DFS_FEDERATION_NAMESERVICE_ID = "dfs.federation.nameservice.id";
+  public static final String  DFS_FEDERATION_NAMESERVICES = "dfs.federation.nameservices";
+  public static final String  DFS_FEDERATION_NAMESERVICE_ID = "dfs.federation.nameservice.id";
   public static final String  DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY = "dfs.namenode.resource.check.interval";
   public static final int     DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT = 5000;
   public static final String  DFS_NAMENODE_DU_RESERVED_KEY = "dfs.namenode.resource.du.reserved";
   public static final long    DFS_NAMENODE_DU_RESERVED_DEFAULT = 1024 * 1024 * 100; // 100 MB
   public static final String  DFS_NAMENODE_CHECKED_VOLUMES_KEY = "dfs.namenode.resource.checked.volumes";
+  
+  // HA related configuration
+  public static final String DFS_HA_NAMENODES_KEY = "dfs.ha.namenodes";
+  public static final String DFS_HA_NAMENODE_ID_KEY = "dfs.ha.namenode.id";
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 8a1baf2b68b..cdcca9e3d33 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -18,13 +18,7 @@
 
 package org.apache.hadoop.hdfs;
 
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_FEDERATION_NAMESERVICES;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY;
-
+import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.net.InetSocketAddress;
@@ -90,13 +84,20 @@ public class DFSUtil {
           a.isDecommissioned() ? 1 : -1;
       }
     };
+  /**
+   * Address matcher for matching an address to local address
+   */
+  static final AddressMatcher LOCAL_ADDRESS_MATCHER = new AddressMatcher() {
+    public boolean match(InetSocketAddress s) {
+      return NetUtils.isLocalAddress(s.getAddress());
+    };
+  };
   
   /**
    * Whether the pathname is valid.  Currently prohibits relative paths, 
    * and names which contain a ":" or "/" 
    */
   public static boolean isValidName(String src) {
-      
     // Path must be absolute.
     if (!src.startsWith(Path.SEPARATOR)) {
       return false;
@@ -298,6 +299,18 @@ public class DFSUtil {
   public static Collection<String> getNameServiceIds(Configuration conf) {
     return conf.getStringCollection(DFS_FEDERATION_NAMESERVICES);
   }
+  
+  /**
+   * Namenode HighAvailability related configuration.
+   * Returns collection of namenode Ids from the configuration. One logical id
+   * for each namenode in the in the HA setup.
+   * 
+   * @param conf configuration
+   * @return collection of namenode Ids
+   */
+  public static Collection<String> getNameNodeIds(Configuration conf) {
+    return conf.getStringCollection(DFS_HA_NAMENODES_KEY);
+  }
 
   /**
    * Given a list of keys in the order of preference, returns a value
@@ -312,9 +325,7 @@ public class DFSUtil {
       Configuration conf, String... keys) {
     String value = null;
     for (String key : keys) {
-      if (keySuffix != null) {
-        key += "." + keySuffix;
-      }
+      key = addSuffix(key, keySuffix);
       value = conf.get(key);
       if (value != null) {
         break;
@@ -326,6 +337,37 @@ public class DFSUtil {
     return value;
   }
   
+  /** Add non empty and non null suffix to a key */
+  private static String addSuffix(String key, String suffix) {
+    if (suffix == null || suffix.length() == 0) {
+      return key;
+    }
+    if (!suffix.startsWith(".")) {
+      key += ".";
+    }
+    return key += suffix;
+  }
+  
+  /** Concatenate list of suffix strings '.' separated */
+  private static String concatSuffixes(String... suffixes) {
+    if (suffixes == null) {
+      return null;
+    }
+    String ret = "";
+    for (int i = 0; i < suffixes.length - 1; i++) {
+      ret = addSuffix(ret, suffixes[i]);
+    }
+    return addSuffix(ret, suffixes[suffixes.length - 1]);
+  }
+  
+  /**
+   * Return configuration key of format key.suffix1.suffix2...suffixN
+   */
+  public static String addKeySuffixes(String key, String... suffixes) {
+    String keySuffix = concatSuffixes(suffixes);
+    return addSuffix(key, keySuffix);
+  }
+  
   /**
    * Returns list of InetSocketAddress for a given set of keys.
    * @param conf configuration
@@ -336,19 +378,38 @@ public class DFSUtil {
   private static List<InetSocketAddress> getAddresses(Configuration conf,
       String defaultAddress, String... keys) {
     Collection<String> nameserviceIds = getNameServiceIds(conf);
+    Collection<String> namenodeIds = getNameNodeIds(conf);
     List<InetSocketAddress> isas = new ArrayList<InetSocketAddress>();
 
-    // Configuration with a single namenode
-    if (nameserviceIds == null || nameserviceIds.isEmpty()) {
+    final boolean federationEnabled = nameserviceIds != null
+        && !nameserviceIds.isEmpty();
+    final boolean haEnabled = namenodeIds != null
+        && !namenodeIds.isEmpty();
+    
+    // Configuration with no federation and ha, return default address
+    if (!federationEnabled && !haEnabled) {
       String address = getConfValue(defaultAddress, null, conf, keys);
       if (address == null) {
         return null;
       }
       isas.add(NetUtils.createSocketAddr(address));
-    } else {
-      // Get the namenodes for all the configured nameServiceIds
-      for (String nameserviceId : nameserviceIds) {
-        String address = getConfValue(null, nameserviceId, conf, keys);
+      return isas;
+    }
+    
+    if (!federationEnabled) {
+      nameserviceIds = new ArrayList<String>();
+      nameserviceIds.add(null);
+    }
+    if (!haEnabled) {
+      namenodeIds = new ArrayList<String>();
+      namenodeIds.add(null);
+    }
+    
+    // Get configuration suffixed with nameserviceId and/or namenodeId
+    for (String nameserviceId : nameserviceIds) {
+      for (String nnId : namenodeIds) {
+        String keySuffix = concatSuffixes(nameserviceId, nnId);
+        String address = getConfValue(null, keySuffix, conf, keys);
         if (address == null) {
           return null;
         }
@@ -431,12 +492,12 @@ public class DFSUtil {
   }
   
   /**
-   * Given the InetSocketAddress for any configured communication with a 
-   * namenode, this method returns the corresponding nameservice ID,
-   * by doing a reverse lookup on the list of nameservices until it
-   * finds a match.
+   * Given the InetSocketAddress this method returns the nameservice Id
+   * corresponding to the key with matching address, by doing a reverse 
+   * lookup on the list of nameservices until it finds a match.
+   * 
    * If null is returned, client should try {@link #isDefaultNamenodeAddress}
-   * to check pre-Federated configurations.
+   * to check pre-Federation, non-HA configurations.
    * Since the process of resolving URIs to Addresses is slightly expensive,
    * this utility method should not be used in performance-critical routines.
    * 
@@ -453,58 +514,43 @@ public class DFSUtil {
    *     not the NameServiceId-suffixed keys.
    * @return nameserviceId, or null if no match found
    */
-  public static String getNameServiceIdFromAddress(Configuration conf, 
-      InetSocketAddress address, String... keys) {
-    Collection<String> nameserviceIds = getNameServiceIds(conf);
-
+  public static String getNameServiceIdFromAddress(final Configuration conf, 
+      final InetSocketAddress address, String... keys) {
     // Configuration with a single namenode and no nameserviceId
-    if (nameserviceIds == null || nameserviceIds.isEmpty()) {
-      // client should try {@link isDefaultNamenodeAddress} instead
+    if (!isFederationEnabled(conf)) {
       return null;
-    }
-    // Get the candidateAddresses for all the configured nameServiceIds
-    for (String nameserviceId : nameserviceIds) {
-      for (String key : keys) {
-        String candidateAddress = conf.get(
-            getNameServiceIdKey(key, nameserviceId));
-        if (candidateAddress != null
-            && address.equals(NetUtils.createSocketAddr(candidateAddress)))
-          return nameserviceId;
-      }
-    }
-    // didn't find a match
-    // client should try {@link isDefaultNamenodeAddress} instead
-    return null;
+    }    
+    String[] ids = getSuffixIDs(conf, address, keys);
+    return (ids != null && ids.length > 0) ? ids[0] : null;
   }
-
+  
   /**
-   * return server http or https address from the configuration
+   * return server http or https address from the configuration for a
+   * given namenode rpc address.
    * @param conf
-   * @param namenode - namenode address
+   * @param namenodeAddr - namenode RPC address
    * @param httpsAddress -If true, and if security is enabled, returns server 
    *                      https address. If false, returns server http address.
    * @return server http or https address
    */
   public static String getInfoServer(
-      InetSocketAddress namenode, Configuration conf, boolean httpsAddress) {
+      InetSocketAddress namenodeAddr, Configuration conf, boolean httpsAddress) {
     String httpAddress = null;
-    
-    String httpAddressKey = (UserGroupInformation.isSecurityEnabled() 
-        && httpsAddress) ? DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_KEY
-        : DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY;
-    String httpAddressDefault = (UserGroupInformation.isSecurityEnabled() 
-        && httpsAddress) ? DFSConfigKeys.DFS_NAMENODE_HTTPS_ADDRESS_DEFAULT
-        : DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_DEFAULT;
-    if(namenode != null) {
+    boolean securityOn = UserGroupInformation.isSecurityEnabled();
+    String httpAddressKey = (securityOn && httpsAddress) ? 
+        DFS_NAMENODE_HTTPS_ADDRESS_KEY : DFS_NAMENODE_HTTP_ADDRESS_KEY;
+    String httpAddressDefault = (securityOn && httpsAddress) ? 
+        DFS_NAMENODE_HTTPS_ADDRESS_DEFAULT : DFS_NAMENODE_HTTP_ADDRESS_DEFAULT;
+    if (namenodeAddr != null) {
       // if non-default namenode, try reverse look up 
       // the nameServiceID if it is available
       String nameServiceId = DFSUtil.getNameServiceIdFromAddress(
-          conf, namenode,
+          conf, namenodeAddr,
           DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
           DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
 
       if (nameServiceId != null) {
-        httpAddress = conf.get(DFSUtil.getNameServiceIdKey(
+        httpAddress = conf.get(DFSUtil.addKeySuffixes(
             httpAddressKey, nameServiceId));
       }
     }
@@ -512,7 +558,6 @@ public class DFSUtil {
     if (httpAddress == null) {
       httpAddress = conf.get(httpAddressKey, httpAddressDefault);
     }
-
     return httpAddress;
   }
   
@@ -548,30 +593,27 @@ public class DFSUtil {
     return false;
   }
   
-  /**
-   * @return key specific to a nameserviceId from a generic key
-   */
-  public static String getNameServiceIdKey(String key, String nameserviceId) {
-    return key + "." + nameserviceId;
-  }
-  
   /**
    * Sets the node specific setting into generic configuration key. Looks up
-   * value of "key.nameserviceId" and if found sets that value into generic key 
-   * in the conf. Note that this only modifies the runtime conf.
+   * value of "key.nameserviceId.namenodeId" and if found sets that value into 
+   * generic key in the conf. Note that this only modifies the runtime conf.
    * 
    * @param conf
    *          Configuration object to lookup specific key and to set the value
    *          to the key passed. Note the conf object is modified.
    * @param nameserviceId
-   *          nameservice Id to construct the node specific key.
+   *          nameservice Id to construct the node specific key. Pass null if
+   *          federation is not configuration.
+   * @param nnId
+   *          namenode Id to construct the node specific key. Pass null if
+   *          HA is not configured.
    * @param keys
    *          The key for which node specific value is looked up
    */
   public static void setGenericConf(Configuration conf,
-      String nameserviceId, String... keys) {
+      String nameserviceId, String nnId, String... keys) {
     for (String key : keys) {
-      String value = conf.get(getNameServiceIdKey(key, nameserviceId));
+      String value = conf.get(addKeySuffixes(key, nameserviceId, nnId));
       if (value != null) {
         conf.set(key, value);
       }
@@ -580,12 +622,12 @@ public class DFSUtil {
   
   /** Return used as percentage of capacity */
   public static float getPercentUsed(long used, long capacity) {
-    return capacity <= 0 ? 100 : ((float)used * 100.0f)/(float)capacity; 
+    return capacity <= 0 ? 100 : (used * 100.0f)/capacity; 
   }
   
   /** Return remaining as percentage of capacity */
   public static float getPercentRemaining(long remaining, long capacity) {
-    return capacity <= 0 ? 0 : ((float)remaining * 100.0f)/(float)capacity; 
+    return capacity <= 0 ? 0 : (remaining * 100.0f)/capacity; 
   }
 
   /**
@@ -687,23 +729,21 @@ public class DFSUtil {
     UserGroupInformation ticket = UserGroupInformation
         .createRemoteUser(locatedBlock.getBlock().getLocalBlock().toString());
     ticket.addToken(locatedBlock.getBlockToken());
-    return (ClientDatanodeProtocol)RPC.getProxy(ClientDatanodeProtocol.class,
+    return RPC.getProxy(ClientDatanodeProtocol.class,
         ClientDatanodeProtocol.versionID, addr, ticket, confWithNoIpcIdle,
         NetUtils.getDefaultSocketFactory(conf), socketTimeout);
   }
 
   /**
-   * Returns true if HA for namenode is configured.
-   * @param conf Configuration
-   * @return true if HA is configured in the configuration; else false.
+   * Returns true if federation configuration is enabled
    */
-  public static boolean isHAEnabled(Configuration conf) {
-    // TODO:HA configuration changes pending
-    return false;
+  public static boolean isFederationEnabled(Configuration conf) {
+    Collection<String> collection = getNameServiceIds(conf);
+    return collection != null && collection.size() != 0;
   }
   
   /**
-   * Get name service Id for the {@link NameNode} based on namenode RPC address
+   * Get nameservice Id for the {@link NameNode} based on namenode RPC address
    * matching the local node address.
    */
   public static String getNamenodeNameServiceId(Configuration conf) {
@@ -711,7 +751,7 @@ public class DFSUtil {
   }
   
   /**
-   * Get name service Id for the BackupNode based on backup node RPC address
+   * Get nameservice Id for the BackupNode based on backup node RPC address
    * matching the local node address.
    */
   public static String getBackupNameServiceId(Configuration conf) {
@@ -719,7 +759,7 @@ public class DFSUtil {
   }
   
   /**
-   * Get name service Id for the secondary node based on secondary http address
+   * Get nameservice Id for the secondary node based on secondary http address
    * matching the local node address.
    */
   public static String getSecondaryNameServiceId(Configuration conf) {
@@ -732,12 +772,12 @@ public class DFSUtil {
    * 
    * If {@link DFSConfigKeys#DFS_FEDERATION_NAMESERVICE_ID} is not specifically
    * configured, this method determines the nameservice Id by matching the local
-   * nodes address with the configured addresses. When a match is found, it
+   * node's address with the configured addresses. When a match is found, it
    * returns the nameservice Id from the corresponding configuration key.
    * 
    * @param conf Configuration
    * @param addressKey configuration key to get the address.
-   * @return name service Id on success, null on failure.
+   * @return nameservice Id on success, null if federation is not configured.
    * @throws HadoopIllegalArgumentException on error
    */
   private static String getNameServiceId(Configuration conf, String addressKey) {
@@ -745,33 +785,104 @@ public class DFSUtil {
     if (nameserviceId != null) {
       return nameserviceId;
     }
-    
-    Collection<String> ids = getNameServiceIds(conf);
-    if (ids == null || ids.size() == 0) {
-      // Not federation configuration, hence no nameservice Id
+    if (!isFederationEnabled(conf)) {
       return null;
     }
-    
-    // Match the rpc address with that of local address
-    int found = 0;
-    for (String id : ids) {
-      String addr = conf.get(getNameServiceIdKey(addressKey, id));
-      InetSocketAddress s = NetUtils.createSocketAddr(addr);
-      if (NetUtils.isLocalAddress(s.getAddress())) {
-        nameserviceId = id;
-        found++;
-      }
-    }
-    if (found > 1) { // Only one address must match the local address
-      throw new HadoopIllegalArgumentException(
-          "Configuration has multiple RPC addresses that matches "
-              + "the local node's address. Please configure the system with "
-              + "the parameter " + DFS_FEDERATION_NAMESERVICE_ID);
-    }
-    if (found == 0) {
-      throw new HadoopIllegalArgumentException("Configuration address "
-          + addressKey + " is missing in configuration with name service Id");
+    nameserviceId = getSuffixIDs(conf, addressKey, LOCAL_ADDRESS_MATCHER)[0];
+    if (nameserviceId == null) {
+      String msg = "Configuration " + addressKey + " must be suffixed with" +
+      		" nameserviceId for federation configuration.";
+      throw new HadoopIllegalArgumentException(msg);
     }
     return nameserviceId;
   }
+  
+  /**
+   * Returns nameservice Id and namenode Id when the local host matches the
+   * configuration parameter {@code addressKey}.<nameservice Id>.<namenode Id>
+   * 
+   * @param conf Configuration
+   * @param addressKey configuration key corresponding to the address.
+   * @param matcher matching criteria for matching the address
+   * @return Array with nameservice Id and namenode Id on success. First element
+   *         in the array is nameservice Id and second element is namenode Id.
+   *         Null value indicates that the configuration does not have the the
+   *         Id.
+   * @throws HadoopIllegalArgumentException on error
+   */
+  static String[] getSuffixIDs(final Configuration conf, final String addressKey,
+      final AddressMatcher matcher) {
+    Collection<String> nsIds = getNameServiceIds(conf);
+    boolean federationEnabled = true;
+    if (nsIds == null || nsIds.size() == 0) {
+      federationEnabled = false; // federation not configured
+      nsIds = new ArrayList<String>();
+      nsIds.add(null);
+    }
+    
+    boolean haEnabled = true;
+    Collection<String> nnIds = getNameNodeIds(conf);
+    if (nnIds == null || nnIds.size() == 0) {
+      haEnabled = false; // HA not configured
+      nnIds = new ArrayList<String>();
+      nnIds.add(null);
+    }
+    
+    // Match the address from addressKey.nsId.nnId based on the given matcher
+    String nameserviceId = null;
+    String namenodeId = null;
+    int found = 0;
+    for (String nsId : nsIds) {
+      for (String nnId : nnIds) {
+        String key = addKeySuffixes(addressKey, nsId, nnId);
+        String addr = conf.get(key);
+        InetSocketAddress s = null;
+        try {
+          s = NetUtils.createSocketAddr(addr);
+        } catch (Exception e) {
+          continue;
+        }
+        if (matcher.match(s)) {
+          nameserviceId = nsId;
+          namenodeId = nnId;
+          found++;
+        }
+      }
+    }
+    if (found > 1) { // Only one address must match the local address
+      String msg = "Configuration has multiple addresses that match "
+          + "local node's address. Please configure the system with "
+          + (federationEnabled ? DFS_FEDERATION_NAMESERVICE_ID : "")
+          + (haEnabled ? (" and " + DFS_HA_NAMENODE_ID_KEY) : "");
+      throw new HadoopIllegalArgumentException(msg);
+    }
+    return new String[] { nameserviceId, namenodeId };
+  }
+  
+  /**
+   * For given set of {@code keys} adds nameservice Id and or namenode Id
+   * and returns {nameserviceId, namenodeId} when address match is found.
+   * @see #getSuffixIDs(Configuration, String, AddressMatcher)
+   */
+  static String[] getSuffixIDs(final Configuration conf,
+      final InetSocketAddress address, final String... keys) {
+    AddressMatcher matcher = new AddressMatcher() {
+     @Override
+      public boolean match(InetSocketAddress s) {
+        return address.equals(s);
+      } 
+    };
+    
+    for (String key : keys) {
+      String[] ids = getSuffixIDs(conf, key, matcher);
+      if (ids != null && (ids [0] != null || ids[1] != null)) {
+        return ids;
+      }
+    }
+    return null;
+  }
+  
+  private interface AddressMatcher {
+    public boolean match(InetSocketAddress s);
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
new file mode 100644
index 00000000000..3824a7439f8
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
+
+import java.net.InetSocketAddress;
+import java.util.Collection;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.conf.Configuration;
+
+public class HAUtil {
+  private HAUtil() { /* Hidden constructor */ }
+
+  /**
+   * Returns true if HA for namenode is configured.
+   * 
+   * @param conf Configuration
+   * @return true if HA is configured in the configuration; else false.
+   */
+  public static boolean isHAEnabled(Configuration conf) {
+    Collection<String> collection = DFSUtil.getNameNodeIds(conf);
+    return collection != null && !collection.isEmpty();
+  }
+
+  /**
+   * Get the namenode Id by matching the {@code addressKey}
+   * with the the address of the local node.
+   * 
+   * If {@link DFSConfigKeys#DFS_HA_NAMENODE_ID_KEY} is not specifically
+   * configured, this method determines the namenode Id by matching the local
+   * node's address with the configured addresses. When a match is found, it
+   * returns the namenode Id from the corresponding configuration key.
+   * 
+   * @param conf Configuration
+   * @return namenode Id on success, null on failure.
+   * @throws HadoopIllegalArgumentException on error
+   */
+  public static String getNameNodeId(Configuration conf) {
+    String namenodeId = conf.get(DFS_HA_NAMENODE_ID_KEY);
+    if (namenodeId != null) {
+      return namenodeId;
+    }
+    if (!isHAEnabled(conf)) {
+      return null;
+    }
+    namenodeId = DFSUtil.getSuffixIDs(conf, DFS_NAMENODE_RPC_ADDRESS_KEY,
+        DFSUtil.LOCAL_ADDRESS_MATCHER)[1];
+    if (namenodeId == null) {
+      String msg = "Configuration " + DFS_NAMENODE_RPC_ADDRESS_KEY + 
+          " must be suffixed with" + " namenodeId for HA configuration.";
+      throw new HadoopIllegalArgumentException(msg);
+    }
+    return namenodeId;
+  }
+
+  /**
+   * Similar to
+   * {@link DFSUtil#getNameServiceIdFromAddress(Configuration, 
+   * InetSocketAddress, String...)}
+   */
+  public static String getNameNodeIdFromAddress(final Configuration conf, 
+      final InetSocketAddress address, String... keys) {
+    // Configuration with a single namenode and no nameserviceId
+    if (!isHAEnabled(conf)) {
+      return null;
+    }
+    
+    String[] ids = DFSUtil.getSuffixIDs(conf, address, keys);
+    if (ids != null && ids.length > 1) {
+      return ids[1];
+    }
+    return null;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index f8a375001d8..456f8d6b75e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Trash;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -526,7 +527,7 @@ public class NameNode {
       throws IOException { 
     this.conf = conf;
     this.role = role;
-    this.haEnabled = DFSUtil.isHAEnabled(conf);
+    this.haEnabled = HAUtil.isHAEnabled(conf);
     this.haContext = new NameNodeHAContext();
     try {
       initializeGenericKeys(conf, getNameServiceId(conf));
@@ -841,15 +842,18 @@ public class NameNode {
    *          Configuration object to lookup specific key and to set the value
    *          to the key passed. Note the conf object is modified
    * @param nameserviceId name service Id
-   * @see DFSUtil#setGenericConf(Configuration, String, String...)
+   * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
    */
   public static void initializeGenericKeys(Configuration conf, String
       nameserviceId) {
-    if ((nameserviceId == null) || nameserviceId.isEmpty()) {
+    String namenodeId = HAUtil.getNameNodeId(conf);
+    if ((nameserviceId == null || nameserviceId.isEmpty()) && 
+        (namenodeId == null || namenodeId.isEmpty())) {
       return;
     }
     
-    DFSUtil.setGenericConf(conf, nameserviceId, NAMESERVICE_SPECIFIC_KEYS);
+    DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
+        NAMESERVICE_SPECIFIC_KEYS);
     if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
       URI defaultUri = URI.create(HdfsConstants.HDFS_URI_SCHEME + "://"
           + conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 8ebae4e57af..f6cf5a0c877 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -587,11 +587,11 @@ public class MiniDFSCluster {
   private static void initFederatedNamenodeAddress(Configuration conf,
       String nameserviceId, int nnPort) {
     // Set nameserviceId specific key
-    String key = DFSUtil.getNameServiceIdKey(
+    String key = DFSUtil.addKeySuffixes(
         DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, nameserviceId);
     conf.set(key, "127.0.0.1:0");
 
-    key = DFSUtil.getNameServiceIdKey(
+    key = DFSUtil.addKeySuffixes(
         DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nameserviceId);
     conf.set(key, "127.0.0.1:" + nnPort);
   }
@@ -644,10 +644,10 @@ public class MiniDFSCluster {
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, nameserviceId);
     NameNode nn = createNameNode(nnIndex, conf, numDataNodes, manageNameDfsDirs,
         format, operation, clusterId);
-    conf.set(DFSUtil.getNameServiceIdKey(
+    conf.set(DFSUtil.addKeySuffixes(
         DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nameserviceId), NameNode
         .getHostPortString(nn.getNameNodeAddress()));
-    conf.set(DFSUtil.getNameServiceIdKey(
+    conf.set(DFSUtil.addKeySuffixes(
         DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, nameserviceId), NameNode
         .getHostPortString(nn.getHttpAddress()));
     DFSUtil.setGenericConf(conf, nameserviceId, 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index f154ff7d203..d9c64f70be0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -86,7 +86,7 @@ public class TestDFSUtil {
   private Configuration setupAddress(String key) {
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFS_FEDERATION_NAMESERVICES, "nn1");
-    conf.set(DFSUtil.getNameServiceIdKey(key, "nn1"), "localhost:9000");
+    conf.set(DFSUtil.addKeySuffixes(key, "nn1"), "localhost:9000");
     return conf;
   }
 
@@ -102,7 +102,7 @@ public class TestDFSUtil {
   }
   
   /**
-   * Test {@link DFSUtil#getNameNodeNameServiceId(Configuration)} to ensure
+   * Test {@link DFSUtil#getNamenodeNameServiceId(Configuration)} to ensure
    * nameserviceId for namenode is determined based on matching the address with
    * local node's address
    */
@@ -135,7 +135,7 @@ public class TestDFSUtil {
   }
 
   /**
-   * Test {@link DFSUtil#getNameServiceId(Configuration, String))} to ensure
+   * Test {@link DFSUtil#getNamenodeNameServiceId(Configuration)} to ensure
    * exception is thrown when multiple rpc addresses match the local node's
    * address
    */
@@ -143,9 +143,9 @@ public class TestDFSUtil {
   public void testGetNameServiceIdException() {
     HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFS_FEDERATION_NAMESERVICES, "nn1,nn2");
-    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"),
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"),
         "localhost:9000");
-    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
         "localhost:9001");
     DFSUtil.getNamenodeNameServiceId(conf);
     fail("Expected exception is not thrown");
@@ -178,9 +178,9 @@ public class TestDFSUtil {
     final String NN1_ADDRESS = "localhost:9000";
     final String NN2_ADDRESS = "localhost:9001";
     final String NN3_ADDRESS = "localhost:9002";
-    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"),
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn1"),
         NN1_ADDRESS);
-    conf.set(DFSUtil.getNameServiceIdKey(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
         NN2_ADDRESS);
 
     Collection<InetSocketAddress> nnAddresses = DFSUtil
@@ -247,7 +247,7 @@ public class TestDFSUtil {
    * copied to generic keys when the namenode starts.
    */
   @Test
-  public void testConfModification() throws IOException {
+  public void testConfModification() {
     final HdfsConfiguration conf = new HdfsConfiguration();
     conf.set(DFS_FEDERATION_NAMESERVICES, "nn1");
     conf.set(DFS_FEDERATION_NAMESERVICE_ID, "nn1");
@@ -256,7 +256,7 @@ public class TestDFSUtil {
     // Set the nameservice specific keys with nameserviceId in the config key
     for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
       // Note: value is same as the key
-      conf.set(DFSUtil.getNameServiceIdKey(key, nameserviceId), key);
+      conf.set(DFSUtil.addKeySuffixes(key, nameserviceId), key);
     }
 
     // Initialize generic keys from specific keys
@@ -282,18 +282,21 @@ public class TestDFSUtil {
       DFSUtil.getNNServiceRpcAddresses(conf);
       fail("Expected IOException is not thrown");
     } catch (IOException expected) {
+      /** Expected */
     }
 
     try {
       DFSUtil.getBackupNodeAddresses(conf);
       fail("Expected IOException is not thrown");
     } catch (IOException expected) {
+      /** Expected */
     }
 
     try {
       DFSUtil.getSecondaryNameNodeAddresses(conf);
       fail("Expected IOException is not thrown");
     } catch (IOException expected) {
+      /** Expected */
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index f88343e0a8f..6eef234bbf5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -1088,9 +1088,9 @@ public class TestCheckpoint extends TestCase {
     snConf2.set(DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "");
 
     // Set the nameserviceIds
-    snConf1.set(DFSUtil.getNameServiceIdKey(
+    snConf1.set(DFSUtil.addKeySuffixes(
         DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, nameserviceId1), nn1);
-    snConf2.set(DFSUtil.getNameServiceIdKey(
+    snConf2.set(DFSUtil.addKeySuffixes(
         DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, nameserviceId2), nn2);
 
     SecondaryNameNode secondary1 = startSecondaryNameNode(snConf1);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
index d7be23a97aa..39e8e20a0df 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
@@ -72,7 +72,7 @@ public class TestGetConf {
     String[] values = new String[nameServiceIdCount];
     for (int i = 0; i < nameServiceIdCount; i++, portOffset++) {
       String nsID = getNameServiceId(i);
-      String specificKey = DFSUtil.getNameServiceIdKey(key, nsID);
+      String specificKey = DFSUtil.addKeySuffixes(key, nsID);
       values[i] = "nn" + i + ":" + portOffset;
       conf.set(specificKey, values[i]);
     }

From ab7e7dfd7e557ff17f177ae445c27133d81e8b1a Mon Sep 17 00:00:00 2001
From: Jitendra Nath Pandey <jitendra@apache.org>
Date: Mon, 17 Oct 2011 20:42:32 +0000
Subject: [PATCH 016/177] Merged r1185354 from trunk for HDFS-2188.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1185363 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  3 +
 .../hdfs/server/namenode/FSEditLog.java       | 58 +++++++++++++++++--
 .../hadoop/hdfs/server/namenode/FSImage.java  |  5 +-
 .../hdfs/server/namenode/NNStorage.java       | 26 ++++++++-
 .../hdfs/server/namenode/FSImageTestUtil.java | 20 +++++--
 .../hdfs/server/namenode/TestEditLog.java     | 20 +++++--
 6 files changed, 112 insertions(+), 20 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index be5f7a3f3ce..b9bc4a22ab0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -97,6 +97,9 @@ Trunk (unreleased changes)
     HDFS-2298. Fix TestDfsOverAvroRpc by changing ClientProtocol to
     not include multiple methods of the same name. (cutting)
 
+    HDFS-2188. Make FSEditLog create its journals from a list of URIs rather 
+    than NNStorage. (Ivan Kelly via jitendra)
+
 Release 0.23.0 - Unreleased
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 4a41a2cbd65..aac2a35592e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -18,10 +18,11 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import static org.apache.hadoop.hdfs.server.common.Util.now;
-
+import java.net.URI;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 
 import org.apache.commons.logging.Log;
@@ -42,9 +43,11 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.token.delegation.DelegationKey;
+import org.apache.hadoop.conf.Configuration;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 
 /**
  * FSEditLog maintains a log of the namespace modifications.
@@ -122,23 +125,68 @@ public class FSEditLog  {
     }
   };
 
+  final private Collection<URI> editsDirs;
+
+  /**
+   * Construct FSEditLog with default configuration, taking editDirs from NNStorage
+   * @param storage Storage object used by namenode
+   */
+  @VisibleForTesting
   FSEditLog(NNStorage storage) {
+    this(new Configuration(), storage, Collections.<URI>emptyList());
+  }
+
+  /**
+   * Constructor for FSEditLog. Add underlying journals are constructed, but 
+   * no streams are opened until open() is called.
+   * 
+   * @param conf The namenode configuration
+   * @param storage Storage object used by namenode
+   * @param editsDirs List of journals to use
+   */
+  FSEditLog(Configuration conf, NNStorage storage, Collection<URI> editsDirs) {
     isSyncRunning = false;
     this.storage = storage;
     metrics = NameNode.getNameNodeMetrics();
     lastPrintTime = now();
+    
+    if (editsDirs.isEmpty()) { 
+      // if this is the case, no edit dirs have been explictly configured
+      // image dirs are to be used for edits too
+      try {
+        editsDirs = Lists.newArrayList(storage.getEditsDirectories());
+      } catch (IOException ioe) {
+        // cannot get list from storage, so the empty editsDirs 
+        // will be assigned. an error will be thrown on first use
+        // of the editlog, as no journals will exist
+      }
+      this.editsDirs = editsDirs;
+    } else {
+      this.editsDirs = Lists.newArrayList(editsDirs);
+    }
 
     this.journalSet = new JournalSet();
-    for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.EDITS)) {
-      journalSet.add(new FileJournalManager(sd));
+    for (URI u : this.editsDirs) {
+      StorageDirectory sd = storage.getStorageDirectory(u);
+      if (sd != null) {
+        journalSet.add(new FileJournalManager(sd));
+      }
     }
-    
+ 
     if (journalSet.isEmpty()) {
       LOG.error("No edits directories configured!");
     } 
     state = State.BETWEEN_LOG_SEGMENTS;
   }
-  
+
+  /**
+   * Get the list of URIs the editlog is using for storage
+   * @return collection of URIs in use by the edit log
+   */
+  Collection<URI> getEditURIs() {
+    return editsDirs;
+  }
+
   /**
    * Initialize the output stream for logging, opening the first
    * log segment.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 325e4b04ca9..a9429417134 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -120,7 +120,7 @@ public class FSImage implements Closeable {
       storage.setRestoreFailedStorage(true);
     }
 
-    this.editLog = new FSEditLog(storage);
+    this.editLog = new FSEditLog(conf, storage, editsDirs);
     
     archivalManager = new NNStorageRetentionManager(conf, storage, editLog);
   }
@@ -150,8 +150,7 @@ public class FSImage implements Closeable {
       "NameNode formatting should be performed before reading the image";
     
     Collection<URI> imageDirs = storage.getImageDirectories();
-    Collection<URI> editsDirs = storage.getEditsDirectories();
-
+    Collection<URI> editsDirs = editLog.getEditURIs();
 
     // none of the data dirs exist
     if((imageDirs.size() == 0 || editsDirs.size() == 0) 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
index 82096252ac4..a7fa7fb4252 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
@@ -59,6 +59,7 @@ import org.apache.hadoop.net.DNS;
 
 import com.google.common.base.Preconditions;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
 
 /**
  * NNStorage is responsible for management of the StorageDirectories used by
@@ -154,7 +155,9 @@ public class NNStorage extends Storage implements Closeable {
 
     storageDirs = new CopyOnWriteArrayList<StorageDirectory>();
     
-    setStorageDirectories(imageDirs, editsDirs);
+    // this may modify the editsDirs, so copy before passing in
+    setStorageDirectories(imageDirs, 
+                          Lists.newArrayList(editsDirs));
   }
 
   @Override // Storage
@@ -298,6 +301,27 @@ public class NNStorage extends Storage implements Closeable {
     }
   }
 
+  /**
+   * Return the storage directory corresponding to the passed URI
+   * @param uri URI of a storage directory
+   * @return The matching storage directory or null if none found
+   */
+  StorageDirectory getStorageDirectory(URI uri) {
+    try {
+      uri = Util.fileAsURI(new File(uri));
+      Iterator<StorageDirectory> it = dirIterator();
+      for (; it.hasNext(); ) {
+        StorageDirectory sd = it.next();
+        if (Util.fileAsURI(sd.getRoot()).equals(uri)) {
+          return sd;
+        }
+      }
+    } catch (IOException ioe) {
+      LOG.warn("Error converting file to URI", ioe);
+    }
+    return null;
+  }
+
   /**
    * Checks the consistency of a URI, in particular if the scheme
    * is specified and is supported by a concrete implementation
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index 39e7db17dc7..aec77ab12ea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -41,10 +41,13 @@ import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFil
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.hdfs.util.MD5FileUtils;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.conf.Configuration;
 import org.mockito.Mockito;
+import org.mockito.Matchers;
 
 import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 import com.google.common.io.Files;
@@ -105,7 +108,7 @@ public abstract class FSImageTestUtil {
     Mockito.doReturn(type)
       .when(sd).getStorageDirType();
     Mockito.doReturn(currentDir).when(sd).getCurrentDir();
-    
+    Mockito.doReturn(currentDir).when(sd).getRoot();
     Mockito.doReturn(mockFile(true)).when(sd).getVersionFile();
     Mockito.doReturn(mockFile(false)).when(sd).getPreviousDir();
     return sd;
@@ -127,7 +130,8 @@ public abstract class FSImageTestUtil {
   
     // Version file should always exist
     doReturn(mockFile(true)).when(sd).getVersionFile();
-    
+    doReturn(mockFile(true)).when(sd).getRoot();
+
     // Previous dir optionally exists
     doReturn(mockFile(previousExists))
       .when(sd).getPreviousDir();   
@@ -142,6 +146,7 @@ public abstract class FSImageTestUtil {
     doReturn(files).when(mockDir).listFiles();
     doReturn(mockDir).when(sd).getCurrentDir();
     
+
     return sd;
   }
   
@@ -169,11 +174,16 @@ public abstract class FSImageTestUtil {
     assertTrue(logDir.mkdirs() || logDir.exists());
     Files.deleteDirectoryContents(logDir);
     NNStorage storage = Mockito.mock(NNStorage.class);
-    List<StorageDirectory> sds = Lists.newArrayList(
-        FSImageTestUtil.mockStorageDirectory(logDir, NameNodeDirType.EDITS));
+    StorageDirectory sd 
+      = FSImageTestUtil.mockStorageDirectory(logDir, NameNodeDirType.EDITS);
+    List<StorageDirectory> sds = Lists.newArrayList(sd);
     Mockito.doReturn(sds).when(storage).dirIterable(NameNodeDirType.EDITS);
+    Mockito.doReturn(sd).when(storage)
+      .getStorageDirectory(Matchers.<URI>anyObject());
 
-    return new FSEditLog(storage);
+    return new FSEditLog(new Configuration(), 
+                         storage,
+                         ImmutableList.of(logDir.toURI()));
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index 123810c9dc4..5c14ab3061e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
+import org.apache.hadoop.hdfs.server.common.Util;
 import org.apache.hadoop.hdfs.server.namenode.EditLogFileInputStream;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
@@ -861,8 +862,11 @@ public class TestEditLog extends TestCase {
    * The syntax <code>[1,]</code> specifies an in-progress log starting at
    * txid 1.
    */
-  private NNStorage mockStorageWithEdits(String... editsDirSpecs) {
+  private NNStorage mockStorageWithEdits(String... editsDirSpecs) throws IOException {
     List<StorageDirectory> sds = Lists.newArrayList();
+    List<URI> uris = Lists.newArrayList();
+
+    NNStorage storage = Mockito.mock(NNStorage.class);
     for (String dirSpec : editsDirSpecs) {
       List<String> files = Lists.newArrayList();
       String[] logSpecs = dirSpec.split("\\|");
@@ -878,13 +882,17 @@ public class TestEditLog extends TestCase {
               Long.valueOf(m.group(2))));
         }
       }
-      sds.add(FSImageTestUtil.mockStorageDirectory(
+      StorageDirectory sd = FSImageTestUtil.mockStorageDirectory(
           NameNodeDirType.EDITS, false,
-          files.toArray(new String[0])));
-    }
-    
-    NNStorage storage = Mockito.mock(NNStorage.class);
+          files.toArray(new String[0]));
+      sds.add(sd);
+      URI u = URI.create("file:///storage"+ Math.random());
+      Mockito.doReturn(sd).when(storage).getStorageDirectory(u);
+      uris.add(u);
+    }    
+
     Mockito.doReturn(sds).when(storage).dirIterable(NameNodeDirType.EDITS);
+    Mockito.doReturn(uris).when(storage).getEditsDirectories();
     return storage;
   }
 

From ebb6cc60c421286d9121ad907e6d68fa4ee3cf93 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 27 Oct 2011 22:10:58 +0000
Subject: [PATCH 017/177] HDFS-2507. Allow saveNamespace operations to be
 canceled. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1190059 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   2 +
 .../hadoop/hdfs/server/namenode/FSImage.java  | 138 ++++++++++++------
 .../hdfs/server/namenode/FSImageFormat.java   |  18 ++-
 .../hdfs/server/namenode/FSNamesystem.java    |  21 +++
 .../SaveNamespaceCancelledException.java      |  28 ++++
 .../server/namenode/SaveNamespaceContext.java |  98 +++++++++++++
 .../apache/hadoop/hdfs/util/MD5FileUtils.java |   2 +-
 .../server/namenode/TestSaveNamespace.java    | 106 ++++++++++++--
 8 files changed, 352 insertions(+), 61 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceCancelledException.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index b9bc4a22ab0..a239cc99e1e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -788,6 +788,8 @@ Release 0.23.0 - Unreleased
     HDFS-2363. Move datanodes size printing from FSNamesystem.metasave(..)
     to BlockManager.  (Uma Maheswara Rao G via szetszwo)
 
+    HDFS-2507. Allow saveNamespace operations to be canceled. (todd)
+
   OPTIMIZATIONS
 
     HDFS-1458. Improve checkpoint performance by avoiding unnecessary image
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index a9429417134..a6af3eb8e87 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -83,7 +83,9 @@ public class FSImage implements Closeable {
 
   final private Configuration conf;
 
-  private final NNStorageRetentionManager archivalManager; 
+  private final NNStorageRetentionManager archivalManager;
+
+  private SaveNamespaceContext curSaveNamespaceContext = null; 
 
 
   /**
@@ -715,14 +717,15 @@ public class FSImage implements Closeable {
   /**
    * Save the contents of the FS image to the file.
    */
-  void saveFSImage(FSNamesystem source, StorageDirectory sd, long txid)
+  void saveFSImage(SaveNamespaceContext context, StorageDirectory sd)
       throws IOException {
+    long txid = context.getTxId();
     File newFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid);
     File dstFile = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE, txid);
     
-    FSImageFormat.Saver saver = new FSImageFormat.Saver();
+    FSImageFormat.Saver saver = new FSImageFormat.Saver(context);
     FSImageCompression compression = FSImageCompression.createCompression(conf);
-    saver.save(newFile, txid, source, compression);
+    saver.save(newFile, compression);
     
     MD5FileUtils.saveMD5File(dstFile, saver.getSavedDigest());
     storage.setMostRecentCheckpointTxId(txid);
@@ -740,25 +743,24 @@ public class FSImage implements Closeable {
    * and writing it out.
    */
   private class FSImageSaver implements Runnable {
+    private final SaveNamespaceContext context;
     private StorageDirectory sd;
-    private List<StorageDirectory> errorSDs;
-    private final long txid;
-    private final FSNamesystem source;
-    
-    FSImageSaver(FSNamesystem source, StorageDirectory sd,
-        List<StorageDirectory> errorSDs, long txid) {
-      this.source = source;
+
+    public FSImageSaver(SaveNamespaceContext context, StorageDirectory sd) {
+      this.context = context;
       this.sd = sd;
-      this.errorSDs = errorSDs;
-      this.txid = txid;
     }
-    
+
     public void run() {
       try {
-        saveFSImage(source, sd, txid);
+        saveFSImage(context, sd);
+      } catch (SaveNamespaceCancelledException snce) {
+        LOG.info("Cancelled image saving for " + sd.getRoot() +
+            ": " + snce.getMessage());
+        // don't report an error on the storage dir!
       } catch (Throwable t) {
         LOG.error("Unable to save image for " + sd.getRoot(), t);
-        errorSDs.add(sd);
+        context.reportErrorOnStorageDirectory(sd);
       }
     }
     
@@ -784,7 +786,7 @@ public class FSImage implements Closeable {
    * Save the contents of the FS image to a new image file in each of the
    * current storage directories.
    */
-  void saveNamespace(FSNamesystem source) throws IOException {
+  synchronized void saveNamespace(FSNamesystem source) throws IOException {
     assert editLog != null : "editLog must be initialized";
     storage.attemptRestoreRemovedStorage();
 
@@ -800,46 +802,71 @@ public class FSImage implements Closeable {
     } finally {
       if (editLogWasOpen) {
         editLog.startLogSegment(imageTxId + 1, true);
-        // Take this opportunity to note the current transaction
+        // Take this opportunity to note the current transaction.
+        // Even if the namespace save was cancelled, this marker
+        // is only used to determine what transaction ID is required
+        // for startup. So, it doesn't hurt to update it unnecessarily.
         storage.writeTransactionIdFileToStorage(imageTxId + 1);
       }
     }
     
   }
   
-  protected void saveFSImageInAllDirs(FSNamesystem source, long txid)
-      throws IOException {
+  void cancelSaveNamespace(String reason)
+      throws InterruptedException {
+    SaveNamespaceContext ctx = curSaveNamespaceContext;
+    if (ctx != null) {
+      ctx.cancel(reason); // waits until complete
+    }
+  }
+
+  
+  protected synchronized void saveFSImageInAllDirs(FSNamesystem source, long txid)
+      throws IOException {    
     if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
       throw new IOException("No image directories available!");
     }
     
-    List<StorageDirectory> errorSDs =
-      Collections.synchronizedList(new ArrayList<StorageDirectory>());
-
-    List<Thread> saveThreads = new ArrayList<Thread>();
-    // save images into current
-    for (Iterator<StorageDirectory> it
-           = storage.dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
-      StorageDirectory sd = it.next();
-      FSImageSaver saver = new FSImageSaver(source, sd, errorSDs, txid);
-      Thread saveThread = new Thread(saver, saver.toString());
-      saveThreads.add(saveThread);
-      saveThread.start();
-    }
-    waitForThreads(saveThreads);
-    saveThreads.clear();
-    storage.reportErrorsOnDirectories(errorSDs);
-
-    if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
-      throw new IOException(
-        "Failed to save in any storage directories while saving namespace.");
-    }
-
-    renameCheckpoint(txid);
+    SaveNamespaceContext ctx = new SaveNamespaceContext(
+        source, txid);
+    curSaveNamespaceContext = ctx;
     
-    // Since we now have a new checkpoint, we can clean up some
-    // old edit logs and checkpoints.
-    purgeOldStorage();
+    try {
+      List<Thread> saveThreads = new ArrayList<Thread>();
+      // save images into current
+      for (Iterator<StorageDirectory> it
+             = storage.dirIterator(NameNodeDirType.IMAGE); it.hasNext();) {
+        StorageDirectory sd = it.next();
+        FSImageSaver saver = new FSImageSaver(ctx, sd);
+        Thread saveThread = new Thread(saver, saver.toString());
+        saveThreads.add(saveThread);
+        saveThread.start();
+      }
+      waitForThreads(saveThreads);
+      saveThreads.clear();
+      storage.reportErrorsOnDirectories(ctx.getErrorSDs());
+  
+      if (storage.getNumStorageDirs(NameNodeDirType.IMAGE) == 0) {
+        throw new IOException(
+          "Failed to save in any storage directories while saving namespace.");
+      }
+      if (ctx.isCancelled()) {
+        deleteCancelledCheckpoint(txid);
+        ctx.checkCancelled(); // throws
+        assert false : "should have thrown above!";
+      }
+  
+      renameCheckpoint(txid);
+  
+      // Since we now have a new checkpoint, we can clean up some
+      // old edit logs and checkpoints.
+      purgeOldStorage();
+    } finally {
+      // Notify any threads waiting on the checkpoint to be canceled
+      // that it is complete.
+      ctx.markComplete();
+      ctx = null;
+    }
   }
 
   /**
@@ -873,6 +900,24 @@ public class FSImage implements Closeable {
     }
     if(al != null) storage.reportErrorsOnDirectories(al);
   }
+  
+  /**
+   * Deletes the checkpoint file in every storage directory,
+   * since the checkpoint was cancelled.
+   */
+  private void deleteCancelledCheckpoint(long txid) throws IOException {
+    ArrayList<StorageDirectory> al = Lists.newArrayList();
+
+    for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.IMAGE)) {
+      File ckpt = NNStorage.getStorageFile(sd, NameNodeFile.IMAGE_NEW, txid);
+      if (ckpt.exists() && !ckpt.delete()) {
+        LOG.warn("Unable to delete cancelled checkpoint in " + sd);
+        al.add(sd);            
+      }
+    }
+    storage.reportErrorsOnDirectories(al);
+  }
+
 
   private void renameCheckpointInDir(StorageDirectory sd, long txid)
       throws IOException {
@@ -1055,4 +1100,5 @@ public class FSImage implements Closeable {
   public synchronized long getLastAppliedTxId() {
     return lastAppliedTxId;
   }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
index c178e048b59..e029b240229 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormat.java
@@ -508,6 +508,7 @@ class FSImageFormat {
    * functions may be used to retrieve information about the file that was written.
    */
   static class Saver {
+    private final SaveNamespaceContext context;
     /** Set to true once an image has been written */
     private boolean saved = false;
     
@@ -529,6 +530,11 @@ class FSImageFormat {
         throw new IllegalStateException("FSImageSaver has already saved an image");
       }
     }
+    
+
+    Saver(SaveNamespaceContext context) {
+      this.context = context;
+    }
 
     /**
      * Return the MD5 checksum of the image file that was saved.
@@ -539,12 +545,11 @@ class FSImageFormat {
     }
 
     void save(File newFile,
-              long txid,
-              FSNamesystem sourceNamesystem,
               FSImageCompression compression)
       throws IOException {
       checkNotSaved();
 
+      final FSNamesystem sourceNamesystem = context.getSourceNamesystem();
       FSDirectory fsDir = sourceNamesystem.dir;
       long startTime = now();
       //
@@ -565,7 +570,7 @@ class FSImageFormat {
             .getNamespaceID());
         out.writeLong(fsDir.rootDir.numItemsInTree());
         out.writeLong(sourceNamesystem.getGenerationStamp());
-        out.writeLong(txid);
+        out.writeLong(context.getTxId());
 
         // write compression info and set up compressed stream
         out = compression.writeHeaderAndWrapStream(fos);
@@ -581,10 +586,12 @@ class FSImageFormat {
         saveImage(strbuf, fsDir.rootDir, out);
         // save files under construction
         sourceNamesystem.saveFilesUnderConstruction(out);
+        context.checkCancelled();
         sourceNamesystem.saveSecretManagerState(out);
         strbuf = null;
-
+        context.checkCancelled();
         out.flush();
+        context.checkCancelled();
         fout.getChannel().force(true);
       } finally {
         out.close();
@@ -603,9 +610,10 @@ class FSImageFormat {
      * This is a recursive procedure, which first saves all children of
      * a current directory and then moves inside the sub-directories.
      */
-    private static void saveImage(ByteBuffer currentDirName,
+    private void saveImage(ByteBuffer currentDirName,
                                   INodeDirectory current,
                                   DataOutputStream out) throws IOException {
+      context.checkCancelled();
       List<INode> children = current.getChildrenRaw();
       if (children == null || children.isEmpty())
         return;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 29e76f7db02..ff590c15280 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -2785,6 +2785,27 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
   }
   
+  /**
+   * Cancel an ongoing saveNamespace operation and wait for its
+   * threads to exit, if one is currently in progress.
+   *
+   * If no such operation is in progress, this call does nothing.
+   *
+   * @param reason a reason to be communicated to the caller saveNamespace 
+   * @throws IOException
+   */
+  void cancelSaveNamespace(String reason) throws IOException {
+    readLock();
+    try {
+      checkSuperuserPrivilege();
+      getFSImage().cancelSaveNamespace(reason);
+    } catch (InterruptedException e) {
+      throw new IOException(e);
+    } finally {
+      readUnlock();
+    }
+  }
+  
   /**
    * Enables/Disables/Checks restoring failed storage replicas if the storage becomes available again.
    * Requires superuser privilege.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceCancelledException.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceCancelledException.java
new file mode 100644
index 00000000000..2731275f261
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceCancelledException.java
@@ -0,0 +1,28 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+class SaveNamespaceCancelledException extends IOException {
+  private static final long serialVersionUID = 1L;
+
+  SaveNamespaceCancelledException(String cancelReason) {
+    super(cancelReason);
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java
new file mode 100644
index 00000000000..c5c0c06d0e5
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceContext.java
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Context for an ongoing SaveNamespace operation. This class
+ * allows cancellation, and also is responsible for accumulating
+ * failed storage directories.
+ */
+class SaveNamespaceContext {
+  private final FSNamesystem sourceNamesystem;
+  private final long txid;
+  private final List<StorageDirectory> errorSDs =
+    Collections.synchronizedList(new ArrayList<StorageDirectory>());
+
+  /**
+   * If the operation has been canceled, set to the reason why
+   * it has been canceled (eg standby moving to active)
+   */
+  private volatile String cancelReason = null;
+  
+  private CountDownLatch completionLatch = new CountDownLatch(1);
+  
+  SaveNamespaceContext(
+      FSNamesystem sourceNamesystem,
+      long txid) {
+    this.sourceNamesystem = sourceNamesystem;
+    this.txid = txid;
+  }
+
+  FSNamesystem getSourceNamesystem() {
+    return sourceNamesystem;
+  }
+
+  long getTxId() {
+    return txid;
+  }
+
+  void reportErrorOnStorageDirectory(StorageDirectory sd) {
+    errorSDs.add(sd);
+  }
+
+  List<StorageDirectory> getErrorSDs() {
+    return errorSDs;
+  }
+
+  /**
+   * Requests that the current saveNamespace operation be
+   * canceled if it is still running.
+   * @param reason the reason why cancellation is requested
+   * @throws InterruptedException 
+   */
+  void cancel(String reason) throws InterruptedException {
+    this.cancelReason = reason;
+    completionLatch.await();
+  }
+  
+  void markComplete() {
+    Preconditions.checkState(completionLatch.getCount() == 1,
+        "Context already completed!");
+    completionLatch.countDown();
+  }
+
+  void checkCancelled() throws SaveNamespaceCancelledException {
+    if (cancelReason != null) {
+      throw new SaveNamespaceCancelledException(
+          cancelReason);
+    }
+  }
+
+  boolean isCancelled() {
+    return cancelReason != null;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java
index 0d691378ba2..c010e2730a6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/MD5FileUtils.java
@@ -42,7 +42,7 @@ public abstract class MD5FileUtils {
   private static final Log LOG = LogFactory.getLog(
       MD5FileUtils.class);
 
-  private static final String MD5_SUFFIX = ".md5";
+  public static final String MD5_SUFFIX = ".md5";
   private static final Pattern LINE_REGEX =
     Pattern.compile("([0-9a-f]{32}) [ \\*](.+)");
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
index 9365c6ef049..13e256d78cf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java
@@ -29,6 +29,10 @@ import static org.mockito.Mockito.spy;
 
 import java.io.File;
 import java.io.IOException;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -44,6 +48,9 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.util.MD5FileUtils;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
 import org.apache.log4j.Level;
 import org.junit.Test;
 import org.mockito.Mockito;
@@ -124,22 +131,25 @@ public class TestSaveNamespace {
     case SAVE_SECOND_FSIMAGE_RTE:
       // The spy throws a RuntimeException when writing to the second directory
       doAnswer(new FaultySaveImage(true)).
-        when(spyImage).saveFSImage(Mockito.eq(fsn),
-            (StorageDirectory)anyObject(), anyLong());
+        when(spyImage).saveFSImage(
+            (SaveNamespaceContext)anyObject(),
+            (StorageDirectory)anyObject());
       shouldFail = false;
       break;
     case SAVE_SECOND_FSIMAGE_IOE:
       // The spy throws an IOException when writing to the second directory
       doAnswer(new FaultySaveImage(false)).
-        when(spyImage).saveFSImage(Mockito.eq(fsn),
-            (StorageDirectory)anyObject(), anyLong());
+        when(spyImage).saveFSImage(
+            (SaveNamespaceContext)anyObject(),
+            (StorageDirectory)anyObject());
       shouldFail = false;
       break;
     case SAVE_ALL_FSIMAGES:
       // The spy throws IOException in all directories
       doThrow(new RuntimeException("Injected")).
-        when(spyImage).saveFSImage(Mockito.eq(fsn),
-            (StorageDirectory)anyObject(), anyLong());
+      when(spyImage).saveFSImage(
+          (SaveNamespaceContext)anyObject(),
+          (StorageDirectory)anyObject());
       shouldFail = true;
       break;
     case WRITE_STORAGE_ALL:
@@ -363,9 +373,9 @@ public class TestSaveNamespace {
         FSNamesystem.getNamespaceEditsDirs(conf));
 
     doThrow(new IOException("Injected fault: saveFSImage")).
-      when(spyImage).saveFSImage(
-          Mockito.eq(fsn), (StorageDirectory)anyObject(),
-          Mockito.anyLong());
+        when(spyImage).saveFSImage(
+            (SaveNamespaceContext)anyObject(),
+            (StorageDirectory)anyObject());
 
     try {
       doAnEdit(fsn, 1);
@@ -479,6 +489,84 @@ public class TestSaveNamespace {
     }
   }
   
+  @Test(timeout=20000)
+  public void testCancelSaveNamespace() throws Exception {
+    Configuration conf = getConf();
+    NameNode.initMetrics(conf, NamenodeRole.NAMENODE);
+    DFSTestUtil.formatNameNode(conf);
+    FSNamesystem fsn = FSNamesystem.loadFromDisk(conf);
+
+    // Replace the FSImage with a spy
+    final FSImage image = fsn.dir.fsImage;
+    NNStorage storage = image.getStorage();
+    storage.close(); // unlock any directories that FSNamesystem's initialization may have locked
+    storage.setStorageDirectories(
+        FSNamesystem.getNamespaceDirs(conf), 
+        FSNamesystem.getNamespaceEditsDirs(conf));
+
+    FSNamesystem spyFsn = spy(fsn);
+    final FSNamesystem finalFsn = spyFsn;
+    DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG);
+    doAnswer(delayer).when(spyFsn).getGenerationStamp();
+    
+    ExecutorService pool = Executors.newFixedThreadPool(2);
+    
+    try {
+      doAnEdit(fsn, 1);
+
+      // Save namespace
+      fsn.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+      try {
+        Future<Void> saverFuture = pool.submit(new Callable<Void>() {
+          @Override
+          public Void call() throws Exception {
+            image.saveNamespace(finalFsn);
+            return null;
+          }
+        });
+
+        // Wait until saveNamespace calls getGenerationStamp
+        delayer.waitForCall();
+        // then cancel the saveNamespace
+        Future<Void> cancelFuture = pool.submit(new Callable<Void>() {
+          public Void call() throws Exception {
+            image.cancelSaveNamespace("cancelled");
+            return null;
+          }
+        });
+        // give the cancel call time to run
+        Thread.sleep(500);
+        
+        // allow saveNamespace to proceed - it should check the cancel flag after
+        // this point and throw an exception
+        delayer.proceed();
+
+        cancelFuture.get();
+        saverFuture.get();
+        fail("saveNamespace did not fail even though cancelled!");
+      } catch (Throwable t) {
+        GenericTestUtils.assertExceptionContains(
+            "SaveNamespaceCancelledException", t);
+      }
+      LOG.info("Successfully cancelled a saveNamespace");
+
+
+      // Check that we have only the original image and not any
+      // cruft left over from half-finished images
+      FSImageTestUtil.logStorageContents(LOG, storage);
+      for (StorageDirectory sd : storage.dirIterable(null)) {
+        File curDir = sd.getCurrentDir();
+        GenericTestUtils.assertGlobEquals(curDir, "fsimage_.*",
+            NNStorage.getImageFileName(0),
+            NNStorage.getImageFileName(0) + MD5FileUtils.MD5_SUFFIX);
+      }      
+    } finally {
+      if (fsn != null) {
+        fsn.close();
+      }
+    }
+  }
+  
   private void doAnEdit(FSNamesystem fsn, int id) throws IOException {
     // Make an edit
     fsn.mkdirs(

From 180646dea33785f8b4cc71482d099595b8c7da9d Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 27 Oct 2011 23:15:07 +0000
Subject: [PATCH 018/177] HDFS-2418. Change ConfiguredFailoverProxyProvider to
 take advantage of HDFS-2231. (atm)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1190078 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 48 ++++++++++++++++---
 .../java/org/apache/hadoop/hdfs/HAUtil.java   |  2 +-
 .../ha/ConfiguredFailoverProxyProvider.java   | 23 ++-------
 .../hadoop/hdfs/TestDFSClientFailover.java    | 24 +++++++---
 5 files changed, 66 insertions(+), 33 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 33ef72552bf..9b9851bc96d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -15,3 +15,5 @@ HDFS-1973. HA: HDFS clients must handle namenode failover and switch over to the
 HDFS-2301. Start/stop appropriate namenode services when transition to active and standby states. (suresh)
 
 HDFS-2231. Configuration changes for HA namenode. (suresh)
+
+HDFS-2418. Change ConfiguredFailoverProxyProvider to take advantage of HDFS-2231. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index cdcca9e3d33..719486f8913 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -406,18 +406,52 @@ public class DFSUtil {
     }
     
     // Get configuration suffixed with nameserviceId and/or namenodeId
-    for (String nameserviceId : nameserviceIds) {
-      for (String nnId : namenodeIds) {
-        String keySuffix = concatSuffixes(nameserviceId, nnId);
-        String address = getConfValue(null, keySuffix, conf, keys);
-        if (address == null) {
-          return null;
+    if (federationEnabled && haEnabled) {
+      for (String nameserviceId : nameserviceIds) {
+        for (String nnId : namenodeIds) {
+          String keySuffix = concatSuffixes(nameserviceId, nnId);
+          String address = getConfValue(null, keySuffix, conf, keys);
+          if (address != null) {
+            isas.add(NetUtils.createSocketAddr(address));
+          }
         }
-        isas.add(NetUtils.createSocketAddr(address));
+      }
+    } else if (!federationEnabled && haEnabled) {
+      for (String nnId : namenodeIds) {
+        String address = getConfValue(null, nnId, conf, keys);
+        if (address != null) {
+          isas.add(NetUtils.createSocketAddr(address));
+        }
+      }
+    } else if (federationEnabled && !haEnabled) {
+      for (String nameserviceId : nameserviceIds) {
+          String address = getConfValue(null, nameserviceId, conf, keys);
+          if (address != null) {
+            isas.add(NetUtils.createSocketAddr(address));
+          }
       }
     }
     return isas;
   }
+
+  /**
+   * Returns list of InetSocketAddress corresponding to HA NN RPC addresses from
+   * the configuration.
+   * 
+   * @param conf configuration
+   * @return list of InetSocketAddresses
+   * @throws IOException if no addresses are configured
+   */
+  public static List<InetSocketAddress> getHaNnRpcAddresses(
+      Configuration conf) throws IOException {
+    List<InetSocketAddress> addressList = getAddresses(conf, null,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    if (addressList == null) {
+      throw new IOException("Incorrect configuration: HA name node addresses "
+          + DFS_NAMENODE_RPC_ADDRESS_KEY + " is not configured.");
+    }
+    return addressList;
+  }
   
   /**
    * Returns list of InetSocketAddress corresponding to  backup node rpc 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 3824a7439f8..2ba01f55015 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -64,7 +64,7 @@ public class HAUtil {
         DFSUtil.LOCAL_ADDRESS_MATCHER)[1];
     if (namenodeId == null) {
       String msg = "Configuration " + DFS_NAMENODE_RPC_ADDRESS_KEY + 
-          " must be suffixed with" + " namenodeId for HA configuration.";
+          " must be suffixed with" + namenodeId + " for HA configuration.";
       throw new HadoopIllegalArgumentException(msg);
     }
     return namenodeId;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index 987f345ae7c..0c180c08d7d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -19,8 +19,6 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
-import java.net.URI;
-import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
@@ -31,7 +29,6 @@ import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -44,9 +41,6 @@ import org.apache.hadoop.security.UserGroupInformation;
 public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
     Configurable {
   
-  public static final String CONFIGURED_NAMENODE_ADDRESSES
-      = "dfs.ha.namenode.addresses";
-  
   private static final Log LOG =
       LogFactory.getLog(ConfiguredFailoverProxyProvider.class);
   
@@ -93,22 +87,13 @@ public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
     try {
       ugi = UserGroupInformation.getCurrentUser();
       
-      Collection<String> addresses = conf.getTrimmedStringCollection(
-          CONFIGURED_NAMENODE_ADDRESSES);
-      if (addresses == null || addresses.size() == 0) {
-        throw new RuntimeException(this.getClass().getSimpleName() +
-            " is configured but " + CONFIGURED_NAMENODE_ADDRESSES +
-            " is not set.");
-      }
-      for (String address : addresses) {
-        proxies.add(new AddressRpcProxyPair(
-            NameNode.getAddress(new URI(address).getAuthority())));
+      Collection<InetSocketAddress> addresses = DFSUtil.getHaNnRpcAddresses(
+          conf);
+      for (InetSocketAddress address : addresses) {
+        proxies.add(new AddressRpcProxyPair(address));
       }
     } catch (IOException e) {
       throw new RuntimeException(e);
-    } catch (URISyntaxException e) {
-      throw new RuntimeException("Malformed URI set in " +
-          CONFIGURED_NAMENODE_ADDRESSES, e);
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
index 5ac38c6a8fa..3b50252bf4c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -18,6 +18,8 @@
 package org.apache.hadoop.hdfs;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
 
 import java.io.IOException;
 import java.io.OutputStream;
@@ -59,9 +61,16 @@ public class TestDFSClientFailover {
   // changed to exercise that.
   @Test
   public void testDfsClientFailover() throws IOException, URISyntaxException {
-    final String nameServiceId = "name-service-uri";
+    final String logicalNameNodeId = "ha-nn-uri";
     InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
     InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
+    String nameServiceId1 = DFSUtil.getNameServiceIdFromAddress(conf, nnAddr1,
+        DFS_NAMENODE_RPC_ADDRESS_KEY);
+    String nameServiceId2 = DFSUtil.getNameServiceIdFromAddress(conf, nnAddr2,
+        DFS_NAMENODE_RPC_ADDRESS_KEY);
+    
+    String nameNodeId1 = "nn1";
+    String nameNodeId2 = "nn2";
     
     ClientProtocol nn1 = DFSUtil.createNamenode(nnAddr1, conf);
     ClientProtocol nn2 = DFSUtil.createNamenode(nnAddr2, conf);
@@ -78,13 +87,16 @@ public class TestDFSClientFailover {
     
     String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
     String address2 = "hdfs://" + nnAddr2.getHostName() + ":" + nnAddr2.getPort();
-    conf.set(ConfiguredFailoverProxyProvider.CONFIGURED_NAMENODE_ADDRESSES,
-        address1 + "," + address2);
-        
-    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + nameServiceId,
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
+        nameServiceId1, nameNodeId1), address1);
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
+        nameServiceId2, nameNodeId2), address2);
+    
+    conf.set(DFS_HA_NAMENODES_KEY, nameNodeId1 + "," + nameNodeId2);
+    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalNameNodeId,
         ConfiguredFailoverProxyProvider.class.getName());
     
-    FileSystem fs = FileSystem.get(new URI("hdfs://" + nameServiceId), conf);
+    FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalNameNodeId), conf);
     
     AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
     cluster.getNameNode(0).stop();

From efb2d93f7739c98600378a9c13019fa9d0b0d6a7 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 27 Oct 2011 23:26:44 +0000
Subject: [PATCH 019/177] HDFS-2393. Mark appropriate methods of ClientProtocol
 with the idempotent annotation. (atm)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1190088 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hadoop/hdfs/protocol/ClientProtocol.java  | 33 ++++++++++++++++---
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 9b9851bc96d..be8708c5520 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -17,3 +17,5 @@ HDFS-2301. Start/stop appropriate namenode services when transition to active an
 HDFS-2231. Configuration changes for HA namenode. (suresh)
 
 HDFS-2418. Change ConfiguredFailoverProxyProvider to take advantage of HDFS-2231. (atm)
+
+HDFS-2393. Mark appropriate methods of ClientProtocol with the idempotent annotation. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
index 262c1e3e04d..83cc3f99cae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
@@ -112,6 +112,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @return a set of server default configuration values
    * @throws IOException
    */
+  @Idempotent
   public FsServerDefaults getServerDefaults() throws IOException;
 
   /**
@@ -215,6 +216,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException if <code>src</code> contains a symlink
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public boolean setReplication(String src, short replication)
       throws AccessControlException, DSQuotaExceededException,
       FileNotFoundException, SafeModeException, UnresolvedLinkException,
@@ -229,6 +231,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException If <code>src</code> contains a symlink
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public void setPermission(String src, FsPermission permission)
       throws AccessControlException, FileNotFoundException, SafeModeException,
       UnresolvedLinkException, IOException;
@@ -246,6 +249,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException If <code>src</code> contains a symlink
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public void setOwner(String src, String username, String groupname)
       throws AccessControlException, FileNotFoundException, SafeModeException,
       UnresolvedLinkException, IOException;
@@ -318,6 +322,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException If <code>src</code> contains a symlink
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public LocatedBlock getAdditionalDatanode(final String src, final ExtendedBlock blk,
       final DatanodeInfo[] existings, final DatanodeInfo[] excludes,
       final int numAdditionalNodes, final String clientName
@@ -355,6 +360,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * locations on datanodes).
    * @param blocks Array of located blocks to report
    */
+  @Idempotent
   public void reportBadBlocks(LocatedBlock[] blocks) throws IOException;
 
   ///////////////////////////////////////
@@ -472,6 +478,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * RunTimeExceptions:
    * @throws InvalidPathException If <code>src</code> is invalid
    */
+  @Idempotent
   public boolean mkdirs(String src, FsPermission masked, boolean createParent)
       throws AccessControlException, FileAlreadyExistsException,
       FileNotFoundException, NSQuotaExceededException,
@@ -492,6 +499,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException If <code>src</code> contains a symlink
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public DirectoryListing getListing(String src,
                                      byte[] startAfter,
                                      boolean needLocation)
@@ -521,6 +529,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws AccessControlException permission denied
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public void renewLease(String clientName) throws AccessControlException,
       IOException;
 
@@ -533,6 +542,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @return true if the file is already closed
    * @throws IOException
    */
+  @Idempotent
   public boolean recoverLease(String src, String clientName) throws IOException;
 
   public int GET_STATS_CAPACITY_IDX = 0;
@@ -544,7 +554,7 @@ public interface ClientProtocol extends VersionedProtocol {
   
   /**
    * Get a set of statistics about the filesystem.
-   * Right now, only three values are returned.
+   * Right now, only seven values are returned.
    * <ul>
    * <li> [0] contains the total storage capacity of the system, in bytes.</li>
    * <li> [1] contains the total used space of the system, in bytes.</li>
@@ -557,6 +567,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * Use public constants like {@link #GET_STATS_CAPACITY_IDX} in place of 
    * actual numbers to index into the array.
    */
+  @Idempotent
   public long[] getStats() throws IOException;
 
   /**
@@ -565,6 +576,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * Return live datanodes if type is LIVE; dead datanodes if type is DEAD;
    * otherwise all datanodes if type is ALL.
    */
+  @Idempotent
   public DatanodeInfo[] getDatanodeReport(HdfsConstants.DatanodeReportType type)
       throws IOException;
 
@@ -575,6 +587,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws IOException
    * @throws UnresolvedLinkException if the path contains a symlink. 
    */
+  @Idempotent
   public long getPreferredBlockSize(String filename) 
       throws IOException, UnresolvedLinkException;
 
@@ -667,6 +680,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * 
    * @throws IOException
    */
+  //TODO(HA): Should this be @Idempotent?
   public void finalizeUpgrade() throws IOException;
 
   /**
@@ -677,6 +691,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws IOException
    */
   @Nullable
+  //TODO(HA): Should this be @Idempotent?
   public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action) 
       throws IOException;
 
@@ -690,9 +705,9 @@ public interface ClientProtocol extends VersionedProtocol {
    * all corrupt files, call this method repeatedly and each time pass in the
    * cookie returned from the previous call.
    */
-  public CorruptFileBlocks
-    listCorruptFileBlocks(String path, String cookie)
-    throws IOException;
+  @Idempotent
+  public CorruptFileBlocks listCorruptFileBlocks(String path, String cookie)
+      throws IOException;
   
   /**
    * Dumps namenode data structures into specified file. If the file
@@ -709,6 +724,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @param bandwidth Blanacer bandwidth in bytes per second for this datanode.
    * @throws IOException
    */
+  //TODO(HA): Should this be @Idempotent?
   public void setBalancerBandwidth(long bandwidth) throws IOException;
   
   /**
@@ -739,6 +755,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException if <code>src</code> contains a symlink
    * @throws IOException If an I/O error occurred        
    */
+  @Idempotent
   public HdfsFileStatus getFileLinkInfo(String src)
       throws AccessControlException, UnresolvedLinkException, IOException;
   
@@ -751,6 +768,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException if <code>path</code> contains a symlink. 
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public ContentSummary getContentSummary(String path)
       throws AccessControlException, FileNotFoundException,
       UnresolvedLinkException, IOException;
@@ -776,6 +794,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException if the <code>path</code> contains a symlink. 
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public void setQuota(String path, long namespaceQuota, long diskspaceQuota)
       throws AccessControlException, FileNotFoundException,
       UnresolvedLinkException, IOException;
@@ -791,6 +810,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException if <code>src</code> contains a symlink. 
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public void fsync(String src, String client) 
       throws AccessControlException, FileNotFoundException, 
       UnresolvedLinkException, IOException;
@@ -810,6 +830,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException if <code>src</code> contains a symlink. 
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public void setTimes(String src, long mtime, long atime)
       throws AccessControlException, FileNotFoundException, 
       UnresolvedLinkException, IOException;
@@ -848,6 +869,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws FileNotFoundException If <code>path</code> does not exist
    * @throws IOException If an I/O error occurred
    */
+  @Idempotent
   public String getLinkTarget(String path) throws AccessControlException,
       FileNotFoundException, IOException; 
   
@@ -863,6 +885,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @return a located block with a new generation stamp and an access token
    * @throws IOException if any error occurs
    */
+  @Idempotent
   public LocatedBlock updateBlockForPipeline(ExtendedBlock block,
       String clientName) throws IOException;
 
@@ -886,6 +909,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @return Token<DelegationTokenIdentifier>
    * @throws IOException
    */
+  @Idempotent
   public Token<DelegationTokenIdentifier> getDelegationToken(Text renewer) 
       throws IOException;
 
@@ -896,6 +920,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @return the new expiration time
    * @throws IOException
    */
+  @Idempotent
   public long renewDelegationToken(Token<DelegationTokenIdentifier> token)
       throws IOException;
   

From b4992f671d36e35fd874958ffbc9e66abc29a725 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 28 Oct 2011 21:24:04 +0000
Subject: [PATCH 020/177] HADOOP-7774. HA: Administrative CLI to control HA
 daemons. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1190584 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |   1 +
 .../java/org/apache/hadoop/ha/HAAdmin.java    | 204 ++++++++++++++++++
 .../org/apache/hadoop/ha/TestHAAdmin.java     | 123 +++++++++++
 3 files changed, 328 insertions(+)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index f2073093752..3207e70c384 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -5,3 +5,4 @@ branch is merged.
 ------------------------------
 
 HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh)
+HADOOP-7774. HA: Administrative CLI to control HA daemons. (todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
new file mode 100644
index 00000000000..b880311da41
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.util.Tool;
+import org.apache.hadoop.util.ToolRunner;
+
+import com.google.common.collect.ImmutableMap;
+
+/**
+ * A command-line tool for making calls in the HAServiceProtocol.
+ * For example,. this can be used to force a daemon to standby or active
+ * mode, or to trigger a health-check.
+ */
+@InterfaceAudience.Private
+public class HAAdmin extends Configured implements Tool {
+  
+  private static Map<String, UsageInfo> USAGE =
+    ImmutableMap.<String, UsageInfo>builder()
+    .put("-transitionToActive",
+        new UsageInfo("<host:port>", "Transitions the daemon into Active state"))
+    .put("-transitionToStandby",
+        new UsageInfo("<host:port>", "Transitions the daemon into Passive state"))
+    .put("-checkHealth",
+        new UsageInfo("<host:port>",
+            "Requests that the daemon perform a health check.\n" + 
+            "The HAAdmin tool will exit with a non-zero exit code\n" +
+            "if the check fails."))
+    .put("-help",
+        new UsageInfo("<command>", "Displays help on the specified command"))
+    .build();
+
+  /** Output stream for errors, for use in tests */
+  PrintStream errOut = System.err;
+  PrintStream out = System.out;
+
+  private static void printUsage(PrintStream errOut) {
+    errOut.println("Usage: java HAAdmin");
+    for (Map.Entry<String, UsageInfo> e : USAGE.entrySet()) {
+      String cmd = e.getKey();
+      UsageInfo usage = e.getValue();
+      
+      errOut.println("    [" + cmd + " " + usage.args + "]"); 
+    }
+    errOut.println();
+    ToolRunner.printGenericCommandUsage(errOut);    
+  }
+  
+  private static void printUsage(PrintStream errOut, String cmd) {
+    UsageInfo usage = USAGE.get(cmd);
+    if (usage == null) {
+      throw new RuntimeException("No usage for cmd " + cmd);
+    }
+    errOut.println("Usage: java HAAdmin [" + cmd + " " + usage.args + "]");
+  }
+
+  private int transitionToActive(final String[] argv)
+      throws IOException, ServiceFailedException {
+    if (argv.length != 2) {
+      errOut.println("transitionToActive: incorrect number of arguments");
+      printUsage(errOut, "-transitionToActive");
+      return -1;
+    }
+    
+    HAServiceProtocol proto = getProtocol(argv[1]);
+    proto.transitionToActive();
+    return 0;
+  }
+
+  
+  private int transitionToStandby(final String[] argv)
+      throws IOException, ServiceFailedException {
+    if (argv.length != 2) {
+      errOut.println("transitionToStandby: incorrect number of arguments");
+      printUsage(errOut, "-transitionToStandby");
+      return -1;
+    }
+    
+    HAServiceProtocol proto = getProtocol(argv[1]);
+    proto.transitionToStandby();
+    return 0;
+  }
+  
+  private int checkHealth(final String[] argv)
+      throws IOException, ServiceFailedException {
+    if (argv.length != 2) {
+      errOut.println("checkHealth: incorrect number of arguments");
+      printUsage(errOut, "-checkHealth");
+      return -1;
+    }
+    
+    HAServiceProtocol proto = getProtocol(argv[1]);
+    try {
+      proto.monitorHealth();
+    } catch (HealthCheckFailedException e) {
+      errOut.println("Health check failed: " + e.getLocalizedMessage());
+      return 1;
+    }
+    return 0;
+  }
+
+  /**
+   * Return a proxy to the specified target host:port.
+   */
+  protected HAServiceProtocol getProtocol(String target)
+      throws IOException {
+    InetSocketAddress addr = NetUtils.createSocketAddr(target);
+    return (HAServiceProtocol)RPC.getProxy(
+          HAServiceProtocol.class, HAServiceProtocol.versionID,
+          addr, getConf());
+  }
+
+      
+  @Override
+  public int run(String[] argv) throws Exception {
+    if (argv.length < 1) {
+      printUsage(errOut);
+      return -1;
+    }
+
+    int i = 0;
+    String cmd = argv[i++];
+
+    if (!cmd.startsWith("-")) {
+      errOut.println("Bad command '" + cmd + "': expected command starting with '-'");
+      printUsage(errOut);
+      return -1;
+    }
+    
+    if ("-transitionToActive".equals(cmd)) {
+      return transitionToActive(argv);
+    } else if ("-transitionToStandby".equals(cmd)) {
+      return transitionToStandby(argv);
+    } else if ("-checkHealth".equals(cmd)) {
+      return checkHealth(argv);
+    } else if ("-help".equals(cmd)) {
+      return help(argv);
+    } else {
+      errOut.println(cmd.substring(1) + ": Unknown command");
+      printUsage(errOut);
+      return -1;
+    } 
+  }
+  
+  private int help(String[] argv) {
+    if (argv.length != 2) {
+      printUsage(errOut, "-help");
+      return -1;
+    }
+    String cmd = argv[1];
+    if (!cmd.startsWith("-")) {
+      cmd = "-" + cmd;
+    }
+    UsageInfo usageInfo = USAGE.get(cmd);
+    if (usageInfo == null) {
+      errOut.println(cmd + ": Unknown command");
+      printUsage(errOut);
+      return -1;
+    }
+    
+    errOut .println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help);
+    return 1;
+  }
+
+  public static void main(String[] argv) throws Exception {
+    int res = ToolRunner.run(new HAAdmin(), argv);
+    System.exit(res);
+  }
+  
+  
+  private static class UsageInfo {
+    private final String args;
+    private final String help;
+    
+    public UsageInfo(String args, String help) {
+      this.args = args;
+      this.help = help;
+    }
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
new file mode 100644
index 00000000000..3cddbbe8a21
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
@@ -0,0 +1,123 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Joiner;
+
+public class TestHAAdmin {
+  private static final Log LOG = LogFactory.getLog(TestHAAdmin.class);
+  
+  private HAAdmin tool;
+  private ByteArrayOutputStream errOutBytes = new ByteArrayOutputStream();
+  private String errOutput;
+  private HAServiceProtocol mockProtocol;
+  
+  @Before
+  public void setup() {
+    mockProtocol = Mockito.mock(HAServiceProtocol.class);
+    tool = new HAAdmin() {
+      @Override
+      protected HAServiceProtocol getProtocol(String target) throws IOException {
+        return mockProtocol;
+      }
+    };
+    tool.setConf(new Configuration());
+    tool.errOut = new PrintStream(errOutBytes);
+  }
+  
+  private void assertOutputContains(String string) {
+    if (!errOutput.contains(string)) {
+      fail("Expected output to contain '" + string + "' but was:\n" +
+          errOutput);
+    }
+  }
+  
+  @Test
+  public void testAdminUsage() throws Exception {
+    assertEquals(-1, runTool());
+    assertOutputContains("Usage:");
+    assertOutputContains("-transitionToActive");
+    
+    assertEquals(-1, runTool("badCommand"));
+    assertOutputContains("Bad command 'badCommand'");
+    
+    assertEquals(-1, runTool("-badCommand"));
+    assertOutputContains("badCommand: Unknown");    
+
+    // valid command but not enough arguments
+    assertEquals(-1, runTool("-transitionToActive"));
+    assertOutputContains("transitionToActive: incorrect number of arguments");
+    assertEquals(-1, runTool("-transitionToActive", "x", "y"));
+    assertOutputContains("transitionToActive: incorrect number of arguments");
+}
+  
+  @Test
+  public void testHelp() throws Exception {
+    assertEquals(-1, runTool("-help"));
+    assertEquals(1, runTool("-help", "transitionToActive"));
+    assertOutputContains("Transitions the daemon into Active");
+  }
+  
+  @Test
+  public void testTransitionToActive() throws Exception {
+    assertEquals(0, runTool("-transitionToActive", "xxx"));
+    Mockito.verify(mockProtocol).transitionToActive();
+  }
+
+  @Test
+  public void testTransitionToStandby() throws Exception {
+    assertEquals(0, runTool("-transitionToStandby", "xxx"));
+    Mockito.verify(mockProtocol).transitionToStandby();
+  }
+  
+  @Test
+  public void testCheckHealth() throws Exception {
+    assertEquals(0, runTool("-checkHealth", "xxx"));
+    Mockito.verify(mockProtocol).monitorHealth();
+    
+    Mockito.doThrow(new HealthCheckFailedException("fake health check failure"))
+      .when(mockProtocol).monitorHealth();
+    assertEquals(1, runTool("-checkHealth", "xxx"));
+    assertOutputContains("Health check failed: fake health check failure");
+  }
+
+  private Object runTool(String ... args) throws Exception {
+    errOutBytes.reset();
+    LOG.info("Running: HAAdmin " + Joiner.on(" ").join(args));
+    int ret = tool.run(args);
+    errOutput = new String(errOutBytes.toByteArray(), Charsets.UTF_8);
+    LOG.info("Output:\n" + errOutput);
+    return ret;
+  }
+  
+}

From 3ab295994a4e7870a1f68d742d26c3ac44546fa5 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 1 Nov 2011 01:29:59 +0000
Subject: [PATCH 021/177] HDFS-2523. Small NN fixes to include
 HAServiceProtocol and prevent NPE on shutdown. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1195753 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt         | 2 ++
 .../java/org/apache/hadoop/hdfs/server/namenode/NameNode.java | 4 +++-
 .../apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java | 4 ++++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index be8708c5520..37e6e4acac3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -19,3 +19,5 @@ HDFS-2231. Configuration changes for HA namenode. (suresh)
 HDFS-2418. Change ConfiguredFailoverProxyProvider to take advantage of HDFS-2231. (atm)
 
 HDFS-2393. Mark appropriate methods of ClientProtocol with the idempotent annotation. (atm)
+
+HDFS-2523. Small NN fixes to include HAServiceProtocol and prevent NPE on shutdown. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 456f8d6b75e..4eb080105f0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -572,7 +572,9 @@ public class NameNode {
       stopRequested = true;
     }
     try {
-      state.exitState(haContext);
+      if (state != null) {
+        state.exitState(haContext);
+      }
     } catch (ServiceFailedException e) {
       LOG.warn("Encountered exception while exiting state ", e);
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index d79614f7d43..6546b8fe06b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
+import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.HDFSPolicyProvider;
@@ -156,6 +157,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
     this.server.addProtocol(RefreshAuthorizationPolicyProtocol.class, this);
     this.server.addProtocol(RefreshUserMappingsProtocol.class, this);
     this.server.addProtocol(GetUserMappingsProtocol.class, this);
+    this.server.addProtocol(HAServiceProtocol.class, this);
     
 
     // set service-level authorization security policy
@@ -225,6 +227,8 @@ class NameNodeRpcServer implements NamenodeProtocols {
       return RefreshUserMappingsProtocol.versionID;
     } else if (protocol.equals(GetUserMappingsProtocol.class.getName())){
       return GetUserMappingsProtocol.versionID;
+    } else if (protocol.equals(HAServiceProtocol.class.getName())) {
+      return HAServiceProtocol.versionID;
     } else {
       throw new IOException("Unknown protocol to name node: " + protocol);
     }

From 6c710f5f80f3360bbae03aa3fb808b40fbdd97d9 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 17 Nov 2011 18:46:25 +0000
Subject: [PATCH 022/177] Merge trunk into HA branch.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1203320 13f79535-47bb-0310-9956-ffa450edef68

From 5f5b3bbdcd091fc6d14401eebc9c41204bad2325 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 24 Nov 2011 00:55:37 +0000
Subject: [PATCH 023/177] HDFS-2577. NN fails to start since it tries to start
 secret manager in safemode. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1205689 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/FSNamesystem.java    | 22 ++++---
 .../hdfs/server/namenode/LeaseManager.java    | 59 +++++++++++++++----
 .../org/apache/hadoop/hdfs/util/RwLock.java   |  3 +
 .../hdfs/server/namenode/NameNodeAdapter.java |  2 +-
 5 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 37e6e4acac3..6530cdcc8c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -21,3 +21,5 @@ HDFS-2418. Change ConfiguredFailoverProxyProvider to take advantage of HDFS-2231
 HDFS-2393. Mark appropriate methods of ClientProtocol with the idempotent annotation. (atm)
 
 HDFS-2523. Small NN fixes to include HAServiceProtocol and prevent NPE on shutdown. (todd)
+
+HDFS-2577. NN fails to start since it tries to start secret manager in safemode. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index fc56db4752d..3f933a8ffaf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -258,7 +258,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   LeaseManager leaseManager = new LeaseManager(this); 
 
-  Daemon lmthread = null;   // LeaseMonitor thread
   Daemon smmthread = null;  // SafeModeMonitor thread
   
   Daemon nnrmthread = null; // NamenodeResourceMonitor thread
@@ -450,9 +449,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     LOG.info("Starting services required for active state");
     writeLock();
     try {
-      startSecretManager();
-      lmthread = new Daemon(leaseManager.new Monitor());
-      lmthread.start();
+      if (UserGroupInformation.isSecurityEnabled()) {
+        startSecretManager();
+      }
+      leaseManager.startMonitor();
     } finally {
       writeUnlock();
     }
@@ -467,14 +467,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     writeLock();
     try {
       stopSecretManager();
-      if (lmthread != null) {
-        try {
-          lmthread.interrupt();
-          lmthread.join(3000);
-        } catch (InterruptedException ie) {
-          LOG.warn("Encountered exception ", ie);
-        }
-        lmthread = null;
+      if (leaseManager != null) {
+        leaseManager.stopMonitor();
       }
     } finally {
       writeUnlock();
@@ -542,6 +536,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     this.fsLock.writeLock().lock();
   }
   @Override
+  public void writeLockInterruptibly() throws InterruptedException {
+    this.fsLock.writeLock().lockInterruptibly();
+  }
+  @Override
   public void writeUnlock() {
     this.fsLock.writeLock().unlock();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
index 44857739b30..c5ea2d09b63 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@@ -34,6 +34,10 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
+import org.apache.hadoop.util.Daemon;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 
 import static org.apache.hadoop.hdfs.server.common.Util.now;
 
@@ -82,6 +86,9 @@ public class LeaseManager {
   //
   private SortedMap<String, Lease> sortedLeasesByPath = new TreeMap<String, Lease>();
 
+  private Daemon lmthread;
+  private volatile boolean shouldRunMonitor;
+
   LeaseManager(FSNamesystem fsnamesystem) {this.fsnamesystem = fsnamesystem;}
 
   Lease getLease(String holder) {
@@ -367,18 +374,18 @@ public class LeaseManager {
 
     /** Check leases periodically. */
     public void run() {
-      for(; fsnamesystem.isRunning(); ) {
-        fsnamesystem.writeLock();
+      for(; shouldRunMonitor && fsnamesystem.isRunning(); ) {
         try {
-          if (!fsnamesystem.isInSafeMode()) {
-            checkLeases();
+          fsnamesystem.writeLockInterruptibly();
+          try {
+            if (!fsnamesystem.isInSafeMode()) {
+              checkLeases();
+            }
+          } finally {
+            fsnamesystem.writeUnlock();
           }
-        } finally {
-          fsnamesystem.writeUnlock();
-        }
-
-
-        try {
+  
+  
           Thread.sleep(HdfsServerConstants.NAMENODE_LEASE_RECHECK_INTERVAL);
         } catch(InterruptedException ie) {
           if (LOG.isDebugEnabled()) {
@@ -437,4 +444,36 @@ public class LeaseManager {
         + "\n sortedLeasesByPath=" + sortedLeasesByPath
         + "\n}";
   }
+
+  void startMonitor() {
+    Preconditions.checkState(lmthread == null,
+        "Lease Monitor already running");
+    shouldRunMonitor = true;
+    lmthread = new Daemon(new Monitor());
+    lmthread.start();
+  }
+  
+  void stopMonitor() {
+    if (lmthread != null) {
+      shouldRunMonitor = false;
+      try {
+        lmthread.interrupt();
+        lmthread.join(3000);
+      } catch (InterruptedException ie) {
+        LOG.warn("Encountered exception ", ie);
+      }
+      lmthread = null;
+    }
+  }
+
+  /**
+   * Trigger the currently-running Lease monitor to re-check
+   * its leases immediately. This is for use by unit tests.
+   */
+  @VisibleForTesting
+  void triggerMonitorCheckNow() {
+    Preconditions.checkState(lmthread != null,
+        "Lease monitor is not running");
+    lmthread.interrupt();
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
index cd88963e3da..8a0f9923640 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/RwLock.java
@@ -30,6 +30,9 @@ public interface RwLock {
 
   /** Acquire write lock. */
   public void writeLock();
+  
+  /** Acquire write lock, unless interrupted while waiting  */
+  void writeLockInterruptibly() throws InterruptedException;
 
   /** Release write lock. */
   public void writeUnlock();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index acbd7d4ee09..fd52e901abd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -78,7 +78,7 @@ public class NameNodeAdapter {
   /** Set the softLimit and hardLimit of client lease periods. */
   public static void setLeasePeriod(final FSNamesystem namesystem, long soft, long hard) {
     getLeaseManager(namesystem).setLeasePeriod(soft, hard);
-    namesystem.lmthread.interrupt();
+    namesystem.leaseManager.triggerMonitorCheckNow();
   }
 
   public static String getLeaseHolderForPath(NameNode namenode, String path) {

From 9146ad23f3f1af7c5547fba08e2a867cee49e015 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 29 Nov 2011 02:27:45 +0000
Subject: [PATCH 024/177] HDFS-2582. Scope dfs.ha.namenodes config by
 nameservice. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1207738 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 300 ++++++++++--------
 .../java/org/apache/hadoop/hdfs/HAUtil.java   |  35 +-
 .../hadoop/hdfs/server/balancer/Balancer.java |  13 +-
 .../server/balancer/NameNodeConnector.java    |  17 +-
 .../hadoop/hdfs/server/datanode/DataNode.java |  24 +-
 .../server/namenode/ClusterJspHelper.java     |  17 +-
 .../hadoop/hdfs/server/namenode/NameNode.java |   7 +-
 .../ha/ConfiguredFailoverProxyProvider.java   |  10 +-
 .../org/apache/hadoop/hdfs/tools/GetConf.java |  26 +-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   |  84 ++++-
 .../hdfs/server/balancer/TestBalancer.java    |   5 +-
 .../TestBalancerWithMultipleNameNodes.java    |   4 +-
 .../apache/hadoop/hdfs/tools/TestGetConf.java |  23 +-
 14 files changed, 358 insertions(+), 209 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 6530cdcc8c3..0c854a5056e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -23,3 +23,5 @@ HDFS-2393. Mark appropriate methods of ClientProtocol with the idempotent annota
 HDFS-2523. Small NN fixes to include HAServiceProtocol and prevent NPE on shutdown. (todd)
 
 HDFS-2577. NN fails to start since it tries to start secret manager in safemode. (todd)
+
+HDFS-2582. Scope dfs.ha.namenodes config by nameservice (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index f35f4910395..fcb7d4cb891 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -24,10 +24,11 @@ import java.io.UnsupportedEncodingException;
 import java.net.InetSocketAddress;
 import java.net.URI;
 import java.net.URISyntaxException;
-import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.StringTokenizer;
 
@@ -45,11 +46,14 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.security.UserGroupInformation;
 
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
 @InterfaceAudience.Private
 public class DFSUtil {
   private DFSUtil() { /* Hidden constructor */ }
@@ -288,10 +292,22 @@ public class DFSUtil {
   /**
    * Returns collection of nameservice Ids from the configuration.
    * @param conf configuration
-   * @return collection of nameservice Ids
+   * @return collection of nameservice Ids, or null if not specified
    */
   public static Collection<String> getNameServiceIds(Configuration conf) {
-    return conf.getStringCollection(DFS_FEDERATION_NAMESERVICES);
+    return conf.getTrimmedStringCollection(DFS_FEDERATION_NAMESERVICES);
+  }
+
+  /**
+   * @return <code>coll</code> if it is non-null and non-empty. Otherwise,
+   * returns a list with a single null value.
+   */
+  private static Collection<String> emptyAsSingletonNull(Collection<String> coll) {
+    if (coll == null || coll.isEmpty()) {
+      return Collections.singletonList(null);
+    } else {
+      return coll;
+    }
   }
   
   /**
@@ -300,12 +316,14 @@ public class DFSUtil {
    * for each namenode in the in the HA setup.
    * 
    * @param conf configuration
+   * @param nsId the nameservice ID to look at, or null for non-federated 
    * @return collection of namenode Ids
    */
-  public static Collection<String> getNameNodeIds(Configuration conf) {
-    return conf.getStringCollection(DFS_HA_NAMENODES_KEY);
+  static Collection<String> getNameNodeIds(Configuration conf, String nsId) {
+    String key = addSuffix(DFS_HA_NAMENODES_KEY, nsId);
+    return conf.getTrimmedStringCollection(key);
   }
-
+  
   /**
    * Given a list of keys in the order of preference, returns a value
    * for the key in the given order from the configuration.
@@ -333,13 +351,12 @@ public class DFSUtil {
   
   /** Add non empty and non null suffix to a key */
   private static String addSuffix(String key, String suffix) {
-    if (suffix == null || suffix.length() == 0) {
+    if (suffix == null || suffix.isEmpty()) {
       return key;
     }
-    if (!suffix.startsWith(".")) {
-      key += ".";
-    }
-    return key += suffix;
+    assert !suffix.startsWith(".") :
+      "suffix '" + suffix + "' should not already have '.' prepended.";
+    return key + "." + suffix;
   }
   
   /** Concatenate list of suffix strings '.' separated */
@@ -347,11 +364,7 @@ public class DFSUtil {
     if (suffixes == null) {
       return null;
     }
-    String ret = "";
-    for (int i = 0; i < suffixes.length - 1; i++) {
-      ret = addSuffix(ret, suffixes[i]);
-    }
-    return addSuffix(ret, suffixes[suffixes.length - 1]);
+    return Joiner.on(".").skipNulls().join(suffixes);
   }
   
   /**
@@ -363,69 +376,44 @@ public class DFSUtil {
   }
   
   /**
-   * Returns list of InetSocketAddress for a given set of keys.
+   * Returns the configured address for all NameNodes in the cluster.
    * @param conf configuration
-   * @param defaultAddress default address to return in case key is not found
+   * @param defaultAddress default address to return in case key is not found.
    * @param keys Set of keys to look for in the order of preference
-   * @return list of InetSocketAddress corresponding to the key
+   * @return a map(nameserviceId to map(namenodeId to InetSocketAddress))
    */
-  private static List<InetSocketAddress> getAddresses(Configuration conf,
+  private static Map<String, Map<String, InetSocketAddress>>
+    getAddresses(Configuration conf,
       String defaultAddress, String... keys) {
     Collection<String> nameserviceIds = getNameServiceIds(conf);
-    Collection<String> namenodeIds = getNameNodeIds(conf);
-    List<InetSocketAddress> isas = new ArrayList<InetSocketAddress>();
+    
+    // Look for configurations of the form <key>[.<nameserviceId>][.<namenodeId>]
+    // across all of the configured nameservices and namenodes.
+    Map<String, Map<String, InetSocketAddress>> ret = Maps.newHashMap();
+    for (String nsId : emptyAsSingletonNull(nameserviceIds)) {
+      Map<String, InetSocketAddress> isas =
+        getAddressesForNameserviceId(conf, nsId, defaultAddress, keys);
+      if (!isas.isEmpty()) {
+        ret.put(nsId, isas);
+      }
+    }
+    return ret;
+  }
 
-    final boolean federationEnabled = nameserviceIds != null
-        && !nameserviceIds.isEmpty();
-    final boolean haEnabled = namenodeIds != null
-        && !namenodeIds.isEmpty();
-    
-    // Configuration with no federation and ha, return default address
-    if (!federationEnabled && !haEnabled) {
-      String address = getConfValue(defaultAddress, null, conf, keys);
-      if (address == null) {
-        return null;
-      }
-      isas.add(NetUtils.createSocketAddr(address));
-      return isas;
-    }
-    
-    if (!federationEnabled) {
-      nameserviceIds = new ArrayList<String>();
-      nameserviceIds.add(null);
-    }
-    if (!haEnabled) {
-      namenodeIds = new ArrayList<String>();
-      namenodeIds.add(null);
-    }
-    
-    // Get configuration suffixed with nameserviceId and/or namenodeId
-    if (federationEnabled && haEnabled) {
-      for (String nameserviceId : nameserviceIds) {
-        for (String nnId : namenodeIds) {
-          String keySuffix = concatSuffixes(nameserviceId, nnId);
-          String address = getConfValue(null, keySuffix, conf, keys);
-          if (address != null) {
-            isas.add(NetUtils.createSocketAddr(address));
-          }
-        }
-      }
-    } else if (!federationEnabled && haEnabled) {
-      for (String nnId : namenodeIds) {
-        String address = getConfValue(null, nnId, conf, keys);
-        if (address != null) {
-          isas.add(NetUtils.createSocketAddr(address));
-        }
-      }
-    } else if (federationEnabled && !haEnabled) {
-      for (String nameserviceId : nameserviceIds) {
-          String address = getConfValue(null, nameserviceId, conf, keys);
-          if (address != null) {
-            isas.add(NetUtils.createSocketAddr(address));
-          }
+  private static Map<String, InetSocketAddress> getAddressesForNameserviceId(
+      Configuration conf, String nsId, String defaultValue,
+      String[] keys) {
+    Collection<String> nnIds = getNameNodeIds(conf, nsId);
+    Map<String, InetSocketAddress> ret = Maps.newHashMap();
+    for (String nnId : emptyAsSingletonNull(nnIds)) {
+      String suffix = concatSuffixes(nsId, nnId);
+      String address = getConfValue(defaultValue, suffix, conf, keys);
+      if (address != null) {
+        InetSocketAddress isa = NetUtils.createSocketAddr(address);
+        ret.put(nnId, isa);
       }
     }
-    return isas;
+    return ret;
   }
 
   /**
@@ -436,15 +424,9 @@ public class DFSUtil {
    * @return list of InetSocketAddresses
    * @throws IOException if no addresses are configured
    */
-  public static List<InetSocketAddress> getHaNnRpcAddresses(
-      Configuration conf) throws IOException {
-    List<InetSocketAddress> addressList = getAddresses(conf, null,
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
-    if (addressList == null) {
-      throw new IOException("Incorrect configuration: HA name node addresses "
-          + DFS_NAMENODE_RPC_ADDRESS_KEY + " is not configured.");
-    }
-    return addressList;
+  public static Map<String, Map<String, InetSocketAddress>> getHaNnRpcAddresses(
+      Configuration conf) {
+    return getAddresses(conf, null, DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
   }
   
   /**
@@ -455,11 +437,11 @@ public class DFSUtil {
    * @return list of InetSocketAddresses
    * @throws IOException on error
    */
-  public static List<InetSocketAddress> getBackupNodeAddresses(
+  public static Map<String, Map<String, InetSocketAddress>> getBackupNodeAddresses(
       Configuration conf) throws IOException {
-    List<InetSocketAddress> addressList = getAddresses(conf,
+    Map<String, Map<String, InetSocketAddress>> addressList = getAddresses(conf,
         null, DFS_NAMENODE_BACKUP_ADDRESS_KEY);
-    if (addressList == null) {
+    if (addressList.isEmpty()) {
       throw new IOException("Incorrect configuration: backup node address "
           + DFS_NAMENODE_BACKUP_ADDRESS_KEY + " is not configured.");
     }
@@ -474,11 +456,11 @@ public class DFSUtil {
    * @return list of InetSocketAddresses
    * @throws IOException on error
    */
-  public static List<InetSocketAddress> getSecondaryNameNodeAddresses(
+  public static Map<String, Map<String, InetSocketAddress>> getSecondaryNameNodeAddresses(
       Configuration conf) throws IOException {
-    List<InetSocketAddress> addressList = getAddresses(conf, null,
+    Map<String, Map<String, InetSocketAddress>> addressList = getAddresses(conf, null,
         DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY);
-    if (addressList == null) {
+    if (addressList.isEmpty()) {
       throw new IOException("Incorrect configuration: secondary namenode address "
           + DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY + " is not configured.");
     }
@@ -498,7 +480,7 @@ public class DFSUtil {
    * @return list of InetSocketAddress
    * @throws IOException on error
    */
-  public static List<InetSocketAddress> getNNServiceRpcAddresses(
+  public static Map<String, Map<String, InetSocketAddress>> getNNServiceRpcAddresses(
       Configuration conf) throws IOException {
     // Use default address as fall back
     String defaultAddress;
@@ -508,9 +490,10 @@ public class DFSUtil {
       defaultAddress = null;
     }
     
-    List<InetSocketAddress> addressList = getAddresses(conf, defaultAddress,
+    Map<String, Map<String, InetSocketAddress>> addressList =
+      getAddresses(conf, defaultAddress,
         DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
-    if (addressList == null) {
+    if (addressList.isEmpty()) {
       throw new IOException("Incorrect configuration: namenode address "
           + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY + " or "  
           + DFS_NAMENODE_RPC_ADDRESS_KEY
@@ -519,6 +502,77 @@ public class DFSUtil {
     return addressList;
   }
   
+  /**
+   * Flatten the given map, as returned by other functions in this class,
+   * into a flat list of {@link ConfiguredNNAddress} instances.
+   */
+  public static List<ConfiguredNNAddress> flattenAddressMap(
+      Map<String, Map<String, InetSocketAddress>> map) {
+    List<ConfiguredNNAddress> ret = Lists.newArrayList();
+    
+    for (Map.Entry<String, Map<String, InetSocketAddress>> entry :
+      map.entrySet()) {
+      String nsId = entry.getKey();
+      Map<String, InetSocketAddress> nnMap = entry.getValue();
+      for (Map.Entry<String, InetSocketAddress> e2 : nnMap.entrySet()) {
+        String nnId = e2.getKey();
+        InetSocketAddress addr = e2.getValue();
+        
+        ret.add(new ConfiguredNNAddress(nsId, nnId, addr));
+      }
+    }
+    return ret;
+  }
+
+  /**
+   * Format the given map, as returned by other functions in this class,
+   * into a string suitable for debugging display. The format of this string
+   * should not be considered an interface, and is liable to change.
+   */
+  public static String addressMapToString(
+      Map<String, Map<String, InetSocketAddress>> map) {
+    StringBuilder b = new StringBuilder();
+    for (Map.Entry<String, Map<String, InetSocketAddress>> entry :
+         map.entrySet()) {
+      String nsId = entry.getKey();
+      Map<String, InetSocketAddress> nnMap = entry.getValue();
+      b.append("Nameservice <").append(nsId).append(">:").append("\n");
+      for (Map.Entry<String, InetSocketAddress> e2 : nnMap.entrySet()) {
+        b.append("  NN ID ").append(e2.getKey())
+          .append(" => ").append(e2.getValue()).append("\n");
+      }
+    }
+    return b.toString();
+  }
+
+  /**
+   * Represent one of the NameNodes configured in the cluster.
+   */
+  public static class ConfiguredNNAddress {
+    private final String nameserviceId;
+    private final String namenodeId;
+    private final InetSocketAddress addr;
+
+    private ConfiguredNNAddress(String nameserviceId, String namenodeId,
+        InetSocketAddress addr) {
+      this.nameserviceId = nameserviceId;
+      this.namenodeId = namenodeId;
+      this.addr = addr;
+    }
+
+    public String getNameserviceId() {
+      return nameserviceId;
+    }
+
+    public String getNamenodeId() {
+      return namenodeId;
+    }
+
+    public InetSocketAddress getAddress() {
+      return addr;
+    }
+  }
+
   /**
    * Given the InetSocketAddress this method returns the nameservice Id
    * corresponding to the key with matching address, by doing a reverse 
@@ -545,11 +599,8 @@ public class DFSUtil {
   public static String getNameServiceIdFromAddress(final Configuration conf, 
       final InetSocketAddress address, String... keys) {
     // Configuration with a single namenode and no nameserviceId
-    if (!isFederationEnabled(conf)) {
-      return null;
-    }    
     String[] ids = getSuffixIDs(conf, address, keys);
-    return (ids != null && ids.length > 0) ? ids[0] : null;
+    return (ids != null) ? ids[0] : null;
   }
   
   /**
@@ -715,14 +766,6 @@ public class DFSUtil {
         ClientDatanodeProtocolTranslatorR23(datanodeid, conf, socketTimeout,
              locatedBlock);
   }
-
-  /**
-   * Returns true if federation configuration is enabled
-   */
-  public static boolean isFederationEnabled(Configuration conf) {
-    Collection<String> collection = getNameServiceIds(conf);
-    return collection != null && collection.size() != 0;
-  }
   
   /** Create {@link ClientDatanodeProtocol} proxy using kerberos ticket */
   static ClientDatanodeProtocol createClientDatanodeProtocolProxy(
@@ -783,16 +826,9 @@ public class DFSUtil {
     if (nameserviceId != null) {
       return nameserviceId;
     }
-    if (!isFederationEnabled(conf)) {
-      return null;
-    }
-    nameserviceId = getSuffixIDs(conf, addressKey, LOCAL_ADDRESS_MATCHER)[0];
-    if (nameserviceId == null) {
-      String msg = "Configuration " + addressKey + " must be suffixed with" +
-      		" nameserviceId for federation configuration.";
-      throw new HadoopIllegalArgumentException(msg);
-    }
-    return nameserviceId;
+    String nnId = conf.get(DFS_HA_NAMENODE_ID_KEY);
+    
+    return getSuffixIDs(conf, addressKey, null, nnId, LOCAL_ADDRESS_MATCHER)[0];
   }
   
   /**
@@ -801,6 +837,8 @@ public class DFSUtil {
    * 
    * @param conf Configuration
    * @param addressKey configuration key corresponding to the address.
+   * @param knownNsId only look at configs for the given nameservice, if not-null
+   * @param knownNNId only look at configs for the given namenode, if not null
    * @param matcher matching criteria for matching the address
    * @return Array with nameservice Id and namenode Id on success. First element
    *         in the array is nameservice Id and second element is namenode Id.
@@ -809,29 +847,23 @@ public class DFSUtil {
    * @throws HadoopIllegalArgumentException on error
    */
   static String[] getSuffixIDs(final Configuration conf, final String addressKey,
+      String knownNsId, String knownNNId,
       final AddressMatcher matcher) {
-    Collection<String> nsIds = getNameServiceIds(conf);
-    boolean federationEnabled = true;
-    if (nsIds == null || nsIds.size() == 0) {
-      federationEnabled = false; // federation not configured
-      nsIds = new ArrayList<String>();
-      nsIds.add(null);
-    }
-    
-    boolean haEnabled = true;
-    Collection<String> nnIds = getNameNodeIds(conf);
-    if (nnIds == null || nnIds.size() == 0) {
-      haEnabled = false; // HA not configured
-      nnIds = new ArrayList<String>();
-      nnIds.add(null);
-    }
-    
-    // Match the address from addressKey.nsId.nnId based on the given matcher
     String nameserviceId = null;
     String namenodeId = null;
     int found = 0;
-    for (String nsId : nsIds) {
-      for (String nnId : nnIds) {
+    
+    Collection<String> nsIds = getNameServiceIds(conf);
+    for (String nsId : emptyAsSingletonNull(nsIds)) {
+      if (knownNsId != null && !knownNsId.equals(nsId)) {
+        continue;
+      }
+      
+      Collection<String> nnIds = getNameNodeIds(conf, nsId);
+      for (String nnId : emptyAsSingletonNull(nnIds)) {
+        if (knownNNId != null && !knownNNId.equals(nnId)) {
+          continue;
+        }
         String key = addKeySuffixes(addressKey, nsId, nnId);
         String addr = conf.get(key);
         InetSocketAddress s = null;
@@ -850,8 +882,8 @@ public class DFSUtil {
     if (found > 1) { // Only one address must match the local address
       String msg = "Configuration has multiple addresses that match "
           + "local node's address. Please configure the system with "
-          + (federationEnabled ? DFS_FEDERATION_NAMESERVICE_ID : "")
-          + (haEnabled ? (" and " + DFS_HA_NAMENODE_ID_KEY) : "");
+          + DFS_FEDERATION_NAMESERVICE_ID + " and "
+          + DFS_HA_NAMENODE_ID_KEY;
       throw new HadoopIllegalArgumentException(msg);
     }
     return new String[] { nameserviceId, namenodeId };
@@ -872,7 +904,7 @@ public class DFSUtil {
     };
     
     for (String key : keys) {
-      String[] ids = getSuffixIDs(conf, key, matcher);
+      String[] ids = getSuffixIDs(conf, key, null, null, matcher);
       if (ids != null && (ids [0] != null || ids[1] != null)) {
         return ids;
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 2ba01f55015..6952871ccd1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -21,6 +21,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 import java.net.InetSocketAddress;
 import java.util.Collection;
+import java.util.Map;
 
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
@@ -29,14 +30,18 @@ public class HAUtil {
   private HAUtil() { /* Hidden constructor */ }
 
   /**
-   * Returns true if HA for namenode is configured.
+   * Returns true if HA for namenode is configured for the given nameservice
    * 
    * @param conf Configuration
+   * @param nsId nameservice, or null if no federated NS is configured
    * @return true if HA is configured in the configuration; else false.
    */
-  public static boolean isHAEnabled(Configuration conf) {
-    Collection<String> collection = DFSUtil.getNameNodeIds(conf);
-    return collection != null && !collection.isEmpty();
+  public static boolean isHAEnabled(Configuration conf, String nsId) {
+    Map<String, Map<String, InetSocketAddress>> addresses =
+      DFSUtil.getHaNnRpcAddresses(conf);
+    if (addresses == null) return false;
+    Map<String, InetSocketAddress> nnMap = addresses.get(nsId);
+    return nnMap != null && nnMap.size() > 1;
   }
 
   /**
@@ -52,22 +57,21 @@ public class HAUtil {
    * @return namenode Id on success, null on failure.
    * @throws HadoopIllegalArgumentException on error
    */
-  public static String getNameNodeId(Configuration conf) {
-    String namenodeId = conf.get(DFS_HA_NAMENODE_ID_KEY);
+  public static String getNameNodeId(Configuration conf, String nsId) {
+    String namenodeId = conf.getTrimmed(DFS_HA_NAMENODE_ID_KEY);
     if (namenodeId != null) {
       return namenodeId;
     }
-    if (!isHAEnabled(conf)) {
-      return null;
-    }
-    namenodeId = DFSUtil.getSuffixIDs(conf, DFS_NAMENODE_RPC_ADDRESS_KEY,
-        DFSUtil.LOCAL_ADDRESS_MATCHER)[1];
-    if (namenodeId == null) {
+    
+    String suffixes[] = DFSUtil.getSuffixIDs(conf, DFS_NAMENODE_RPC_ADDRESS_KEY,
+        nsId, null, DFSUtil.LOCAL_ADDRESS_MATCHER);
+    if (suffixes == null) {
       String msg = "Configuration " + DFS_NAMENODE_RPC_ADDRESS_KEY + 
           " must be suffixed with" + namenodeId + " for HA configuration.";
       throw new HadoopIllegalArgumentException(msg);
     }
-    return namenodeId;
+    
+    return suffixes[1];
   }
 
   /**
@@ -78,14 +82,11 @@ public class HAUtil {
   public static String getNameNodeIdFromAddress(final Configuration conf, 
       final InetSocketAddress address, String... keys) {
     // Configuration with a single namenode and no nameserviceId
-    if (!isHAEnabled(conf)) {
-      return null;
-    }
-    
     String[] ids = DFSUtil.getSuffixIDs(conf, address, keys);
     if (ids != null && ids.length > 1) {
       return ids[1];
     }
     return null;
   }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
index a0146e75a87..bc7c13a9147 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
@@ -39,6 +39,7 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -1379,7 +1380,8 @@ public class Balancer {
    * for each namenode,
    * execute a {@link Balancer} to work through all datanodes once.  
    */
-  static int run(List<InetSocketAddress> namenodes, final Parameters p,
+  static int run(Map<String, Map<String, InetSocketAddress>> namenodes,
+      final Parameters p,
       Configuration conf) throws IOException, InterruptedException {
     final long sleeptime = 2000*conf.getLong(
         DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
@@ -1393,8 +1395,10 @@ public class Balancer {
     final List<NameNodeConnector> connectors
         = new ArrayList<NameNodeConnector>(namenodes.size());
     try {
-      for(InetSocketAddress isa : namenodes) {
-        connectors.add(new NameNodeConnector(isa, conf));
+      for(Entry<String, Map<String, InetSocketAddress>> entry :
+          namenodes.entrySet()) {
+        connectors.add(
+            new NameNodeConnector(entry.getValue().values(), conf));
       }
     
       boolean done = false;
@@ -1476,7 +1480,8 @@ public class Balancer {
       try {
         checkReplicationPolicyCompatibility(conf);
 
-        final List<InetSocketAddress> namenodes = DFSUtil.getNNServiceRpcAddresses(conf);
+        final Map<String, Map<String, InetSocketAddress>> namenodes =
+          DFSUtil.getNNServiceRpcAddresses(conf);
         return Balancer.run(namenodes, parse(args), conf);
       } catch (IOException e) {
         System.out.println(e + ".  Exiting ...");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
index 634efdf5b3b..f43a41ea388 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
@@ -22,6 +22,8 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.Map;
@@ -53,6 +55,9 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
 
+import com.google.common.collect.Collections2;
+import com.google.common.collect.Lists;
+
 /**
  * The class provides utilities for {@link Balancer} to access a NameNode
  */
@@ -75,12 +80,14 @@ class NameNodeConnector {
   private BlockTokenSecretManager blockTokenSecretManager;
   private Daemon keyupdaterthread; // AccessKeyUpdater thread
 
-  NameNodeConnector(InetSocketAddress namenodeAddress, Configuration conf
-      ) throws IOException {
-    this.namenodeAddress = namenodeAddress;
-    this.namenode = createNamenode(namenodeAddress, conf);
+  NameNodeConnector(Collection<InetSocketAddress> haNNs,
+      Configuration conf) throws IOException {
+    InetSocketAddress nn = Lists.newArrayList(haNNs).get(0);
+    // TODO(HA): need to deal with connecting to HA NN pair here
+    this.namenodeAddress = nn;
+    this.namenode = createNamenode(nn, conf);
     this.client = DFSUtil.createNamenode(conf);
-    this.fs = FileSystem.get(NameNode.getUri(namenodeAddress), conf);
+    this.fs = FileSystem.get(NameNode.getUri(nn), conf);
 
     final NamespaceInfo namespaceinfo = namenode.versionRequest();
     this.blockpoolID = namespaceinfo.getBlockPoolID();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 65ccba80dcd..87a62f4e57c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -77,6 +77,7 @@ import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -92,6 +93,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
 import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.Block;
@@ -168,6 +170,8 @@ import org.apache.hadoop.util.VersionInfo;
 import org.mortbay.util.ajax.JSON;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 
 /**********************************************************
@@ -251,8 +255,14 @@ public class DataNode extends Configured
       bpMapping = new HashMap<String, BPOfferService>();
       nameNodeThreads = new HashMap<InetSocketAddress, BPOfferService>();
   
-      List<InetSocketAddress> isas = DFSUtil.getNNServiceRpcAddresses(conf);
-      for(InetSocketAddress isa : isas) {
+      Map<String, Map<String, InetSocketAddress>> map =
+        DFSUtil.getNNServiceRpcAddresses(conf);
+      for (Entry<String, Map<String, InetSocketAddress>> entry :
+           map.entrySet()) {
+        List<InetSocketAddress> nnList = Lists.newArrayList(entry.getValue().values());
+        // TODO(HA) when HDFS-1971 (dual BRs) is done, pass all of the NNs
+        // to BPOS
+        InetSocketAddress isa = nnList.get(0);
         BPOfferService bpos = new BPOfferService(isa, DataNode.this);
         nameNodeThreads.put(bpos.getNNSocketAddress(), bpos);
       }
@@ -333,8 +343,16 @@ public class DataNode extends Configured
         throws IOException {
       LOG.info("Refresh request received for nameservices: "
           + conf.get(DFS_FEDERATION_NAMESERVICES));
-      List<InetSocketAddress> newAddresses = 
+      
+      // TODO(HA): need to update this for multiple NNs per nameservice
+      // For now, just list all of the NNs into this set
+      Map<String, Map<String, InetSocketAddress>> newAddressMap = 
         DFSUtil.getNNServiceRpcAddresses(conf);
+      Set<InetSocketAddress> newAddresses = Sets.newHashSet();
+      for (ConfiguredNNAddress cnn : DFSUtil.flattenAddressMap(newAddressMap)) {
+        newAddresses.add(cnn.getAddress());
+      }
+      
       List<BPOfferService> toShutdown = new ArrayList<BPOfferService>();
       List<InetSocketAddress> toStart = new ArrayList<InetSocketAddress>();
       synchronized (refreshNamenodesLock) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java
index 10601b17235..3ffc852667d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ClusterJspHelper.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
 import org.apache.hadoop.util.StringUtils;
 import org.codehaus.jackson.JsonNode;
@@ -66,9 +67,10 @@ class ClusterJspHelper {
   ClusterStatus generateClusterHealthReport() {
     ClusterStatus cs = new ClusterStatus();
     Configuration conf = new Configuration();
-    List<InetSocketAddress> isas = null;
+    List<ConfiguredNNAddress> nns = null;
     try {
-      isas = DFSUtil.getNNServiceRpcAddresses(conf);
+      nns = DFSUtil.flattenAddressMap(
+          DFSUtil.getNNServiceRpcAddresses(conf));
     } catch (Exception e) {
       // Could not build cluster status
       cs.setError(e);
@@ -76,7 +78,8 @@ class ClusterJspHelper {
     }
     
     // Process each namenode and add it to ClusterStatus
-    for (InetSocketAddress isa : isas) {
+    for (ConfiguredNNAddress cnn : nns) {
+      InetSocketAddress isa = cnn.getAddress();
       NamenodeMXBeanHelper nnHelper = null;
       try {
         nnHelper = new NamenodeMXBeanHelper(isa, conf);
@@ -102,9 +105,10 @@ class ClusterJspHelper {
   DecommissionStatus generateDecommissioningReport() {
     String clusterid = "";
     Configuration conf = new Configuration();
-    List<InetSocketAddress> isas = null;
+    List<ConfiguredNNAddress> cnns = null;
     try {
-      isas = DFSUtil.getNNServiceRpcAddresses(conf);
+      cnns = DFSUtil.flattenAddressMap(
+          DFSUtil.getNNServiceRpcAddresses(conf));
     } catch (Exception e) {
       // catch any exception encountered other than connecting to namenodes
       DecommissionStatus dInfo = new DecommissionStatus(clusterid, e);
@@ -122,7 +126,8 @@ class ClusterJspHelper {
       new HashMap<String, Exception>();
     
     List<String> unreportedNamenode = new ArrayList<String>();
-    for (InetSocketAddress isa : isas) {
+    for (ConfiguredNNAddress cnn : cnns) {
+      InetSocketAddress isa = cnn.getAddress();
       NamenodeMXBeanHelper nnHelper = null;
       try {
         nnHelper = new NamenodeMXBeanHelper(isa, conf);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 4eb080105f0..f411a4adbb1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -527,10 +527,11 @@ public class NameNode {
       throws IOException { 
     this.conf = conf;
     this.role = role;
-    this.haEnabled = HAUtil.isHAEnabled(conf);
+    String nsId = getNameServiceId(conf);
+    this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
     this.haContext = new NameNodeHAContext();
     try {
-      initializeGenericKeys(conf, getNameServiceId(conf));
+      initializeGenericKeys(conf, nsId);
       initialize(conf);
       if (!haEnabled) {
         state = ACTIVE_STATE;
@@ -848,7 +849,7 @@ public class NameNode {
    */
   public static void initializeGenericKeys(Configuration conf, String
       nameserviceId) {
-    String namenodeId = HAUtil.getNameNodeId(conf);
+    String namenodeId = HAUtil.getNameNodeId(conf, nameserviceId);
     if ((nameserviceId == null || nameserviceId.isEmpty()) && 
         (namenodeId == null || namenodeId.isEmpty())) {
       return;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index d002fde1844..483d9eb6230 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -22,6 +22,7 @@ import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -89,9 +90,14 @@ public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
     try {
       ugi = UserGroupInformation.getCurrentUser();
       
-      Collection<InetSocketAddress> addresses = DFSUtil.getHaNnRpcAddresses(
+      Map<String, Map<String, InetSocketAddress>> map = DFSUtil.getHaNnRpcAddresses(
           conf);
-      for (InetSocketAddress address : addresses) {
+      // TODO(HA): currently hardcoding the nameservice used by MiniDFSCluster.
+      // We need to somehow communicate this into the proxy provider.
+      String nsId = "nameserviceId1";
+      Map<String, InetSocketAddress> addressesInNN = map.get(nsId);
+      
+      for (InetSocketAddress address : addressesInNN.values()) {
         proxies.add(new AddressRpcProxyPair(address));
       }
     } catch (IOException e) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java
index b9631430d74..ae544c2be04 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetConf.java
@@ -21,12 +21,15 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.net.InetSocketAddress;
 import java.security.PrivilegedExceptionAction;
+import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
@@ -155,7 +158,7 @@ public class GetConf extends Configured implements Tool {
   static class NameNodesCommandHandler extends CommandHandler {
     @Override
     int doWorkInternal(GetConf tool) throws IOException {
-      tool.printList(DFSUtil.getNNServiceRpcAddresses(tool.getConf()));
+      tool.printMap(DFSUtil.getNNServiceRpcAddresses(tool.getConf()));
       return 0;
     }
   }
@@ -166,7 +169,7 @@ public class GetConf extends Configured implements Tool {
   static class BackupNodesCommandHandler extends CommandHandler {
     @Override
     public int doWorkInternal(GetConf tool) throws IOException {
-      tool.printList(DFSUtil.getBackupNodeAddresses(tool.getConf()));
+      tool.printMap(DFSUtil.getBackupNodeAddresses(tool.getConf()));
       return 0;
     }
   }
@@ -177,7 +180,7 @@ public class GetConf extends Configured implements Tool {
   static class SecondaryNameNodesCommandHandler extends CommandHandler {
     @Override
     public int doWorkInternal(GetConf tool) throws IOException {
-      tool.printList(DFSUtil.getSecondaryNameNodeAddresses(tool.getConf()));
+      tool.printMap(DFSUtil.getSecondaryNameNodeAddresses(tool.getConf()));
       return 0;
     }
   }
@@ -191,9 +194,11 @@ public class GetConf extends Configured implements Tool {
     @Override
     public int doWorkInternal(GetConf tool) throws IOException {
       Configuration config = tool.getConf();
-      List<InetSocketAddress> rpclist = DFSUtil.getNNServiceRpcAddresses(config);
-      if (rpclist != null) {
-        for (InetSocketAddress rpc : rpclist) {
+      List<ConfiguredNNAddress> cnnlist = DFSUtil.flattenAddressMap(
+          DFSUtil.getNNServiceRpcAddresses(config));
+      if (!cnnlist.isEmpty()) {
+        for (ConfiguredNNAddress cnn : cnnlist) {
+          InetSocketAddress rpc = cnn.getAddress();
           tool.printOut(rpc.getHostName()+":"+rpc.getPort());
         }
         return 0;
@@ -223,10 +228,13 @@ public class GetConf extends Configured implements Tool {
   void printOut(String message) {
     out.println(message);
   }
-
-  void printList(List<InetSocketAddress> list) {
+  
+  void printMap(Map<String, Map<String, InetSocketAddress>> map) {
     StringBuilder buffer = new StringBuilder();
-    for (InetSocketAddress address : list) {
+
+    List<ConfiguredNNAddress> cnns = DFSUtil.flattenAddressMap(map);
+    for (ConfiguredNNAddress cnn : cnns) {
+      InetSocketAddress address = cnn.getAddress();
       if (buffer.length() > 0) {
         buffer.append(" ");
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index d9c64f70be0..5fb5bd70e8d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -28,6 +28,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
@@ -183,14 +184,19 @@ public class TestDFSUtil {
     conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY, "nn2"),
         NN2_ADDRESS);
 
-    Collection<InetSocketAddress> nnAddresses = DFSUtil
+    Map<String, Map<String, InetSocketAddress>> nnMap = DFSUtil
         .getNNServiceRpcAddresses(conf);
-    assertEquals(2, nnAddresses.size());
-    Iterator<InetSocketAddress> iterator = nnAddresses.iterator();
-    InetSocketAddress addr = iterator.next();
+    assertEquals(2, nnMap.size());
+    
+    Map<String, InetSocketAddress> nn1Map = nnMap.get("nn1");
+    assertEquals(1, nn1Map.size());
+    InetSocketAddress addr = nn1Map.get(null);
     assertEquals("localhost", addr.getHostName());
     assertEquals(9000, addr.getPort());
-    addr = iterator.next();
+    
+    Map<String, InetSocketAddress> nn2Map = nnMap.get("nn2");
+    assertEquals(1, nn2Map.size());
+    addr = nn2Map.get(null);
     assertEquals("localhost", addr.getHostName());
     assertEquals(9001, addr.getPort());
 
@@ -237,9 +243,14 @@ public class TestDFSUtil {
     conf.set(FS_DEFAULT_NAME_KEY, hdfs_default);
     // If DFS_FEDERATION_NAMESERVICES is not set, verify that
     // default namenode address is returned.
-    List<InetSocketAddress> addrList = DFSUtil.getNNServiceRpcAddresses(conf);
-    assertEquals(1, addrList.size());
-    assertEquals(9999, addrList.get(0).getPort());
+    Map<String, Map<String, InetSocketAddress>> addrMap =
+      DFSUtil.getNNServiceRpcAddresses(conf);
+    assertEquals(1, addrMap.size());
+    
+    Map<String, InetSocketAddress> defaultNsMap = addrMap.get(null);
+    assertEquals(1, defaultNsMap.size());
+    
+    assertEquals(9999, defaultNsMap.get(null).getPort());
   }
   
   /**
@@ -279,22 +290,28 @@ public class TestDFSUtil {
   public void testEmptyConf() {
     HdfsConfiguration conf = new HdfsConfiguration(false);
     try {
-      DFSUtil.getNNServiceRpcAddresses(conf);
-      fail("Expected IOException is not thrown");
+      Map<String, Map<String, InetSocketAddress>> map =
+          DFSUtil.getNNServiceRpcAddresses(conf);
+      fail("Expected IOException is not thrown, result was: " +
+          DFSUtil.addressMapToString(map));
     } catch (IOException expected) {
       /** Expected */
     }
 
     try {
-      DFSUtil.getBackupNodeAddresses(conf);
-      fail("Expected IOException is not thrown");
+      Map<String, Map<String, InetSocketAddress>> map =
+        DFSUtil.getBackupNodeAddresses(conf);
+      fail("Expected IOException is not thrown, result was: " +
+          DFSUtil.addressMapToString(map));
     } catch (IOException expected) {
       /** Expected */
     }
 
     try {
-      DFSUtil.getSecondaryNameNodeAddresses(conf);
-      fail("Expected IOException is not thrown");
+      Map<String, Map<String, InetSocketAddress>> map =
+        DFSUtil.getSecondaryNameNodeAddresses(conf);
+      fail("Expected IOException is not thrown, result was: " +
+          DFSUtil.addressMapToString(map));
     } catch (IOException expected) {
       /** Expected */
     }
@@ -310,5 +327,44 @@ public class TestDFSUtil {
     String httpport = DFSUtil.getInfoServer(null, conf, false);
     assertEquals("0.0.0.0:50070", httpport);
   }
+  
+  @Test
+  public void testHANameNodesWithFederation() {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    
+    final String NS1_NN1_HOST = "ns1-nn1.example.com:8020";
+    final String NS1_NN2_HOST = "ns1-nn2.example.com:8020";
+    final String NS2_NN1_HOST = "ns2-nn1.example.com:8020";
+    final String NS2_NN2_HOST = "ns2-nn2.example.com:8020";
+    
+    // Two nameservices, each with two NNs.
+    conf.set(DFS_FEDERATION_NAMESERVICES, "ns1,ns2");
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, "ns1"),
+        "ns1-nn1,ns1-nn2");
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, "ns2"),
+        "ns2-nn1,ns2-nn2");
+    conf.set(DFSUtil.addKeySuffixes(
+          DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "ns1-nn1"),
+        NS1_NN1_HOST);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "ns1-nn2"),
+        NS1_NN2_HOST);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns2", "ns2-nn1"),
+        NS2_NN1_HOST);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns2", "ns2-nn2"),
+        NS2_NN2_HOST);
+    
+    Map<String, Map<String, InetSocketAddress>> map =
+      DFSUtil.getHaNnRpcAddresses(conf);
+    System.err.println("TestHANameNodesWithFederation:\n" +
+        DFSUtil.addressMapToString(map));
+    
+    assertEquals(NS1_NN1_HOST, map.get("ns1").get("ns1-nn1").toString());
+    assertEquals(NS1_NN2_HOST, map.get("ns1").get("ns1-nn2").toString());
+    assertEquals(NS2_NN1_HOST, map.get("ns2").get("ns2-nn1").toString());
+    assertEquals(NS2_NN2_HOST, map.get("ns2").get("ns2-nn2").toString());
+  }
 
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
index 34cd784bd04..84235112aa5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
@@ -22,6 +22,7 @@ import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.TimeoutException;
 
@@ -330,8 +331,8 @@ public class TestBalancer extends TestCase {
     waitForHeartBeat(totalUsedSpace, totalCapacity);
 
     // start rebalancing
-    final List<InetSocketAddress> namenodes =new ArrayList<InetSocketAddress>();
-    namenodes.add(NameNode.getServiceAddress(conf, true));
+    Map<String, Map<String, InetSocketAddress>> namenodes =
+      DFSUtil.getNNServiceRpcAddresses(conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
     assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
index 6ca0ffe7b31..151614b14b3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 
 import org.apache.commons.logging.Log;
@@ -157,7 +158,8 @@ public class TestBalancerWithMultipleNameNodes {
     LOG.info("BALANCER 1");
 
     // start rebalancing
-    final List<InetSocketAddress> namenodes = DFSUtil.getNNServiceRpcAddresses(s.conf);
+    final Map<String, Map<String, InetSocketAddress>> namenodes =
+      DFSUtil.getNNServiceRpcAddresses(s.conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, s.conf);
     Assert.assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
index 39e8e20a0df..4553543d73d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestGetConf.java
@@ -24,6 +24,7 @@ import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.StringTokenizer;
 
 import static org.junit.Assert.*;
@@ -32,6 +33,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.tools.GetConf;
@@ -80,13 +82,13 @@ public class TestGetConf {
   }
 
   /*
-   * Convert list of InetSocketAddress to string array with each address
-   * represented as "host:port"
+   * Convert the map returned from DFSUtil functions to an array of
+   * addresses represented as "host:port"
    */
-  private String[] toStringArray(List<InetSocketAddress> list) {
+  private String[] toStringArray(List<ConfiguredNNAddress> list) {
     String[] ret = new String[list.size()];
     for (int i = 0; i < list.size(); i++) {
-      ret[i] = NameNode.getHostPortString(list.get(i));
+      ret[i] = NameNode.getHostPortString(list.get(i).getAddress());
     }
     return ret;
   }
@@ -94,8 +96,8 @@ public class TestGetConf {
   /**
    * Using DFSUtil methods get the list of given {@code type} of address
    */
-  private List<InetSocketAddress> getAddressListFromConf(TestType type,
-      HdfsConfiguration conf) throws IOException {
+  private Map<String, Map<String, InetSocketAddress>> getAddressListFromConf(
+      TestType type, HdfsConfiguration conf) throws IOException {
     switch (type) {
     case NAMENODE:
       return DFSUtil.getNNServiceRpcAddresses(conf);
@@ -161,7 +163,7 @@ public class TestGetConf {
    * @param expected, expected addresses
    */
   private void getAddressListFromTool(TestType type, HdfsConfiguration conf,
-      boolean checkPort, List<InetSocketAddress> expected) throws Exception {
+      boolean checkPort, List<ConfiguredNNAddress> expected) throws Exception {
     String out = getAddressListFromTool(type, conf, expected.size() != 0);
     List<String> values = new ArrayList<String>();
     
@@ -176,7 +178,8 @@ public class TestGetConf {
     // Convert expected list to String[] of hosts
     int i = 0;
     String[] expectedHosts = new String[expected.size()];
-    for (InetSocketAddress addr : expected) {
+    for (ConfiguredNNAddress cnn : expected) {
+      InetSocketAddress addr = cnn.getAddress();
       if (!checkPort) {
         expectedHosts[i++] = addr.getHostName();
       }else {
@@ -191,7 +194,9 @@ public class TestGetConf {
   private void verifyAddresses(HdfsConfiguration conf, TestType type,
       boolean checkPort, String... expected) throws Exception {
     // Ensure DFSUtil returned the right set of addresses
-    List<InetSocketAddress> list = getAddressListFromConf(type, conf);
+    Map<String, Map<String, InetSocketAddress>> map =
+      getAddressListFromConf(type, conf);
+    List<ConfiguredNNAddress> list = DFSUtil.flattenAddressMap(map);
     String[] actual = toStringArray(list);
     Arrays.sort(actual);
     Arrays.sort(expected);

From bb91ebb53c1a41e47755b2008deee67c11b79d7e Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 30 Nov 2011 06:49:30 +0000
Subject: [PATCH 025/177] HDFS-2591. MiniDFSCluster support to mix and match
 federation with HA. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1208297 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |   6 +
 .../apache/hadoop/hdfs/MiniDFSCluster.java    | 323 +++++++++++-------
 .../apache/hadoop/hdfs/MiniDFSNNTopology.java | 157 +++++++++
 .../hadoop/hdfs/TestDFSClientFailover.java    |  22 +-
 .../apache/hadoop/hdfs/TestDecommission.java  |   6 +-
 .../hadoop/hdfs/TestMiniDFSCluster.java       |   3 +-
 .../security/token/block/TestBlockToken.java  |   2 +-
 .../TestBalancerWithMultipleNameNodes.java    |   7 +-
 .../server/datanode/TestDataNodeExit.java     |   7 +-
 .../TestDataNodeMultipleRegistrations.java    |  25 +-
 .../server/datanode/TestDeleteBlockPool.java  |  11 +-
 .../TestMulitipleNNDataBlockScanner.java      |  17 +-
 .../server/datanode/TestRefreshNamenodes.java |  13 +-
 .../hdfs/server/namenode/NameNodeAdapter.java |   7 +
 .../hdfs/server/namenode/TestCheckpoint.java  |   6 +-
 .../hdfs/server/namenode/TestStartup.java     |   3 +-
 17 files changed, 449 insertions(+), 168 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 0c854a5056e..ce0fd6c3a19 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -25,3 +25,5 @@ HDFS-2523. Small NN fixes to include HAServiceProtocol and prevent NPE on shutdo
 HDFS-2577. NN fails to start since it tries to start secret manager in safemode. (todd)
 
 HDFS-2582. Scope dfs.ha.namenodes config by nameservice (todd)
+
+HDFS-2591. MiniDFSCluster support to mix and match federation with HA (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index fcb7d4cb891..3c4fd9fd4e0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -571,6 +571,12 @@ public class DFSUtil {
     public InetSocketAddress getAddress() {
       return addr;
     }
+    
+    @Override
+    public String toString() {
+      return "ConfiguredNNAddress[nsId=" + nameserviceId + ";" +
+        "nnId=" + namenodeId + ";addr=" + addr + "]";
+    }
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index df913b37d5a..cde68487563 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -32,6 +32,7 @@ import java.nio.channels.FileChannel;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 import java.util.Random;
 
 import org.apache.commons.logging.Log;
@@ -41,7 +42,13 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
+
+import org.apache.hadoop.ha.HAServiceProtocol;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
@@ -65,6 +72,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
+import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.DNSToSwitchMapping;
 import org.apache.hadoop.net.NetUtils;
@@ -77,6 +85,11 @@ import org.apache.hadoop.tools.GetUserMappingsProtocol;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.ToolRunner;
 
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+
 /**
  * This class creates a single-process DFS cluster for junit testing.
  * The data directories for non-simulated DFS are under the testing directory.
@@ -102,7 +115,6 @@ public class MiniDFSCluster {
     private int nameNodePort = 0;
     private int nameNodeHttpPort = 0;
     private final Configuration conf;
-    private int numNameNodes = 1;
     private int numDataNodes = 1;
     private boolean format = true;
     private boolean manageNameDfsDirs = true;
@@ -114,21 +126,12 @@ public class MiniDFSCluster {
     private String clusterId = null;
     private boolean waitSafeMode = true;
     private boolean setupHostsFile = false;
-    private boolean federation = false;
+    private MiniDFSNNTopology nnTopology = null;
     
     public Builder(Configuration conf) {
       this.conf = conf;
     }
     
-    /**
-     * default false - non federated cluster
-     * @param val
-     * @return Builder object
-     */
-    public Builder federation (boolean val){
-      this.federation = val;
-      return this;
-    }
     /**
      * Default: 0
      */
@@ -145,14 +148,6 @@ public class MiniDFSCluster {
       return this;
     }
 
-    /**
-     * Default: 1
-     */
-    public Builder numNameNodes(int val) {
-      this.numNameNodes = val;
-      return this;
-    }
-
     /**
      * Default: 1
      */
@@ -242,6 +237,16 @@ public class MiniDFSCluster {
       return this;
     }
     
+    /**
+     * Default: a single namenode.
+     * See {@link MiniDFSNNTopology#simpleFederatedTopology(int)} to set up
+     * federated nameservices
+     */
+    public Builder nnTopology(MiniDFSNNTopology topology) {
+      this.nnTopology = topology;
+      return this;
+    }
+    
     /**
      * Construct the actual MiniDFSCluster
      */
@@ -254,15 +259,17 @@ public class MiniDFSCluster {
    * Used by builder to create and return an instance of MiniDFSCluster
    */
   private MiniDFSCluster(Builder builder) throws IOException {
-    LOG.info("starting cluster with " + builder.numNameNodes + " namenodes.");
-    nameNodes = new NameNodeInfo[builder.numNameNodes];
-    // try to determine if in federation mode
-    if(builder.numNameNodes > 1)
-      builder.federation = true;
+    if (builder.nnTopology == null) {
+      // If no topology is specified, build a single NN. 
+      builder.nnTopology = MiniDFSNNTopology.simpleSingleNN(
+          builder.nameNodePort, builder.nameNodeHttpPort);
+    }
+    
+    LOG.info("starting cluster with " + 
+        builder.nnTopology.countNameNodes() + " namenodes.");
+    nameNodes = new NameNodeInfo[builder.nnTopology.countNameNodes()];
       
-    initMiniDFSCluster(builder.nameNodePort,
-                       builder.nameNodeHttpPort,
-                       builder.conf,
+    initMiniDFSCluster(builder.conf,
                        builder.numDataNodes,
                        builder.format,
                        builder.manageNameDfsDirs,
@@ -274,7 +281,7 @@ public class MiniDFSCluster {
                        builder.clusterId,
                        builder.waitSafeMode,
                        builder.setupHostsFile,
-                       builder.federation);
+                       builder.nnTopology);
   }
   
   public class DataNodeProperties {
@@ -296,8 +303,8 @@ public class MiniDFSCluster {
                          new ArrayList<DataNodeProperties>();
   private File base_dir;
   private File data_dir;
-  private boolean federation = false; 
   private boolean waitSafeMode = true;
+  private boolean federation;
   
   /**
    * Stores the information related to a namenode in the cluster
@@ -488,22 +495,23 @@ public class MiniDFSCluster {
                         String[] racks, String hosts[],
                         long[] simulatedCapacities) throws IOException {
     this.nameNodes = new NameNodeInfo[1]; // Single namenode in the cluster
-    initMiniDFSCluster(nameNodePort, 0, conf, numDataNodes, format,
+    initMiniDFSCluster(conf, numDataNodes, format,
         manageNameDfsDirs, manageDataDfsDirs, operation, racks, hosts,
-        simulatedCapacities, null, true, false, false);
+        simulatedCapacities, null, true, false,
+        MiniDFSNNTopology.simpleSingleNN(nameNodePort, 0));
   }
 
-  private void initMiniDFSCluster(int nameNodePort, int nameNodeHttpPort,
+  private void initMiniDFSCluster(
       Configuration conf,
       int numDataNodes, boolean format, boolean manageNameDfsDirs,
       boolean manageDataDfsDirs, StartupOption operation, String[] racks,
       String[] hosts, long[] simulatedCapacities, String clusterId,
-      boolean waitSafeMode, boolean setupHostsFile, boolean federation) 
+      boolean waitSafeMode, boolean setupHostsFile,
+      MiniDFSNNTopology nnTopology)
   throws IOException {
     this.conf = conf;
     base_dir = new File(determineDfsBaseDir());
     data_dir = new File(base_dir, "data");
-    this.federation = federation;
     this.waitSafeMode = waitSafeMode;
     
     // use alternate RPC engine if spec'd
@@ -538,28 +546,9 @@ public class MiniDFSCluster {
     conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, 
                    StaticMapping.class, DNSToSwitchMapping.class);
     
-    Collection<String> nameserviceIds = DFSUtil.getNameServiceIds(conf);
-    if(nameserviceIds.size() > 1)  
-      federation = true;
-  
-    if (!federation) {
-      conf.set(FS_DEFAULT_NAME_KEY, "127.0.0.1:" + nameNodePort);
-      conf.set(DFS_NAMENODE_HTTP_ADDRESS_KEY, "127.0.0.1:"
-          + nameNodeHttpPort);
-      NameNode nn = createNameNode(0, conf, numDataNodes, manageNameDfsDirs,
-          format, operation, clusterId);
-      nameNodes[0] = new NameNodeInfo(nn, conf);
-      FileSystem.setDefaultUri(conf, getURI(0));
-    } else {
-      if (nameserviceIds.isEmpty()) {
-        for (int i = 0; i < nameNodes.length; i++) {
-          nameserviceIds.add(NAMESERVICE_ID_PREFIX + i);
-        }
-      }
-      initFederationConf(conf, nameserviceIds, numDataNodes, nameNodePort);
-      createFederationNamenodes(conf, nameserviceIds, manageNameDfsDirs, format,
-          operation, clusterId);
-    }
+    federation = nnTopology.isFederated();
+    createNameNodesAndSetConf(
+        nnTopology, manageNameDfsDirs, format, operation, clusterId, conf);
     
     if (format) {
       if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) {
@@ -575,51 +564,91 @@ public class MiniDFSCluster {
     ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
   }
   
-  /** Initialize configuration for federated cluster */
-  private static void initFederationConf(Configuration conf,
-      Collection<String> nameserviceIds, int numDataNodes, int nnPort) {
-    String nameserviceIdList = "";
-    for (String nameserviceId : nameserviceIds) {
-      // Create comma separated list of nameserviceIds
-      if (nameserviceIdList.length() > 0) {
-        nameserviceIdList += ",";
-      }
-      nameserviceIdList += nameserviceId;
-      initFederatedNamenodeAddress(conf, nameserviceId, nnPort);
-      nnPort = nnPort == 0 ? 0 : nnPort + 2;
+  private void createNameNodesAndSetConf(MiniDFSNNTopology nnTopology,
+      boolean manageNameDfsDirs, boolean format, StartupOption operation,
+      String clusterId,
+      Configuration conf) throws IOException {
+    Preconditions.checkArgument(nnTopology.countNameNodes() > 0,
+        "empty NN topology: no namenodes specified!");
+
+    if (!federation && nnTopology.countNameNodes() == 1) {
+      NNConf onlyNN = nnTopology.getOnlyNameNode();
+      // we only had one NN, set DEFAULT_NAME for it
+      conf.set(FS_DEFAULT_NAME_KEY, "127.0.0.1:" + onlyNN.getIpcPort());
     }
-    conf.set(DFS_FEDERATION_NAMESERVICES, nameserviceIdList);
-  }
-
-  /* For federated namenode initialize the address:port */
-  private static void initFederatedNamenodeAddress(Configuration conf,
-      String nameserviceId, int nnPort) {
-    // Set nameserviceId specific key
-    String key = DFSUtil.addKeySuffixes(
-        DFS_NAMENODE_HTTP_ADDRESS_KEY, nameserviceId);
-    conf.set(key, "127.0.0.1:0");
-
-    key = DFSUtil.addKeySuffixes(
-        DFS_NAMENODE_RPC_ADDRESS_KEY, nameserviceId);
-    conf.set(key, "127.0.0.1:" + nnPort);
-  }
-  
-  private void createFederationNamenodes(Configuration conf,
-      Collection<String> nameserviceIds, boolean manageNameDfsDirs,
-      boolean format, StartupOption operation, String clusterId)
-      throws IOException {
-    // Create namenodes in the cluster
+    
     int nnCounter = 0;
-    for (String nameserviceId : nameserviceIds) {
-      createFederatedNameNode(nnCounter++, conf, numDataNodes, manageNameDfsDirs,
-          format, operation, clusterId, nameserviceId);
+    List<String> nsIds = Lists.newArrayList();
+    for (MiniDFSNNTopology.NSConf nameservice : nnTopology.getNameservices()) {
+      String nsId = nameservice.getId();
+      nsIds.add(nameservice.getId());
+      
+      Preconditions.checkArgument(
+          !federation || nsId != null,
+          "if there is more than one NS, they must have names");
+
+      // First set up the configuration which all of the NNs
+      // need to have - have to do this a priori before starting
+      // *any* of the NNs, so they know to come up in standby.
+      List<String> nnIds = Lists.newArrayList();
+      // Iterate over the NNs in this nameservice
+      for (NNConf nn : nameservice.getNNs()) {
+        nnIds.add(nn.getNnId());
+
+        initNameNodeAddress(conf, nameservice.getId(), nn);
+      }
+
+      // If HA is enabled on this nameservice, enumerate all the namenodes
+      // in the configuration. Also need to set a shared edits dir
+      if (nnIds.size() > 1) {
+        conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nameservice.getId()),
+            Joiner.on(",").join(nnIds));
+        if (manageNameDfsDirs) {
+          URI sharedEditsUri = fileAsURI(new File(base_dir, "shared-edits-" +
+              nnCounter + "-through-" + (nnCounter+nnIds.size()-1)));
+          // TODO in HDFS-1971: conf.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, sharedEditsUri.toString());
+        }
+      }
+
+      // Now start all the NNs in this nameservice.
+      int i = 0;
+      for (NNConf nn : nameservice.getNNs()) {
+        initNameNodeConf(conf, nsId, nn.getNnId(), manageNameDfsDirs, nnCounter);
+        
+        boolean formatThisOne = format;
+        if (format && i++ > 0) {
+          // Don't format the second NN in an HA setup - that
+          // would result in it having a different clusterID,
+          // block pool ID, etc. Instead, copy the name dirs
+          // from the first one.
+          formatThisOne = false;
+          copyNameDirs(getConfiguration(nnCounter - 1), conf);
+        }
+        
+        createNameNode(nnCounter++, conf, numDataNodes, formatThisOne,
+            operation, clusterId, nsId, nn.getNnId());
+      }
+      
     }
+    if (federation) {
+      // If we have more than one nameservice, need to enumerate them in the
+      // config.
+      conf.set(DFS_FEDERATION_NAMESERVICES, Joiner.on(",").join(nsIds));
+    }
+    
   }
   
-  private NameNode createNameNode(int nnIndex, Configuration conf,
-      int numDataNodes, boolean manageNameDfsDirs, boolean format,
-      StartupOption operation, String clusterId)
+  private void initNameNodeConf(Configuration conf,
+      String nameserviceId, String nnId,
+      boolean manageNameDfsDirs, int nnIndex)
       throws IOException {
+    if (nameserviceId != null) {
+      conf.set(DFS_FEDERATION_NAMESERVICE_ID, nameserviceId);
+    }
+    if (nnId != null) {
+      conf.set(DFS_HA_NAMENODE_ID_KEY, nnId);
+    }
+    
     if (manageNameDfsDirs) {
       conf.set(DFS_NAMENODE_NAME_DIR_KEY,
           fileAsURI(new File(base_dir, "name" + (2*nnIndex + 1)))+","+
@@ -628,7 +657,49 @@ public class MiniDFSCluster {
           fileAsURI(new File(base_dir, "namesecondary" + (2*nnIndex + 1)))+","+
           fileAsURI(new File(base_dir, "namesecondary" + (2*nnIndex + 2))));
     }
-    
+  }
+
+  private void copyNameDirs(Configuration srcConf, Configuration dstConf)
+      throws IOException {
+    Collection<URI> srcDirs = FSNamesystem.getNamespaceDirs(srcConf);
+    Collection<URI> dstDirs = FSNamesystem.getNamespaceDirs(dstConf);
+    URI srcDir = Lists.newArrayList(srcDirs).get(0);
+    FileSystem dstFS = FileSystem.getLocal(dstConf).getRaw();
+    for (URI dstDir : dstDirs) {
+      Preconditions.checkArgument(!dstDir.equals(srcDir));
+      Files.deleteRecursively(new File(dstDir));
+      LOG.info("Copying namedir from primary node dir "
+          + srcDir + " to " + dstDir);
+      FileUtil.copy(
+          new File(srcDir),
+          dstFS, new Path(dstDir), false, dstConf);
+    }
+  }
+
+  /**
+   * Initialize the address and port for this NameNode. In the
+   * non-federated case, the nameservice and namenode ID may be
+   * null.
+   */
+  private static void initNameNodeAddress(Configuration conf,
+      String nameserviceId, NNConf nnConf) {
+    // Set NN-specific specific key
+    String key = DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_HTTP_ADDRESS_KEY, nameserviceId,
+        nnConf.getNnId());
+    conf.set(key, "127.0.0.1:" + nnConf.getHttpPort());
+
+    key = DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, nameserviceId,
+        nnConf.getNnId());
+    conf.set(key, "127.0.0.1:" + nnConf.getIpcPort());
+  }
+  
+  private void createNameNode(int nnIndex, Configuration conf,
+      int numDataNodes, boolean format, StartupOption operation,
+      String clusterId, String nameserviceId,
+      String nnId)
+      throws IOException {
     // Format and clean out DataNode directories
     if (format) {
       DFSTestUtil.formatNameNode(conf);
@@ -642,23 +713,17 @@ public class MiniDFSCluster {
                      operation == StartupOption.FORMAT ||
                      operation == StartupOption.REGULAR) ?
       new String[] {} : new String[] {operation.getName()};
-    return NameNode.createNameNode(args, conf);
-  }
-  
-  private void createFederatedNameNode(int nnIndex, Configuration conf,
-      int numDataNodes, boolean manageNameDfsDirs, boolean format,
-      StartupOption operation, String clusterId, String nameserviceId)
-      throws IOException {
-    conf.set(DFS_FEDERATION_NAMESERVICE_ID, nameserviceId);
-    NameNode nn = createNameNode(nnIndex, conf, numDataNodes, manageNameDfsDirs,
-        format, operation, clusterId);
+    NameNode nn =  NameNode.createNameNode(args, conf);
+    
+    // After the NN has started, set back the bound ports into
+    // the conf
     conf.set(DFSUtil.addKeySuffixes(
-        DFS_NAMENODE_RPC_ADDRESS_KEY, nameserviceId), NameNode
+        DFS_NAMENODE_RPC_ADDRESS_KEY, nameserviceId, nnId), NameNode
         .getHostPortString(nn.getNameNodeAddress()));
     conf.set(DFSUtil.addKeySuffixes(
-        DFS_NAMENODE_HTTP_ADDRESS_KEY, nameserviceId), NameNode
+        DFS_NAMENODE_HTTP_ADDRESS_KEY, nameserviceId, nnId), NameNode
         .getHostPortString(nn.getHttpAddress()));
-    DFSUtil.setGenericConf(conf, nameserviceId, 
+    DFSUtil.setGenericConf(conf, nameserviceId, nnId,
         DFS_NAMENODE_HTTP_ADDRESS_KEY);
     nameNodes[nnIndex] = new NameNodeInfo(nn, new Configuration(conf));
   }
@@ -1110,6 +1175,7 @@ public class MiniDFSCluster {
     LOG.info("Shutting down the Mini HDFS Cluster");
     shutdownDataNodes();
     for (NameNodeInfo nnInfo : nameNodes) {
+      if (nnInfo == null) continue;
       NameNode nameNode = nnInfo.nameNode;
       if (nameNode != null) {
         nameNode.stop();
@@ -1380,14 +1446,7 @@ public class MiniDFSCluster {
       return false;
     }
     long[] sizes;
-    try {
-      sizes = nameNode.getRpcServer().getStats();
-    } catch (IOException ioe) {
-      // This method above should never throw.
-      // It only throws IOE since it is exposed via RPC
-      throw (AssertionError)(new AssertionError("Unexpected IOE thrown: "
-          + StringUtils.stringifyException(ioe)).initCause(ioe));
-    }
+    sizes = NameNodeAdapter.getStats(nameNode.getNamesystem());
     boolean isUp = false;
     synchronized (this) {
       isUp = ((!nameNode.isInSafeMode() || !waitSafeMode) && sizes[0] != 0);
@@ -1497,6 +1556,22 @@ public class MiniDFSCluster {
   public Collection<URI> getNameEditsDirs(int nnIndex) {
     return FSNamesystem.getNamespaceEditsDirs(nameNodes[nnIndex].conf);
   }
+  
+  private HAServiceProtocol getHaServiceClient(int nnIndex) throws IOException {
+    InetSocketAddress addr = nameNodes[nnIndex].nameNode.getServiceRpcAddress();
+    return RPC.getProxy(HAServiceProtocol.class,
+        HAServiceProtocol.versionID, addr, conf);
+  }
+  
+  public void transitionToActive(int nnIndex) throws IOException,
+      ServiceFailedException {
+    getHaServiceClient(nnIndex).transitionToActive();
+  }
+  
+  public void transitionToStandby(int nnIndex) throws IOException,
+      ServiceFailedException {
+    getHaServiceClient(nnIndex).transitionToActive();
+  }
 
   /** Wait until the given namenode gets registration from all the datanodes */
   public void waitActive(int nnIndex) throws IOException {
@@ -1504,6 +1579,7 @@ public class MiniDFSCluster {
       return;
     }
     InetSocketAddress addr = nameNodes[nnIndex].nameNode.getServiceRpcAddress();
+    assert addr.getPort() != 0;
     DFSClient client = new DFSClient(addr, conf);
 
     // ensure all datanodes have registered and sent heartbeat to the namenode
@@ -1902,7 +1978,7 @@ public class MiniDFSCluster {
       throws IOException {
     if(!federation)
       throw new IOException("cannot add namenode to non-federated cluster");
-    
+
     int nnIndex = nameNodes.length;
     int numNameNodes = nameNodes.length + 1;
     NameNodeInfo[] newlist = new NameNodeInfo[numNameNodes];
@@ -1913,10 +1989,13 @@ public class MiniDFSCluster {
     String nameserviceIds = conf.get(DFS_FEDERATION_NAMESERVICES);
     nameserviceIds += "," + nameserviceId;
     conf.set(DFS_FEDERATION_NAMESERVICES, nameserviceIds);
-    
-    initFederatedNamenodeAddress(conf, nameserviceId, namenodePort);
-    createFederatedNameNode(nnIndex, conf, numDataNodes, true, true, null,
-        null, nameserviceId);
+  
+    String nnId = null;
+    initNameNodeAddress(conf, nameserviceId,
+        new NNConf(nnId).setIpcPort(namenodePort));
+    initNameNodeConf(conf, nameserviceId, nnId, true, nnIndex);
+    createNameNode(nnIndex, conf, numDataNodes, true, null, null,
+        nameserviceId, nnId);
 
     // Refresh datanodes with the newly started namenode
     for (DataNodeProperties dn : dataNodes) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
new file mode 100644
index 00000000000..b8f50842eea
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
@@ -0,0 +1,157 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * This class is used to specify the setup of namenodes when instantiating
+ * a MiniDFSCluster. It consists of a set of nameservices, each of which
+ * may have one or more namenodes (in the case of HA)
+ */
+@InterfaceAudience.LimitedPrivate({"HBase", "HDFS", "Hive", "MapReduce", "Pig"})
+@InterfaceStability.Unstable
+public class MiniDFSNNTopology {
+  private final List<NSConf> nameservices = Lists.newArrayList();
+  private boolean federation;
+
+  public MiniDFSNNTopology() {
+  }
+
+  /**
+   * Set up a simple non-federated non-HA NN.
+   */
+  public static MiniDFSNNTopology simpleSingleNN(
+      int nameNodePort, int nameNodeHttpPort) {
+    return new MiniDFSNNTopology()
+      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+        .addNN(new MiniDFSNNTopology.NNConf(null)
+          .setHttpPort(nameNodeHttpPort)
+          .setIpcPort(nameNodePort)));
+  }
+  
+  /**
+   * Set up federated cluster with the given number of nameservices, each
+   * of which has only a single NameNode.
+   */
+  public static MiniDFSNNTopology simpleFederatedTopology(
+      int numNameservices) {
+    MiniDFSNNTopology topology = new MiniDFSNNTopology();
+    for (int i = 1; i <= numNameservices; i++) {
+      topology.addNameservice(new MiniDFSNNTopology.NSConf("ns" + i)
+        .addNN(new MiniDFSNNTopology.NNConf(null)));
+    }
+    topology.setFederation(true);
+    return topology;
+  }
+
+  public MiniDFSNNTopology setFederation(boolean federation) {
+    this.federation = federation;
+    return this;
+  }
+
+  public MiniDFSNNTopology addNameservice(NSConf nameservice) {
+    Preconditions.checkArgument(!nameservice.getNNs().isEmpty(),
+        "Must have at least one NN in a nameservice");
+    this.nameservices.add(nameservice);
+    return this;
+  }
+
+  public int countNameNodes() {
+    int count = 0;
+    for (NSConf ns : nameservices) {
+      count += ns.nns.size();
+    }
+    return count;
+  }
+  
+  public NNConf getOnlyNameNode() {
+    Preconditions.checkState(countNameNodes() == 1,
+        "must have exactly one NN!");
+    return nameservices.get(0).getNNs().get(0);
+  }
+
+  public boolean isFederated() {
+    return nameservices.size() > 1 || federation;
+  }
+
+  public List<NSConf> getNameservices() {
+    return nameservices;
+  }
+  
+  public static class NSConf {
+    private final String id;
+    private final List<NNConf> nns = Lists.newArrayList();
+    
+    public NSConf(String id) {
+      this.id = id;
+    }
+    
+    public NSConf addNN(NNConf nn) {
+      this.nns.add(nn);
+      return this;
+    }
+
+    public String getId() {
+      return id;
+    }
+
+    public List<NNConf> getNNs() {
+      return nns;
+    }
+  }
+  
+  public static class NNConf {
+    private String nnId;
+    private int httpPort;
+    private int ipcPort;
+    
+    public NNConf(String nnId) {
+      this.nnId = nnId;
+    }
+
+    String getNnId() {
+      return nnId;
+    }
+
+    int getIpcPort() {
+      return ipcPort;
+    }
+    
+    int getHttpPort() {
+      return httpPort;
+    }
+
+    public NNConf setHttpPort(int httpPort) {
+      this.httpPort = httpPort;
+      return this;
+    }
+
+    public NNConf setIpcPort(int ipcPort) {
+      this.ipcPort = ipcPort;
+      return this;
+    }
+  }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
index 3b50252bf4c..b144a8087c5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -46,7 +46,9 @@ public class TestDFSClientFailover {
   
   @Before
   public void setUpCluster() throws IOException {
-    cluster = new MiniDFSCluster.Builder(conf).numNameNodes(2).build();
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
+      .build();
     cluster.waitActive();
   }
   
@@ -61,7 +63,6 @@ public class TestDFSClientFailover {
   // changed to exercise that.
   @Test
   public void testDfsClientFailover() throws IOException, URISyntaxException {
-    final String logicalNameNodeId = "ha-nn-uri";
     InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
     InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
     String nameServiceId1 = DFSUtil.getNameServiceIdFromAddress(conf, nnAddr1,
@@ -69,9 +70,6 @@ public class TestDFSClientFailover {
     String nameServiceId2 = DFSUtil.getNameServiceIdFromAddress(conf, nnAddr2,
         DFS_NAMENODE_RPC_ADDRESS_KEY);
     
-    String nameNodeId1 = "nn1";
-    String nameNodeId2 = "nn2";
-    
     ClientProtocol nn1 = DFSUtil.createNamenode(nnAddr1, conf);
     ClientProtocol nn2 = DFSUtil.createNamenode(nnAddr2, conf);
     
@@ -85,14 +83,22 @@ public class TestDFSClientFailover {
     out1.close();
     out2.close();
     
+    String nsId = "nameserviceId1";
+    
+    final String logicalNameNodeId = "ha-nn-uri";
+    String nameNodeId1 = "nn1";
+    String nameNodeId2 = "nn2";
+    
     String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
     String address2 = "hdfs://" + nnAddr2.getHostName() + ":" + nnAddr2.getPort();
     conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
-        nameServiceId1, nameNodeId1), address1);
+        nsId, nameNodeId1), address1);
     conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
-        nameServiceId2, nameNodeId2), address2);
+        nsId, nameNodeId2), address2);
     
-    conf.set(DFS_HA_NAMENODES_KEY, nameNodeId1 + "," + nameNodeId2);
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nsId);
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nsId),
+        nameNodeId1 + "," + nameNodeId2);
     conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalNameNodeId,
         ConfiguredFailoverProxyProvider.class.getName());
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
index faf7efd5364..6997ebc2e71 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
@@ -279,7 +279,8 @@ public class TestDecommission {
    * @throws IOException */
   private void startCluster(int numNameNodes, int numDatanodes,
       Configuration conf) throws IOException {
-    cluster = new MiniDFSCluster.Builder(conf).numNameNodes(numNameNodes)
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(numNameNodes))
         .numDataNodes(numDatanodes).build();
     cluster.waitActive();
     for (int i = 0; i < numNameNodes; i++) {
@@ -507,7 +508,8 @@ public class TestDecommission {
       InterruptedException {
     conf.set(DFSConfigKeys.DFS_HOSTS, hostsFile.toUri().getPath());
     int numDatanodes = 1;
-    cluster = new MiniDFSCluster.Builder(conf).numNameNodes(numNameNodes)
+    cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(numNameNodes))
         .numDataNodes(numDatanodes).setupHostsFile(true).build();
     cluster.waitActive();
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
index 4e3152385cf..5a3524495b4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
@@ -41,7 +41,8 @@ public class TestMiniDFSCluster {
   protected File testDataDir;
   @Before
   public void setUp() {
-    testDataPath = System.getProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA);
+    testDataPath = System.getProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA,
+        "build/test/data");
     testDataDir = new File(new File(testDataPath).getParentFile(),
                            "miniclusters");
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java
index fd9c91d88c4..d543dd85c17 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java
@@ -370,7 +370,7 @@ public class TestBlockToken {
     Configuration conf = new HdfsConfiguration();
     conf.setBoolean(DFSConfigKeys.DFS_BLOCK_ACCESS_TOKEN_ENABLE_KEY, true);
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 512);
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numNameNodes(1)
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
         .numDataNodes(1).build();
     cluster.waitActive();
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
index 151614b14b3..743efe9ebdb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@@ -254,7 +255,7 @@ public class TestBalancerWithMultipleNameNodes {
     {
       LOG.info("UNEVEN 1");
       final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-          .numNameNodes(nNameNodes)
+          .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
           .numDataNodes(nDataNodes)
           .racks(racks)
           .simulatedCapacities(capacities)
@@ -275,7 +276,7 @@ public class TestBalancerWithMultipleNameNodes {
     {
       LOG.info("UNEVEN 10");
       final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-          .numNameNodes(nNameNodes)
+          .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(nNameNodes))
           .numDataNodes(nDataNodes)
           .racks(racks)
           .simulatedCapacities(capacities)
@@ -329,7 +330,7 @@ public class TestBalancerWithMultipleNameNodes {
 
     LOG.info("RUN_TEST -1");
     final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-        .numNameNodes(nNameNodes)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(nNameNodes))
         .numDataNodes(nDataNodes)
         .racks(racks)
         .simulatedCapacities(capacities)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeExit.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeExit.java
index b7a10177c13..0faa5b1d05b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeExit.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeExit.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -36,7 +37,6 @@ import org.junit.Test;
  * Tests if DataNode process exits if all Block Pool services exit. 
  */
 public class TestDataNodeExit {
-  private static int BASEPORT = 9923;
   private static long WAIT_TIME_IN_MILLIS = 10;
   Configuration conf;
   MiniDFSCluster cluster = null;
@@ -46,8 +46,9 @@ public class TestDataNodeExit {
     conf = new HdfsConfiguration();
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 100);
     conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 100);
-    cluster = new MiniDFSCluster.Builder(conf).numNameNodes(3)
-        .nameNodePort(BASEPORT).build();
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(3))
+      .build();
     for (int i = 0; i < 3; i++) {
       cluster.waitActive(i);
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
index 7b26f4e805a..cc82682ec4b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
@@ -30,6 +30,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.datanode.FSDataset.VolumeInfo;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
@@ -56,8 +57,9 @@ public class TestDataNodeMultipleRegistrations {
    */
   @Test
   public void test2NNRegistration() throws IOException {
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numNameNodes(2)
-        .nameNodePort(9928).build();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
+        .build();
     try {
       cluster.waitActive();
       NameNode nn1 = cluster.getNameNode(0);
@@ -180,8 +182,9 @@ public class TestDataNodeMultipleRegistrations {
   
   @Test
   public void testClusterIdMismatch() throws IOException {
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numNameNodes(2).
-    nameNodePort(9928).build();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
+        .build();
     try {
       cluster.waitActive();
 
@@ -216,25 +219,27 @@ public class TestDataNodeMultipleRegistrations {
 
     Configuration conf = new HdfsConfiguration();
     // start Federated cluster and add a node.
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numNameNodes(2).
-    nameNodePort(9928).build();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
+      .build();
     Assert.assertNotNull(cluster);
     Assert.assertEquals("(1)Should be 2 namenodes", 2, cluster.getNumNameNodes());
     
     // add a node
-    cluster.addNameNode(conf, 9929);
+    cluster.addNameNode(conf, 0);
     Assert.assertEquals("(1)Should be 3 namenodes", 3, cluster.getNumNameNodes());
     cluster.shutdown();
         
     // 2. start with Federation flag set
     conf = new HdfsConfiguration();
-    cluster = new MiniDFSCluster.Builder(conf).federation(true).
-    nameNodePort(9928).build();
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(1))
+      .build();
     Assert.assertNotNull(cluster);
     Assert.assertEquals("(2)Should be 1 namenodes", 1, cluster.getNumNameNodes());
     
     // add a node
-    cluster.addNameNode(conf, 9929);   
+    cluster.addNameNode(conf, 0);
     Assert.assertEquals("(2)Should be 2 namenodes", 2, cluster.getNumNameNodes());
     cluster.shutdown();
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDeleteBlockPool.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDeleteBlockPool.java
index 0b0ca7bd742..2ff075c8ad7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDeleteBlockPool.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDeleteBlockPool.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.tools.DFSAdmin;
 import org.junit.Test;
 
@@ -47,8 +48,9 @@ public class TestDeleteBlockPool {
     try {
       conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES,
           "namesServerId1,namesServerId2");
-      cluster = new MiniDFSCluster.Builder(conf).federation(true).numNameNodes(
-          2).numDataNodes(2).build();
+      cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
+        .numDataNodes(2).build();
 
       cluster.waitActive();
 
@@ -155,8 +157,9 @@ public class TestDeleteBlockPool {
     try {
       conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES,
           "namesServerId1,namesServerId2");
-      cluster = new MiniDFSCluster.Builder(conf).federation(true).numNameNodes(
-          2).numDataNodes(1).build();
+      cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
+        .numDataNodes(1).build();
 
       cluster.waitActive();
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
index 1b9a19c6499..8441e184068 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestMulitipleNNDataBlockScanner.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.junit.Test;
 
 
@@ -41,12 +42,13 @@ public class TestMulitipleNNDataBlockScanner {
   String bpids[] = new String[3];
   FileSystem fs[] = new FileSystem[3];
   
-  public void setUp(int port) throws IOException {
+  public void setUp() throws IOException {
     conf = new HdfsConfiguration();
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 100);
     conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 100);
-    cluster = new MiniDFSCluster.Builder(conf).numNameNodes(3).nameNodePort(
-        port).build();
+    cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(3))
+        .build();
     for (int i = 0; i < 3; i++) {
       cluster.waitActive(i);
     }
@@ -65,7 +67,7 @@ public class TestMulitipleNNDataBlockScanner {
   
   @Test
   public void testDataBlockScanner() throws IOException, InterruptedException {
-    setUp(9923);
+    setUp();
     try {
       DataNode dn = cluster.getDataNodes().get(0);
       for (int i = 0; i < 3; i++) {
@@ -89,9 +91,10 @@ public class TestMulitipleNNDataBlockScanner {
   @Test
   public void testBlockScannerAfterRefresh() throws IOException,
       InterruptedException {
-    setUp(9933);
+    setUp();
     try {
-      Configuration conf = new HdfsConfiguration(cluster.getConfiguration(0));
+      Configuration dnConf = cluster.getDataNodes().get(0).getConf();
+      Configuration conf = new HdfsConfiguration(dnConf);
       StringBuilder namenodesBuilder = new StringBuilder();
 
       String bpidToShutdown = cluster.getNamesystem(2).getBlockPoolId();
@@ -140,7 +143,7 @@ public class TestMulitipleNNDataBlockScanner {
   @Test
   public void testBlockScannerAfterRestart() throws IOException,
       InterruptedException {
-    setUp(9943);
+    setUp();
     try {
       cluster.restartDataNode(0);
       cluster.waitActive();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
index 150f1178406..1360cad5caf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
@@ -26,6 +26,9 @@ import java.net.InetSocketAddress;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology.NSConf;
 import org.junit.Test;
 
 /**
@@ -43,9 +46,13 @@ public class TestRefreshNamenodes {
     Configuration conf = new Configuration();
     MiniDFSCluster cluster = null;
     try {
-      conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, "namesServerId1");
-      cluster = new MiniDFSCluster.Builder(conf).federation(true).
-          numNameNodes(1).nameNodePort(nnPort1).build();
+      MiniDFSNNTopology topology = new MiniDFSNNTopology()
+        .addNameservice(new NSConf("ns1").addNN(
+            new NNConf(null).setIpcPort(nnPort1)))
+        .setFederation(true);
+      cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(topology)
+        .build();
 
       DataNode dn = cluster.getDataNodes().get(0);
       assertEquals(1, dn.getAllBpOs().length);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index fd52e901abd..2f14331c9d9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -97,4 +97,11 @@ public class NameNodeAdapter {
       ns.readUnlock();
     }
   }
+  
+  /**
+   * Return the FSNamesystem stats
+   */
+  public static long[] getStats(final FSNamesystem fsn) {
+    return fsn.getStats();
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index 65169fa0119..7fe193e913b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.DFSUtil.ErrorSimulator;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
@@ -1072,8 +1073,9 @@ public class TestCheckpoint extends TestCase {
     String nameserviceId2 = "ns2";
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nameserviceId1
         + "," + nameserviceId2);
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numNameNodes(2)
-        .nameNodePort(9928).build();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
+        .build();
     Configuration snConf1 = new HdfsConfiguration(cluster.getConfiguration(0));
     Configuration snConf2 = new HdfsConfiguration(cluster.getConfiguration(1));
     InetSocketAddress nn1RpcAddress =
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
index ceb6261db4b..90c648fb199 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestStartup.java
@@ -514,11 +514,10 @@ public class TestStartup extends TestCase {
     InetAddress inetAddress = InetAddress.getByAddress(b);
     list.add(inetAddress.getHostName());
     writeConfigFile(localFileSys, hostsFile, list);
-    int numNameNodes = 1;
     int numDatanodes = 1;
     
     try {
-      cluster = new MiniDFSCluster.Builder(conf).numNameNodes(numNameNodes)
+      cluster = new MiniDFSCluster.Builder(conf)
       .numDataNodes(numDatanodes).setupHostsFile(true).build();
       cluster.waitActive();
   

From f87a4b40bc99e76602a75906df31747cfdbff78a Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 30 Nov 2011 21:46:22 +0000
Subject: [PATCH 026/177] HDFS-1975. Support for sharing the namenode state
 from active to standby. Contributed by Jitendra Nath Pandey, Aaron T Myers,
 and Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1208813 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   1 +
 .../hdfs/protocol/BlockListAsLongs.java       |  14 +-
 .../hadoop/hdfs/server/common/Storage.java    |   2 +-
 .../hdfs/server/namenode/BackupImage.java     |   2 +-
 .../hdfs/server/namenode/Checkpointer.java    |   2 +-
 .../namenode/EditLogBackupInputStream.java    |   5 +
 .../namenode/EditLogFileInputStream.java      |  13 +-
 .../server/namenode/EditLogInputStream.java   |  12 +-
 .../hdfs/server/namenode/FSEditLog.java       |  91 ++++++--
 .../hadoop/hdfs/server/namenode/FSImage.java  |  41 +++-
 .../hdfs/server/namenode/FSNamesystem.java    | 139 +++++++++++-
 .../server/namenode/FileJournalManager.java   |  10 +-
 .../hdfs/server/namenode/NNStorage.java       |  22 +-
 .../hadoop/hdfs/server/namenode/NameNode.java |   9 +-
 .../server/namenode/NameNodeRpcServer.java    |  45 +++-
 .../namenode/PendingDataNodeMessages.java     | 201 ++++++++++++++++++
 .../server/namenode/ha/EditLogTailer.java     | 142 +++++++++++++
 .../server/protocol/DatanodeProtocol.java     |   3 +-
 .../src/main/resources/hdfs-default.xml       |  12 ++
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   7 +-
 .../apache/hadoop/hdfs/MiniDFSNNTopology.java |  11 +
 .../hdfs/server/namenode/CreateEditsLog.java  |   2 +-
 .../hdfs/server/namenode/FSImageTestUtil.java |   4 +-
 .../hdfs/server/namenode/NameNodeAdapter.java |  30 +++
 .../hdfs/server/namenode/TestEditLog.java     |  18 +-
 .../server/namenode/TestFSEditLogLoader.java  |   2 +-
 .../server/namenode/ha/TestEditLogTailer.java | 128 +++++++++++
 28 files changed, 904 insertions(+), 66 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index ce0fd6c3a19..e2975e85cff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -27,3 +27,5 @@ HDFS-2577. NN fails to start since it tries to start secret manager in safemode.
 HDFS-2582. Scope dfs.ha.namenodes config by nameservice (todd)
 
 HDFS-2591. MiniDFSCluster support to mix and match federation with HA (todd)
+
+HDFS-1975. Support for sharing the namenode state from active to standby. (jitendra, atm, todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 9f774d00aa8..511adcfb170 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -162,6 +162,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_NAMENODE_HTTPS_ADDRESS_DEFAULT = "0.0.0.0:" + DFS_NAMENODE_HTTPS_PORT_DEFAULT;
   public static final String  DFS_NAMENODE_NAME_DIR_KEY = "dfs.namenode.name.dir";
   public static final String  DFS_NAMENODE_EDITS_DIR_KEY = "dfs.namenode.edits.dir";
+  public static final String  DFS_NAMENODE_SHARED_EDITS_DIR_KEY = "dfs.namenode.shared.edits.dir";
   public static final String  DFS_CLIENT_READ_PREFETCH_SIZE_KEY = "dfs.client.read.prefetch.size"; 
   public static final String  DFS_CLIENT_RETRY_WINDOW_BASE= "dfs.client.retry.window.base";
   public static final String  DFS_METRICS_SESSION_ID_KEY = "dfs.metrics.session-id";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockListAsLongs.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockListAsLongs.java
index e1006a65d43..58af5fd50c5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockListAsLongs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/BlockListAsLongs.java
@@ -40,7 +40,7 @@ import org.apache.hadoop.hdfs.server.datanode.ReplicaInfo;
  * - followed by the invalid replica represented with three -1s;
  * - followed by the under-construction replica list where each replica is
  *   represented by 4 longs: three for the block id, length, generation 
- *   stamp, and the forth for the replica state.
+ *   stamp, and the fourth for the replica state.
  */
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
@@ -304,4 +304,16 @@ public class BlockListAsLongs implements Iterable<Block> {
     blockList[idx+1] = -1;
     blockList[idx+2] = -1;
   }
+
+  public long getMaxGsInBlockList() {
+    long maxGs = -1;
+    Iterator<Block> iter = getBlockReportIterator();
+    while (iter.hasNext()) {
+      Block b = iter.next();
+      if (b.getGenerationStamp() > maxGs) {
+        maxGs = b.getGenerationStamp();
+      }
+    }
+    return maxGs;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
index 4c11973d4ae..027ce7cc1a6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java
@@ -568,7 +568,7 @@ public abstract class Storage extends StorageInfo {
      * <p> Locking is not supported by all file systems.
      * E.g., NFS does not consistently support exclusive locks.
      * 
-     * <p> If locking is supported we guarantee exculsive access to the
+     * <p> If locking is supported we guarantee exclusive access to the
      * storage directory. Otherwise, no guarantee is given.
      * 
      * @throws IOException if locking fails
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
index fc1fe14af78..4de70367a5a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
@@ -345,7 +345,7 @@ public class BackupImage extends FSImage {
   synchronized void namenodeStartedLogSegment(long txid)
       throws IOException {
     LOG.info("NameNode started a new log segment at txid " + txid);
-    if (editLog.isOpen()) {
+    if (editLog.isOpenForWrite()) {
       if (editLog.getLastWrittenTxId() == txid - 1) {
         // We are in sync with the NN, so end and finalize the current segment
         editLog.endCurrentLogSegment(false);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
index 39d2abaee75..6fcf3b17a7d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
@@ -286,7 +286,7 @@ class Checkpointer extends Daemon {
           log.getStartTxId(), log.getEndTxId());
       if (log.getStartTxId() > dstImage.getLastAppliedTxId()) {
         editsStreams.add(new EditLogFileInputStream(f, log.getStartTxId(), 
-                                                    log.getEndTxId()));
+                                                    log.getEndTxId(), true));
        }
     }
     LOG.info("Checkpointer about to load edits from " +
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
index 974697d9271..68bcdba6edf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
@@ -133,4 +133,9 @@ class EditLogBackupInputStream extends EditLogInputStream {
   public long getLastTxId() throws IOException {
     return HdfsConstants.INVALID_TXID;
   }
+
+  @Override
+  boolean isInProgress() {
+    return true;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
index d05c4fe3d53..719ef781006 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
@@ -41,6 +41,7 @@ class EditLogFileInputStream extends EditLogInputStream {
   private final int logVersion;
   private final FSEditLogOp.Reader reader;
   private final FSEditLogLoader.PositionTrackingInputStream tracker;
+  private final boolean isInProgress;
   
   /**
    * Open an EditLogInputStream for the given file.
@@ -53,7 +54,7 @@ class EditLogFileInputStream extends EditLogInputStream {
    */
   EditLogFileInputStream(File name)
       throws LogHeaderCorruptException, IOException {
-    this(name, HdfsConstants.INVALID_TXID, HdfsConstants.INVALID_TXID);
+    this(name, HdfsConstants.INVALID_TXID, HdfsConstants.INVALID_TXID, false);
   }
 
   /**
@@ -66,8 +67,8 @@ class EditLogFileInputStream extends EditLogInputStream {
    * @throws IOException if an actual IO error occurs while reading the
    *         header
    */
-  EditLogFileInputStream(File name, long firstTxId, long lastTxId)
-      throws LogHeaderCorruptException, IOException {
+  EditLogFileInputStream(File name, long firstTxId, long lastTxId,
+      boolean isInProgress) throws LogHeaderCorruptException, IOException {
     file = name;
     fStream = new FileInputStream(name);
 
@@ -84,6 +85,7 @@ class EditLogFileInputStream extends EditLogInputStream {
     reader = new FSEditLogOp.Reader(in, logVersion);
     this.firstTxId = firstTxId;
     this.lastTxId = lastTxId;
+    this.isInProgress = isInProgress;
   }
 
   @Override
@@ -132,6 +134,11 @@ class EditLogFileInputStream extends EditLogInputStream {
     return file.length();
   }
   
+  @Override
+  boolean isInProgress() {
+    return isInProgress;
+  }
+  
   @Override
   public String toString() {
     return getName();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
index c6f850542f8..c66977c0717 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
@@ -20,6 +20,9 @@ package org.apache.hadoop.hdfs.server.namenode;
 import java.io.Closeable;
 import java.io.IOException;
 
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
 /**
  * A generic abstract class to support reading edits log data from 
  * persistent storage.
@@ -27,7 +30,9 @@ import java.io.IOException;
  * It should stream bytes from the storage exactly as they were written
  * into the #{@link EditLogOutputStream}.
  */
-abstract class EditLogInputStream implements JournalStream, Closeable {
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public abstract class EditLogInputStream implements JournalStream, Closeable {
   /** 
    * @return the first transaction which will be found in this stream
    */
@@ -75,4 +80,9 @@ abstract class EditLogInputStream implements JournalStream, Closeable {
    * Return the size of the current edits log.
    */
   abstract long length() throws IOException;
+  
+  /**
+   * Return true if this stream is in progress, false if it is finalized.
+   */
+  abstract boolean isInProgress();
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index cb0f88e85ad..bfe971b5eb4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -60,22 +60,36 @@ public class FSEditLog  {
 
   /**
    * State machine for edit log.
+   * 
+   * In a non-HA setup:
+   * 
    * The log starts in UNITIALIZED state upon construction. Once it's
-   * initialized, it is usually in IN_SEGMENT state, indicating that edits
-   * may be written. In the middle of a roll, or while saving the namespace,
-   * it briefly enters the BETWEEN_LOG_SEGMENTS state, indicating that the
-   * previous segment has been closed, but the new one has not yet been opened.
+   * initialized, it is usually in IN_SEGMENT state, indicating that edits may
+   * be written. In the middle of a roll, or while saving the namespace, it
+   * briefly enters the BETWEEN_LOG_SEGMENTS state, indicating that the previous
+   * segment has been closed, but the new one has not yet been opened.
+   * 
+   * In an HA setup:
+   * 
+   * The log starts in UNINITIALIZED state upon construction. Once it's
+   * initialized, it sits in the OPEN_FOR_READING state the entire time that the
+   * NN is in standby. Upon the NN transition to active, the log will be CLOSED,
+   * and then move to being BETWEEN_LOG_SEGMENTS, much as if the NN had just
+   * started up, and then will move to IN_SEGMENT so it can begin writing to the
+   * log. The log states will then revert to behaving as they do in a non-HA
+   * setup.
    */
   private enum State {
     UNINITIALIZED,
     BETWEEN_LOG_SEGMENTS,
     IN_SEGMENT,
+    OPEN_FOR_READING,
     CLOSED;
   }  
   private State state = State.UNINITIALIZED;
   
   //initialize
-  final private JournalSet journalSet;
+  private JournalSet journalSet = null;
   private EditLogOutputStream editLogStream = null;
 
   // a monotonically increasing counter that represents transactionIds.
@@ -125,6 +139,11 @@ public class FSEditLog  {
   };
 
   final private Collection<URI> editsDirs;
+  
+  /**
+   * The edit directories that are shared between primary and secondary.
+   */
+  final private Collection<URI> sharedEditsDirs;
 
   /**
    * Construct FSEditLog with default configuration, taking editDirs from NNStorage
@@ -163,9 +182,34 @@ public class FSEditLog  {
     } else {
       this.editsDirs = Lists.newArrayList(editsDirs);
     }
-
+    
+    this.sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
+  }
+  
+  public void initJournalsForWrite() {
+    Preconditions.checkState(state == State.UNINITIALIZED ||
+        state == State.CLOSED, "Unexpected state: %s", state);
+    
+    initJournals(this.editsDirs);
+    state = State.BETWEEN_LOG_SEGMENTS;
+  }
+  
+  public void initSharedJournalsForRead() {
+    if (state == State.OPEN_FOR_READING) {
+      LOG.warn("Initializing shared journals for READ, already open for READ",
+          new Exception());
+      return;
+    }
+    Preconditions.checkState(state == State.UNINITIALIZED ||
+        state == State.CLOSED);
+    
+    initJournals(this.sharedEditsDirs);
+    state = State.OPEN_FOR_READING;
+  }
+  
+  private void initJournals(Collection<URI> dirs) {
     this.journalSet = new JournalSet();
-    for (URI u : this.editsDirs) {
+    for (URI u : dirs) {
       StorageDirectory sd = storage.getStorageDirectory(u);
       if (sd != null) {
         journalSet.add(new FileJournalManager(sd));
@@ -175,7 +219,6 @@ public class FSEditLog  {
     if (journalSet.isEmpty()) {
       LOG.error("No edits directories configured!");
     } 
-    state = State.BETWEEN_LOG_SEGMENTS;
   }
 
   /**
@@ -190,17 +233,22 @@ public class FSEditLog  {
    * Initialize the output stream for logging, opening the first
    * log segment.
    */
-  synchronized void open() throws IOException {
-    Preconditions.checkState(state == State.BETWEEN_LOG_SEGMENTS);
+  synchronized void openForWrite() throws IOException {
+    Preconditions.checkState(state == State.BETWEEN_LOG_SEGMENTS,
+        "Bad state: %s", state);
 
     startLogSegment(getLastWrittenTxId() + 1, true);
     assert state == State.IN_SEGMENT : "Bad state: " + state;
   }
   
-  synchronized boolean isOpen() {
+  synchronized boolean isOpenForWrite() {
     return state == State.IN_SEGMENT;
   }
 
+  synchronized boolean isOpenForRead() {
+    return state == State.OPEN_FOR_READING;
+  }
+
   /**
    * Shutdown the file store.
    */
@@ -230,7 +278,8 @@ public class FSEditLog  {
    */
   void logEdit(final FSEditLogOp op) {
     synchronized (this) {
-      assert state != State.CLOSED;
+      assert state != State.CLOSED && state != State.OPEN_FOR_READING :
+        "bad state: " + state;
       
       // wait if an automatic sync is scheduled
       waitIfAutoSyncScheduled();
@@ -317,7 +366,7 @@ public class FSEditLog  {
   /**
    * Return the transaction ID of the last transaction written to the log.
    */
-  synchronized long getLastWrittenTxId() {
+  public synchronized long getLastWrittenTxId() {
     return txid;
   }
   
@@ -962,19 +1011,29 @@ public class FSEditLog  {
       // All journals have failed, it is handled in logSync.
     }
   }
+  
+  Collection<EditLogInputStream> selectInputStreams(long fromTxId,
+      long toAtLeastTxId) throws IOException {
+    return selectInputStreams(fromTxId, toAtLeastTxId, true);
+  }
 
   /**
    * Select a list of input streams to load.
+   * 
    * @param fromTxId first transaction in the selected streams
    * @param toAtLeast the selected streams must contain this transaction
+   * @param inProgessOk set to true if in-progress streams are OK
    */
-  Collection<EditLogInputStream> selectInputStreams(long fromTxId,
-      long toAtLeastTxId) throws IOException {
+  public Collection<EditLogInputStream> selectInputStreams(long fromTxId,
+      long toAtLeastTxId, boolean inProgressOk) throws IOException {
     List<EditLogInputStream> streams = new ArrayList<EditLogInputStream>();
     EditLogInputStream stream = journalSet.getInputStream(fromTxId);
     while (stream != null) {
+      if (inProgressOk || !stream.isInProgress()) {
+        streams.add(stream);
+      }
+      // We're now looking for a higher range, so reset the fromTxId
       fromTxId = stream.getLastTxId() + 1;
-      streams.add(stream);
       stream = journalSet.getInputStream(fromTxId);
     }
     if (fromTxId <= toAtLeastTxId) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 4cfb014dd53..a4a772cd771 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -56,6 +56,8 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.util.MD5FileUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
@@ -68,7 +70,7 @@ import com.google.common.collect.Lists;
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
 public class FSImage implements Closeable {
-  protected static final Log LOG = LogFactory.getLog(FSImage.class.getName());
+  public static final Log LOG = LogFactory.getLog(FSImage.class.getName());
 
   protected FSEditLog editLog = null;
   private boolean isUpgradeFinalized = false;
@@ -112,7 +114,8 @@ public class FSImage implements Closeable {
    * @throws IOException if directories are invalid.
    */
   protected FSImage(Configuration conf,
-                    Collection<URI> imageDirs, Collection<URI> editsDirs)
+                    Collection<URI> imageDirs,
+                    Collection<URI> editsDirs)
       throws IOException {
     this.conf = conf;
 
@@ -123,6 +126,12 @@ public class FSImage implements Closeable {
     }
 
     this.editLog = new FSEditLog(conf, storage, editsDirs);
+    String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
+    if (!HAUtil.isHAEnabled(conf, nameserviceId)) {
+      editLog.initJournalsForWrite();
+    } else {
+      editLog.initSharedJournalsForRead();
+    }
     
     archivalManager = new NNStorageRetentionManager(conf, storage, editLog);
   }
@@ -217,6 +226,7 @@ public class FSImage implements Closeable {
       }
     }
 
+    // TODO(HA): Have to figure out a story for the first 3 of these.
     // 3. Do transitions
     switch(startOpt) {
     case UPGRADE:
@@ -251,6 +261,12 @@ public class FSImage implements Closeable {
       StorageState curState;
       try {
         curState = sd.analyzeStorage(startOpt, storage);
+        // TODO(HA): Fix this.
+        String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
+        if (curState != StorageState.NORMAL && HAUtil.isHAEnabled(conf, nameserviceId)) {
+          throw new IOException("Cannot start an HA namenode with name dirs " +
+              "that need recovery. Dir: " + sd + " state: " + curState);
+        }
         // sd is locked but not opened
         switch(curState) {
         case NON_EXISTENT:
@@ -326,7 +342,7 @@ public class FSImage implements Closeable {
         assert curDir.exists() : "Current directory must exist.";
         assert !prevDir.exists() : "prvious directory must not exist.";
         assert !tmpDir.exists() : "prvious.tmp directory must not exist.";
-        assert !editLog.isOpen() : "Edits log must not be open.";
+        assert !editLog.isOpenForWrite() : "Edits log must not be open.";
 
         // rename current to tmp
         NNStorage.rename(curDir, tmpDir);
@@ -519,11 +535,11 @@ public class FSImage implements Closeable {
     return editLog;
   }
 
-  void openEditLog() throws IOException {
+  void openEditLogForWrite() throws IOException {
     assert editLog != null : "editLog must be initialized";
-    Preconditions.checkState(!editLog.isOpen(),
+    Preconditions.checkState(!editLog.isOpenForWrite(),
         "edit log should not yet be open");
-    editLog.open();
+    editLog.openForWrite();
     storage.writeTransactionIdFileToStorage(editLog.getCurSegmentTxId());
   };
   
@@ -564,6 +580,7 @@ public class FSImage implements Closeable {
 
     Iterable<EditLogInputStream> editStreams = null;
 
+    // TODO(HA): We shouldn't run this when coming up in standby state
     editLog.recoverUnclosedStreams();
 
     if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, 
@@ -616,6 +633,8 @@ public class FSImage implements Closeable {
     
     // update the txid for the edit log
     editLog.setNextTxId(storage.getMostRecentCheckpointTxId() + numLoaded + 1);
+    // TODO(HA): This should probably always return false when HA is enabled and
+    // we're coming up in standby state.
     return needToSave;
   }
 
@@ -644,7 +663,7 @@ public class FSImage implements Closeable {
    * Load the specified list of edit files into the image.
    * @return the number of transactions loaded
    */
-  protected long loadEdits(Iterable<EditLogInputStream> editStreams,
+  public long loadEdits(Iterable<EditLogInputStream> editStreams,
                            FSNamesystem target) throws IOException {
     LOG.debug("About to load edits:\n  " + Joiner.on("\n  ").join(editStreams));
 
@@ -663,10 +682,13 @@ public class FSImage implements Closeable {
         lastAppliedTxId += thisNumLoaded;
       }
     } finally {
+      // TODO(HA): Should this happen when called by the tailer?
       FSEditLog.closeAllStreams(editStreams);
     }
 
     // update the counts
+    // TODO(HA): this may be very slow -- we probably want to
+    // update them as we go for HA.
     target.dir.updateCountForINodeWithQuota();    
     return numLoaded;
   }
@@ -688,8 +710,7 @@ public class FSImage implements Closeable {
   
   /**
    * Load in the filesystem image from file. It's a big list of
-   * filenames and blocks.  Return whether we should
-   * "re-save" and consolidate the edit-logs
+   * filenames and blocks.
    */
   private void loadFSImage(File curFile, MD5Hash expectedMd5,
       FSNamesystem target) throws IOException {
@@ -790,7 +811,7 @@ public class FSImage implements Closeable {
     assert editLog != null : "editLog must be initialized";
     storage.attemptRestoreRemovedStorage();
 
-    boolean editLogWasOpen = editLog.isOpen();
+    boolean editLogWasOpen = editLog.isOpenForWrite();
     
     if (editLogWasOpen) {
       editLog.endCurrentLogSegment(true);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 3f933a8ffaf..9a499b65250 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -46,6 +46,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DAT
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_UPGRADE_PERMISSION_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
@@ -108,6 +109,7 @@ import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.Block;
@@ -144,6 +146,11 @@ import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
 import org.apache.hadoop.hdfs.server.common.Util;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
+import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReceivedDeleteMessage;
+import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReportMessage;
+import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.CommitBlockSynchronizationMessage;
+import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.DataNodeMessage;
+import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
@@ -170,6 +177,7 @@ import org.apache.hadoop.util.Daemon;
 import org.apache.hadoop.util.VersionInfo;
 import org.mortbay.util.ajax.JSON;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 /***************************************************
@@ -293,6 +301,16 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   // lock to protect FSNamesystem.
   private ReentrantReadWriteLock fsLock;
 
+  private PendingDataNodeMessages pendingDatanodeMessages = new PendingDataNodeMessages();
+  
+  /**
+   * Used when this NN is in standby state to read from the shared edit log.
+   */
+  private EditLogTailer editLogTailer = null;
+  
+  PendingDataNodeMessages getPendingDataNodeMessages() {
+    return pendingDatanodeMessages;
+  }
   
   /**
    * Instantiates an FSNamesystem loaded from the image and edits
@@ -303,7 +321,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * @return an FSNamesystem which contains the loaded namespace
    * @throws IOException if loading fails
    */
-  public static FSNamesystem loadFromDisk(Configuration conf) throws IOException {
+  public static FSNamesystem loadFromDisk(Configuration conf)
+    throws IOException {
     Collection<URI> namespaceDirs = FSNamesystem.getNamespaceDirs(conf);
     Collection<URI> namespaceEditsDirs = 
       FSNamesystem.getNamespaceEditsDirs(conf);
@@ -322,7 +341,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
     long loadStart = now();
     StartupOption startOpt = NameNode.getStartupOption(conf);
-    namesystem.loadFSImage(startOpt, fsImage);
+    String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
+    namesystem.loadFSImage(startOpt, fsImage,
+      HAUtil.isHAEnabled(conf, nameserviceId));
     long timeTakenToLoadFSImage = now() - loadStart;
     LOG.info("Finished loading FSImage in " + timeTakenToLoadFSImage + " msecs");
     NameNode.getNameNodeMetrics().setFsImageLoadTime(
@@ -368,7 +389,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     this.safeMode = new SafeModeInfo(conf);
   }
 
-  void loadFSImage(StartupOption startOpt, FSImage fsImage)
+  void loadFSImage(StartupOption startOpt, FSImage fsImage, boolean haEnabled)
       throws IOException {
     // format before starting up if requested
     if (startOpt == StartupOption.FORMAT) {
@@ -379,10 +400,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     boolean success = false;
     try {
-      if (fsImage.recoverTransitionRead(startOpt, this)) {
+      // We shouldn't be calling saveNamespace if we've come up in standby state.
+      if (fsImage.recoverTransitionRead(startOpt, this) && !haEnabled) {
         fsImage.saveNamespace(this);
       }
-      fsImage.openEditLog();
+      // This will start a new log segment and write to the seen_txid file, so
+      // we shouldn't do it when coming up in standby state
+      if (!haEnabled) {
+        fsImage.openEditLogForWrite();
+      }
       
       success = true;
     } finally {
@@ -449,6 +475,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     LOG.info("Starting services required for active state");
     writeLock();
     try {
+      if (!dir.fsImage.editLog.isOpenForWrite()) {
+        // During startup, we're already open for write during initialization.
+        // TODO(HA): consider adding a startup state?
+        dir.fsImage.editLog.initJournalsForWrite();
+        // May need to recover
+        dir.fsImage.editLog.recoverUnclosedStreams();
+        dir.fsImage.editLog.openForWrite();
+      }
       if (UserGroupInformation.isSecurityEnabled()) {
         startSecretManager();
       }
@@ -459,7 +493,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
   
   /** 
-   * Start services required in active state 
+   * Stop services required in active state
    * @throws InterruptedException
    */
   void stopActiveServices() {
@@ -470,6 +504,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if (leaseManager != null) {
         leaseManager.stopMonitor();
       }
+      dir.fsImage.editLog.close();
     } finally {
       writeUnlock();
     }
@@ -478,11 +513,21 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   /** Start services required in standby state */
   void startStandbyServices() {
     LOG.info("Starting services required for standby state");
+    if (!dir.fsImage.editLog.isOpenForRead()) {
+      // During startup, we're already open for read.
+      dir.fsImage.editLog.initSharedJournalsForRead();
+    }
+    editLogTailer = new EditLogTailer(this);
+    editLogTailer.start();
   }
 
   /** Stop services required in standby state */
-  void stopStandbyServices() {
+  void stopStandbyServices() throws IOException {
     LOG.info("Stopping services started for standby state");
+    if (editLogTailer != null) {
+      editLogTailer.stop();
+    }
+    dir.fsImage.editLog.close();
   }
   
   public static Collection<URI> getNamespaceDirs(Configuration conf) {
@@ -520,7 +565,22 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
 
   public static Collection<URI> getNamespaceEditsDirs(Configuration conf) {
-    return getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY);
+    Collection<URI> editsDirs = getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY);
+    editsDirs.addAll(getSharedEditsDirs(conf));
+    return editsDirs;
+  }
+  
+  /**
+   * Returns edit directories that are shared between primary and secondary.
+   * @param conf
+   * @return Collection of edit directories.
+   */
+  public static Collection<URI> getSharedEditsDirs(Configuration conf) {
+    // don't use getStorageDirs here, because we want an empty default
+    // rather than the dir in /tmp
+    Collection<String> dirNames = conf.getTrimmedStringCollection(
+        DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
+    return Util.stringCollectionAsURIs(dirNames);
   }
 
   @Override
@@ -634,6 +694,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     } finally {
       // using finally to ensure we also wait for lease daemon
       try {
+        // TODO: these lines spew lots of warnings about "already stopped" logs, etc
         stopActiveServices();
         stopStandbyServices();
         if (dir != null) {
@@ -1796,12 +1857,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * @throws QuotaExceededException If addition of block exceeds space quota
    */
   private Block allocateBlock(String src, INode[] inodes,
-      DatanodeDescriptor targets[]) throws QuotaExceededException {
+      DatanodeDescriptor targets[]) throws QuotaExceededException,
+      SafeModeException {
     assert hasWriteLock();
     Block b = new Block(DFSUtil.getRandom().nextLong(), 0, 0); 
     while(isValidBlock(b)) {
       b.setBlockId(DFSUtil.getRandom().nextLong());
     }
+    // Increment the generation stamp for every new block.
+    nextGenerationStamp();
     b.setGenerationStamp(getGenerationStamp());
     b = dir.addBlock(src, inodes, b, targets);
     NameNode.stateChangeLog.info("BLOCK* NameSystem.allocateBlock: "
@@ -2703,11 +2767,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
   }
   
-  FSImage getFSImage() {
+  public FSImage getFSImage() {
     return dir.fsImage;
   }
 
-  FSEditLog getEditLog() {
+  public FSEditLog getEditLog() {
     return getFSImage().getEditLog();
   }    
 
@@ -3726,6 +3790,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
   
   private ObjectName mbeanName;
+
   /**
    * Register the FSNamesystem MBean using the name
    *        "hadoop:service=NameNode,name=FSNamesystemState"
@@ -3766,6 +3831,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   void setGenerationStamp(long stamp) {
     generationStamp.setStamp(stamp);
+    notifyGenStampUpdate(stamp);
   }
 
   /**
@@ -4523,4 +4589,55 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       byte[] password) throws InvalidToken {
     getDelegationTokenSecretManager().verifyToken(identifier, password);
   }
+  
+  public boolean isGenStampInFuture(long genStamp) {
+    return (genStamp > getGenerationStamp());
+  }
+  
+  public void notifyGenStampUpdate(long gs) {
+    LOG.info("=> notified of genstamp update for: " + gs);
+    DataNodeMessage msg = pendingDatanodeMessages.take(gs);
+    while (msg != null) {
+      LOG.info("processing message: " + msg);
+      try {
+        switch (msg.getType()) {
+        case BLOCK_RECEIVED_DELETE:
+          BlockReceivedDeleteMessage m = (BlockReceivedDeleteMessage) msg;
+          if (NameNode.stateChangeLog.isDebugEnabled()) {
+            NameNode.stateChangeLog
+                .debug("*BLOCK* NameNode.blockReceivedAndDeleted: " + "from "
+                    + m.getNodeReg().getName() + " "
+                    + m.getReceivedAndDeletedBlocks().length + " blocks.");
+          }
+          this.getBlockManager().blockReceivedAndDeleted(m.getNodeReg(),
+              m.getPoolId(), m.getReceivedAndDeletedBlocks());
+          break;
+        case BLOCK_REPORT:
+          BlockReportMessage mbr = (BlockReportMessage) msg;
+          if (NameNode.stateChangeLog.isDebugEnabled()) {
+            NameNode.stateChangeLog.debug("*BLOCK* NameNode.blockReport: "
+                + "from " + mbr.getNodeReg().getName() + " "
+                + mbr.getBlockList().getNumberOfBlocks() + " blocks");
+          }
+          this.getBlockManager().processReport(mbr.getNodeReg(),
+              mbr.getPoolId(), mbr.getBlockList());
+          break;
+        case COMMIT_BLOCK_SYNCHRONIZATION:
+          CommitBlockSynchronizationMessage mcbm = (CommitBlockSynchronizationMessage) msg;
+          this.commitBlockSynchronization(mcbm.getBlock(),
+              mcbm.getNewgenerationstamp(), mcbm.getNewlength(),
+              mcbm.isCloseFile(), mcbm.isDeleteblock(), mcbm.getNewtargets());
+          break;
+        }
+      } catch (IOException ex) {
+        LOG.warn("Could not process the message " + msg.getType(), ex);
+      }
+      msg = pendingDatanodeMessages.take(gs);
+    }
+  }
+  
+  @VisibleForTesting
+  public EditLogTailer getEditLogTailer() {
+    return editLogTailer;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index eeb40c2f572..bf7bfde2da3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -90,7 +90,7 @@ class FileJournalManager implements JournalManager {
 
     File dstFile = NNStorage.getFinalizedEditsFile(
         sd, firstTxId, lastTxId);
-    LOG.debug("Finalizing edits file " + inprogressFile + " -> " + dstFile);
+    LOG.info("Finalizing edits file " + inprogressFile + " -> " + dstFile);
     
     Preconditions.checkState(!dstFile.exists(),
         "Can't finalize edits file " + inprogressFile + " since finalized file " +
@@ -116,6 +116,7 @@ class FileJournalManager implements JournalManager {
   @Override
   public void purgeLogsOlderThan(long minTxIdToKeep)
       throws IOException {
+    LOG.info("Purging logs older than " + minTxIdToKeep);
     File[] files = FileUtil.listFiles(sd.getCurrentDir());
     List<EditLogFile> editLogs = 
       FileJournalManager.matchEditLogs(files);
@@ -169,7 +170,7 @@ class FileJournalManager implements JournalManager {
           LOG.error("Edits file " + f + " has improperly formatted " +
                     "transaction ID");
           // skip
-        }          
+        }
       }
       
       // Check for in-progress edits
@@ -190,7 +191,7 @@ class FileJournalManager implements JournalManager {
   }
 
   @Override
-  synchronized public EditLogInputStream getInputStream(long fromTxId) 
+  synchronized public EditLogInputStream getInputStream(long fromTxId)
       throws IOException {
     for (EditLogFile elf : getLogFiles(fromTxId)) {
       if (elf.getFirstTxId() == fromTxId) {
@@ -201,7 +202,7 @@ class FileJournalManager implements JournalManager {
           LOG.trace("Returning edit stream reading from " + elf);
         }
         return new EditLogFileInputStream(elf.getFile(), 
-            elf.getFirstTxId(), elf.getLastTxId());
+            elf.getFirstTxId(), elf.getLastTxId(), elf.isInProgress());
       }
     }
 
@@ -245,6 +246,7 @@ class FileJournalManager implements JournalManager {
     }
 
     long max = findMaxTransaction();
+    
     // fromTxId should be greater than max, as it points to the next 
     // transaction we should expect to find. If it is less than or equal
     // to max, it means that a transaction with txid == max has not been found
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
index a7fa7fb4252..7bddaeb5d1f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
@@ -30,6 +30,7 @@ import java.security.NoSuchAlgorithmException;
 import java.security.SecureRandom;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -157,7 +158,8 @@ public class NNStorage extends Storage implements Closeable {
     
     // this may modify the editsDirs, so copy before passing in
     setStorageDirectories(imageDirs, 
-                          Lists.newArrayList(editsDirs));
+                          Lists.newArrayList(editsDirs),
+                          FSNamesystem.getSharedEditsDirs(conf));
   }
 
   @Override // Storage
@@ -245,6 +247,16 @@ public class NNStorage extends Storage implements Closeable {
   List<StorageDirectory> getRemovedStorageDirs() {
     return this.removedStorageDirs;
   }
+  
+  /**
+   * See {@link NNStorage#setStorageDirectories(Collection, Collection, Collection)}
+   */
+  @VisibleForTesting
+  synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
+                                          Collection<URI> fsEditsDirs)
+      throws IOException {
+    setStorageDirectories(fsNameDirs, fsEditsDirs, new ArrayList<URI>());
+  }
 
   /**
    * Set the storage directories which will be used. This should only ever be
@@ -261,7 +273,8 @@ public class NNStorage extends Storage implements Closeable {
    */
   @VisibleForTesting
   synchronized void setStorageDirectories(Collection<URI> fsNameDirs,
-                                          Collection<URI> fsEditsDirs)
+                                          Collection<URI> fsEditsDirs,
+                                          Collection<URI> sharedEditsDirs)
       throws IOException {
     this.storageDirs.clear();
     this.removedStorageDirs.clear();
@@ -285,7 +298,8 @@ public class NNStorage extends Storage implements Closeable {
       if(dirName.getScheme().compareTo(JournalType.FILE.name().toLowerCase())
           == 0){
         this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
-            dirType));
+            dirType,
+            !sharedEditsDirs.contains(dirName))); // Don't lock the dir if it's shared.
       }
     }
 
@@ -297,7 +311,7 @@ public class NNStorage extends Storage implements Closeable {
       if(dirName.getScheme().compareTo(JournalType.FILE.name().toLowerCase())
           == 0)
         this.addStorageDir(new StorageDirectory(new File(dirName.getPath()),
-                    NameNodeDirType.EDITS));
+                    NameNodeDirType.EDITS, !sharedEditsDirs.contains(dirName)));
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index f411a4adbb1..b05b9f10bdd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -947,12 +947,17 @@ public class NameNode {
 
     @Override
     public void startStandbyServices() throws IOException {
-      // TODO:HA Start reading editlog from active
+      namesystem.startStandbyServices();
     }
 
     @Override
     public void stopStandbyServices() throws IOException {
-      // TODO:HA Stop reading editlog from active
+      // TODO(HA): Are we guaranteed to be the only active here?
+      namesystem.stopStandbyServices();
     }
   }
+  
+  public boolean isStandbyState() {
+    return (state.equals(STANDBY_STATE));
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index ff057443433..69b3f972c1f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -179,6 +179,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
         RefreshAuthorizationPolicyProtocol.class, this);
     this.clientRpcServer.addProtocol(RefreshUserMappingsProtocol.class, this);
     this.clientRpcServer.addProtocol(GetUserMappingsProtocol.class, this);
+    this.clientRpcServer.addProtocol(HAServiceProtocol.class, this);
     
 
     // set service-level authorization security policy
@@ -538,6 +539,17 @@ class NameNodeRpcServer implements NamenodeProtocols {
       boolean closeFile, boolean deleteblock, DatanodeID[] newtargets)
       throws IOException {
     nn.checkOperation(OperationCategory.WRITE);
+    if (nn.isStandbyState()) {
+      if (namesystem.isGenStampInFuture(newgenerationstamp)) {
+        LOG.info("Required GS=" + newgenerationstamp
+            + ", Queuing commitBlockSynchronization message");
+        namesystem.getPendingDataNodeMessages().queueMessage(
+            new PendingDataNodeMessages.CommitBlockSynchronizationMessage(
+                block, newgenerationstamp, newlength, closeFile, deleteblock,
+                newtargets, newgenerationstamp));
+        return;
+      }
+    }
     namesystem.commitBlockSynchronization(block,
         newgenerationstamp, newlength, closeFile, deleteblock, newtargets);
   }
@@ -670,7 +682,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public DatanodeInfo[] getDatanodeReport(DatanodeReportType type)
       throws IOException {
-    nn.checkOperation(OperationCategory.READ);
+    // TODO(HA): decide on OperationCategory for this
     DatanodeInfo results[] = namesystem.datanodeReport(type);
     if (results == null ) {
       throw new IOException("Cannot find datanode report");
@@ -859,6 +871,16 @@ class NameNodeRpcServer implements NamenodeProtocols {
       String poolId, long[] blocks) throws IOException {
     verifyRequest(nodeReg);
     BlockListAsLongs blist = new BlockListAsLongs(blocks);
+    if (nn.isStandbyState()) {
+      long maxGs = blist.getMaxGsInBlockList();
+      if (namesystem.isGenStampInFuture(maxGs)) {
+        LOG.info("Required GS="+maxGs+", Queuing blockReport message");
+        namesystem.getPendingDataNodeMessages().queueMessage(
+            new PendingDataNodeMessages.BlockReportMessage(nodeReg, poolId,
+                blist, maxGs));
+        return null;
+      }
+    }
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*BLOCK* NameNode.blockReport: "
            + "from " + nodeReg.getName() + " " + blist.getNumberOfBlocks()
@@ -866,7 +888,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
     }
 
     namesystem.getBlockManager().processReport(nodeReg, poolId, blist);
-    if (nn.getFSImage().isUpgradeFinalized())
+    if (nn.getFSImage().isUpgradeFinalized() && !nn.isStandbyState())
       return new FinalizeCommand(poolId);
     return null;
   }
@@ -875,6 +897,25 @@ class NameNodeRpcServer implements NamenodeProtocols {
   public void blockReceivedAndDeleted(DatanodeRegistration nodeReg, String poolId,
       ReceivedDeletedBlockInfo[] receivedAndDeletedBlocks) throws IOException {
     verifyRequest(nodeReg);
+    if (nn.isStandbyState()) {
+      if (receivedAndDeletedBlocks.length > 0) {
+        long maxGs = receivedAndDeletedBlocks[0].getBlock()
+            .getGenerationStamp();
+        for (ReceivedDeletedBlockInfo binfo : receivedAndDeletedBlocks) {
+          if (binfo.getBlock().getGenerationStamp() > maxGs) {
+            maxGs = binfo.getBlock().getGenerationStamp();
+          }
+        }
+        if (namesystem.isGenStampInFuture(maxGs)) {
+          LOG.info("Required GS=" + maxGs
+              + ", Queuing blockReceivedAndDeleted message");
+          namesystem.getPendingDataNodeMessages().queueMessage(
+              new PendingDataNodeMessages.BlockReceivedDeleteMessage(nodeReg,
+                  poolId, receivedAndDeletedBlocks, maxGs));
+          return;
+        }
+      }
+    }
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*BLOCK* NameNode.blockReceivedAndDeleted: "
           +"from "+nodeReg.getName()+" "+receivedAndDeletedBlocks.length
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
new file mode 100644
index 00000000000..aafa022136e
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
@@ -0,0 +1,201 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.util.PriorityQueue;
+
+import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
+
+public class PendingDataNodeMessages {
+  
+  PriorityQueue<DataNodeMessage> queue = new PriorityQueue<DataNodeMessage>();
+  
+  enum MessageType {
+    BLOCK_RECEIVED_DELETE,
+    BLOCK_REPORT,
+    COMMIT_BLOCK_SYNCHRONIZATION
+  }
+  
+  static abstract class DataNodeMessage 
+     implements Comparable<DataNodeMessage> {
+    
+    final MessageType type;
+    private final long targetGs;
+    
+    DataNodeMessage(MessageType type, long targetGenStamp) {
+      this.type = type;
+      this.targetGs = targetGenStamp;
+    }
+    
+    protected MessageType getType() {
+      return type;
+    }
+    
+    protected long getTargetGs() {
+      return targetGs;
+    }
+    
+    public int compareTo(DataNodeMessage other) {
+      if (targetGs == other.targetGs) {
+        return 0;
+      } else if (targetGs < other.targetGs) {
+        return -1;
+      }
+      return 1;
+    }
+  }
+  
+  static class BlockReceivedDeleteMessage extends DataNodeMessage {
+    final DatanodeRegistration nodeReg;
+    final String poolId;
+    final ReceivedDeletedBlockInfo[] receivedAndDeletedBlocks;
+    
+    BlockReceivedDeleteMessage(DatanodeRegistration nodeReg, String poolId,
+      ReceivedDeletedBlockInfo[] receivedAndDeletedBlocks, long targetGs) {
+      super(MessageType.BLOCK_RECEIVED_DELETE, targetGs);
+      this.nodeReg = nodeReg;
+      this.poolId = poolId;
+      this.receivedAndDeletedBlocks = receivedAndDeletedBlocks;
+    }
+    
+    DatanodeRegistration getNodeReg() {
+      return nodeReg;
+    }
+    
+    String getPoolId() {
+      return poolId;
+    }
+    
+    ReceivedDeletedBlockInfo[] getReceivedAndDeletedBlocks() {
+      return receivedAndDeletedBlocks;
+    }
+    
+    public String toString() {
+      return "BlockReceivedDeletedMessage with " +
+        receivedAndDeletedBlocks.length + " blocks";
+    }
+  }
+  
+  static class CommitBlockSynchronizationMessage extends DataNodeMessage {
+
+    private final ExtendedBlock block;
+    private final long newgenerationstamp;
+    private final long newlength;
+    private final boolean closeFile;
+    private final boolean deleteblock;
+    private final DatanodeID[] newtargets;
+
+    CommitBlockSynchronizationMessage(ExtendedBlock block,
+        long newgenerationstamp, long newlength, boolean closeFile,
+        boolean deleteblock, DatanodeID[] newtargets, long targetGenStamp) {
+      super(MessageType.COMMIT_BLOCK_SYNCHRONIZATION, targetGenStamp);
+      this.block = block;
+      this.newgenerationstamp = newgenerationstamp;
+      this.newlength = newlength;
+      this.closeFile = closeFile;
+      this.deleteblock = deleteblock;
+      this.newtargets = newtargets;
+    }
+
+    ExtendedBlock getBlock() {
+      return block;
+    }
+
+    long getNewgenerationstamp() {
+      return newgenerationstamp;
+    }
+
+    long getNewlength() {
+      return newlength;
+    }
+
+    boolean isCloseFile() {
+      return closeFile;
+    }
+
+    boolean isDeleteblock() {
+      return deleteblock;
+    }
+
+    DatanodeID[] getNewtargets() {
+      return newtargets;
+    }
+    
+    public String toString() {
+      return "CommitBlockSynchronizationMessage for " + block;
+    }
+  }
+  
+  static class BlockReportMessage extends DataNodeMessage {
+
+    private final DatanodeRegistration nodeReg;
+    private final String poolId;
+    private final BlockListAsLongs blockList;
+
+    BlockReportMessage(DatanodeRegistration nodeReg, String poolId,
+        BlockListAsLongs blist, long targetGenStamp) {
+      super(MessageType.BLOCK_REPORT, targetGenStamp);
+      this.nodeReg = nodeReg;
+      this.poolId = poolId;
+      this.blockList = blist;
+    }
+
+    DatanodeRegistration getNodeReg() {
+      return nodeReg;
+    }
+
+    String getPoolId() {
+      return poolId;
+    }
+
+    BlockListAsLongs getBlockList() {
+      return blockList;
+    }
+
+    public String toString() {
+      return "BlockReport from " + nodeReg + " with " + blockList.getNumberOfBlocks() + " blocks";
+    }
+  }
+
+  synchronized void queueMessage(DataNodeMessage msg) {
+    queue.add(msg);
+  }
+  
+  /**
+   * Returns a message if contains a message less or equal to the given gs,
+   * otherwise returns null.
+   * 
+   * @param gs
+   */
+  synchronized DataNodeMessage take(long gs) {
+    DataNodeMessage m = queue.peek();
+    if (m != null && m.getTargetGs() < gs) {
+      return queue.remove();
+    } else {
+      return null;
+    }
+  }
+  
+  synchronized boolean isEmpty() {
+    return queue.isEmpty();
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
new file mode 100644
index 00000000000..c15629f38b8
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -0,0 +1,142 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * EditLogTailer represents a thread which periodically reads from edits
+ * journals and applies the transactions contained within to a given
+ * FSNamesystem.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class EditLogTailer {
+  public static final Log LOG = LogFactory.getLog(EditLogTailer.class);
+  
+  private final EditLogTailerThread tailerThread;
+  
+  public EditLogTailer(FSNamesystem namesystem) {
+    this.tailerThread = new EditLogTailerThread(namesystem);
+  }
+  
+  public void start() {
+    tailerThread.start();
+  }
+  
+  public void stop() throws IOException {
+    tailerThread.setShouldRun(false);
+    tailerThread.interrupt();
+    try {
+      tailerThread.join();
+    } catch (InterruptedException e) {
+      LOG.warn("Edit log tailer thread exited with an exception");
+      throw new IOException(e);
+    }
+  }
+
+  @VisibleForTesting
+  public void setSleepTime(long sleepTime) {
+    tailerThread.setSleepTime(sleepTime);
+  }
+  
+  @VisibleForTesting
+  public void interrupt() {
+    tailerThread.interrupt();
+  }
+
+  /**
+   * The thread which does the actual work of tailing edits journals and
+   * applying the transactions to the FSNS.
+   */
+  private static class EditLogTailerThread extends Thread {
+
+    private FSNamesystem namesystem;
+    private FSImage image;
+    private FSEditLog editLog;
+    
+    private volatile boolean shouldRun = true;
+    private long sleepTime = 60 * 1000;
+    
+    private EditLogTailerThread(FSNamesystem namesystem) {
+      super("Edit log tailer");
+      this.namesystem = namesystem;
+      image = namesystem.getFSImage();
+      editLog = namesystem.getEditLog();
+    }
+    
+    private void setShouldRun(boolean shouldRun) {
+      this.shouldRun = shouldRun;
+    }
+    
+    private void setSleepTime(long sleepTime) {
+      this.sleepTime = sleepTime;
+    }
+    
+    @Override
+    public void run() {
+      while (shouldRun) {
+        try {
+          long lastTxnId = image.getLastAppliedTxId();
+          
+          if (LOG.isDebugEnabled()) {
+            LOG.debug("lastTxnId: " + lastTxnId);
+          }
+          try {
+            // At least one record should be available.
+            Collection<EditLogInputStream> streams = editLog
+                .selectInputStreams(lastTxnId + 1, lastTxnId + 1, false);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("edit streams to load from: " + streams.size());
+            }
+            
+            long editsLoaded = image.loadEdits(streams, namesystem);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("editsLoaded: " + editsLoaded);
+            }
+          } catch (IOException e) {
+            // Will try again
+            LOG.info("Got error, will try again.", e);
+          }
+        } catch (Throwable t) {
+          // TODO(HA): What should we do in this case? Shutdown the standby NN?
+          LOG.error("Edit log tailer received throwable", t);
+        }
+
+        try {
+          Thread.sleep(sleepTime);
+        } catch (InterruptedException e) {
+          LOG.warn("Edit log tailer interrupted", e);
+        }
+      }
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
index 28f54e86eee..5a4cae8a5e7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
@@ -123,7 +123,8 @@ public interface DatanodeProtocol extends VersionedProtocol {
    * @param registration
    * @param poolId - the block pool ID for the blocks
    * @param blocks - the block list as an array of longs.
-   *     Each block is represented as 2 longs.
+   *     Each finalized block is represented as 3 longs. Each under-
+   *     construction replica is represented as 4 longs.
    *     This is done instead of Block[] to reduce memory used by block reports.
    *     
    * @return - the next command for DN to process.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 11ca83a1fe3..7789c239f7f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -226,6 +226,18 @@ creations/deletions), or "all".</description>
       directories, for redundancy. Default value is same as dfs.name.dir
   </description>
 </property>
+
+<property>
+  <name>dfs.namenode.shared.edits.dir</name>
+  <value></value>
+  <description>A directory on shared storage between the multiple namenodes
+  in an HA cluster. This directory will be written by the active and read
+  by the standby in order to keep the namespaces synchronized. This directory
+  does not need to be listed in dfs.namenode.edits.dir above. It should be
+  left empty in a non-HA cluster.
+  </description>
+</property>
+  
 <property>
   <name>dfs.web.ugi</name>
   <value>webuser,webgroup</value>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index cde68487563..2a2699048d1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -606,7 +606,7 @@ public class MiniDFSCluster {
         if (manageNameDfsDirs) {
           URI sharedEditsUri = fileAsURI(new File(base_dir, "shared-edits-" +
               nnCounter + "-through-" + (nnCounter+nnIds.size()-1)));
-          // TODO in HDFS-1971: conf.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, sharedEditsUri.toString());
+          conf.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, sharedEditsUri.toString());
         }
       }
 
@@ -667,7 +667,10 @@ public class MiniDFSCluster {
     FileSystem dstFS = FileSystem.getLocal(dstConf).getRaw();
     for (URI dstDir : dstDirs) {
       Preconditions.checkArgument(!dstDir.equals(srcDir));
-      Files.deleteRecursively(new File(dstDir));
+      File dstDirF = new File(dstDir);
+      if (dstDirF.exists()) {
+        Files.deleteRecursively(dstDirF);
+      }
       LOG.info("Copying namedir from primary node dir "
           + srcDir + " to " + dstDir);
       FileUtil.copy(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
index b8f50842eea..407ec8f5d10 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
@@ -51,6 +51,17 @@ public class MiniDFSNNTopology {
           .setIpcPort(nameNodePort)));
   }
   
+
+  /**
+   * Set up an HA topology with a single HA nameservice.
+   */
+  public static MiniDFSNNTopology simpleHATopology() {
+    return new MiniDFSNNTopology()
+      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+        .addNN(new MiniDFSNNTopology.NNConf("nn1"))
+        .addNN(new MiniDFSNNTopology.NNConf("nn2")));
+  }
+
   /**
    * Set up federated cluster with the given number of nameservices, each
    * of which has only a single NameNode.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java
index 818b2aacc15..211a2070af2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/CreateEditsLog.java
@@ -193,7 +193,7 @@ public class CreateEditsLog {
 
     FileNameGenerator nameGenerator = new FileNameGenerator(BASE_PATH, 100);
     FSEditLog editLog = FSImageTestUtil.createStandaloneEditLog(editsLogDir);
-    editLog.open();
+    editLog.openForWrite();
     addFiles(editLog, numFiles, replication, numBlocksPerFile, startingBlockId,
              nameGenerator);
     editLog.logSync();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index 032802a3cc0..0269166b57d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -182,9 +182,11 @@ public abstract class FSImageTestUtil {
     Mockito.doReturn(sd).when(storage)
       .getStorageDirectory(Matchers.<URI>anyObject());
 
-    return new FSEditLog(new Configuration(), 
+    FSEditLog editLog = new FSEditLog(new Configuration(), 
                          storage,
                          ImmutableList.of(logDir.toURI()));
+    editLog.initJournalsForWrite();
+    return editLog;
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index 2f14331c9d9..d0aa51f2b19 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -19,13 +19,17 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.IOException;
 
+import org.apache.hadoop.fs.UnresolvedLinkException;
+import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.security.AccessControlException;
 
 /**
  * This is a utility class to expose NameNode functionality for unit tests.
@@ -47,6 +51,32 @@ public class NameNodeAdapter {
         src, offset, length, false, true);
   }
   
+  public static HdfsFileStatus getFileInfo(NameNode namenode, String src,
+      boolean resolveLink) throws AccessControlException, UnresolvedLinkException {
+    return namenode.getNamesystem().getFileInfo(src, resolveLink);
+  }
+  
+  public static boolean mkdirs(NameNode namenode, String src,
+      PermissionStatus permissions, boolean createParent)
+      throws UnresolvedLinkException, IOException {
+    return namenode.getNamesystem().mkdirs(src, permissions, createParent);
+  }
+  
+  public static void saveNamespace(NameNode namenode)
+      throws AccessControlException, IOException {
+    namenode.getNamesystem().saveNamespace();
+  }
+  
+  public static void enterSafeMode(NameNode namenode, boolean resourcesLow)
+      throws IOException {
+    namenode.getNamesystem().enterSafeMode(resourcesLow);
+  }
+  
+  public static void leaveSafeMode(NameNode namenode, boolean checkForUpgrades)
+      throws SafeModeException {
+    namenode.getNamesystem().leaveSafeMode(checkForUpgrades);
+  }
+  
   /**
    * Get the internal RPC server instance.
    * @return rpc server
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index 0d2479319e5..a8a3ac4cb61 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -580,7 +580,6 @@ public class TestEditLog extends TestCase {
             currentDir.getAbsolutePath());
         assertNotNull("No image found in " + nameDir, imageFile);
         assertEquals(NNStorage.getImageFileName(0), imageFile.getName());
-        
         // Try to start a new cluster
         LOG.info("\n===========================================\n" +
         "Starting same cluster after simulated crash");
@@ -772,6 +771,11 @@ public class TestEditLog extends TestCase {
     public JournalType getType() {
       return JournalType.FILE;
     }
+
+    @Override
+    boolean isInProgress() {
+      return true;
+    }
   }
 
   public void testFailedOpen() throws Exception {
@@ -780,7 +784,7 @@ public class TestEditLog extends TestCase {
     FSEditLog log = FSImageTestUtil.createStandaloneEditLog(logDir);
     try {
       logDir.setWritable(false);
-      log.open();
+      log.openForWrite();
       fail("Did no throw exception on only having a bad dir");
     } catch (IOException ioe) {
       GenericTestUtils.assertExceptionContains(
@@ -805,6 +809,7 @@ public class TestEditLog extends TestCase {
         "[1,100]|[101,200]|[201,]",
         "[1,100]|[101,200]|[201,]");
     log = new FSEditLog(storage);
+    log.initJournalsForWrite();
     assertEquals("[[1,100], [101,200]]",
         log.getEditLogManifest(1).toString());
     assertEquals("[[101,200]]",
@@ -816,6 +821,7 @@ public class TestEditLog extends TestCase {
         "[1,100]|[101,200]",
         "[1,100]|[201,300]|[301,400]"); // nothing starting at 101
     log = new FSEditLog(storage);
+    log.initJournalsForWrite();
     assertEquals("[[1,100], [101,200], [201,300], [301,400]]",
         log.getEditLogManifest(1).toString());
     
@@ -825,6 +831,7 @@ public class TestEditLog extends TestCase {
         "[1,100]|[301,400]", // gap from 101 to 300
         "[301,400]|[401,500]");
     log = new FSEditLog(storage);
+    log.initJournalsForWrite();
     assertEquals("[[301,400], [401,500]]",
         log.getEditLogManifest(1).toString());
     
@@ -834,6 +841,7 @@ public class TestEditLog extends TestCase {
         "[1,100]|[101,150]", // short log at 101
         "[1,50]|[101,200]"); // short log at 1
     log = new FSEditLog(storage);
+    log.initJournalsForWrite();
     assertEquals("[[1,100], [101,200]]",
         log.getEditLogManifest(1).toString());
     assertEquals("[[101,200]]",
@@ -846,6 +854,7 @@ public class TestEditLog extends TestCase {
         "[1,100]|[101,]", 
         "[1,100]|[101,200]"); 
     log = new FSEditLog(storage);
+    log.initJournalsForWrite();
     assertEquals("[[1,100], [101,200]]",
         log.getEditLogManifest(1).toString());
     assertEquals("[[101,200]]",
@@ -938,7 +947,8 @@ public class TestEditLog extends TestCase {
     // open the edit log and add two transactions
     // logGenerationStamp is used, simply because it doesn't 
     // require complex arguments.
-    editlog.open();
+    editlog.initJournalsForWrite();
+    editlog.openForWrite();
     for (int i = 2; i < TXNS_PER_ROLL; i++) {
       editlog.logGenerationStamp((long)0);
     }
@@ -998,6 +1008,7 @@ public class TestEditLog extends TestCase {
                                    new AbortSpec(10, 1));
     long totaltxnread = 0;
     FSEditLog editlog = new FSEditLog(storage);
+    editlog.initJournalsForWrite();
     long startTxId = 1;
     Iterable<EditLogInputStream> editStreams = editlog.selectInputStreams(startTxId, 
                                                                           TXNS_PER_ROLL*11);
@@ -1047,6 +1058,7 @@ public class TestEditLog extends TestCase {
     assertTrue(files[0].delete());
     
     FSEditLog editlog = new FSEditLog(storage);
+    editlog.initJournalsForWrite();
     long startTxId = 1;
     try {
       Iterable<EditLogInputStream> editStreams 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
index e8207053971..e100b7013ae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
@@ -165,7 +165,7 @@ public class TestFSEditLogLoader {
     SortedMap<Long, Long> offsetToTxId = Maps.newTreeMap();
     try {
       fsel = FSImageTestUtil.createStandaloneEditLog(testDir);
-      fsel.open();
+      fsel.openForWrite();
       assertTrue("should exist: " + logFile, logFile.exists());
       
       for (int i = 0; i < NUM_TXNS; i++) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
new file mode 100644
index 00000000000..4174a9e5618
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.log4j.Level;
+import org.junit.Test;
+
+public class TestEditLogTailer {
+  
+  private static final String DIR_PREFIX = "/dir";
+  private static final int DIRS_TO_MAKE = 20;
+  private static final long SLEEP_TIME = 1000;
+  private static final long NN_LAG_TIMEOUT = 10 * 1000;
+  
+  static {
+    ((Log4JLogger)FSImage.LOG).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)EditLogTailer.LOG).getLogger().setLevel(Level.ALL);
+  }
+  
+  @Test
+  public void testTailer() throws IOException, InterruptedException,
+      ServiceFailedException {
+    Configuration conf = new HdfsConfiguration();
+    
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(0)
+      .build();
+    cluster.waitActive();
+    
+    cluster.transitionToActive(0);
+    
+    NameNode nn1 = cluster.getNameNode(0);
+    NameNode nn2 = cluster.getNameNode(1);
+    nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn2.getNamesystem().getEditLogTailer().interrupt();
+    try {
+      for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
+        NameNodeAdapter.mkdirs(nn1, getDirPath(i),
+            new PermissionStatus("test","test", new FsPermission((short)00755)),
+            true);
+      }
+      
+      waitForStandbyToCatchUp(nn1, nn2);
+      
+      for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
+        assertTrue(NameNodeAdapter.getFileInfo(nn2,
+            getDirPath(i), false).isDir());
+      }
+      
+      for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
+        NameNodeAdapter.mkdirs(nn1, getDirPath(i),
+            new PermissionStatus("test","test", new FsPermission((short)00755)),
+            true);
+      }
+      
+      waitForStandbyToCatchUp(nn1, nn2);
+      
+      for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
+        assertTrue(NameNodeAdapter.getFileInfo(nn2,
+            getDirPath(i), false).isDir());
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
+  
+  private static String getDirPath(int suffix) {
+    return DIR_PREFIX + suffix;
+  }
+  
+  private static void waitForStandbyToCatchUp(NameNode active,
+      NameNode standby) throws InterruptedException, IOException {
+    
+    long activeTxId = active.getNamesystem().getFSImage().getEditLog()
+      .getLastWrittenTxId();
+    
+    doSaveNamespace(active);
+    
+    long start = System.currentTimeMillis();
+    while (System.currentTimeMillis() - start < NN_LAG_TIMEOUT) {
+      long nn2HighestTxId = standby.getNamesystem().getFSImage()
+        .getLastAppliedTxId();
+      if (nn2HighestTxId >= activeTxId) {
+        break;
+      }
+      Thread.sleep(SLEEP_TIME);
+    }
+  }
+  
+  private static void doSaveNamespace(NameNode nn)
+      throws IOException {
+    NameNodeAdapter.enterSafeMode(nn, false);
+    NameNodeAdapter.saveNamespace(nn);
+    NameNodeAdapter.leaveSafeMode(nn, false);
+  }
+  
+}

From 1e346aa829519f8a2aa830e76d9856f914861805 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 1 Dec 2011 01:10:28 +0000
Subject: [PATCH 027/177] HDFS-1971. Send block report from datanode to both
 active and standby namenodes. (sanjay, todd via suresh)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1208925 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/server/datanode/BPOfferService.java  | 829 ++++++------------
 .../hdfs/server/datanode/BPServiceActor.java  | 633 +++++++++++++
 .../hdfs/server/datanode/BlockReceiver.java   |   5 +-
 .../hadoop/hdfs/server/datanode/DataNode.java | 139 ++-
 .../server/datanode/TestBPOfferService.java   | 282 ++++++
 .../TestDataNodeMultipleRegistrations.java    |  16 +-
 .../server/datanode/TestDatanodeRegister.java |  10 +-
 .../server/datanode/TestRefreshNamenodes.java |   2 +-
 9 files changed, 1310 insertions(+), 608 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index e2975e85cff..414b28e908c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -29,3 +29,5 @@ HDFS-2582. Scope dfs.ha.namenodes config by nameservice (todd)
 HDFS-2591. MiniDFSCluster support to mix and match federation with HA (todd)
 
 HDFS-1975. Support for sharing the namenode state from active to standby. (jitendra, atm, todd)
+
+HDFS-1971. Send block report from datanode to both active and standby namenodes. (sanjay, todd via suresh)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index 4d098ebec2f..85807f6d5ae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -17,62 +17,43 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
-import static org.apache.hadoop.hdfs.server.common.Util.now;
-
 import java.io.IOException;
 import java.net.InetSocketAddress;
-import java.net.SocketTimeoutException;
-import java.net.URI;
-import java.util.Collection;
-import java.util.LinkedList;
+import java.util.List;
+import java.util.concurrent.CopyOnWriteArrayList;
 
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.Block;
-import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocol.LocatedBlock;
-import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException;
-import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
-import org.apache.hadoop.hdfs.server.common.Storage;
-import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
-import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
 import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
 import org.apache.hadoop.hdfs.server.protocol.KeyUpdateCommand;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
 import org.apache.hadoop.ipc.RPC;
-import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.util.StringUtils;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
 /**
- * A thread per namenode to perform:
- * <ul>
- * <li> Pre-registration handshake with namenode</li>
- * <li> Registration with namenode</li>
- * <li> Send periodic heartbeats to the namenode</li>
- * <li> Handle commands received from the namenode</li>
- * </ul>
+ * One instance per block-pool/namespace on the DN, which handles the
+ * heartbeats to the active and standby NNs for that namespace.
+ * This class manages an instance of {@link BPServiceActor} for each NN,
+ * and delegates calls to both NNs. 
+ * It also maintains the state about which of the NNs is considered active.
  */
 @InterfaceAudience.Private
-class BPOfferService implements Runnable {
+class BPOfferService {
   static final Log LOG = DataNode.LOG;
   
-  final InetSocketAddress nnAddr;
-  
   /**
    * Information about the namespace that this service
    * is registering with. This is assigned after
@@ -87,27 +68,25 @@ class BPOfferService implements Runnable {
    */
   DatanodeRegistration bpRegistration;
   
-  long lastBlockReport = 0;
-  long lastDeletedReport = 0;
-
-  boolean resetBlockReportTime = true;
-
-  Thread bpThread;
-  DatanodeProtocol bpNamenode;
-  private long lastHeartbeat = 0;
-  private volatile boolean initialized = false;
-  private final LinkedList<ReceivedDeletedBlockInfo> receivedAndDeletedBlockList 
-    = new LinkedList<ReceivedDeletedBlockInfo>();
-  private volatile int pendingReceivedRequests = 0;
-  private volatile boolean shouldServiceRun = true;
   UpgradeManagerDatanode upgradeManager = null;
   private final DataNode dn;
-  private final DNConf dnConf;
 
-  BPOfferService(InetSocketAddress nnAddr, DataNode dn) {
+  private BPServiceActor bpServiceToActive;
+  private List<BPServiceActor> bpServices =
+    new CopyOnWriteArrayList<BPServiceActor>();
+
+  BPOfferService(List<InetSocketAddress> nnAddrs, DataNode dn) {
+    Preconditions.checkArgument(!nnAddrs.isEmpty(),
+        "Must pass at least one NN.");
     this.dn = dn;
-    this.nnAddr = nnAddr;
-    this.dnConf = dn.getDnConf();
+
+    for (InetSocketAddress addr : nnAddrs) {
+      this.bpServices.add(new BPServiceActor(addr, this));
+    }
+    // TODO(HA): currently we just make the first one the initial
+    // active. In reality it should start in an unknown state and then
+    // as we figure out which is active, designate one as such.
+    this.bpServiceToActive = this.bpServices.get(0);
   }
 
   /**
@@ -115,15 +94,18 @@ class BPOfferService implements Runnable {
    * and has registered with the corresponding namenode
    * @return true if initialized
    */
-  public boolean isInitialized() {
-    return initialized;
+  boolean isInitialized() {
+    // TODO(HA) is this right?
+    return bpServiceToActive != null && bpServiceToActive.isInitialized();
   }
   
-  public boolean isAlive() {
-    return shouldServiceRun && bpThread.isAlive();
+  boolean isAlive() {
+    // TODO: should || all the bp actors probably?
+    return bpServiceToActive != null &&
+      bpServiceToActive.isAlive();
   }
   
-  public String getBlockPoolId() {
+  String getBlockPoolId() {
     if (bpNSInfo != null) {
       return bpNSInfo.getBlockPoolID();
     } else {
@@ -133,10 +115,11 @@ class BPOfferService implements Runnable {
     }
   }
   
-  public NamespaceInfo getNamespaceInfo() {
+  NamespaceInfo getNamespaceInfo() {
     return bpNSInfo;
   }
   
+  @Override
   public String toString() {
     if (bpNSInfo == null) {
       // If we haven't yet connected to our NN, we don't yet know our
@@ -148,519 +131,279 @@ class BPOfferService implements Runnable {
         storageId = "unknown";
       }
       return "Block pool <registering> (storage id " + storageId +
-        ") connecting to " + nnAddr;
+        ")";
     } else {
       return "Block pool " + getBlockPoolId() +
         " (storage id " + dn.getStorageId() +
-        ") registered with " + nnAddr;
+        ")";
     }
   }
   
-  InetSocketAddress getNNSocketAddress() {
-    return nnAddr;
-  }
-
-  /**
-   * Used to inject a spy NN in the unit tests.
-   */
-  @VisibleForTesting
-  void setNameNode(DatanodeProtocol dnProtocol) {
-    bpNamenode = dnProtocol;
-  }
-
-  /**
-   * Perform the first part of the handshake with the NameNode.
-   * This calls <code>versionRequest</code> to determine the NN's
-   * namespace and version info. It automatically retries until
-   * the NN responds or the DN is shutting down.
-   * 
-   * @return the NamespaceInfo
-   * @throws IncorrectVersionException if the remote NN does not match
-   * this DN's version
-   */
-  NamespaceInfo retrieveNamespaceInfo() throws IncorrectVersionException {
-    NamespaceInfo nsInfo = null;
-    while (shouldRun()) {
-      try {
-        nsInfo = bpNamenode.versionRequest();
-        LOG.debug(this + " received versionRequest response: " + nsInfo);
-        break;
-      } catch(SocketTimeoutException e) {  // namenode is busy
-        LOG.warn("Problem connecting to server: " + nnAddr);
-      } catch(IOException e ) {  // namenode is not available
-        LOG.warn("Problem connecting to server: " + nnAddr);
-      }
-      
-      // try again in a second
-      sleepAndLogInterrupts(5000, "requesting version info from NN");
-    }
-    
-    if (nsInfo != null) {
-      checkNNVersion(nsInfo);        
-    }
-    return nsInfo;
-  }
-
-  private void checkNNVersion(NamespaceInfo nsInfo)
-      throws IncorrectVersionException {
-    // build and layout versions should match
-    String nsBuildVer = nsInfo.getBuildVersion();
-    String stBuildVer = Storage.getBuildVersion();
-    if (!nsBuildVer.equals(stBuildVer)) {
-      LOG.warn("Data-node and name-node Build versions must be the same. " +
-        "Namenode build version: " + nsBuildVer + "Datanode " +
-        "build version: " + stBuildVer);
-      throw new IncorrectVersionException(nsBuildVer, "namenode", stBuildVer);
-    }
-
-    if (HdfsConstants.LAYOUT_VERSION != nsInfo.getLayoutVersion()) {
-      LOG.warn("Data-node and name-node layout versions must be the same." +
-        " Expected: "+ HdfsConstants.LAYOUT_VERSION +
-        " actual "+ bpNSInfo.getLayoutVersion());
-      throw new IncorrectVersionException(
-          bpNSInfo.getLayoutVersion(), "namenode");
-    }
-  }
-
-  private void connectToNNAndHandshake() throws IOException {
-    // get NN proxy
-    bpNamenode = (DatanodeProtocol)RPC.waitForProxy(DatanodeProtocol.class,
-          DatanodeProtocol.versionID, nnAddr, dn.getConf());
-
-    // First phase of the handshake with NN - get the namespace
-    // info.
-    bpNSInfo = retrieveNamespaceInfo();
-    
-    // Now that we know the namespace ID, etc, we can pass this to the DN.
-    // The DN can now initialize its local storage if we are the
-    // first BP to handshake, etc.
-    dn.initBlockPool(this);
-    
-    // Second phase of the handshake with the NN.
-    register();
-  }
-  
-  /**
-   * This methods  arranges for the data node to send the block report at 
-   * the next heartbeat.
-   */
-  void scheduleBlockReport(long delay) {
-    if (delay > 0) { // send BR after random delay
-      lastBlockReport = System.currentTimeMillis()
-      - ( dnConf.blockReportInterval - DFSUtil.getRandom().nextInt((int)(delay)));
-    } else { // send at next heartbeat
-      lastBlockReport = lastHeartbeat - dnConf.blockReportInterval;
-    }
-    resetBlockReportTime = true; // reset future BRs for randomness
-  }
-
   void reportBadBlocks(ExtendedBlock block) {
-    DatanodeInfo[] dnArr = { new DatanodeInfo(bpRegistration) };
-    LocatedBlock[] blocks = { new LocatedBlock(block, dnArr) }; 
-    
-    try {
-      bpNamenode.reportBadBlocks(blocks);  
-    } catch (IOException e){
-      /* One common reason is that NameNode could be in safe mode.
-       * Should we keep on retrying in that case?
-       */
-      LOG.warn("Failed to report bad block " + block + " to namenode : "
-          + " Exception", e);
+    checkBlock(block);
+    for (BPServiceActor actor : bpServices) {
+      actor.reportBadBlocks(block);
     }
-    
   }
   
-  /**
-   * Report received blocks and delete hints to the Namenode
-   * 
-   * @throws IOException
-   */
-  private void reportReceivedDeletedBlocks() throws IOException {
-
-    // check if there are newly received blocks
-    ReceivedDeletedBlockInfo[] receivedAndDeletedBlockArray = null;
-    int currentReceivedRequestsCounter;
-    synchronized (receivedAndDeletedBlockList) {
-      currentReceivedRequestsCounter = pendingReceivedRequests;
-      int numBlocks = receivedAndDeletedBlockList.size();
-      if (numBlocks > 0) {
-        //
-        // Send newly-received and deleted blockids to namenode
-        //
-        receivedAndDeletedBlockArray = receivedAndDeletedBlockList
-            .toArray(new ReceivedDeletedBlockInfo[numBlocks]);
-      }
-    }
-    if (receivedAndDeletedBlockArray != null) {
-      bpNamenode.blockReceivedAndDeleted(bpRegistration, getBlockPoolId(),
-          receivedAndDeletedBlockArray);
-      synchronized (receivedAndDeletedBlockList) {
-        for (int i = 0; i < receivedAndDeletedBlockArray.length; i++) {
-          receivedAndDeletedBlockList.remove(receivedAndDeletedBlockArray[i]);
-        }
-        pendingReceivedRequests -= currentReceivedRequestsCounter;
-      }
-    }
-  }
-
   /*
    * Informing the name node could take a long long time! Should we wait
    * till namenode is informed before responding with success to the
    * client? For now we don't.
    */
   void notifyNamenodeReceivedBlock(ExtendedBlock block, String delHint) {
-    if (block == null || delHint == null) {
-      throw new IllegalArgumentException(block == null ? "Block is null"
-          : "delHint is null");
+    checkBlock(block);
+    checkDelHint(delHint);
+    ReceivedDeletedBlockInfo bInfo = 
+               new ReceivedDeletedBlockInfo(block.getLocalBlock(), delHint);
+    for (BPServiceActor actor : bpServices) {
+      actor.notifyNamenodeReceivedBlock(bInfo);
     }
+  }
 
-    if (!block.getBlockPoolId().equals(getBlockPoolId())) {
-      LOG.warn("BlockPool mismatch " + block.getBlockPoolId() + " vs. "
-          + getBlockPoolId());
-      return;
-    }
-
-    synchronized (receivedAndDeletedBlockList) {
-      receivedAndDeletedBlockList.add(new ReceivedDeletedBlockInfo(block
-          .getLocalBlock(), delHint));
-      pendingReceivedRequests++;
-      receivedAndDeletedBlockList.notifyAll();
-    }
+  private void checkBlock(ExtendedBlock block) {
+    Preconditions.checkArgument(block != null,
+        "block is null");
+    Preconditions.checkArgument(block.getBlockPoolId().equals(getBlockPoolId()),
+        "block belongs to BP %s instead of BP %s",
+        block.getBlockPoolId(), getBlockPoolId());
+  }
+  
+  private void checkDelHint(String delHint) {
+    Preconditions.checkArgument(delHint != null,
+        "delHint is null");
   }
 
   void notifyNamenodeDeletedBlock(ExtendedBlock block) {
-    if (block == null) {
-      throw new IllegalArgumentException("Block is null");
-    }
-
-    if (!block.getBlockPoolId().equals(getBlockPoolId())) {
-      LOG.warn("BlockPool mismatch " + block.getBlockPoolId() + " vs. "
-          + getBlockPoolId());
-      return;
-    }
-
-    synchronized (receivedAndDeletedBlockList) {
-      receivedAndDeletedBlockList.add(new ReceivedDeletedBlockInfo(block
-          .getLocalBlock(), ReceivedDeletedBlockInfo.TODELETE_HINT));
+    checkBlock(block);
+    ReceivedDeletedBlockInfo bInfo = new ReceivedDeletedBlockInfo(block
+          .getLocalBlock(), ReceivedDeletedBlockInfo.TODELETE_HINT);
+    
+    for (BPServiceActor actor : bpServices) {
+      actor.notifyNamenodeDeletedBlock(bInfo);
     }
   }
 
-
-  /**
-   * Report the list blocks to the Namenode
-   * @throws IOException
-   */
-  DatanodeCommand blockReport() throws IOException {
-    // send block report if timer has expired.
-    DatanodeCommand cmd = null;
-    long startTime = now();
-    if (startTime - lastBlockReport > dnConf.blockReportInterval) {
-
-      // Create block report
-      long brCreateStartTime = now();
-      BlockListAsLongs bReport = dn.data.getBlockReport(getBlockPoolId());
-
-      // Send block report
-      long brSendStartTime = now();
-      cmd = bpNamenode.blockReport(bpRegistration, getBlockPoolId(), bReport
-          .getBlockListAsLongs());
-
-      // Log the block report processing stats from Datanode perspective
-      long brSendCost = now() - brSendStartTime;
-      long brCreateCost = brSendStartTime - brCreateStartTime;
-      dn.metrics.addBlockReport(brSendCost);
-      LOG.info("BlockReport of " + bReport.getNumberOfBlocks()
-          + " blocks took " + brCreateCost + " msec to generate and "
-          + brSendCost + " msecs for RPC and NN processing");
-
-      // If we have sent the first block report, then wait a random
-      // time before we start the periodic block reports.
-      if (resetBlockReportTime) {
-        lastBlockReport = startTime - DFSUtil.getRandom().nextInt((int)(dnConf.blockReportInterval));
-        resetBlockReportTime = false;
-      } else {
-        /* say the last block report was at 8:20:14. The current report
-         * should have started around 9:20:14 (default 1 hour interval).
-         * If current time is :
-         *   1) normal like 9:20:18, next report should be at 10:20:14
-         *   2) unexpected like 11:35:43, next report should be at 12:20:14
-         */
-        lastBlockReport += (now() - lastBlockReport) /
-        dnConf.blockReportInterval * dnConf.blockReportInterval;
-      }
-      LOG.info("sent block report, processed command:" + cmd);
-    }
-    return cmd;
-  }
-  
-  
-  DatanodeCommand [] sendHeartBeat() throws IOException {
-    return bpNamenode.sendHeartbeat(bpRegistration,
-        dn.data.getCapacity(),
-        dn.data.getDfsUsed(),
-        dn.data.getRemaining(),
-        dn.data.getBlockPoolUsed(getBlockPoolId()),
-        dn.xmitsInProgress.get(),
-        dn.getXceiverCount(), dn.data.getNumFailedVolumes());
-  }
-  
   //This must be called only by blockPoolManager
   void start() {
-    if ((bpThread != null) && (bpThread.isAlive())) {
-      //Thread is started already
-      return;
+    for (BPServiceActor actor : bpServices) {
+      actor.start();
     }
-    bpThread = new Thread(this, formatThreadName());
-    bpThread.setDaemon(true); // needed for JUnit testing
-    bpThread.start();
-  }
-  
-  private String formatThreadName() {
-    Collection<URI> dataDirs = DataNode.getStorageDirs(dn.getConf());
-    return "DataNode: [" +
-      StringUtils.uriToString(dataDirs.toArray(new URI[0])) + "] " +
-      " heartbeating to " + nnAddr;
   }
   
   //This must be called only by blockPoolManager.
   void stop() {
-    shouldServiceRun = false;
-    if (bpThread != null) {
-        bpThread.interrupt();
+    for (BPServiceActor actor : bpServices) {
+      actor.stop();
     }
   }
   
   //This must be called only by blockPoolManager
   void join() {
-    try {
-      if (bpThread != null) {
-        bpThread.join();
-      }
-    } catch (InterruptedException ie) { }
+    for (BPServiceActor actor : bpServices) {
+      actor.join();
+    }
+  }
+
+  synchronized UpgradeManagerDatanode getUpgradeManager() {
+    if(upgradeManager == null)
+      upgradeManager = 
+        new UpgradeManagerDatanode(dn, getBlockPoolId());
+    
+    return upgradeManager;
   }
   
-  //Cleanup method to be called by current thread before exiting.
-  private synchronized void cleanUp() {
-    
-    if(upgradeManager != null)
-      upgradeManager.shutdownUpgrade();
-    shouldServiceRun = false;
-    RPC.stopProxy(bpNamenode);
-    dn.shutdownBlockPool(this);
+  void processDistributedUpgradeCommand(UpgradeCommand comm)
+  throws IOException {
+    UpgradeManagerDatanode upgradeManager = getUpgradeManager();
+    upgradeManager.processUpgradeCommand(comm);
   }
 
   /**
-   * Main loop for each BP thread. Run until shutdown,
-   * forever calling remote NameNode functions.
+   * Start distributed upgrade if it should be initiated by the data-node.
    */
-  private void offerService() throws Exception {
-    LOG.info("For namenode " + nnAddr + " using DELETEREPORT_INTERVAL of "
-        + dnConf.deleteReportInterval + " msec " + " BLOCKREPORT_INTERVAL of "
-        + dnConf.blockReportInterval + "msec" + " Initial delay: "
-        + dnConf.initialBlockReportDelay + "msec" + "; heartBeatInterval="
-        + dnConf.heartBeatInterval);
-
-    //
-    // Now loop for a long time....
-    //
-    while (shouldRun()) {
-      try {
-        long startTime = now();
-
-        //
-        // Every so often, send heartbeat or block-report
-        //
-        if (startTime - lastHeartbeat > dnConf.heartBeatInterval) {
-          //
-          // All heartbeat messages include following info:
-          // -- Datanode name
-          // -- data transfer port
-          // -- Total capacity
-          // -- Bytes remaining
-          //
-          lastHeartbeat = startTime;
-          if (!dn.areHeartbeatsDisabledForTests()) {
-            DatanodeCommand[] cmds = sendHeartBeat();
-            dn.metrics.addHeartbeat(now() - startTime);
-
-            long startProcessCommands = now();
-            if (!processCommand(cmds))
-              continue;
-            long endProcessCommands = now();
-            if (endProcessCommands - startProcessCommands > 2000) {
-              LOG.info("Took " + (endProcessCommands - startProcessCommands) +
-                  "ms to process " + cmds.length + " commands from NN");
-            }
-          }
-        }
-        if (pendingReceivedRequests > 0
-            || (startTime - lastDeletedReport > dnConf.deleteReportInterval)) {
-          reportReceivedDeletedBlocks();
-          lastDeletedReport = startTime;
-        }
-
-        DatanodeCommand cmd = blockReport();
-        processCommand(cmd);
-
-        // Now safe to start scanning the block pool
-        if (dn.blockScanner != null) {
-          dn.blockScanner.addBlockPool(this.getBlockPoolId());
-        }
-
-        //
-        // There is no work to do;  sleep until hearbeat timer elapses, 
-        // or work arrives, and then iterate again.
-        //
-        long waitTime = dnConf.heartBeatInterval - 
-        (System.currentTimeMillis() - lastHeartbeat);
-        synchronized(receivedAndDeletedBlockList) {
-          if (waitTime > 0 && pendingReceivedRequests == 0) {
-            try {
-              receivedAndDeletedBlockList.wait(waitTime);
-            } catch (InterruptedException ie) {
-              LOG.warn("BPOfferService for " + this + " interrupted");
-            }
-          }
-        } // synchronized
-      } catch(RemoteException re) {
-        String reClass = re.getClassName();
-        if (UnregisteredNodeException.class.getName().equals(reClass) ||
-            DisallowedDatanodeException.class.getName().equals(reClass) ||
-            IncorrectVersionException.class.getName().equals(reClass)) {
-          LOG.warn(this + " is shutting down", re);
-          shouldServiceRun = false;
-          return;
-        }
-        LOG.warn("RemoteException in offerService", re);
-        try {
-          long sleepTime = Math.min(1000, dnConf.heartBeatInterval);
-          Thread.sleep(sleepTime);
-        } catch (InterruptedException ie) {
-          Thread.currentThread().interrupt();
-        }
-      } catch (IOException e) {
-        LOG.warn("IOException in offerService", e);
-      }
-    } // while (shouldRun())
-  } // offerService
-
-  /**
-   * Register one bp with the corresponding NameNode
-   * <p>
-   * The bpDatanode needs to register with the namenode on startup in order
-   * 1) to report which storage it is serving now and 
-   * 2) to receive a registrationID
-   *  
-   * issued by the namenode to recognize registered datanodes.
-   * 
-   * @see FSNamesystem#registerDatanode(DatanodeRegistration)
-   * @throws IOException
-   */
-  void register() throws IOException {
-    Preconditions.checkState(bpNSInfo != null,
-        "register() should be called after handshake()");
+  synchronized void startDistributedUpgradeIfNeeded() throws IOException {
+    UpgradeManagerDatanode um = getUpgradeManager();
     
-    // The handshake() phase loaded the block pool storage
-    // off disk - so update the bpRegistration object from that info
-    bpRegistration = dn.createBPRegistration(bpNSInfo);
-
-    LOG.info(this + " beginning handshake with NN");
-
-    while (shouldRun()) {
-      try {
-        // Use returned registration from namenode with updated machine name.
-        bpRegistration = bpNamenode.registerDatanode(bpRegistration);
-        break;
-      } catch(SocketTimeoutException e) {  // namenode is busy
-        LOG.info("Problem connecting to server: " + nnAddr);
-        sleepAndLogInterrupts(1000, "connecting to server");
-      }
-    }
-    
-    LOG.info("Block pool " + this + " successfully registered with NN");
-    dn.bpRegistrationSucceeded(bpRegistration, getBlockPoolId());
-
-    // random short delay - helps scatter the BR from all DNs
-    scheduleBlockReport(dnConf.initialBlockReportDelay);
+    if(!um.getUpgradeState())
+      return;
+    um.setUpgradeState(false, um.getUpgradeVersion());
+    um.startUpgrade();
+    return;
   }
-
-
-  private void sleepAndLogInterrupts(int millis,
-      String stateString) {
-    try {
-      Thread.sleep(millis);
-    } catch (InterruptedException ie) {
-      LOG.info("BPOfferService " + this +
-          " interrupted while " + stateString);
-    }
+  
+  DataNode getDataNode() {
+    return dn;
   }
 
   /**
-   * No matter what kind of exception we get, keep retrying to offerService().
-   * That's the loop that connects to the NameNode and provides basic DataNode
-   * functionality.
-   *
-   * Only stop when "shouldRun" or "shouldServiceRun" is turned off, which can
-   * happen either at shutdown or due to refreshNamenodes.
+   * Called by the BPServiceActors when they handshake to a NN.
+   * If this is the first NN connection, this sets the namespace info
+   * for this BPOfferService. If it's a connection to a new NN, it
+   * verifies that this namespace matches (eg to prevent a misconfiguration
+   * where a StandbyNode from a different cluster is specified)
    */
-  @Override
-  public void run() {
-    LOG.info(this + " starting to offer service");
-
-    try {
-      // init stuff
-      try {
-        // setup storage
-        connectToNNAndHandshake();
-      } catch (IOException ioe) {
-        // Initial handshake, storage recovery or registration failed
-        // End BPOfferService thread
-        LOG.fatal("Initialization failed for block pool " + this, ioe);
-        return;
-      }
-
-      initialized = true; // bp is initialized;
+  void verifyAndSetNamespaceInfo(NamespaceInfo nsInfo) throws IOException {
+    if (this.bpNSInfo == null) {
+      this.bpNSInfo = nsInfo;
       
-      while (shouldRun()) {
-        try {
-          startDistributedUpgradeIfNeeded();
-          offerService();
-        } catch (Exception ex) {
-          LOG.error("Exception in BPOfferService for " + this, ex);
-          sleepAndLogInterrupts(5000, "offering service");
-        }
-      }
-    } catch (Throwable ex) {
-      LOG.warn("Unexpected exception in block pool " + this, ex);
-    } finally {
-      LOG.warn("Ending block pool service for: " + this);
-      cleanUp();
+      // Now that we know the namespace ID, etc, we can pass this to the DN.
+      // The DN can now initialize its local storage if we are the
+      // first BP to handshake, etc.
+      dn.initBlockPool(this);
+      return;
+    } else {
+      checkNSEquality(bpNSInfo.getBlockPoolID(), nsInfo.getBlockPoolID(),
+          "Blockpool ID");
+      checkNSEquality(bpNSInfo.getNamespaceID(), nsInfo.getNamespaceID(),
+          "Namespace ID");
+      checkNSEquality(bpNSInfo.getClusterID(), nsInfo.getClusterID(),
+          "Cluster ID");
     }
   }
 
-  private boolean shouldRun() {
-    return shouldServiceRun && dn.shouldRun();
-  }
-
   /**
-   * Process an array of datanode commands
-   * 
-   * @param cmds an array of datanode commands
-   * @return true if further processing may be required or false otherwise. 
+   * After one of the BPServiceActors registers successfully with the
+   * NN, it calls this function to verify that the NN it connected to
+   * is consistent with other NNs serving the block-pool.
    */
-  private boolean processCommand(DatanodeCommand[] cmds) {
-    if (cmds != null) {
-      for (DatanodeCommand cmd : cmds) {
-        try {
-          if (processCommand(cmd) == false) {
-            return false;
-          }
-        } catch (IOException ioe) {
-          LOG.warn("Error processing datanode Command", ioe);
-        }
+  void registrationSucceeded(BPServiceActor bpServiceActor,
+      DatanodeRegistration reg) throws IOException {
+    if (bpRegistration != null) {
+      checkNSEquality(bpRegistration.storageInfo.getNamespaceID(),
+          reg.storageInfo.getNamespaceID(), "namespace ID");
+      checkNSEquality(bpRegistration.storageInfo.getClusterID(),
+          reg.storageInfo.getClusterID(), "cluster ID");
+    } else {
+      bpRegistration = reg;
+    }
+  }
+
+  /**
+   * Verify equality of two namespace-related fields, throwing
+   * an exception if they are unequal.
+   */
+  private static void checkNSEquality(
+      Object ourID, Object theirID,
+      String idHelpText) throws IOException {
+    if (!ourID.equals(theirID)) {
+      throw new IOException(idHelpText + " mismatch: " +
+          "previously connected to " + idHelpText + " " + ourID + 
+          " but now connected to " + idHelpText + " " + theirID);
+    }
+  }
+
+  DatanodeRegistration createRegistration() {
+    Preconditions.checkState(bpNSInfo != null,
+        "getRegistration() can only be called after initial handshake");
+    return dn.createBPRegistration(bpNSInfo);
+  }
+
+  /**
+   * Called when an actor shuts down. If this is the last actor
+   * to shut down, shuts down the whole blockpool in the DN.
+   */
+  void shutdownActor(BPServiceActor actor) {
+    if (bpServiceToActive == actor) {
+      bpServiceToActive = null;
+    }
+
+    bpServices.remove(actor);
+
+    // TODO: synchronization should be a little better here
+    if (bpServices.isEmpty()) {
+      dn.shutdownBlockPool(this);
+      
+      if(upgradeManager != null)
+        upgradeManager.shutdownUpgrade();
+    }
+  }
+
+  @Deprecated
+  InetSocketAddress getNNSocketAddress() {
+    // TODO(HA) this doesn't make sense anymore
+    return bpServiceToActive.getNNSocketAddress();
+  }
+
+  /**
+   * Called by the DN to report an error to the NNs.
+   */
+  void trySendErrorReport(int errCode, String errMsg) {
+    for (BPServiceActor actor : bpServices) {
+      actor.trySendErrorReport(errCode, errMsg);
+    }
+  }
+
+  /**
+   * Ask each of the actors to schedule a block report after
+   * the specified delay.
+   */
+  void scheduleBlockReport(long delay) {
+    for (BPServiceActor actor : bpServices) {
+      actor.scheduleBlockReport(delay);
+    }
+  }
+
+  /**
+   * Ask each of the actors to report a bad block hosted on another DN.
+   */
+  void reportRemoteBadBlock(DatanodeInfo dnInfo, ExtendedBlock block) {
+    for (BPServiceActor actor : bpServices) {
+      try {
+        actor.reportRemoteBadBlock(dnInfo, block);
+      } catch (IOException e) {
+        LOG.warn("Couldn't report bad block " + block + " to " + actor,
+            e);
       }
     }
-    return true;
+  }
+
+  /**
+   * TODO: this is still used in a few places where we need to sort out
+   * what to do in HA!
+   * @return a proxy to the active NN
+   */
+  @Deprecated
+  DatanodeProtocol getActiveNN() {
+    return bpServiceToActive.bpNamenode;
+  }
+
+  /**
+   * @return true if the given NN address is one of the NNs for this
+   * block pool
+   */
+  boolean containsNN(InetSocketAddress addr) {
+    for (BPServiceActor actor : bpServices) {
+      if (actor.getNNSocketAddress().equals(addr)) {
+        return true;
+      }
+    }
+    return false;
+  }
+  
+  @VisibleForTesting
+  int countNameNodes() {
+    return bpServices.size();
+  }
+
+  /**
+   * Run an immediate block report on this thread. Used by tests.
+   */
+  @VisibleForTesting
+  void triggerBlockReportForTests() throws IOException {
+    for (BPServiceActor actor : bpServices) {
+      actor.triggerBlockReportForTests();
+    }
+  }
+
+  boolean processCommandFromActor(DatanodeCommand cmd,
+      BPServiceActor actor) throws IOException {
+    assert bpServices.contains(actor);
+    if (actor == bpServiceToActive) {
+      return processCommandFromActive(cmd, actor);
+    } else {
+      return processCommandFromStandby(cmd, actor);
+    }
   }
 
   /**
@@ -669,7 +412,8 @@ class BPOfferService implements Runnable {
    * @return true if further processing may be required or false otherwise. 
    * @throws IOException
    */
-  private boolean processCommand(DatanodeCommand cmd) throws IOException {
+  private boolean processCommandFromActive(DatanodeCommand cmd,
+      BPServiceActor actor) throws IOException {
     if (cmd == null)
       return true;
     final BlockCommand bcmd = 
@@ -700,19 +444,12 @@ class BPOfferService implements Runnable {
       dn.metrics.incrBlocksRemoved(toDelete.length);
       break;
     case DatanodeProtocol.DNA_SHUTDOWN:
-      // shut down the data node
-      shouldServiceRun = false;
-      return false;
+      // TODO: DNA_SHUTDOWN appears to be unused - the NN never sends this command
+      throw new UnsupportedOperationException("Received unimplemented DNA_SHUTDOWN");
     case DatanodeProtocol.DNA_REGISTER:
       // namenode requested a registration - at start or if NN lost contact
       LOG.info("DatanodeCommand action: DNA_REGISTER");
-      if (shouldRun()) {
-        // re-retrieve namespace info to make sure that, if the NN
-        // was restarted, we still match its version (HDFS-2120)
-        retrieveNamespaceInfo();
-        // and re-register
-        register();
-      }
+      actor.reRegister();
       break;
     case DatanodeProtocol.DNA_FINALIZE:
       String bp = ((FinalizeCommand) cmd).getBlockPoolId(); 
@@ -732,7 +469,8 @@ class BPOfferService implements Runnable {
     case DatanodeProtocol.DNA_ACCESSKEYUPDATE:
       LOG.info("DatanodeCommand action: DNA_ACCESSKEYUPDATE");
       if (dn.isBlockTokenEnabled) {
-        dn.blockPoolTokenSecretManager.setKeys(getBlockPoolId(), 
+        dn.blockPoolTokenSecretManager.setKeys(
+            getBlockPoolId(), 
             ((KeyUpdateCommand) cmd).getExportedKeys());
       }
       break;
@@ -751,32 +489,39 @@ class BPOfferService implements Runnable {
     }
     return true;
   }
-  
-  private void processDistributedUpgradeCommand(UpgradeCommand comm)
-  throws IOException {
-    UpgradeManagerDatanode upgradeManager = getUpgradeManager();
-    upgradeManager.processUpgradeCommand(comm);
+ 
+  private boolean processCommandFromStandby(DatanodeCommand cmd,
+      BPServiceActor actor) throws IOException {
+    if (cmd == null)
+      return true;
+    switch(cmd.getAction()) {
+    case DatanodeProtocol.DNA_REGISTER:
+      // namenode requested a registration - at start or if NN lost contact
+      LOG.info("DatanodeCommand action: DNA_REGISTER");
+      actor.reRegister();
+      return true;
+    case DatanodeProtocol.DNA_TRANSFER:
+    case DatanodeProtocol.DNA_INVALIDATE:
+    case DatanodeProtocol.DNA_SHUTDOWN:
+    case DatanodeProtocol.DNA_RECOVERBLOCK:
+    case DatanodeProtocol.DNA_ACCESSKEYUPDATE:
+    case DatanodeProtocol.DNA_BALANCERBANDWIDTHUPDATE:
+      LOG.warn("Got a command from standby NN - ignoring command:" + cmd.getAction());
+      return true;   
+    default:
+      LOG.warn("Unknown DatanodeCommand action: " + cmd.getAction());
+    }
+    return true;
   }
 
-  synchronized UpgradeManagerDatanode getUpgradeManager() {
-    if(upgradeManager == null)
-      upgradeManager = 
-        new UpgradeManagerDatanode(dn, getBlockPoolId());
-    
-    return upgradeManager;
-  }
-  
   /**
-   * Start distributed upgrade if it should be initiated by the data-node.
+   * Connect to the NN at the given address. This is separated out for ease
+   * of testing.
    */
-  private void startDistributedUpgradeIfNeeded() throws IOException {
-    UpgradeManagerDatanode um = getUpgradeManager();
-    
-    if(!um.getUpgradeState())
-      return;
-    um.setUpgradeState(false, um.getUpgradeVersion());
-    um.startUpgrade();
-    return;
+  DatanodeProtocol connectToNN(InetSocketAddress nnAddr)
+      throws IOException {
+    return (DatanodeProtocol)RPC.waitForProxy(DatanodeProtocol.class,
+        DatanodeProtocol.versionID, nnAddr, dn.getConf());
   }
 
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
new file mode 100644
index 00000000000..2c4a15bf819
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -0,0 +1,633 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.datanode;
+
+import static org.apache.hadoop.hdfs.server.common.Util.now;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.SocketTimeoutException;
+import java.net.URI;
+import java.util.Collection;
+import java.util.LinkedList;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException;
+import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.util.StringUtils;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * A thread per active or standby namenode to perform:
+ * <ul>
+ * <li> Pre-registration handshake with namenode</li>
+ * <li> Registration with namenode</li>
+ * <li> Send periodic heartbeats to the namenode</li>
+ * <li> Handle commands received from the namenode</li>
+ * </ul>
+ */
+@InterfaceAudience.Private
+class BPServiceActor implements Runnable {
+  
+  static final Log LOG = DataNode.LOG;
+  final InetSocketAddress nnAddr;
+
+  BPOfferService bpos;
+  
+  long lastBlockReport = 0;
+  long lastDeletedReport = 0;
+
+  boolean resetBlockReportTime = true;
+
+  Thread bpThread;
+  DatanodeProtocol bpNamenode;
+  private long lastHeartbeat = 0;
+  private volatile boolean initialized = false;
+  private final LinkedList<ReceivedDeletedBlockInfo> receivedAndDeletedBlockList 
+    = new LinkedList<ReceivedDeletedBlockInfo>();
+  private volatile int pendingReceivedRequests = 0;
+  private volatile boolean shouldServiceRun = true;
+  private final DataNode dn;
+  private final DNConf dnConf;
+
+  private DatanodeRegistration bpRegistration;
+
+  BPServiceActor(InetSocketAddress nnAddr, BPOfferService bpos) {
+    this.bpos = bpos;
+    this.dn = bpos.getDataNode();
+    this.nnAddr = nnAddr;
+    this.dnConf = dn.getDnConf();
+  }
+
+  /**
+   * returns true if BP thread has completed initialization of storage
+   * and has registered with the corresponding namenode
+   * @return true if initialized
+   */
+  boolean isInitialized() {
+    return initialized;
+  }
+  
+  boolean isAlive() {
+    return shouldServiceRun && bpThread.isAlive();
+  }
+
+  @Override
+  public String toString() {
+    return bpos.toString() + " service to " + nnAddr;
+  }
+  
+  InetSocketAddress getNNSocketAddress() {
+    return nnAddr;
+  }
+
+  /**
+   * Used to inject a spy NN in the unit tests.
+   */
+  @VisibleForTesting
+  void setNameNode(DatanodeProtocol dnProtocol) {
+    bpNamenode = dnProtocol;
+  }
+
+  /**
+   * Perform the first part of the handshake with the NameNode.
+   * This calls <code>versionRequest</code> to determine the NN's
+   * namespace and version info. It automatically retries until
+   * the NN responds or the DN is shutting down.
+   * 
+   * @return the NamespaceInfo
+   */
+  @VisibleForTesting
+  NamespaceInfo retrieveNamespaceInfo() throws IOException {
+    NamespaceInfo nsInfo = null;
+    while (shouldRun()) {
+      try {
+        nsInfo = bpNamenode.versionRequest();
+        LOG.debug(this + " received versionRequest response: " + nsInfo);
+        break;
+      } catch(SocketTimeoutException e) {  // namenode is busy
+        LOG.warn("Problem connecting to server: " + nnAddr);
+      } catch(IOException e ) {  // namenode is not available
+        LOG.warn("Problem connecting to server: " + nnAddr);
+      }
+      
+      // try again in a second
+      sleepAndLogInterrupts(5000, "requesting version info from NN");
+    }
+    
+    if (nsInfo != null) {
+      checkNNVersion(nsInfo);
+    } else {
+      throw new IOException("DN shut down before block pool connected");
+    }
+    return nsInfo;
+  }
+
+  private void checkNNVersion(NamespaceInfo nsInfo)
+      throws IncorrectVersionException {
+    // build and layout versions should match
+    String nsBuildVer = nsInfo.getBuildVersion();
+    String stBuildVer = Storage.getBuildVersion();
+    if (!nsBuildVer.equals(stBuildVer)) {
+      LOG.warn("Data-node and name-node Build versions must be the same. " +
+        "Namenode build version: " + nsBuildVer + "Datanode " +
+        "build version: " + stBuildVer);
+      throw new IncorrectVersionException(nsBuildVer, "namenode", stBuildVer);
+    }
+
+    if (HdfsConstants.LAYOUT_VERSION != nsInfo.getLayoutVersion()) {
+      LOG.warn("Data-node and name-node layout versions must be the same." +
+        " Expected: "+ HdfsConstants.LAYOUT_VERSION +
+        " actual "+ nsInfo.getLayoutVersion());
+      throw new IncorrectVersionException(
+          nsInfo.getLayoutVersion(), "namenode");
+    }
+  }
+
+  private void connectToNNAndHandshake() throws IOException {
+    // get NN proxy
+    bpNamenode = bpos.connectToNN(nnAddr);
+
+    // First phase of the handshake with NN - get the namespace
+    // info.
+    NamespaceInfo nsInfo = retrieveNamespaceInfo();
+    
+    // Verify that this matches the other NN in this HA pair.
+    // This also initializes our block pool in the DN if we are
+    // the first NN connection for this BP.
+    bpos.verifyAndSetNamespaceInfo(nsInfo);
+    
+    // Second phase of the handshake with the NN.
+    register();
+  }
+  
+  /**
+   * This methods  arranges for the data node to send the block report at 
+   * the next heartbeat.
+   */
+  void scheduleBlockReport(long delay) {
+    if (delay > 0) { // send BR after random delay
+      lastBlockReport = System.currentTimeMillis()
+      - ( dnConf.blockReportInterval - DFSUtil.getRandom().nextInt((int)(delay)));
+    } else { // send at next heartbeat
+      lastBlockReport = lastHeartbeat - dnConf.blockReportInterval;
+    }
+    resetBlockReportTime = true; // reset future BRs for randomness
+  }
+
+  void reportBadBlocks(ExtendedBlock block) {
+    DatanodeInfo[] dnArr = { new DatanodeInfo(bpRegistration) };
+    LocatedBlock[] blocks = { new LocatedBlock(block, dnArr) }; 
+    
+    try {
+      bpNamenode.reportBadBlocks(blocks);  
+    } catch (IOException e){
+      /* One common reason is that NameNode could be in safe mode.
+       * Should we keep on retrying in that case?
+       */
+      LOG.warn("Failed to report bad block " + block + " to namenode : "
+          + " Exception", e);
+    }
+  }
+  
+  /**
+   * Report received blocks and delete hints to the Namenode
+   * 
+   * @throws IOException
+   */
+  private void reportReceivedDeletedBlocks() throws IOException {
+
+    // check if there are newly received blocks
+    ReceivedDeletedBlockInfo[] receivedAndDeletedBlockArray = null;
+    int currentReceivedRequestsCounter;
+    synchronized (receivedAndDeletedBlockList) {
+      currentReceivedRequestsCounter = pendingReceivedRequests;
+      int numBlocks = receivedAndDeletedBlockList.size();
+      if (numBlocks > 0) {
+        //
+        // Send newly-received and deleted blockids to namenode
+        //
+        receivedAndDeletedBlockArray = receivedAndDeletedBlockList
+            .toArray(new ReceivedDeletedBlockInfo[numBlocks]);
+      }
+    }
+    if (receivedAndDeletedBlockArray != null) {
+      bpNamenode.blockReceivedAndDeleted(bpRegistration, bpos.getBlockPoolId(),
+          receivedAndDeletedBlockArray);
+      synchronized (receivedAndDeletedBlockList) {
+        for (int i = 0; i < receivedAndDeletedBlockArray.length; i++) {
+          receivedAndDeletedBlockList.remove(receivedAndDeletedBlockArray[i]);
+        }
+        pendingReceivedRequests -= currentReceivedRequestsCounter;
+      }
+    }
+  }
+
+  /*
+   * Informing the name node could take a long long time! Should we wait
+   * till namenode is informed before responding with success to the
+   * client? For now we don't.
+   */
+  void notifyNamenodeReceivedBlock(ReceivedDeletedBlockInfo bInfo) {
+    synchronized (receivedAndDeletedBlockList) {
+      receivedAndDeletedBlockList.add(bInfo);
+      pendingReceivedRequests++;
+      receivedAndDeletedBlockList.notifyAll();
+    }
+  }
+
+  void notifyNamenodeDeletedBlock(ReceivedDeletedBlockInfo bInfo) {
+    synchronized (receivedAndDeletedBlockList) {
+      receivedAndDeletedBlockList.add(bInfo);
+    }
+  }
+
+  /**
+   * Run an immediate block report on this thread. Used by tests.
+   */
+  @VisibleForTesting
+  void triggerBlockReportForTests() throws IOException {
+      lastBlockReport = 0;
+      blockReport();
+  }
+
+  /**
+   * Report the list blocks to the Namenode
+   * @throws IOException
+   */
+  DatanodeCommand blockReport() throws IOException {
+    // send block report if timer has expired.
+    DatanodeCommand cmd = null;
+    long startTime = now();
+    if (startTime - lastBlockReport > dnConf.blockReportInterval) {
+
+      // Create block report
+      long brCreateStartTime = now();
+      BlockListAsLongs bReport = dn.getFSDataset().getBlockReport(
+          bpos.getBlockPoolId());
+
+      // Send block report
+      long brSendStartTime = now();
+      cmd = bpNamenode.blockReport(bpRegistration, bpos.getBlockPoolId(), bReport
+          .getBlockListAsLongs());
+
+      // Log the block report processing stats from Datanode perspective
+      long brSendCost = now() - brSendStartTime;
+      long brCreateCost = brSendStartTime - brCreateStartTime;
+      dn.getMetrics().addBlockReport(brSendCost);
+      LOG.info("BlockReport of " + bReport.getNumberOfBlocks()
+          + " blocks took " + brCreateCost + " msec to generate and "
+          + brSendCost + " msecs for RPC and NN processing");
+
+      // If we have sent the first block report, then wait a random
+      // time before we start the periodic block reports.
+      if (resetBlockReportTime) {
+        lastBlockReport = startTime - DFSUtil.getRandom().nextInt((int)(dnConf.blockReportInterval));
+        resetBlockReportTime = false;
+      } else {
+        /* say the last block report was at 8:20:14. The current report
+         * should have started around 9:20:14 (default 1 hour interval).
+         * If current time is :
+         *   1) normal like 9:20:18, next report should be at 10:20:14
+         *   2) unexpected like 11:35:43, next report should be at 12:20:14
+         */
+        lastBlockReport += (now() - lastBlockReport) /
+        dnConf.blockReportInterval * dnConf.blockReportInterval;
+      }
+      LOG.info("sent block report, processed command:" + cmd);
+    }
+    return cmd;
+  }
+  
+  
+  DatanodeCommand [] sendHeartBeat() throws IOException {
+    LOG.info("heartbeat: " + this);
+    // TODO: saw an NPE here - maybe if the two BPOS register at
+    // same time, this one won't block on the other one?
+    return bpNamenode.sendHeartbeat(bpRegistration,
+        dn.getFSDataset().getCapacity(),
+        dn.getFSDataset().getDfsUsed(),
+        dn.getFSDataset().getRemaining(),
+        dn.getFSDataset().getBlockPoolUsed(bpos.getBlockPoolId()),
+        dn.getXmitsInProgress(),
+        dn.getXceiverCount(), dn.getFSDataset().getNumFailedVolumes());
+  }
+  
+  //This must be called only by BPOfferService
+  void start() {
+    if ((bpThread != null) && (bpThread.isAlive())) {
+      //Thread is started already
+      return;
+    }
+    bpThread = new Thread(this, formatThreadName());
+    bpThread.setDaemon(true); // needed for JUnit testing
+    bpThread.start();
+  }
+  
+  private String formatThreadName() {
+    Collection<URI> dataDirs = DataNode.getStorageDirs(dn.getConf());
+    return "DataNode: [" +
+      StringUtils.uriToString(dataDirs.toArray(new URI[0])) + "] " +
+      " heartbeating to " + nnAddr;
+  }
+  
+  //This must be called only by blockPoolManager.
+  void stop() {
+    shouldServiceRun = false;
+    if (bpThread != null) {
+        bpThread.interrupt();
+    }
+  }
+  
+  //This must be called only by blockPoolManager
+  void join() {
+    try {
+      if (bpThread != null) {
+        bpThread.join();
+      }
+    } catch (InterruptedException ie) { }
+  }
+  
+  //Cleanup method to be called by current thread before exiting.
+  private synchronized void cleanUp() {
+    
+    shouldServiceRun = false;
+    RPC.stopProxy(bpNamenode);
+    bpos.shutdownActor(this);
+  }
+
+  /**
+   * Main loop for each BP thread. Run until shutdown,
+   * forever calling remote NameNode functions.
+   */
+  private void offerService() throws Exception {
+    LOG.info("For namenode " + nnAddr + " using DELETEREPORT_INTERVAL of "
+        + dnConf.deleteReportInterval + " msec " + " BLOCKREPORT_INTERVAL of "
+        + dnConf.blockReportInterval + "msec" + " Initial delay: "
+        + dnConf.initialBlockReportDelay + "msec" + "; heartBeatInterval="
+        + dnConf.heartBeatInterval);
+
+    //
+    // Now loop for a long time....
+    //
+    while (shouldRun()) {
+      try {
+        long startTime = now();
+
+        //
+        // Every so often, send heartbeat or block-report
+        //
+        if (startTime - lastHeartbeat > dnConf.heartBeatInterval) {
+          //
+          // All heartbeat messages include following info:
+          // -- Datanode name
+          // -- data transfer port
+          // -- Total capacity
+          // -- Bytes remaining
+          //
+          lastHeartbeat = startTime;
+          if (!dn.areHeartbeatsDisabledForTests()) {
+            DatanodeCommand[] cmds = sendHeartBeat();
+            dn.getMetrics().addHeartbeat(now() - startTime);
+
+            long startProcessCommands = now();
+            if (!processCommand(cmds))
+              continue;
+            long endProcessCommands = now();
+            if (endProcessCommands - startProcessCommands > 2000) {
+              LOG.info("Took " + (endProcessCommands - startProcessCommands) +
+                  "ms to process " + cmds.length + " commands from NN");
+            }
+          }
+        }
+        if (pendingReceivedRequests > 0
+            || (startTime - lastDeletedReport > dnConf.deleteReportInterval)) {
+          reportReceivedDeletedBlocks();
+          lastDeletedReport = startTime;
+        }
+
+        DatanodeCommand cmd = blockReport();
+        processCommand(new DatanodeCommand[]{ cmd });
+
+        // Now safe to start scanning the block pool
+        // TODO(HA): this doesn't seem quite right
+        if (dn.blockScanner != null) {
+          dn.blockScanner.addBlockPool(bpos.getBlockPoolId());
+        }
+
+        //
+        // There is no work to do;  sleep until hearbeat timer elapses, 
+        // or work arrives, and then iterate again.
+        //
+        long waitTime = dnConf.heartBeatInterval - 
+        (System.currentTimeMillis() - lastHeartbeat);
+        synchronized(receivedAndDeletedBlockList) {
+          if (waitTime > 0 && pendingReceivedRequests == 0) {
+            try {
+              receivedAndDeletedBlockList.wait(waitTime);
+            } catch (InterruptedException ie) {
+              LOG.warn("BPOfferService for " + this + " interrupted");
+            }
+          }
+        } // synchronized
+      } catch(RemoteException re) {
+        String reClass = re.getClassName();
+        if (UnregisteredNodeException.class.getName().equals(reClass) ||
+            DisallowedDatanodeException.class.getName().equals(reClass) ||
+            IncorrectVersionException.class.getName().equals(reClass)) {
+          LOG.warn(this + " is shutting down", re);
+          shouldServiceRun = false;
+          return;
+        }
+        LOG.warn("RemoteException in offerService", re);
+        try {
+          long sleepTime = Math.min(1000, dnConf.heartBeatInterval);
+          Thread.sleep(sleepTime);
+        } catch (InterruptedException ie) {
+          Thread.currentThread().interrupt();
+        }
+      } catch (IOException e) {
+        LOG.warn("IOException in offerService", e);
+      }
+    } // while (shouldRun())
+  } // offerService
+
+  /**
+   * Register one bp with the corresponding NameNode
+   * <p>
+   * The bpDatanode needs to register with the namenode on startup in order
+   * 1) to report which storage it is serving now and 
+   * 2) to receive a registrationID
+   *  
+   * issued by the namenode to recognize registered datanodes.
+   * 
+   * @see FSNamesystem#registerDatanode(DatanodeRegistration)
+   * @throws IOException
+   */
+  void register() throws IOException {
+    // The handshake() phase loaded the block pool storage
+    // off disk - so update the bpRegistration object from that info
+    bpRegistration = bpos.createRegistration();
+
+    LOG.info(this + " beginning handshake with NN");
+
+    while (shouldRun()) {
+      try {
+        // Use returned registration from namenode with updated machine name.
+        bpRegistration = bpNamenode.registerDatanode(bpRegistration);
+        break;
+      } catch(SocketTimeoutException e) {  // namenode is busy
+        LOG.info("Problem connecting to server: " + nnAddr);
+        sleepAndLogInterrupts(1000, "connecting to server");
+      }
+    }
+    
+    LOG.info("Block pool " + this + " successfully registered with NN");
+    bpos.registrationSucceeded(this, bpRegistration);
+
+    // random short delay - helps scatter the BR from all DNs
+    scheduleBlockReport(dnConf.initialBlockReportDelay);
+  }
+
+
+  private void sleepAndLogInterrupts(int millis,
+      String stateString) {
+    try {
+      Thread.sleep(millis);
+    } catch (InterruptedException ie) {
+      LOG.info("BPOfferService " + this +
+          " interrupted while " + stateString);
+    }
+  }
+
+  /**
+   * No matter what kind of exception we get, keep retrying to offerService().
+   * That's the loop that connects to the NameNode and provides basic DataNode
+   * functionality.
+   *
+   * Only stop when "shouldRun" or "shouldServiceRun" is turned off, which can
+   * happen either at shutdown or due to refreshNamenodes.
+   */
+  @Override
+  public void run() {
+    LOG.info(this + " starting to offer service");
+
+    try {
+      // init stuff
+      try {
+        // setup storage
+        connectToNNAndHandshake();
+      } catch (IOException ioe) {
+        // Initial handshake, storage recovery or registration failed
+        // End BPOfferService thread
+        LOG.fatal("Initialization failed for block pool " + this, ioe);
+        return;
+      }
+
+      initialized = true; // bp is initialized;
+      
+      while (shouldRun()) {
+        try {
+          bpos.startDistributedUpgradeIfNeeded();
+          offerService();
+        } catch (Exception ex) {
+          LOG.error("Exception in BPOfferService for " + this, ex);
+          sleepAndLogInterrupts(5000, "offering service");
+        }
+      }
+    } catch (Throwable ex) {
+      LOG.warn("Unexpected exception in block pool " + this, ex);
+    } finally {
+      LOG.warn("Ending block pool service for: " + this);
+      cleanUp();
+    }
+  }
+
+  private boolean shouldRun() {
+    return shouldServiceRun && dn.shouldRun();
+  }
+
+  /**
+   * Process an array of datanode commands
+   * 
+   * @param cmds an array of datanode commands
+   * @return true if further processing may be required or false otherwise. 
+   */
+  boolean processCommand(DatanodeCommand[] cmds) {
+    if (cmds != null) {
+      for (DatanodeCommand cmd : cmds) {
+        try {
+          if (bpos.processCommandFromActor(cmd, this) == false) {
+            return false;
+          }
+        } catch (IOException ioe) {
+          LOG.warn("Error processing datanode Command", ioe);
+        }
+      }
+    }
+    return true;
+  }
+
+  void trySendErrorReport(int errCode, String errMsg) {
+    try {
+      bpNamenode.errorReport(bpRegistration, errCode, errMsg);
+    } catch(IOException e) {
+      LOG.warn("Error reporting an error to NameNode " + nnAddr,
+          e);
+    }
+  }
+
+  /**
+   * Report a bad block from another DN in this cluster.
+   */
+  void reportRemoteBadBlock(DatanodeInfo dnInfo, ExtendedBlock block)
+      throws IOException {
+    LocatedBlock lb = new LocatedBlock(block, 
+                                    new DatanodeInfo[] {dnInfo});
+    bpNamenode.reportBadBlocks(new LocatedBlock[] {lb});
+  }
+
+  void reRegister() throws IOException {
+    if (shouldRun()) {
+      // re-retrieve namespace info to make sure that, if the NN
+      // was restarted, we still match its version (HDFS-2120)
+      retrieveNamespaceInfo();
+      // and re-register
+      register();
+    }
+  }
+
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
index 61bc29acf46..c8aac296a7a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
@@ -320,7 +320,6 @@ class BlockReceiver implements Closeable {
   private void verifyChunks( byte[] dataBuf, int dataOff, int len, 
                              byte[] checksumBuf, int checksumOff ) 
                              throws IOException {
-    DatanodeProtocol nn = datanode.getBPNamenode(block.getBlockPoolId());
     while (len > 0) {
       int chunkLen = Math.min(len, bytesPerChecksum);
       
@@ -331,9 +330,7 @@ class BlockReceiver implements Closeable {
           try {
             LOG.info("report corrupt block " + block + " from datanode " +
                       srcDataNode + " to namenode");
-            LocatedBlock lb = new LocatedBlock(block, 
-                                            new DatanodeInfo[] {srcDataNode});
-            nn.reportBadBlocks(new LocatedBlock[] {lb});
+            datanode.reportRemoteBadBlock(srcDataNode, block);
           } catch (IOException e) {
             LOG.warn("Failed to report bad block " + block + 
                       " from datanode " + srcDataNode + " to namenode");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index aba55f8c6a4..dc3a18163b2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -71,6 +71,7 @@ import java.util.AbstractList;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
@@ -243,7 +244,7 @@ public class DataNode extends Configured
   @InterfaceAudience.Private
   class BlockPoolManager {
     private final Map<String, BPOfferService> bpMapping;
-    private final Map<InetSocketAddress, BPOfferService> nameNodeThreads;
+    private final List<BPOfferService> offerServices;
  
     //This lock is used only to ensure exclusion of refreshNamenodes
     private final Object refreshNamenodesLock = new Object();
@@ -251,31 +252,26 @@ public class DataNode extends Configured
     BlockPoolManager(Configuration conf)
         throws IOException {
       bpMapping = new HashMap<String, BPOfferService>();
-      nameNodeThreads = new HashMap<InetSocketAddress, BPOfferService>();
+      offerServices = new ArrayList<BPOfferService>();
   
       Map<String, Map<String, InetSocketAddress>> map =
         DFSUtil.getNNServiceRpcAddresses(conf);
       for (Entry<String, Map<String, InetSocketAddress>> entry :
            map.entrySet()) {
         List<InetSocketAddress> nnList = Lists.newArrayList(entry.getValue().values());
-        // TODO(HA) when HDFS-1971 (dual BRs) is done, pass all of the NNs
-        // to BPOS
-        InetSocketAddress isa = nnList.get(0);
-        BPOfferService bpos = new BPOfferService(isa, DataNode.this);
-        nameNodeThreads.put(bpos.getNNSocketAddress(), bpos);
+        BPOfferService bpos = new BPOfferService(nnList, DataNode.this);
+        offerServices.add(bpos);
       }
     }
     
-    synchronized void addBlockPool(BPOfferService t) {
-      if (nameNodeThreads.get(t.getNNSocketAddress()) == null) {
-        throw new IllegalArgumentException(
-            "Unknown BPOfferService thread for namenode address:"
-                + t.getNNSocketAddress());
-      }
-      if (t.getBlockPoolId() == null) {
+    synchronized void addBlockPool(BPOfferService bpos) {
+      Preconditions.checkArgument(offerServices.contains(bpos),
+          "Unknown BPOS: %s", bpos);
+      if (bpos.getBlockPoolId() == null) {
         throw new IllegalArgumentException("Null blockpool id");
       }
-      bpMapping.put(t.getBlockPoolId(), t);
+      LOG.info("===> registering in bpmapping: " + bpos);
+      bpMapping.put(bpos.getBlockPoolId(), bpos);
     }
     
     /**
@@ -283,21 +279,26 @@ public class DataNode extends Configured
      * Caution: The BPOfferService returned could be shutdown any time.
      */
     synchronized BPOfferService[] getAllNamenodeThreads() {
-      BPOfferService[] bposArray = new BPOfferService[nameNodeThreads.values()
-          .size()];
-      return nameNodeThreads.values().toArray(bposArray);
+      BPOfferService[] bposArray = new BPOfferService[offerServices.size()];
+      return offerServices.toArray(bposArray);
     }
-    
-    synchronized BPOfferService get(InetSocketAddress addr) {
-      return nameNodeThreads.get(addr);
-    }
-    
+        
     synchronized BPOfferService get(String bpid) {
       return bpMapping.get(bpid);
     }
     
+    // TODO(HA) would be good to kill this
+    synchronized BPOfferService get(InetSocketAddress addr) {
+      for (BPOfferService bpos : offerServices) {
+        if (bpos.containsNN(addr)) {
+          return bpos;
+        }
+      }
+      return null;
+    }
+
     synchronized void remove(BPOfferService t) {
-      nameNodeThreads.remove(t.getNNSocketAddress());
+      offerServices.remove(t);
       bpMapping.remove(t.getBlockPoolId());
     }
     
@@ -318,7 +319,7 @@ public class DataNode extends Configured
         UserGroupInformation.getLoginUser().doAs(
             new PrivilegedExceptionAction<Object>() {
               public Object run() throws Exception {
-                for (BPOfferService bpos : nameNodeThreads.values()) {
+                for (BPOfferService bpos : offerServices) {
                   bpos.start();
                 }
                 return null;
@@ -339,6 +340,10 @@ public class DataNode extends Configured
     
     void refreshNamenodes(Configuration conf)
         throws IOException {
+      throw new UnsupportedOperationException("TODO(HA)");
+/*
+ * TODO(HA)
+
       LOG.info("Refresh request received for nameservices: "
           + conf.get(DFS_FEDERATION_NAMESERVICES));
       
@@ -355,20 +360,20 @@ public class DataNode extends Configured
       List<InetSocketAddress> toStart = new ArrayList<InetSocketAddress>();
       synchronized (refreshNamenodesLock) {
         synchronized (this) {
-          for (InetSocketAddress nnaddr : nameNodeThreads.keySet()) {
+          for (InetSocketAddress nnaddr : offerServices.keySet()) {
             if (!(newAddresses.contains(nnaddr))) {
-              toShutdown.add(nameNodeThreads.get(nnaddr));
+              toShutdown.add(offerServices.get(nnaddr));
             }
           }
           for (InetSocketAddress nnaddr : newAddresses) {
-            if (!(nameNodeThreads.containsKey(nnaddr))) {
+            if (!(offerServices.containsKey(nnaddr))) {
               toStart.add(nnaddr);
             }
           }
 
           for (InetSocketAddress nnaddr : toStart) {
             BPOfferService bpos = new BPOfferService(nnaddr, DataNode.this);
-            nameNodeThreads.put(bpos.getNNSocketAddress(), bpos);
+            offerServices.put(bpos.getNNSocketAddress(), bpos);
           }
         }
 
@@ -383,7 +388,9 @@ public class DataNode extends Configured
         // Now start the threads that are not already running.
         startAll();
       }
+      */
     }
+
   }
   
   volatile boolean shouldRun = true;
@@ -685,13 +692,44 @@ public class DataNode extends Configured
     }
   }
   
+  /**
+   * Report a bad block which is hosted on the local DN.
+   */
   public void reportBadBlocks(ExtendedBlock block) throws IOException{
-    BPOfferService bpos = blockPoolManager.get(block.getBlockPoolId());
-    if(bpos == null || bpos.bpNamenode == null) {
-      throw new IOException("cannot locate OfferService thread for bp="+block.getBlockPoolId());
-    }
+    BPOfferService bpos = getBPOSForBlock(block);
     bpos.reportBadBlocks(block);
   }
+
+  /**
+   * Report a bad block on another DN (eg if we received a corrupt replica
+   * from a remote host).
+   * @param srcDataNode the DN hosting the bad block
+   * @param block the block itself
+   */
+  public void reportRemoteBadBlock(DatanodeInfo srcDataNode, ExtendedBlock block)
+      throws IOException {
+    BPOfferService bpos = getBPOSForBlock(block);
+    bpos.reportRemoteBadBlock(srcDataNode, block);
+  }
+  
+  /**
+   * Return the BPOfferService instance corresponding to the given block.
+   * @param block
+   * @return the BPOS
+   * @throws IOException if no such BPOS can be found
+   */
+  private BPOfferService getBPOSForBlock(ExtendedBlock block)
+      throws IOException {
+    Preconditions.checkNotNull(block);
+    BPOfferService bpos = blockPoolManager.get(block.getBlockPoolId());
+    if (bpos == null) {
+      throw new IOException("cannot locate OfferService thread for bp="+
+          block.getBlockPoolId());
+    }
+    return bpos;
+  }
+
+
   
   // used only for testing
   void setHeartbeatsDisabledForTests(
@@ -1006,11 +1044,15 @@ public class DataNode extends Configured
   
   /**
    * get BP registration by machine and port name (host:port)
-   * @param mName
+   * @param mName - the name that the NN used
    * @return BP registration 
    * @throws IOException 
    */
   DatanodeRegistration getDNRegistrationByMachineName(String mName) {
+    // TODO: all the BPs should have the same name as each other, they all come
+    // from getName() here! and the use cases only are in tests where they just
+    // call with getName(). So we could probably just make this method return
+    // the first BPOS's registration
     BPOfferService [] bposArray = blockPoolManager.getAllNamenodeThreads();
     for (BPOfferService bpos : bposArray) {
       if(bpos.bpRegistration.getName().equals(mName))
@@ -1055,6 +1097,8 @@ public class DataNode extends Configured
    * @return namenode address corresponding to the bpid
    */
   public InetSocketAddress getNameNodeAddr(String bpid) {
+    // TODO(HA) this function doesn't make sense! used by upgrade code
+    // Should it return just the active one or simply return the BPService.
     BPOfferService bp = blockPoolManager.get(bpid);
     if (bp != null) {
       return bp.getNNSocketAddress();
@@ -1288,12 +1332,7 @@ public class DataNode extends Configured
 
     //inform NameNodes
     for(BPOfferService bpos: blockPoolManager.getAllNamenodeThreads()) {
-      DatanodeProtocol nn = bpos.bpNamenode;
-      try {
-        nn.errorReport(bpos.bpRegistration, dpError, errMsgr);
-      } catch(IOException e) {
-        LOG.warn("Error reporting disk failure to NameNode", e);
-      }
+      bpos.trySendErrorReport(dpError, errMsgr);
     }
     
     if(hasEnoughResources) {
@@ -1309,6 +1348,10 @@ public class DataNode extends Configured
   int getXceiverCount() {
     return threadGroup == null ? 0 : threadGroup.activeCount();
   }
+  
+  int getXmitsInProgress() {
+    return xmitsInProgress.get();
+  }
     
   UpgradeManagerDatanode getUpgradeManagerDatanode(String bpid) {
     BPOfferService bpos = blockPoolManager.get(bpid);
@@ -1321,14 +1364,15 @@ public class DataNode extends Configured
   private void transferBlock( ExtendedBlock block, 
                               DatanodeInfo xferTargets[] 
                               ) throws IOException {
-    DatanodeProtocol nn = getBPNamenode(block.getBlockPoolId());
+    BPOfferService bpos = getBPOSForBlock(block);
     DatanodeRegistration bpReg = getDNRegistrationForBP(block.getBlockPoolId());
     
     if (!data.isValidBlock(block)) {
       // block does not exist or is under-construction
       String errStr = "Can't send invalid block " + block;
       LOG.info(errStr);
-      nn.errorReport(bpReg, DatanodeProtocol.INVALID_BLOCK, errStr);
+      
+      bpos.trySendErrorReport(DatanodeProtocol.INVALID_BLOCK, errStr);
       return;
     }
 
@@ -1336,9 +1380,7 @@ public class DataNode extends Configured
     long onDiskLength = data.getLength(block);
     if (block.getNumBytes() > onDiskLength) {
       // Shorter on-disk len indicates corruption so report NN the corrupt block
-      nn.reportBadBlocks(new LocatedBlock[]{
-          new LocatedBlock(block, new DatanodeInfo[] {
-              new DatanodeInfo(bpReg)})});
+      bpos.reportBadBlocks(block);
       LOG.warn("Can't replicate block " + block
           + " because on-disk length " + onDiskLength 
           + " is shorter than NameNode recorded length " + block.getNumBytes());
@@ -1991,10 +2033,10 @@ public class DataNode extends Configured
    */
   public DatanodeProtocol getBPNamenode(String bpid) throws IOException {
     BPOfferService bpos = blockPoolManager.get(bpid);
-    if(bpos == null || bpos.bpNamenode == null) {
+    if (bpos == null) {
       throw new IOException("cannot find a namnode proxy for bpid=" + bpid);
     }
-    return bpos.bpNamenode;
+    return bpos.getActiveNN();
   }
 
   /** Block synchronization */
@@ -2013,6 +2055,7 @@ public class DataNode extends Configured
     // or their replicas have 0 length.
     // The block can be deleted.
     if (syncList.isEmpty()) {
+      // TODO: how does this work in HA??
       nn.commitBlockSynchronization(block, recoveryId, 0,
           true, true, DatanodeID.EMPTY_ARRAY);
       return;
@@ -2229,7 +2272,7 @@ public class DataNode extends Configured
   public String getNamenodeAddresses() {
     final Map<String, String> info = new HashMap<String, String>();
     for (BPOfferService bpos : blockPoolManager.getAllNamenodeThreads()) {
-      if (bpos != null && bpos.bpThread != null) {
+      if (bpos != null) {
         info.put(bpos.getNNSocketAddress().getHostName(), bpos.getBlockPoolId());
       }
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
new file mode 100644
index 00000000000..33b0e64aed1
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.datanode;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
+import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
+import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.log4j.Level;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+
+import com.google.common.base.Supplier;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+public class TestBPOfferService {
+
+  private static final String FAKE_BPID = "fake bpid";
+  private static final String FAKE_CLUSTERID = "fake cluster";
+  protected static final Log LOG = LogFactory.getLog(
+      TestBPOfferService.class);
+  private static final ExtendedBlock FAKE_BLOCK =
+    new ExtendedBlock(FAKE_BPID, 12345L);
+
+  static {
+    ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
+  }
+
+  private DatanodeProtocol mockNN1;
+  private DatanodeProtocol mockNN2;
+  private DataNode mockDn;
+  private FSDatasetInterface mockFSDataset;
+  
+  @Before
+  public void setupMocks() throws Exception {
+    mockNN1 = setupNNMock();
+    mockNN2 = setupNNMock();
+
+    // Set up a mock DN with the bare-bones configuration
+    // objects, etc.
+    mockDn = Mockito.mock(DataNode.class);
+    Mockito.doReturn(true).when(mockDn).shouldRun();
+    Configuration conf = new Configuration();
+    Mockito.doReturn(conf).when(mockDn).getConf();
+    Mockito.doReturn(new DNConf(conf)).when(mockDn).getDnConf();
+    Mockito.doReturn(DataNodeMetrics.create(conf, "fake dn"))
+    .when(mockDn).getMetrics();
+
+    // Set up a simulated dataset with our fake BP
+    mockFSDataset = Mockito.spy(new SimulatedFSDataset(conf));
+    mockFSDataset.addBlockPool(FAKE_BPID, conf);
+
+    // Wire the dataset to the DN.
+    Mockito.doReturn(mockFSDataset).when(mockDn).getFSDataset();
+  }
+
+  /**
+   * Set up a mock NN with the bare minimum for a DN to register to it.
+   */
+  private DatanodeProtocol setupNNMock() throws Exception {
+    DatanodeProtocol mock = Mockito.mock(DatanodeProtocol.class);
+    Mockito.doReturn(
+        new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID,
+            0, HdfsConstants.LAYOUT_VERSION))
+      .when(mock).versionRequest();
+    return mock;
+  }
+  
+  /**
+   * Test that the BPOS can register to talk to two different NNs,
+   * sends block reports to both, etc.
+   */
+  @Test
+  public void testBasicFunctionality() throws Exception {
+    BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
+    bpos.start();
+    try {
+      waitForInitialization(bpos);
+      
+      // The DN should have register to both NNs.
+      Mockito.verify(mockNN1).registerDatanode(
+          (DatanodeRegistration) Mockito.anyObject());
+      Mockito.verify(mockNN2).registerDatanode(
+          (DatanodeRegistration) Mockito.anyObject());
+      
+      // Should get block reports from both NNs
+      waitForBlockReport(mockNN1);
+      waitForBlockReport(mockNN2);
+
+      // When we receive a block, it should report it to both NNs
+      bpos.notifyNamenodeReceivedBlock(FAKE_BLOCK, "");
+
+      ReceivedDeletedBlockInfo[] ret = waitForBlockReceived(FAKE_BLOCK, mockNN1);
+      assertEquals(1, ret.length);
+      assertEquals(FAKE_BLOCK.getLocalBlock(), ret[0].getBlock());
+      
+      ret = waitForBlockReceived(FAKE_BLOCK, mockNN2);
+      assertEquals(1, ret.length);
+      assertEquals(FAKE_BLOCK.getLocalBlock(), ret[0].getBlock());
+
+    } finally {
+      bpos.stop();
+    }
+  }
+
+  /**
+   * Test that DNA_INVALIDATE commands from the standby are ignored.
+   */
+  @Test
+  public void testIgnoreDeletionsFromNonActive() throws Exception {
+    BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
+
+    // Ask to invalidate FAKE_BLOCK when block report hits the
+    // standby
+    Mockito.doReturn(new BlockCommand(DatanodeProtocol.DNA_INVALIDATE,
+        FAKE_BPID, new Block[] { FAKE_BLOCK.getLocalBlock() }))
+        .when(mockNN2).blockReport(
+            Mockito.<DatanodeRegistration>anyObject(),  
+            Mockito.eq(FAKE_BPID),
+            Mockito.<long[]>anyObject());
+
+    bpos.start();
+    try {
+      waitForInitialization(bpos);
+      
+      // Should get block reports from both NNs
+      waitForBlockReport(mockNN1);
+      waitForBlockReport(mockNN2);
+
+    } finally {
+      bpos.stop();
+    }
+    
+    // Should ignore the delete command from the standby
+    Mockito.verify(mockFSDataset, Mockito.never())
+      .invalidate(Mockito.eq(FAKE_BPID),
+          (Block[]) Mockito.anyObject());
+  }
+
+  /**
+   * Ensure that, if the two NNs configured for a block pool
+   * have different block pool IDs, they will refuse to both
+   * register.
+   */
+  @Test
+  public void testNNsFromDifferentClusters() throws Exception {
+    Mockito.doReturn(
+        new NamespaceInfo(1, "fake foreign cluster", FAKE_BPID,
+            0, HdfsConstants.LAYOUT_VERSION))
+      .when(mockNN1).versionRequest();
+        
+    BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
+    bpos.start();
+    try {
+      waitForOneToFail(bpos);
+    } finally {
+      bpos.stop();
+    }
+  }
+
+  private void waitForOneToFail(final BPOfferService bpos)
+      throws Exception {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return bpos.countNameNodes() == 1;
+      }
+    }, 100, 10000);
+  }
+
+  /**
+   * Create a BPOfferService which registers with and heartbeats with the
+   * specified namenode proxy objects.
+   */
+  private BPOfferService setupBPOSForNNs(DatanodeProtocol ... nns) {
+    // Set up some fake InetAddresses, then override the connectToNN
+    // function to return the corresponding proxies.
+
+    final Map<InetSocketAddress, DatanodeProtocol> nnMap = Maps.newLinkedHashMap();
+    for (int port = 0; port < nns.length; port++) {
+      nnMap.put(new InetSocketAddress(port), nns[port]);
+    }
+
+    return new BPOfferService(Lists.newArrayList(nnMap.keySet()), mockDn) {
+      @Override
+      DatanodeProtocol connectToNN(InetSocketAddress nnAddr) throws IOException {
+        DatanodeProtocol nn = nnMap.get(nnAddr);
+        if (nn == null) {
+          throw new AssertionError("bad NN addr: " + nnAddr);
+        }
+        return nn;
+      }
+    };
+  }
+
+  private void waitForInitialization(final BPOfferService bpos)
+      throws Exception {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return bpos.isAlive() && bpos.isInitialized();
+      }
+    }, 100, 10000);
+  }
+  
+  private void waitForBlockReport(final DatanodeProtocol mockNN)
+      throws Exception {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        try {
+          Mockito.verify(mockNN).blockReport(
+              Mockito.<DatanodeRegistration>anyObject(),  
+              Mockito.eq(FAKE_BPID),
+              Mockito.<long[]>anyObject());
+          return true;
+        } catch (Throwable t) {
+          LOG.info("waiting on block report: " + t.getMessage());
+          return false;
+        }
+      }
+    }, 500, 10000);
+  }
+  
+  private ReceivedDeletedBlockInfo[] waitForBlockReceived(
+      ExtendedBlock fakeBlock,
+      DatanodeProtocol mockNN) throws Exception {
+    final ArgumentCaptor<ReceivedDeletedBlockInfo[]> captor =
+      ArgumentCaptor.forClass(ReceivedDeletedBlockInfo[].class);
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+
+      @Override
+      public Boolean get() {
+        try {
+          Mockito.verify(mockNN1).blockReceivedAndDeleted(
+            Mockito.<DatanodeRegistration>anyObject(),
+            Mockito.eq(FAKE_BPID),
+            captor.capture());
+          return true;
+        } catch (Throwable t) {
+          return false;
+        }
+      }
+    }, 100, 10000);
+    return captor.getValue();
+  }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
index cc82682ec4b..760eb08ba87 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
@@ -93,23 +93,22 @@ public class TestDataNodeMultipleRegistrations {
       assertEquals("number of volumes is wrong", 2, volInfos.size());
 
       for (BPOfferService bpos : dn.getAllBpOs()) {
-        LOG.info("reg: bpid=" + "; name=" + bpos.bpRegistration.name + "; sid="
-            + bpos.bpRegistration.storageID + "; nna=" + bpos.nnAddr);
+        LOG.info("BP: " + bpos);
       }
 
       BPOfferService bpos1 = dn.getAllBpOs()[0];
       BPOfferService bpos2 = dn.getAllBpOs()[1];
 
       // The order of bpos is not guaranteed, so fix the order
-      if (bpos1.nnAddr.equals(nn2.getNameNodeAddress())) {
+      if (bpos1.getNNSocketAddress().equals(nn2.getNameNodeAddress())) {
         BPOfferService tmp = bpos1;
         bpos1 = bpos2;
         bpos2 = tmp;
       }
 
-      assertEquals("wrong nn address", bpos1.nnAddr,
+      assertEquals("wrong nn address", bpos1.getNNSocketAddress(),
           nn1.getNameNodeAddress());
-      assertEquals("wrong nn address", bpos2.nnAddr,
+      assertEquals("wrong nn address", bpos2.getNNSocketAddress(),
           nn2.getNameNodeAddress());
       assertEquals("wrong bpid", bpos1.getBlockPoolId(), bpid1);
       assertEquals("wrong bpid", bpos2.getBlockPoolId(), bpid2);
@@ -156,15 +155,14 @@ public class TestDataNodeMultipleRegistrations {
 
       for (BPOfferService bpos : dn.getAllBpOs()) {
         LOG.info("reg: bpid=" + "; name=" + bpos.bpRegistration.name + "; sid="
-            + bpos.bpRegistration.storageID + "; nna=" + bpos.nnAddr);
+            + bpos.bpRegistration.storageID + "; nna=" + bpos.getNNSocketAddress());
       }
 
       // try block report
       BPOfferService bpos1 = dn.getAllBpOs()[0];
-      bpos1.lastBlockReport = 0;
-      bpos1.blockReport();
+      bpos1.triggerBlockReportForTests();
 
-      assertEquals("wrong nn address", bpos1.nnAddr,
+      assertEquals("wrong nn address", bpos1.getNNSocketAddress(),
           nn1.getNameNodeAddress());
       assertEquals("wrong bpid", bpos1.getBlockPoolId(), bpid1);
       assertEquals("wrong cid", dn.getClusterId(), cid1);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java
index 97554e7a804..ba36b277647 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java
@@ -42,17 +42,19 @@ public class TestDatanodeRegister {
     DataNode mockDN = mock(DataNode.class);
     Mockito.doReturn(true).when(mockDN).shouldRun();
     
-    BPOfferService bpos = new BPOfferService(INVALID_ADDR, mockDN);
+    BPOfferService mockBPOS = Mockito.mock(BPOfferService.class);
+    Mockito.doReturn(mockDN).when(mockBPOS).getDataNode();
+    
+    BPServiceActor actor = new BPServiceActor(INVALID_ADDR, mockBPOS);
 
     NamespaceInfo fakeNSInfo = mock(NamespaceInfo.class);
     when(fakeNSInfo.getBuildVersion()).thenReturn("NSBuildVersion");
     DatanodeProtocol fakeDNProt = mock(DatanodeProtocol.class);
     when(fakeDNProt.versionRequest()).thenReturn(fakeNSInfo);
 
-    bpos.setNameNode( fakeDNProt );
-    bpos.bpNSInfo = fakeNSInfo;
+    actor.setNameNode( fakeDNProt );
     try {   
-      bpos.retrieveNamespaceInfo();
+      actor.retrieveNamespaceInfo();
       fail("register() did not throw exception! " +
            "Expected: IncorrectVersionException");
     } catch (IncorrectVersionException ie) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
index 1360cad5caf..cfa1d64c903 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
@@ -72,7 +72,7 @@ public class TestRefreshNamenodes {
         InetSocketAddress addr = cluster.getNameNode(i).getNameNodeAddress();
         boolean found = false;
         for (int j = 0; j < bpoList.length; j++) {
-          if (bpoList[j] != null && addr.equals(bpoList[j].nnAddr)) {
+          if (bpoList[j] != null && addr.equals(bpoList[j].getNNSocketAddress())) {
             found = true;
             bpoList[j] = null; // Erase the address that matched
             break;

From 8dbb5237684bb9de78430b5cef27be40c78a8474 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 1 Dec 2011 08:03:41 +0000
Subject: [PATCH 028/177] HDFS-2616. Change DatanodeProtocol#sendHeartbeat() to
 return HeartbeatResponse. (suresh)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1208987 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/datanode/BPServiceActor.java  | 12 +--
 .../hdfs/server/namenode/FSNamesystem.java    | 18 ++---
 .../server/namenode/NameNodeRpcServer.java    |  3 +-
 .../server/protocol/DatanodeProtocol.java     |  6 +-
 .../server/protocol/HeartbeatResponse.java    | 73 ++++++++++++++++++
 ...tanodeProtocolServerSideTranslatorR23.java |  4 +-
 .../DatanodeProtocolTranslatorR23.java        | 11 +--
 .../DatanodeWireProtocol.java                 |  2 +-
 .../HeartbeatResponseWritable.java            | 76 +++++++++++++++++++
 .../TestHeartbeatHandling.java                | 17 +++--
 .../namenode/NNThroughputBenchmark.java       | 16 ++--
 .../hdfs/server/namenode/NameNodeAdapter.java |  4 +-
 .../server/namenode/TestDeadDatanode.java     |  3 +-
 14 files changed, 203 insertions(+), 44 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/HeartbeatResponse.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/HeartbeatResponseWritable.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 414b28e908c..728582a4f82 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -31,3 +31,5 @@ HDFS-2591. MiniDFSCluster support to mix and match federation with HA (todd)
 HDFS-1975. Support for sharing the namenode state from active to standby. (jitendra, atm, todd)
 
 HDFS-1971. Send block report from datanode to both active and standby namenodes. (sanjay, todd via suresh)
+
+HDFS-2616. Change DatanodeProtocol#sendHeartbeat() to return HeartbeatResponse. (suresh)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index 2c4a15bf819..e83ec99c1ac 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
+import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.ipc.RPC;
@@ -333,7 +334,7 @@ class BPServiceActor implements Runnable {
   }
   
   
-  DatanodeCommand [] sendHeartBeat() throws IOException {
+  HeartbeatResponse sendHeartBeat() throws IOException {
     LOG.info("heartbeat: " + this);
     // TODO: saw an NPE here - maybe if the two BPOS register at
     // same time, this one won't block on the other one?
@@ -420,16 +421,17 @@ class BPServiceActor implements Runnable {
           //
           lastHeartbeat = startTime;
           if (!dn.areHeartbeatsDisabledForTests()) {
-            DatanodeCommand[] cmds = sendHeartBeat();
+            HeartbeatResponse resp = sendHeartBeat();
             dn.getMetrics().addHeartbeat(now() - startTime);
 
             long startProcessCommands = now();
-            if (!processCommand(cmds))
+            if (!processCommand(resp.getCommands()))
               continue;
             long endProcessCommands = now();
             if (endProcessCommands - startProcessCommands > 2000) {
-              LOG.info("Took " + (endProcessCommands - startProcessCommands) +
-                  "ms to process " + cmds.length + " commands from NN");
+              LOG.info("Took " + (endProcessCommands - startProcessCommands)
+                  + "ms to process " + resp.getCommands().length
+                  + " commands from NN");
             }
           }
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 9a499b65250..09b6634dab0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -154,6 +154,7 @@ import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
@@ -2688,7 +2689,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * @return an array of datanode commands 
    * @throws IOException
    */
-  DatanodeCommand[] handleHeartbeat(DatanodeRegistration nodeReg,
+  HeartbeatResponse handleHeartbeat(DatanodeRegistration nodeReg,
       long capacity, long dfsUsed, long remaining, long blockPoolUsed,
       int xceiverCount, int xmitsInProgress, int failedVolumes) 
         throws IOException {
@@ -2699,16 +2700,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
           nodeReg, blockPoolId, capacity, dfsUsed, remaining, blockPoolUsed,
           xceiverCount, maxTransfer, failedVolumes);
-      if (cmds != null) {
-        return cmds;
+      if (cmds == null) {
+        DatanodeCommand cmd = upgradeManager.getBroadcastCommand();
+        if (cmd != null) {
+          cmds = new DatanodeCommand[] {cmd};
+        }
       }
-
-      //check distributed upgrade
-      DatanodeCommand cmd = upgradeManager.getBroadcastCommand();
-      if (cmd != null) {
-        return new DatanodeCommand[] {cmd};
-      }
-      return null;
+      return new HeartbeatResponse(cmds);
     } finally {
       readUnlock();
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 69b3f972c1f..d6ba4175c12 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -75,6 +75,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
+import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
@@ -857,7 +858,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
   }
 
   @Override // DatanodeProtocol
-  public DatanodeCommand[] sendHeartbeat(DatanodeRegistration nodeReg,
+  public HeartbeatResponse sendHeartbeat(DatanodeRegistration nodeReg,
       long capacity, long dfsUsed, long remaining, long blockPoolUsed,
       int xmitsInProgress, int xceiverCount, int failedVolumes)
       throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
index 5a4cae8a5e7..7b99f371239 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
@@ -22,8 +22,8 @@ import java.io.*;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocolR23Compatible.ClientNamenodeWireProtocol;
 import org.apache.hadoop.hdfs.server.protocolR23Compatible.DatanodeWireProtocol;
@@ -92,7 +92,7 @@ public interface DatanodeProtocol extends VersionedProtocol {
    * sendHeartbeat() tells the NameNode that the DataNode is still
    * alive and well.  Includes some status info, too. 
    * It also gives the NameNode a chance to return 
-   * an array of "DatanodeCommand" objects.
+   * an array of "DatanodeCommand" objects in HeartbeatResponse.
    * A DatanodeCommand tells the DataNode to invalidate local block(s), 
    * or to copy them to other DataNodes, etc.
    * @param registration datanode registration information
@@ -106,7 +106,7 @@ public interface DatanodeProtocol extends VersionedProtocol {
    * @throws IOException on error
    */
   @Nullable
-  public DatanodeCommand[] sendHeartbeat(DatanodeRegistration registration,
+  public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration,
                                        long capacity,
                                        long dfsUsed, long remaining,
                                        long blockPoolUsed,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/HeartbeatResponse.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/HeartbeatResponse.java
new file mode 100644
index 00000000000..fb1a533afc0
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/HeartbeatResponse.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.protocol;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.io.ObjectWritable;
+import org.apache.hadoop.io.Writable;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+/**
+ * Response to {@link DatanodeProtocol#sendHeartbeat}
+ */
+public class HeartbeatResponse implements Writable {
+  /** Commands returned from the namenode to the datanode */
+  private DatanodeCommand[] commands;
+  
+  public HeartbeatResponse() {
+    // Empty constructor required for Writable
+  }
+  
+  public HeartbeatResponse(DatanodeCommand[] cmds) {
+    commands = cmds;
+  }
+  
+  public DatanodeCommand[] getCommands() {
+    return commands;
+  }
+
+  ///////////////////////////////////////////
+  // Writable
+  ///////////////////////////////////////////
+  @Override
+  public void write(DataOutput out) throws IOException {
+    int length = commands == null ? 0 : commands.length;
+    out.writeInt(length);
+    for (int i = 0; i < length; i++) {
+      ObjectWritable.writeObject(out, commands[i], commands[i].getClass(),
+                                 null, true);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    int length = in.readInt();
+    commands = new DatanodeCommand[length];
+    ObjectWritable objectWritable = new ObjectWritable();
+    for (int i = 0; i < length; i++) {
+      commands[i] = (DatanodeCommand) ObjectWritable.readObject(in,
+          objectWritable, null);
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeProtocolServerSideTranslatorR23.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeProtocolServerSideTranslatorR23.java
index 2c806afd449..11b833fa0de 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeProtocolServerSideTranslatorR23.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeProtocolServerSideTranslatorR23.java
@@ -110,11 +110,11 @@ public class DatanodeProtocolServerSideTranslatorR23 implements
   }
 
   @Override
-  public DatanodeCommandWritable[] sendHeartbeat(
+  public HeartbeatResponseWritable sendHeartbeat(
       DatanodeRegistrationWritable registration, long capacity, long dfsUsed,
       long remaining, long blockPoolUsed, int xmitsInProgress,
       int xceiverCount, int failedVolumes) throws IOException {
-    return DatanodeCommandWritable.convert(server.sendHeartbeat(
+    return HeartbeatResponseWritable.convert(server.sendHeartbeat(
         registration.convert(), capacity, dfsUsed, remaining, blockPoolUsed,
         xmitsInProgress, xceiverCount, failedVolumes));
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeProtocolTranslatorR23.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeProtocolTranslatorR23.java
index 1664940474b..fb29fffac33 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeProtocolTranslatorR23.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeProtocolTranslatorR23.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
@@ -130,14 +131,14 @@ public class DatanodeProtocolTranslatorR23 implements
   }
 
   @Override
-  public DatanodeCommand[] sendHeartbeat(DatanodeRegistration registration,
+  public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration,
       long capacity, long dfsUsed, long remaining, long blockPoolUsed,
       int xmitsInProgress, int xceiverCount, int failedVolumes)
       throws IOException {
-    return DatanodeCommandWritable.convert(rpcProxy.sendHeartbeat(
-            DatanodeRegistrationWritable.convert(registration), capacity,
-            dfsUsed, remaining, blockPoolUsed, xmitsInProgress, xceiverCount,
-            failedVolumes));
+    return rpcProxy.sendHeartbeat(
+        DatanodeRegistrationWritable.convert(registration), capacity, dfsUsed,
+        remaining, blockPoolUsed, xmitsInProgress, xceiverCount, failedVolumes)
+        .convert();
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeWireProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeWireProtocol.java
index f630053bf9a..8625c22a535 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeWireProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeWireProtocol.java
@@ -99,7 +99,7 @@ public interface DatanodeWireProtocol extends VersionedProtocol {
    * @throws IOException on error
    */
   @Nullable
-  public DatanodeCommandWritable[] sendHeartbeat(
+  public HeartbeatResponseWritable sendHeartbeat(
       DatanodeRegistrationWritable registration, long capacity, long dfsUsed,
       long remaining, long blockPoolUsed, int xmitsInProgress,
       int xceiverCount, int failedVolumes) throws IOException;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/HeartbeatResponseWritable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/HeartbeatResponseWritable.java
new file mode 100644
index 00000000000..f7fe3db7b77
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/HeartbeatResponseWritable.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.protocolR23Compatible;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
+import org.apache.hadoop.io.ObjectWritable;
+import org.apache.hadoop.io.Writable;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class HeartbeatResponseWritable implements Writable {
+  private DatanodeCommandWritable[] commands;
+  
+  public HeartbeatResponseWritable() {
+    // Empty constructor for Writable
+  }
+  
+  public HeartbeatResponseWritable(DatanodeCommandWritable[] cmds) {
+    commands = cmds;
+  }
+  
+  public HeartbeatResponse convert() {
+    return new HeartbeatResponse(DatanodeCommandWritable.convert(commands));
+  }
+  
+  ///////////////////////////////////////////
+  // Writable
+  ///////////////////////////////////////////
+  @Override
+  public void write(DataOutput out) throws IOException {
+    int length = commands == null ? 0 : commands.length;
+    out.writeInt(length);
+    for (int i = 0; i < length; i++) {
+      ObjectWritable.writeObject(out, commands[i], commands[i].getClass(),
+                                 null, true);
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    int length = in.readInt();
+    commands = new DatanodeCommandWritable[length];
+    ObjectWritable objectWritable = new ObjectWritable();
+    for (int i = 0; i < length; i++) {
+      commands[i] = (DatanodeCommandWritable) ObjectWritable.readObject(in,
+          objectWritable, null);
+    }
+  }
+
+  public static HeartbeatResponseWritable convert(
+      HeartbeatResponse resp) {
+    return new HeartbeatResponseWritable(DatanodeCommandWritable.convert(resp
+        .getCommands()));
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java
index c18a5c04fe0..45741ceae2a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java
@@ -41,7 +41,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 public class TestHeartbeatHandling extends TestCase {
   /**
    * Test if
-   * {@link FSNamesystem#handleHeartbeat(DatanodeRegistration, long, long, long, long, int, int)}
+   * {@link FSNamesystem#handleHeartbeat}
    * can pick up replication and/or invalidate requests and observes the max
    * limit
    */
@@ -75,7 +75,8 @@ public class TestHeartbeatHandling extends TestCase {
             dd.addBlockToBeReplicated(
                 new Block(i, 0, GenerationStamp.FIRST_VALID_STAMP), ONE_TARGET);
           }
-          DatanodeCommand[]cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem);
+          DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd,
+              namesystem).getCommands();
           assertEquals(1, cmds.length);
           assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
           assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
@@ -85,26 +86,30 @@ public class TestHeartbeatHandling extends TestCase {
             blockList.add(new Block(i, 0, GenerationStamp.FIRST_VALID_STAMP));
           }
           dd.addBlocksToBeInvalidated(blockList);
-          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem);
+          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
+              .getCommands();
           assertEquals(2, cmds.length);
           assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
           assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
           assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
           assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
           
-          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem);
+          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
+              .getCommands();
           assertEquals(2, cmds.length);
           assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
           assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
           assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
           assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
           
-          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem);
+          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
+              .getCommands();
           assertEquals(1, cmds.length);
           assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
           assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
 
-          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem);
+          cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
+              .getCommands();
           assertEquals(null, cmds);
         }
       } finally {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
index afc003f9381..7d15900756a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
@@ -25,8 +25,6 @@ import java.util.Arrays;
 import java.util.EnumSet;
 import java.util.List;
 
-import javax.security.auth.login.LoginException;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
@@ -78,7 +76,7 @@ import org.apache.log4j.LogManager;
  * <li>-logLevel L specifies the logging level when the benchmark runs.
  * The default logging level is {@link Level#ERROR}.</li>
  * <li>-UGCacheRefreshCount G will cause the benchmark to call
- * {@link NameNode#refreshUserToGroupsMappings()} after
+ * {@link NameNodeRpcServer#refreshUserToGroupsMappings} after
  * every G operations, which purges the name-node's user group cache.
  * By default the refresh is never called.</li>
  * <li>-keepResults do not clean up the name-space after execution.</li>
@@ -104,7 +102,7 @@ public class NNThroughputBenchmark {
   static NameNode nameNode;
   static NamenodeProtocols nameNodeProto;
 
-  NNThroughputBenchmark(Configuration conf) throws IOException, LoginException {
+  NNThroughputBenchmark(Configuration conf) throws IOException {
     config = conf;
     // We do not need many handlers, since each thread simulates a handler
     // by calling name-node methods directly
@@ -125,7 +123,7 @@ public class NNThroughputBenchmark {
     nameNodeProto = nameNode.getRpcServer();
   }
 
-  void close() throws IOException {
+  void close() {
     nameNode.stop();
   }
 
@@ -806,7 +804,8 @@ public class NNThroughputBenchmark {
       // register datanode
       // TODO:FEDERATION currently a single block pool is supported
       DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration,
-          DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED, 0, 0, 0);
+          DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED, 0, 0, 0)
+          .getCommands();
       if(cmds != null) {
         for (DatanodeCommand cmd : cmds ) {
           if(LOG.isDebugEnabled()) {
@@ -851,7 +850,8 @@ public class NNThroughputBenchmark {
       // register datanode
       // TODO:FEDERATION currently a single block pool is supported
       DatanodeCommand[] cmds = nameNodeProto.sendHeartbeat(dnRegistration,
-          DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED, 0, 0, 0);
+          DF_CAPACITY, DF_USED, DF_CAPACITY - DF_USED, DF_USED, 0, 0, 0)
+          .getCommands();
       if (cmds != null) {
         for (DatanodeCommand cmd : cmds) {
           if (cmd.getAction() == DatanodeProtocol.DNA_TRANSFER) {
@@ -916,7 +916,7 @@ public class NNThroughputBenchmark {
       config.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 3 * 60);
       parseArguments(args);
       // adjust replication to the number of data-nodes
-      this.replication = (short)Math.min((int)replication, getNumDatanodes());
+      this.replication = (short)Math.min(replication, getNumDatanodes());
     }
 
     /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index d0aa51f2b19..c7cc61dc137 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -26,8 +26,8 @@ import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
-import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.security.AccessControlException;
 
@@ -90,7 +90,7 @@ public class NameNodeAdapter {
     return ns.getDelegationTokenSecretManager();
   }
 
-  public static DatanodeCommand[] sendHeartBeat(DatanodeRegistration nodeReg,
+  public static HeartbeatResponse sendHeartBeat(DatanodeRegistration nodeReg,
       DatanodeDescriptor dd, FSNamesystem namesystem) throws IOException {
     return namesystem.handleHeartbeat(nodeReg, dd.getCapacity(), 
         dd.getDfsUsed(), dd.getRemaining(), dd.getBlockPoolUsed(), 0, 0, 0);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
index 2e73ec556a2..33a71294571 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
@@ -128,7 +128,8 @@ public class TestDeadDatanode {
 
     // Ensure heartbeat from dead datanode is rejected with a command
     // that asks datanode to register again
-    DatanodeCommand[] cmd = dnp.sendHeartbeat(reg, 0, 0, 0, 0, 0, 0, 0);
+    DatanodeCommand[] cmd = dnp.sendHeartbeat(reg, 0, 0, 0, 0, 0, 0, 0)
+        .getCommands();
     Assert.assertEquals(1, cmd.length);
     Assert.assertEquals(cmd[0].getAction(), RegisterCommand.REGISTER
         .getAction());

From 4cbead84846f4f65c843fd5101ecc0bd7595c9c5 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 1 Dec 2011 20:38:46 +0000
Subject: [PATCH 029/177] HDFS-2622. Fix TestDFSUpgrade in HA branch.
 Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1209230 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../apache/hadoop/hdfs/TestDFSUpgrade.java    | 19 ++++++++++---------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 728582a4f82..9188a55204c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -33,3 +33,5 @@ HDFS-1975. Support for sharing the namenode state from active to standby. (jiten
 HDFS-1971. Send block report from datanode to both active and standby namenodes. (sanjay, todd via suresh)
 
 HDFS-2616. Change DatanodeProtocol#sendHeartbeat() to return HeartbeatResponse. (suresh)
+
+HDFS-2622. Fix TestDFSUpgrade in HA branch. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
index a308c230cb0..9246e6e42f6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.server.namenode.TestParallelImageWrite;
 import static org.apache.hadoop.hdfs.server.namenode.NNStorage.getInProgressEditsFileName;
 import static org.apache.hadoop.hdfs.server.namenode.NNStorage.getImageFileName;
 
+import static org.apache.hadoop.test.GenericTestUtils.assertExists;
 import org.apache.hadoop.util.StringUtils;
 import org.junit.BeforeClass;
 import org.junit.Ignore;
@@ -51,7 +52,7 @@ import static org.junit.Assert.*;
 */
 public class TestDFSUpgrade {
  
-  private static final int EXPECTED_TXID = 17;
+  private static final int EXPECTED_TXID = 33;
   private static final Log LOG = LogFactory.getLog(TestDFSUpgrade.class.getName());
   private Configuration conf;
   private int testCounter = 0;
@@ -80,16 +81,16 @@ public class TestDFSUpgrade {
           Joiner.on("  \n").join(new File(baseDir, "current").list()));
       LOG.info("==================");
       
-      assertTrue(new File(baseDir,"current").isDirectory());
-      assertTrue(new File(baseDir,"current/VERSION").isFile());
-      assertTrue(new File(baseDir,"current/" 
-                          + getInProgressEditsFileName(imageTxId + 1)).isFile());
-      assertTrue(new File(baseDir,"current/" 
-                          + getImageFileName(imageTxId)).isFile());
-      assertTrue(new File(baseDir,"current/seen_txid").isFile());
+      assertExists(new File(baseDir,"current"));
+      assertExists(new File(baseDir,"current/VERSION"));
+      assertExists(new File(baseDir,"current/" 
+                          + getInProgressEditsFileName(imageTxId + 1)));
+      assertExists(new File(baseDir,"current/" 
+                          + getImageFileName(imageTxId)));
+      assertExists(new File(baseDir,"current/seen_txid"));
       
       File previous = new File(baseDir, "previous");
-      assertTrue(previous.isDirectory());
+      assertExists(previous);
       assertEquals(UpgradeUtilities.checksumContents(NAME_NODE, previous),
           UpgradeUtilities.checksumMasterNameNodeContents());
     }

From f4fa76719e622a4ef883c51ec0abc6e6e6ddf09e Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 1 Dec 2011 21:26:08 +0000
Subject: [PATCH 030/177] HDFS-2612. Handle refreshNameNodes in federated HA
 clusters. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1209249 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/server/datanode/BPOfferService.java  |  19 ++
 .../server/datanode/BlockPoolManager.java     | 251 ++++++++++++++++++
 .../hadoop/hdfs/server/datanode/DataNode.java | 165 +-----------
 .../server/datanode/TestBlockPoolManager.java | 138 ++++++++++
 5 files changed, 412 insertions(+), 163 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 9188a55204c..9fbc7746730 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -35,3 +35,5 @@ HDFS-1971. Send block report from datanode to both active and standby namenodes.
 HDFS-2616. Change DatanodeProtocol#sendHeartbeat() to return HeartbeatResponse. (suresh)
 
 HDFS-2622. Fix TestDFSUpgrade in HA branch. (todd)
+
+HDFS-2612. Handle refreshNameNodes in federated HA clusters (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index 85807f6d5ae..62b825be56f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -19,7 +19,9 @@ package org.apache.hadoop.hdfs.server.datanode;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.CopyOnWriteArrayList;
 
 import org.apache.commons.logging.Log;
@@ -42,6 +44,8 @@ import org.apache.hadoop.ipc.RPC;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
 
 /**
  * One instance per block-pool/namespace on the DN, which handles the
@@ -89,6 +93,21 @@ class BPOfferService {
     this.bpServiceToActive = this.bpServices.get(0);
   }
 
+  void refreshNNList(ArrayList<InetSocketAddress> addrs) throws IOException {
+    Set<InetSocketAddress> oldAddrs = Sets.newHashSet();
+    for (BPServiceActor actor : bpServices) {
+      oldAddrs.add(actor.getNNSocketAddress());
+    }
+    Set<InetSocketAddress> newAddrs = Sets.newHashSet(addrs);
+    
+    if (!Sets.symmetricDifference(oldAddrs, newAddrs).isEmpty()) {
+      // Keep things simple for now -- we can implement this at a later date.
+      throw new IOException(
+          "HA does not currently support adding a new standby to a running DN. " +
+          "Please do a rolling restart of DNs to reconfigure the list of NNs.");
+    }
+  }
+
   /**
    * returns true if BP thread has completed initialization of storage
    * and has registered with the corresponding namenode
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java
new file mode 100644
index 00000000000..3176be20784
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java
@@ -0,0 +1,251 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.datanode;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import com.google.common.collect.Sets;
+
+/**
+ * Manages the BPOfferService objects for the data node.
+ * Creation, removal, starting, stopping, shutdown on BPOfferService
+ * objects must be done via APIs in this class.
+ */
+@InterfaceAudience.Private
+class BlockPoolManager {
+  private static final Log LOG = DataNode.LOG;
+  
+  private final Map<String, BPOfferService> bpByNameserviceId =
+    Maps.newHashMap();
+  private final Map<String, BPOfferService> bpByBlockPoolId =
+    Maps.newHashMap();
+  private final List<BPOfferService> offerServices =
+    Lists.newArrayList();
+
+  private final DataNode dn;
+
+  //This lock is used only to ensure exclusion of refreshNamenodes
+  private final Object refreshNamenodesLock = new Object();
+  
+  BlockPoolManager(DataNode dn) {
+    this.dn = dn;
+  }
+  
+  synchronized void addBlockPool(BPOfferService bpos) {
+    Preconditions.checkArgument(offerServices.contains(bpos),
+        "Unknown BPOS: %s", bpos);
+    if (bpos.getBlockPoolId() == null) {
+      throw new IllegalArgumentException("Null blockpool id");
+    }
+    bpByBlockPoolId.put(bpos.getBlockPoolId(), bpos);
+  }
+  
+  /**
+   * Returns the array of BPOfferService objects. 
+   * Caution: The BPOfferService returned could be shutdown any time.
+   */
+  synchronized BPOfferService[] getAllNamenodeThreads() {
+    BPOfferService[] bposArray = new BPOfferService[offerServices.size()];
+    return offerServices.toArray(bposArray);
+  }
+      
+  synchronized BPOfferService get(String bpid) {
+    return bpByBlockPoolId.get(bpid);
+  }
+  
+  // TODO(HA) would be good to kill this
+  synchronized BPOfferService get(InetSocketAddress addr) {
+    for (BPOfferService bpos : offerServices) {
+      if (bpos.containsNN(addr)) {
+        return bpos;
+      }
+    }
+    return null;
+  }
+
+  synchronized void remove(BPOfferService t) {
+    offerServices.remove(t);
+    bpByBlockPoolId.remove(t.getBlockPoolId());
+    
+    boolean removed = false;
+    for (Iterator<BPOfferService> it = bpByNameserviceId.values().iterator();
+         it.hasNext() && !removed;) {
+      BPOfferService bpos = it.next();
+      if (bpos == t) {
+        it.remove();
+        LOG.info("Removed " + bpos);
+        removed = true;
+      }
+    }
+    
+    if (!removed) {
+      LOG.warn("Couldn't remove BPOS " + t + " from bpByNameserviceId map");
+    }
+  }
+  
+  void shutDownAll() throws InterruptedException {
+    BPOfferService[] bposArray = this.getAllNamenodeThreads();
+    
+    for (BPOfferService bpos : bposArray) {
+      bpos.stop(); //interrupts the threads
+    }
+    //now join
+    for (BPOfferService bpos : bposArray) {
+      bpos.join();
+    }
+  }
+  
+  synchronized void startAll() throws IOException {
+    try {
+      UserGroupInformation.getLoginUser().doAs(
+          new PrivilegedExceptionAction<Object>() {
+            public Object run() throws Exception {
+              for (BPOfferService bpos : offerServices) {
+                bpos.start();
+              }
+              return null;
+            }
+          });
+    } catch (InterruptedException ex) {
+      IOException ioe = new IOException();
+      ioe.initCause(ex.getCause());
+      throw ioe;
+    }
+  }
+  
+  void joinAll() {
+    for (BPOfferService bpos: this.getAllNamenodeThreads()) {
+      bpos.join();
+    }
+  }
+  
+  void refreshNamenodes(Configuration conf)
+      throws IOException {
+    LOG.info("Refresh request received for nameservices: "
+        + conf.get(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES));
+    
+    Map<String, Map<String, InetSocketAddress>> newAddressMap = 
+      DFSUtil.getNNServiceRpcAddresses(conf);
+    
+    synchronized (refreshNamenodesLock) {
+      doRefreshNamenodes(newAddressMap);
+    }
+  }
+  
+  private void doRefreshNamenodes(
+      Map<String, Map<String, InetSocketAddress>> addrMap) throws IOException {
+    assert Thread.holdsLock(refreshNamenodesLock);
+
+    Set<String> toRefresh = Sets.newHashSet();
+    Set<String> toAdd = Sets.newHashSet();
+    Set<String> toRemove;
+    
+    synchronized (this) {
+      // Step 1. For each of the new nameservices, figure out whether
+      // it's an update of the set of NNs for an existing NS,
+      // or an entirely new nameservice.
+      for (String nameserviceId : addrMap.keySet()) {
+        if (bpByNameserviceId.containsKey(nameserviceId)) {
+          toRefresh.add(nameserviceId);
+        } else {
+          toAdd.add(nameserviceId);
+        }
+      }
+      
+      // Step 2. Any nameservices we currently have but are no longer present
+      // need to be removed.
+      toRemove = Sets.newHashSet(Sets.difference(
+          bpByNameserviceId.keySet(), addrMap.keySet()));
+      
+      assert toRefresh.size() + toAdd.size() ==
+        addrMap.size() :
+          "toAdd: " + Joiner.on(",").useForNull("<default>").join(toAdd) +
+          "  toRemove: " + Joiner.on(",").useForNull("<default>").join(toRemove) +
+          "  toRefresh: " + Joiner.on(",").useForNull("<default>").join(toRefresh);
+
+      
+      // Step 3. Start new nameservices
+      if (!toAdd.isEmpty()) {
+        LOG.info("Starting BPOfferServices for nameservices: " +
+            Joiner.on(",").useForNull("<default>").join(toAdd));
+      
+        for (String nsToAdd : toAdd) {
+          ArrayList<InetSocketAddress> addrs =
+            Lists.newArrayList(addrMap.get(nsToAdd).values());
+          BPOfferService bpos = createBPOS(addrs);
+          bpByNameserviceId.put(nsToAdd, bpos);
+          offerServices.add(bpos);
+        }
+      }
+      startAll();
+    }
+
+    // Step 4. Shut down old nameservices. This happens outside
+    // of the synchronized(this) lock since they need to call
+    // back to .remove() from another thread
+    if (!toRemove.isEmpty()) {
+      LOG.info("Stopping BPOfferServices for nameservices: " +
+          Joiner.on(",").useForNull("<default>").join(toRemove));
+      
+      for (String nsToRemove : toRemove) {
+        BPOfferService bpos = bpByNameserviceId.get(nsToRemove);
+        bpos.stop();
+        bpos.join();
+        // they will call remove on their own
+      }
+    }
+    
+    // Step 5. Update nameservices whose NN list has changed
+    if (!toRefresh.isEmpty()) {
+      LOG.info("Refreshing list of NNs for nameservices: " +
+          Joiner.on(",").useForNull("<default>").join(toRefresh));
+      
+      for (String nsToRefresh : toRefresh) {
+        BPOfferService bpos = bpByNameserviceId.get(nsToRefresh);
+        ArrayList<InetSocketAddress> addrs =
+          Lists.newArrayList(addrMap.get(nsToRefresh).values());
+        bpos.refreshNNList(addrs);
+      }
+    }
+  }
+
+  /**
+   * Extracted out for test purposes.
+   */
+  protected BPOfferService createBPOS(List<InetSocketAddress> nnAddrs) {
+    return new BPOfferService(nnAddrs, dn);
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index dc3a18163b2..b2c974c28b1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -48,7 +48,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_SIMULATEDDATASTO
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STARTUP_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_STORAGEID_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_USER_NAME_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_FEDERATION_NAMESERVICES;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HTTPS_ENABLE_KEY;
 
 import java.io.BufferedOutputStream;
@@ -71,12 +70,10 @@ import java.util.AbstractList;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -91,7 +88,6 @@ import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
-import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil.ConfiguredNNAddress;
 import org.apache.hadoop.hdfs.HDFSPolicyProvider;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -169,7 +165,6 @@ import org.apache.hadoop.util.VersionInfo;
 import org.mortbay.util.ajax.JSON;
 
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
 
@@ -236,163 +231,6 @@ public class DataNode extends Configured
     return NetUtils.createSocketAddr(target);
   }
   
-  /**
-   * Manages he BPOfferService objects for the data node.
-   * Creation, removal, starting, stopping, shutdown on BPOfferService
-   * objects must be done via APIs in this class.
-   */
-  @InterfaceAudience.Private
-  class BlockPoolManager {
-    private final Map<String, BPOfferService> bpMapping;
-    private final List<BPOfferService> offerServices;
- 
-    //This lock is used only to ensure exclusion of refreshNamenodes
-    private final Object refreshNamenodesLock = new Object();
-    
-    BlockPoolManager(Configuration conf)
-        throws IOException {
-      bpMapping = new HashMap<String, BPOfferService>();
-      offerServices = new ArrayList<BPOfferService>();
-  
-      Map<String, Map<String, InetSocketAddress>> map =
-        DFSUtil.getNNServiceRpcAddresses(conf);
-      for (Entry<String, Map<String, InetSocketAddress>> entry :
-           map.entrySet()) {
-        List<InetSocketAddress> nnList = Lists.newArrayList(entry.getValue().values());
-        BPOfferService bpos = new BPOfferService(nnList, DataNode.this);
-        offerServices.add(bpos);
-      }
-    }
-    
-    synchronized void addBlockPool(BPOfferService bpos) {
-      Preconditions.checkArgument(offerServices.contains(bpos),
-          "Unknown BPOS: %s", bpos);
-      if (bpos.getBlockPoolId() == null) {
-        throw new IllegalArgumentException("Null blockpool id");
-      }
-      LOG.info("===> registering in bpmapping: " + bpos);
-      bpMapping.put(bpos.getBlockPoolId(), bpos);
-    }
-    
-    /**
-     * Returns the array of BPOfferService objects. 
-     * Caution: The BPOfferService returned could be shutdown any time.
-     */
-    synchronized BPOfferService[] getAllNamenodeThreads() {
-      BPOfferService[] bposArray = new BPOfferService[offerServices.size()];
-      return offerServices.toArray(bposArray);
-    }
-        
-    synchronized BPOfferService get(String bpid) {
-      return bpMapping.get(bpid);
-    }
-    
-    // TODO(HA) would be good to kill this
-    synchronized BPOfferService get(InetSocketAddress addr) {
-      for (BPOfferService bpos : offerServices) {
-        if (bpos.containsNN(addr)) {
-          return bpos;
-        }
-      }
-      return null;
-    }
-
-    synchronized void remove(BPOfferService t) {
-      offerServices.remove(t);
-      bpMapping.remove(t.getBlockPoolId());
-    }
-    
-    void shutDownAll() throws InterruptedException {
-      BPOfferService[] bposArray = this.getAllNamenodeThreads();
-      
-      for (BPOfferService bpos : bposArray) {
-        bpos.stop(); //interrupts the threads
-      }
-      //now join
-      for (BPOfferService bpos : bposArray) {
-        bpos.join();
-      }
-    }
-    
-    synchronized void startAll() throws IOException {
-      try {
-        UserGroupInformation.getLoginUser().doAs(
-            new PrivilegedExceptionAction<Object>() {
-              public Object run() throws Exception {
-                for (BPOfferService bpos : offerServices) {
-                  bpos.start();
-                }
-                return null;
-              }
-            });
-      } catch (InterruptedException ex) {
-        IOException ioe = new IOException();
-        ioe.initCause(ex.getCause());
-        throw ioe;
-      }
-    }
-    
-    void joinAll() {
-      for (BPOfferService bpos: this.getAllNamenodeThreads()) {
-        bpos.join();
-      }
-    }
-    
-    void refreshNamenodes(Configuration conf)
-        throws IOException {
-      throw new UnsupportedOperationException("TODO(HA)");
-/*
- * TODO(HA)
-
-      LOG.info("Refresh request received for nameservices: "
-          + conf.get(DFS_FEDERATION_NAMESERVICES));
-      
-      // TODO(HA): need to update this for multiple NNs per nameservice
-      // For now, just list all of the NNs into this set
-      Map<String, Map<String, InetSocketAddress>> newAddressMap = 
-        DFSUtil.getNNServiceRpcAddresses(conf);
-      Set<InetSocketAddress> newAddresses = Sets.newHashSet();
-      for (ConfiguredNNAddress cnn : DFSUtil.flattenAddressMap(newAddressMap)) {
-        newAddresses.add(cnn.getAddress());
-      }
-      
-      List<BPOfferService> toShutdown = new ArrayList<BPOfferService>();
-      List<InetSocketAddress> toStart = new ArrayList<InetSocketAddress>();
-      synchronized (refreshNamenodesLock) {
-        synchronized (this) {
-          for (InetSocketAddress nnaddr : offerServices.keySet()) {
-            if (!(newAddresses.contains(nnaddr))) {
-              toShutdown.add(offerServices.get(nnaddr));
-            }
-          }
-          for (InetSocketAddress nnaddr : newAddresses) {
-            if (!(offerServices.containsKey(nnaddr))) {
-              toStart.add(nnaddr);
-            }
-          }
-
-          for (InetSocketAddress nnaddr : toStart) {
-            BPOfferService bpos = new BPOfferService(nnaddr, DataNode.this);
-            offerServices.put(bpos.getNNSocketAddress(), bpos);
-          }
-        }
-
-        for (BPOfferService bpos : toShutdown) {
-          bpos.stop();
-          bpos.join();
-        }
-        
-        // stoping the BPOSes causes them to call remove() on their own when they
-        // clean up.
-        
-        // Now start the threads that are not already running.
-        startAll();
-      }
-      */
-    }
-
-  }
-  
   volatile boolean shouldRun = true;
   private BlockPoolManager blockPoolManager;
   public volatile FSDatasetInterface data = null;
@@ -779,7 +617,8 @@ public class DataNode extends Configured
 
     metrics = DataNodeMetrics.create(conf, getMachineName());
 
-    blockPoolManager = new BlockPoolManager(conf);
+    blockPoolManager = new BlockPoolManager(this);
+    blockPoolManager.refreshNamenodes(conf);
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java
new file mode 100644
index 00000000000..c0301ac8145
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockPoolManager.java
@@ -0,0 +1,138 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.datanode;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+
+public class TestBlockPoolManager {
+  private Log LOG = LogFactory.getLog(TestBlockPoolManager.class);
+  private DataNode mockDN = Mockito.mock(DataNode.class);
+  private BlockPoolManager bpm;
+  private StringBuilder log = new StringBuilder();
+  private int mockIdx = 1;
+  
+  @Before
+  public void setupBPM() {
+    bpm = new BlockPoolManager(mockDN){
+
+      @Override
+      protected BPOfferService createBPOS(List<InetSocketAddress> nnAddrs) {
+        final int idx = mockIdx++;
+        doLog("create #" + idx);
+        final BPOfferService bpos = Mockito.mock(BPOfferService.class);
+        Mockito.doReturn("Mock BPOS #" + idx).when(bpos).toString();
+        // Log refreshes
+        try {
+          Mockito.doAnswer(
+              new Answer<Void>() {
+                @Override
+                public Void answer(InvocationOnMock invocation) throws Throwable {
+                  doLog("refresh #" + idx);
+                  return null;
+                }
+              }).when(bpos).refreshNNList(
+                  Mockito.<ArrayList<InetSocketAddress>>any());
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+        // Log stops
+        Mockito.doAnswer(
+            new Answer<Void>() {
+              @Override
+              public Void answer(InvocationOnMock invocation) throws Throwable {
+                doLog("stop #" + idx);
+                bpm.remove(bpos);
+                return null;
+              }
+            }).when(bpos).stop();
+        return bpos;
+      }
+    };
+  }
+  
+  private void doLog(String string) {
+    synchronized(log) {
+      LOG.info(string);
+      log.append(string).append("\n");
+    }
+  }
+
+  @Test
+  public void testSimpleSingleNS() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set(DFSConfigKeys.FS_DEFAULT_NAME_KEY,
+        "hdfs://mock1:8020");
+    bpm.refreshNamenodes(conf);
+    assertEquals("create #1\n", log.toString());
+  }
+
+  @Test
+  public void testFederationRefresh() throws Exception {
+    Configuration conf = new Configuration();
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES,
+        "ns1,ns2");
+    addNN(conf, "ns1", "mock1:8020");
+    addNN(conf, "ns2", "mock1:8020");
+    bpm.refreshNamenodes(conf);
+    assertEquals(
+        "create #1\n" +
+        "create #2\n", log.toString());
+    log.setLength(0);
+
+    // Remove the first NS
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES,
+        "ns1");
+    bpm.refreshNamenodes(conf);
+    assertEquals(
+        "stop #1\n" +
+        "refresh #2\n", log.toString());
+    log.setLength(0);
+    
+    // Add back an NS -- this creates a new BPOS since the old
+    // one for ns2 should have been previously retired
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES,
+        "ns1,ns2");
+    bpm.refreshNamenodes(conf);
+    assertEquals(
+        "create #3\n" +
+        "refresh #2\n", log.toString());
+  }
+
+  private static void addNN(Configuration conf, String ns, String addr) {
+    String key = DFSUtil.addKeySuffixes(
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, ns);
+    conf.set(key, addr);
+  }
+}

From 28dbd56de0456c3504ce2d2227a22027c5d46d52 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 1 Dec 2011 21:37:08 +0000
Subject: [PATCH 031/177] HDFS-2623. Add test case for hot standby capability.
 Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1209256 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../apache/hadoop/hdfs/AppendTestUtil.java    |   2 +-
 .../hadoop/hdfs/TestDFSClientFailover.java    |  24 ++--
 .../server/namenode/ha/TestEditLogTailer.java |   9 +-
 .../server/namenode/ha/TestStandbyIsHot.java  | 110 ++++++++++++++++++
 5 files changed, 137 insertions(+), 10 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 9fbc7746730..6c932936712 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -37,3 +37,5 @@ HDFS-2616. Change DatanodeProtocol#sendHeartbeat() to return HeartbeatResponse.
 HDFS-2622. Fix TestDFSUpgrade in HA branch. (todd)
 
 HDFS-2612. Handle refreshNameNodes in federated HA clusters (todd)
+
+HDFS-2623. Add test case for hot standby capability (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java
index 384cfe75b91..f8bc89afaec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java
@@ -109,7 +109,7 @@ public class AppendTestUtil {
     out.write(bytes);
   }
   
-  static void check(FileSystem fs, Path p, long length) throws IOException {
+  public static void check(FileSystem fs, Path p, long length) throws IOException {
     int i = -1;
     try {
       final FileStatus status = fs.getFileStatus(p);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
index b144a8087c5..a1db640c2c9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -82,13 +82,28 @@ public class TestDFSClientFailover {
     AppendTestUtil.write(out2, 0, FILE_LENGTH_TO_VERIFY);
     out1.close();
     out2.close();
+        
+    FileSystem fs = configureFailoverFs(cluster, conf);
     
+    AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
+    cluster.getNameNode(0).stop();
+    AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
+    
+    fs.close();
+  }
+
+  public static FileSystem configureFailoverFs(MiniDFSCluster cluster, Configuration conf)
+  throws IOException, URISyntaxException {
+    InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
+    InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
+
     String nsId = "nameserviceId1";
     
     final String logicalNameNodeId = "ha-nn-uri";
     String nameNodeId1 = "nn1";
     String nameNodeId2 = "nn2";
     
+    conf = new Configuration(conf);
     String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
     String address2 = "hdfs://" + nnAddr2.getHostName() + ":" + nnAddr2.getPort();
     conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
@@ -103,12 +118,7 @@ public class TestDFSClientFailover {
         ConfiguredFailoverProxyProvider.class.getName());
     
     FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalNameNodeId), conf);
-    
-    AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
-    cluster.getNameNode(0).stop();
-    AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
-    
-    fs.close();
+    return fs;
   }
 
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
index 4174a9e5618..b22ef02b864 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.log4j.Level;
+import org.junit.Assert;
 import org.junit.Test;
 
 public class TestEditLogTailer {
@@ -99,12 +100,13 @@ public class TestEditLogTailer {
     return DIR_PREFIX + suffix;
   }
   
-  private static void waitForStandbyToCatchUp(NameNode active,
+  static void waitForStandbyToCatchUp(NameNode active,
       NameNode standby) throws InterruptedException, IOException {
     
     long activeTxId = active.getNamesystem().getFSImage().getEditLog()
       .getLastWrittenTxId();
     
+    // TODO: we should really just ask for a log roll here
     doSaveNamespace(active);
     
     long start = System.currentTimeMillis();
@@ -112,10 +114,13 @@ public class TestEditLogTailer {
       long nn2HighestTxId = standby.getNamesystem().getFSImage()
         .getLastAppliedTxId();
       if (nn2HighestTxId >= activeTxId) {
-        break;
+        return;
       }
       Thread.sleep(SLEEP_TIME);
     }
+    Assert.fail("Standby did not catch up to txid " + activeTxId +
+        " (currently at " +
+        standby.getNamesystem().getFSImage().getLastAppliedTxId() + ")");
   }
   
   private static void doSaveNamespace(NameNode nn)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
new file mode 100644
index 00000000000..036e914cee9
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
@@ -0,0 +1,110 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.AppendTestUtil;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+
+import com.google.common.base.Supplier;
+
+/**
+ * The hotornot.com of unit tests: makes sure that the standby not only
+ * has namespace information, but also has the correct block reports, etc.
+ */
+public class TestStandbyIsHot {
+  protected static final Log LOG = LogFactory.getLog(
+      TestStandbyIsHot.class);
+  private static final String TEST_FILE_DATA = "hello highly available world";
+  private static final String TEST_FILE = "/testStandbyIsHot";
+  private static final Path TEST_FILE_PATH = new Path(TEST_FILE);
+
+  @Test
+  public void testStandbyIsHot() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      
+      NameNode nn1 = cluster.getNameNode(0);
+      NameNode nn2 = cluster.getNameNode(1);
+      nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
+      nn2.getNamesystem().getEditLogTailer().interrupt();
+      
+      FileSystem fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+      
+      Thread.sleep(1000);
+      System.err.println("==================================");
+      DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA);
+      // Have to force an edit log roll so that the standby catches up
+      nn1.getRpcServer().rollEditLog();
+      System.err.println("==================================");
+
+      waitForBlockLocations(nn2, TEST_FILE, 3);
+      
+      nn1.stop();
+      cluster.transitionToActive(1);
+
+      assertEquals(TEST_FILE_DATA, DFSTestUtil.readFile(fs, TEST_FILE_PATH));
+      
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
+  private void waitForBlockLocations(final NameNode nn,
+      final String path, final int expectedReplicas)
+      throws Exception {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      
+      @Override
+      public Boolean get() {
+        try {
+          LocatedBlocks locs = NameNodeAdapter.getBlockLocations(nn, path, 0, 1000);
+          LOG.info("Got locs: " + locs);
+          return locs.getLastLocatedBlock().getLocations().length == expectedReplicas;
+        } catch (IOException e) {
+          LOG.warn("No block locations yet: " + e.getMessage());
+          return false;
+        }
+      }
+    }, 500, 10000);
+    
+  }
+}

From 74b1f069ccaf0cf0536d1e5e8389ba54c81515c4 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 2 Dec 2011 00:03:17 +0000
Subject: [PATCH 032/177] Amend HDFS-2616 to fix mocking in TestBPOfferService

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1209315 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hdfs/server/datanode/TestBPOfferService.java    | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
index 33b0e64aed1..de26891f951 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
 import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.test.GenericTestUtils;
@@ -97,6 +98,18 @@ public class TestBPOfferService {
         new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID,
             0, HdfsConstants.LAYOUT_VERSION))
       .when(mock).versionRequest();
+    
+    Mockito.doReturn(new HeartbeatResponse(null))
+      .when(mock).sendHeartbeat(
+          Mockito.any(DatanodeRegistration.class),
+          Mockito.anyLong(),
+          Mockito.anyLong(),
+          Mockito.anyLong(),
+          Mockito.anyLong(),
+          Mockito.anyInt(),
+          Mockito.anyInt(),
+          Mockito.anyInt());
+
     return mock;
   }
   

From b3f28dbb3d1ab6b2f686efdd7bdb064426177f21 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 5 Dec 2011 06:36:00 +0000
Subject: [PATCH 033/177] HDFS-2626. BPOfferService.verifyAndSetNamespaceInfo
 needs to be synchronized. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1210340 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt         | 2 ++
 .../apache/hadoop/hdfs/server/datanode/BPOfferService.java    | 2 +-
 .../apache/hadoop/hdfs/server/datanode/BPServiceActor.java    | 4 +---
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 6c932936712..d5f4d0d7f79 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -39,3 +39,5 @@ HDFS-2622. Fix TestDFSUpgrade in HA branch. (todd)
 HDFS-2612. Handle refreshNameNodes in federated HA clusters (todd)
 
 HDFS-2623. Add test case for hot standby capability (todd)
+
+HDFS-2626. BPOfferService.verifyAndSetNamespaceInfo needs to be synchronized (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index 62b825be56f..eb611bffcb0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -262,7 +262,7 @@ class BPOfferService {
    * verifies that this namespace matches (eg to prevent a misconfiguration
    * where a StandbyNode from a different cluster is specified)
    */
-  void verifyAndSetNamespaceInfo(NamespaceInfo nsInfo) throws IOException {
+  synchronized void verifyAndSetNamespaceInfo(NamespaceInfo nsInfo) throws IOException {
     if (this.bpNSInfo == null) {
       this.bpNSInfo = nsInfo;
       
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index e83ec99c1ac..bf49cc0a6b9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -336,8 +336,6 @@ class BPServiceActor implements Runnable {
   
   HeartbeatResponse sendHeartBeat() throws IOException {
     LOG.info("heartbeat: " + this);
-    // TODO: saw an NPE here - maybe if the two BPOS register at
-    // same time, this one won't block on the other one?
     return bpNamenode.sendHeartbeat(bpRegistration,
         dn.getFSDataset().getCapacity(),
         dn.getFSDataset().getDfsUsed(),
@@ -632,4 +630,4 @@ class BPServiceActor implements Runnable {
     }
   }
 
-}
\ No newline at end of file
+}

From c7f5167845ce52b060d4f1037ed2ddee5cfa3e4e Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 5 Dec 2011 06:37:46 +0000
Subject: [PATCH 034/177] HDFS-2624. ConfiguredFailoverProxyProvider doesn't
 correctly stop ProtocolTranslators. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1210341 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt      | 2 ++
 .../src/main/java/org/apache/hadoop/hdfs/DFSClient.java    | 3 ++-
 .../namenode/ha/ConfiguredFailoverProxyProvider.java       | 7 ++++++-
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index d5f4d0d7f79..5de1611b329 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -41,3 +41,5 @@ HDFS-2612. Handle refreshNameNodes in federated HA clusters (todd)
 HDFS-2623. Add test case for hot standby capability (todd)
 
 HDFS-2626. BPOfferService.verifyAndSetNamespaceInfo needs to be synchronized (todd)
+
+HDFS-2624. ConfiguredFailoverProxyProvider doesn't correctly stop ProtocolTranslators (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 9f444155262..c0533eed1e3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -439,8 +439,9 @@ public class DFSClient implements java.io.Closeable {
         // fall through - lets try the stopProxy
         LOG.warn("Exception closing namenode, stopping the proxy");
       }     
+    } else {
+      RPC.stopProxy(namenode);
     }
-    RPC.stopProxy(namenode);
   }
   
   /** Abort and release resources held.  Ignore all errors. */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index 483d9eb6230..8239c5e03b0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
+import java.io.Closeable;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
@@ -126,7 +127,11 @@ public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
   public synchronized void close() throws IOException {
     for (AddressRpcProxyPair proxy : proxies) {
       if (proxy.namenode != null) {
-        RPC.stopProxy(proxy.namenode);
+        if (proxy.namenode instanceof Closeable) {
+          ((Closeable)proxy.namenode).close();
+        } else {
+          RPC.stopProxy(proxy.namenode);
+        }
       }
     }
   }

From ad7fe4e21eb778f5a9700212751b6a0180402346 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 5 Dec 2011 06:38:52 +0000
Subject: [PATCH 035/177] HDFS-2625. TestDfsOverAvroRpc failing after
 introduction of HeartbeatResponse type. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1210342 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt           | 2 ++
 .../hadoop/hdfs/server/blockmanagement/DatanodeManager.java     | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 5de1611b329..6b67be47f0f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -43,3 +43,5 @@ HDFS-2623. Add test case for hot standby capability (todd)
 HDFS-2626. BPOfferService.verifyAndSetNamespaceInfo needs to be synchronized (todd)
 
 HDFS-2624. ConfiguredFailoverProxyProvider doesn't correctly stop ProtocolTranslators (todd)
+
+HDFS-2625. TestDfsOverAvroRpc failing after introduction of HeartbeatResponse type (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 33b648c5bad..0996fb71203 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -921,7 +921,7 @@ public class DatanodeManager {
       }
     }
 
-    return null;
+    return new DatanodeCommand[0];
   }
 
   /**

From 649144435718c7f446e9a056fc2988dc75fae14c Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 5 Dec 2011 21:13:07 +0000
Subject: [PATCH 036/177] Fix to previous trunk merge - RPC's addProtocol
 method now needs an RpcKind

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1210638 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop/hdfs/server/namenode/NameNodeRpcServer.java      | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index f005f8b9d4f..f0ac86ae8c0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -163,7 +163,8 @@ class NameNodeRpcServer implements NamenodeProtocols {
           RefreshUserMappingsProtocol.class, this);
       this.serviceRpcServer.addProtocol(RpcKind.RPC_WRITABLE, 
           GetUserMappingsProtocol.class, this);
-      this.serviceRpcServer.addProtocol(HAServiceProtocol.class, this);
+      this.serviceRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
+          HAServiceProtocol.class, this);
       
       this.serviceRPCAddress = this.serviceRpcServer.getListenerAddress();
       nn.setRpcServiceServerAddress(conf, serviceRPCAddress);
@@ -188,7 +189,8 @@ class NameNodeRpcServer implements NamenodeProtocols {
         RefreshUserMappingsProtocol.class, this);
     this.clientRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
         GetUserMappingsProtocol.class, this);
-    this.clientRpcServer.addProtocol(HAServiceProtocol.class, this);
+    this.clientRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
+        HAServiceProtocol.class, this);
     
 
     // set service-level authorization security policy

From f39aac60e0b0c2cd4c82607c02501dde43afe02c Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 5 Dec 2011 22:10:35 +0000
Subject: [PATCH 037/177] Merge trunk into branch.

Resolved conflicts generated by commit of HDFS-1580 in trunk:
- made EditLogInputStream.isInProgress public
- fixed trivial conflict in DFSConfigKeys


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1210666 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |   7 +-
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   1 +
 .../hdfs/protocolPB/JournalProtocolPB.java    |  53 ++++++++
 ...JournalProtocolServerSideTranslatorPB.java | 121 ++++++++++++++++++
 .../JournalProtocolTranslatorPB.java          | 106 +++++++++++++++
 .../hadoop/hdfs/protocolPB/PBHelper.java      |  92 +++++++++++++
 .../hadoop/hdfs/protocolPB/overview.html      |  62 +++++++++
 .../protocolProtocolBuffers/overview.html     |  29 -----
 .../namenode/EditLogBackupInputStream.java    |   4 +-
 .../namenode/EditLogBackupOutputStream.java   |   8 +-
 .../namenode/EditLogFileInputStream.java      |   4 +-
 .../namenode/EditLogFileOutputStream.java     |   8 +-
 .../server/namenode/EditLogInputStream.java   |   4 +-
 .../server/namenode/EditLogOutputStream.java  |  16 ++-
 .../hdfs/server/namenode/FSEditLog.java       |  63 ++++++++-
 .../hdfs/server/namenode/FSEditLogLoader.java |   6 +-
 .../hdfs/server/namenode/FSEditLogOp.java     |   4 +
 .../hdfs/server/namenode/JournalManager.java  |   7 +-
 .../hdfs/server/namenode/JournalSet.java      |   8 +-
 .../hdfs/server/namenode/NNStorage.java       |  15 +--
 .../namenode/NameNodeResourceChecker.java     |  15 ++-
 .../hadoop/hdfs/protocolPB/TestPBHelper.java  |  79 ++++++++++++
 .../hdfs/server/namenode/TestEditLog.java     |   2 +-
 23 files changed, 642 insertions(+), 72 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolPB.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolServerSideTranslatorPB.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolTranslatorPB.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/overview.html
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 7bafb1f44d1..ffeff8119d1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -3,7 +3,7 @@ Hadoop HDFS Change Log
 Trunk (unreleased changes)
   NEW FEATURES
     HDFS-395.  DFS Scalability: Incremental block reports. (Tomasz Nykiel
-               via hairong)
+    via hairong)
 
     HDFS-2517. Add protobuf service for JounralProtocol. (suresh)
 
@@ -13,6 +13,8 @@ Trunk (unreleased changes)
 
     HDFS-2519. Add protobuf service for DatanodeProtocol. (suresh)
 
+    HDFS-2581. Implement protobuf service for JournalProtocol. (suresh)
+
   IMPROVEMENTS
 
     HADOOP-7524 Change RPC to allow multiple protocols including multuple 
@@ -72,6 +74,9 @@ Trunk (unreleased changes)
 									Move the support for multiple protocols to lower layer so that Writable,
 								  PB and Avro can all use it (Sanjay)
 
+    HDFS-1580. Add interface for generic Write Ahead Logging mechanisms.
+    (Ivan Kelly via jitendra)
+
   OPTIMIZATIONS
     HDFS-2477. Optimize computing the diff between a block report and the
                namenode state. (Tomasz Nykiel via hairong)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 511adcfb170..1c26c4bc57a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -163,6 +163,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_NAMENODE_NAME_DIR_KEY = "dfs.namenode.name.dir";
   public static final String  DFS_NAMENODE_EDITS_DIR_KEY = "dfs.namenode.edits.dir";
   public static final String  DFS_NAMENODE_SHARED_EDITS_DIR_KEY = "dfs.namenode.shared.edits.dir";
+  public static final String  DFS_NAMENODE_EDITS_PLUGIN_PREFIX = "dfs.namenode.edits.journal-plugin";
   public static final String  DFS_CLIENT_READ_PREFETCH_SIZE_KEY = "dfs.client.read.prefetch.size"; 
   public static final String  DFS_CLIENT_RETRY_WINDOW_BASE= "dfs.client.retry.window.base";
   public static final String  DFS_METRICS_SESSION_ID_KEY = "dfs.metrics.session-id";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolPB.java
new file mode 100644
index 00000000000..ebbdcb3d5c4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolPB.java
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocolPB;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.JournalProtocolService;
+import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
+import org.apache.hadoop.security.KerberosInfo;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.ipc.ProtocolInfo;
+import org.apache.hadoop.ipc.VersionedProtocol;
+
+/**
+ * Protocol used to journal edits to a remote node. Currently,
+ * this is used to publish edits from the NameNode to a BackupNode.
+ * 
+ * Note: This extends the protocolbuffer service based interface to
+ * add annotations required for security.
+ */
+@KerberosInfo(
+    serverPrincipal = DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY,
+    clientPrincipal = DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY)
+@ProtocolInfo(protocolName = 
+    "org.apache.hadoop.hdfs.server.protocol.JournalProtocol",
+    protocolVersion = 1)
+@InterfaceAudience.Private
+public interface JournalProtocolPB extends
+    JournalProtocolService.BlockingInterface, VersionedProtocol {
+  /**
+   * This method is defined to get the protocol signature using 
+   * the R23 protocol - hence we have added the suffix of 2 the method name
+   * to avoid conflict.
+   */
+  public ProtocolSignatureWritable getProtocolSignature2(String protocol,
+      long clientVersion, int clientMethodsHash) throws IOException;
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolServerSideTranslatorPB.java
new file mode 100644
index 00000000000..389bf154d32
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolServerSideTranslatorPB.java
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocolPB;
+
+import java.io.IOException;
+
+import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.JournalRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.JournalResponseProto;
+import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.StartLogSegmentRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.StartLogSegmentResponseProto;
+import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
+import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
+import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.VersionedProtocol;
+
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+
+/**
+ * Implementation for protobuf service that forwards requests
+ * received on {@link JournalProtocolPB} to the 
+ * {@link JournalProtocol} server implementation.
+ */
+public class JournalProtocolServerSideTranslatorPB implements JournalProtocolPB {
+  /** Server side implementation to delegate the requests to */
+  private final JournalProtocol impl;
+
+  public JournalProtocolServerSideTranslatorPB(JournalProtocol impl) {
+    this.impl = impl;
+  }
+
+  /** @see JournalProtocol#journal */
+  @Override
+  public JournalResponseProto journal(RpcController unused,
+      JournalRequestProto req) throws ServiceException {
+    try {
+      impl.journal(PBHelper.convert(req.getRegistration()),
+          req.getFirstTxnId(), req.getNumTxns(), req.getRecords()
+              .toByteArray());
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+    return JournalResponseProto.newBuilder().build();
+  }
+
+  /** @see JournalProtocol#startLogSegment */
+  @Override
+  public StartLogSegmentResponseProto startLogSegment(RpcController controller,
+      StartLogSegmentRequestProto req) throws ServiceException {
+    try {
+      impl.startLogSegment(PBHelper.convert(req.getRegistration()),
+          req.getTxid());
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+    return StartLogSegmentResponseProto.newBuilder().build();
+  }
+
+  /** @see VersionedProtocol#getProtocolVersion */
+  @Override
+  public long getProtocolVersion(String protocol, long clientVersion)
+      throws IOException {
+    return RPC.getProtocolVersion(JournalProtocolPB.class);
+  }
+
+  /**
+   * The client side will redirect getProtocolSignature to
+   * getProtocolSignature2.
+   * 
+   * However the RPC layer below on the Server side will call getProtocolVersion
+   * and possibly in the future getProtocolSignature. Hence we still implement
+   * it even though the end client will never call this method.
+   * 
+   * @see VersionedProtocol#getProtocolSignature(String, long, int)
+   */
+  @Override
+  public ProtocolSignature getProtocolSignature(String protocol,
+      long clientVersion, int clientMethodsHash) throws IOException {
+    /**
+     * Don't forward this to the server. The protocol version and signature is
+     * that of {@link JournalProtocol}
+     */
+    if (!protocol.equals(RPC.getProtocolName(JournalProtocolPB.class))) {
+      throw new IOException("Namenode Serverside implements " +
+          RPC.getProtocolName(JournalProtocolPB.class) +
+          ". The following requested protocol is unknown: " + protocol);
+    }
+
+    return ProtocolSignature.getProtocolSignature(clientMethodsHash,
+        RPC.getProtocolVersion(JournalProtocolPB.class),
+        JournalProtocolPB.class);
+  }
+
+
+  @Override
+  public ProtocolSignatureWritable getProtocolSignature2(String protocol,
+      long clientVersion, int clientMethodsHash) throws IOException {
+    /**
+     * Don't forward this to the server. The protocol version and signature is
+     * that of {@link JournalPBProtocol}
+     */
+    return ProtocolSignatureWritable.convert(
+        this.getProtocolSignature(protocol, clientVersion, clientMethodsHash));
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolTranslatorPB.java
new file mode 100644
index 00000000000..adddf9a2f4b
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolTranslatorPB.java
@@ -0,0 +1,106 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocolPB;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.JournalRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.StartLogSegmentRequestProto;
+import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
+import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
+import org.apache.hadoop.ipc.ProtobufHelper;
+import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.RPC;
+
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+
+/**
+ * This class is the client side translator to translate the requests made on
+ * {@link JournalProtocol} interfaces to the RPC server implementing
+ * {@link JournalProtocolPB}.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Stable
+public class JournalProtocolTranslatorPB implements JournalProtocol, Closeable {
+  /** RpcController is not used and hence is set to null */
+  private final static RpcController NULL_CONTROLLER = null;
+  private final JournalProtocolPB rpcProxy;
+
+  public JournalProtocolTranslatorPB(InetSocketAddress nameNodeAddr,
+      Configuration conf) throws IOException {
+    RPC.setProtocolEngine(conf, JournalProtocolPB.class, ProtobufRpcEngine.class);
+    rpcProxy = RPC.getProxy(JournalProtocolPB.class,
+        JournalProtocol.versionID, nameNodeAddr, conf);
+  }
+
+  @Override
+  public void close() {
+    RPC.stopProxy(rpcProxy);
+  }
+
+  @Override
+  public long getProtocolVersion(String protocolName, long clientVersion)
+      throws IOException {
+    return 0;
+  }
+
+  @Override
+  public ProtocolSignature getProtocolSignature(String protocol,
+      long clientVersion, int clientMethodsHash) throws IOException {
+    return ProtocolSignatureWritable.convert(rpcProxy.getProtocolSignature2(
+        protocol, clientVersion, clientMethodsHash));
+  }
+
+  @Override
+  public void journal(NamenodeRegistration reg, long firstTxnId,
+      int numTxns, byte[] records) throws IOException {
+    JournalRequestProto req = JournalRequestProto.newBuilder()
+        .setRegistration(PBHelper.convert(reg))
+        .setFirstTxnId(firstTxnId)
+        .setNumTxns(numTxns)
+        .setRecords(PBHelper.getByteString(records))
+        .build();
+    try {
+      rpcProxy.journal(NULL_CONTROLLER, req);
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
+  @Override
+  public void startLogSegment(NamenodeRegistration registration, long txid)
+      throws IOException {
+    StartLogSegmentRequestProto req = StartLogSegmentRequestProto.newBuilder()
+        .setRegistration(PBHelper.convert(registration))
+        .setTxid(txid)
+        .build();
+    try {
+      rpcProxy.startLogSegment(NULL_CONTROLLER, req);
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
new file mode 100644
index 00000000000..598c7fb4163
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocolPB;
+
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeRegistrationProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeRegistrationProto.NamenodeRoleProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.StorageInfoProto;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
+
+import com.google.protobuf.ByteString;
+
+/**
+ * Utilities for converting protobuf classes to and from 
+ * implementation classes.
+ */
+class PBHelper {
+  private PBHelper() {
+    /** Hidden constructor */
+  }
+  
+  public static ByteString getByteString(byte[] bytes) {
+    return ByteString.copyFrom(bytes);
+  }
+  
+  public static NamenodeRole convert(NamenodeRoleProto role) {
+    switch (role) {
+    case NAMENODE:
+      return NamenodeRole.NAMENODE;
+    case BACKUP:
+      return NamenodeRole.BACKUP;
+    case CHECKPOINT:
+      return NamenodeRole.CHECKPOINT;
+    }
+    return null;
+  }
+  
+  public static NamenodeRoleProto convert(NamenodeRole role) {
+    switch (role) {
+    case NAMENODE:
+      return NamenodeRoleProto.NAMENODE;
+    case BACKUP:
+      return NamenodeRoleProto.BACKUP;
+    case CHECKPOINT:
+      return NamenodeRoleProto.CHECKPOINT;
+    }
+    return null;
+  }
+  
+  public static StorageInfoProto convert(StorageInfo info) {
+    return StorageInfoProto.newBuilder().setClusterID(info.getClusterID())
+        .setCTime(info.getCTime())
+        .setLayoutVersion(info.getLayoutVersion())
+        .setNamespceID(info.getNamespaceID())
+        .build();
+  }
+  
+  public static StorageInfo convert(StorageInfoProto info) {
+    return new StorageInfo(info.getLayoutVersion(), info.getNamespceID(),
+        info.getClusterID(), info.getCTime());
+  }
+  
+  
+  public static NamenodeRegistrationProto convert(NamenodeRegistration reg) {
+    return NamenodeRegistrationProto.newBuilder()
+        .setHttpAddress(reg.getHttpAddress())
+        .setRole(convert(reg.getRole()))
+        .setRpcAddress(reg.getAddress())
+        .setStorageInfo(convert((StorageInfo) reg)).build();
+  }
+  
+  public static NamenodeRegistration convert(NamenodeRegistrationProto reg) {
+    return new NamenodeRegistration(reg.getRpcAddress(), reg.getHttpAddress(),
+        convert(reg.getStorageInfo()), convert(reg.getRole()));
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/overview.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/overview.html
new file mode 100644
index 00000000000..cf620f379b4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/overview.html
@@ -0,0 +1,62 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<head>
+   <title>Protocol Buffers based data types for NN protocols</title>
+</head>
+<body>
+<p>
+The Protocol Buffers data types for NN protocols that use PB go in this package.
+</p>
+<h1>Steps to add a new protocol</h1>
+<hr/>
+<ol>
+<li>Define the protobuf service for the protocol in &lt;ProtocolName&gt;.proto class. 
+<ul>
+<li>This file should include both the protobuf service definition and the types
+ used for request and response. For example see - NamenodeProtocol.proto
+<li>The naming convention for the protobuf service is &lt;ProtocolName&gt;Service.
+Example: NamenodeProtocolService.
+<li>Every RPC method takes a request and returns a response. The request 
+naming convention is &lt;MethodName&gt;RequestProto. The response naming convention 
+is &lt;MethodName&gt;ResponseProto.
+</ul>
+<li>Generate java files from the proto file using protoc tool.
+<li>Define server side interface that extends BlockingInterface from the 
+generated files (Example: NamenodeProtocolService.BlockingInterface) 
+and VersionedProtocol. See NamenodePBProtocol.java for example.
+<li>Define client side translator to translate the client protocol to 
+protobuf. See NamenodeProtocolTranslator.
+<li>Define server side implementation that implements the server side interface.
+This implementation receives the protobuf requests and delegates it to the 
+server side implementation. See NamenodePBProtocolImpl for example.
+<li>Make changes to register this protocol at the server. See the other 
+protocols on how this is done.
+</ol>
+<h1>Steps to make changes to the existing protocol in a compatible way</h1>
+<hr/>
+<ol>
+<li>Adding new methods is a compatible change.</li>
+<li>When modifying an existing method, do not change the required parameters 
+to optional or optional parameters to required. Only add optional parameters 
+to the request and response.</li>
+<li>When modifying an existing type, do not change the required parameters 
+to optional or optional parameters to require and optional parameters to 
+required. Only add optional parameters to the request and response.</li>
+</ol>
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolProtocolBuffers/overview.html b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolProtocolBuffers/overview.html
index 6d41cfdf5a4..e69de29bb2d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolProtocolBuffers/overview.html
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolProtocolBuffers/overview.html
@@ -1,29 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
-<html>
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-<head>
-   <title>Protocol Buffers based data types for NN protocols</title>
-</head>
-<body>
-<p>
-The Protocol Buffers data types for NN protocols that use 
-PB go in this package.
-</p>
-
-
-
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
index 68bcdba6edf..a0fb8fe6291 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupInputStream.java
@@ -103,7 +103,7 @@ class EditLogBackupInputStream extends EditLogInputStream {
   }
 
   @Override
-  long length() throws IOException {
+  public long length() throws IOException {
     // file size + size of both buffers
     return inner.length();
   }
@@ -135,7 +135,7 @@ class EditLogBackupInputStream extends EditLogInputStream {
   }
 
   @Override
-  boolean isInProgress() {
+  public boolean isInProgress() {
     return true;
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
index 067990d01b9..711fcce48e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
@@ -67,12 +67,12 @@ class EditLogBackupOutputStream extends EditLogOutputStream {
   }
   
   @Override // EditLogOutputStream
-  void write(FSEditLogOp op) throws IOException {
+  public void write(FSEditLogOp op) throws IOException {
     doubleBuf.writeOp(op);
  }
 
   @Override
-  void writeRaw(byte[] bytes, int offset, int length) throws IOException {
+  public void writeRaw(byte[] bytes, int offset, int length) throws IOException {
     throw new IOException("Not supported");
   }
 
@@ -80,7 +80,7 @@ class EditLogBackupOutputStream extends EditLogOutputStream {
    * There is no persistent storage. Just clear the buffers.
    */
   @Override // EditLogOutputStream
-  void create() throws IOException {
+  public void create() throws IOException {
     assert doubleBuf.isFlushed() : "previous data is not flushed yet";
     this.doubleBuf = new EditsDoubleBuffer(DEFAULT_BUFFER_SIZE);
   }
@@ -106,7 +106,7 @@ class EditLogBackupOutputStream extends EditLogOutputStream {
   }
 
   @Override // EditLogOutputStream
-  void setReadyToFlush() throws IOException {
+  public void setReadyToFlush() throws IOException {
     doubleBuf.setReadyToFlush();
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
index 719ef781006..3857db236c6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
@@ -129,13 +129,13 @@ class EditLogFileInputStream extends EditLogInputStream {
   }
 
   @Override
-  long length() throws IOException {
+  public long length() throws IOException {
     // file size + size of both buffers
     return file.length();
   }
   
   @Override
-  boolean isInProgress() {
+  public boolean isInProgress() {
     return isInProgress;
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
index 4780d04b002..13c76ae1e6c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
@@ -73,7 +73,7 @@ class EditLogFileOutputStream extends EditLogOutputStream {
 
   /** {@inheritDoc} */
   @Override
-  void write(FSEditLogOp op) throws IOException {
+  public void write(FSEditLogOp op) throws IOException {
     doubleBuf.writeOp(op);
   }
 
@@ -86,7 +86,7 @@ class EditLogFileOutputStream extends EditLogOutputStream {
    * </ul>
    * */
   @Override
-  void writeRaw(byte[] bytes, int offset, int length) throws IOException {
+  public void writeRaw(byte[] bytes, int offset, int length) throws IOException {
     doubleBuf.writeRaw(bytes, offset, length);
   }
 
@@ -94,7 +94,7 @@ class EditLogFileOutputStream extends EditLogOutputStream {
    * Create empty edits logs file.
    */
   @Override
-  void create() throws IOException {
+  public void create() throws IOException {
     fc.truncate(0);
     fc.position(0);
     doubleBuf.getCurrentBuf().writeInt(HdfsConstants.LAYOUT_VERSION);
@@ -150,7 +150,7 @@ class EditLogFileOutputStream extends EditLogOutputStream {
    * data can be still written to the stream while flushing is performed.
    */
   @Override
-  void setReadyToFlush() throws IOException {
+  public void setReadyToFlush() throws IOException {
     doubleBuf.getCurrentBuf().write(FSEditLogOpCodes.OP_INVALID.getOpCode()); // insert eof marker
     doubleBuf.setReadyToFlush();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
index c66977c0717..2c4bdd53d00 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 import java.io.Closeable;
 import java.io.IOException;
 
@@ -79,7 +81,7 @@ public abstract class EditLogInputStream implements JournalStream, Closeable {
   /**
    * Return the size of the current edits log.
    */
-  abstract long length() throws IOException;
+  public abstract long length() throws IOException;
   
   /**
    * Return true if this stream is in progress, false if it is finalized.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java
index 8681837de56..d0fc1568015 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogOutputStream.java
@@ -21,17 +21,21 @@ import java.io.IOException;
 
 import static org.apache.hadoop.hdfs.server.common.Util.now;
 
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 
 /**
  * A generic abstract class to support journaling of edits logs into 
  * a persistent storage.
  */
-abstract class EditLogOutputStream {
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public abstract class EditLogOutputStream {
   // these are statistics counters
   private long numSync;        // number of sync(s) to disk
   private long totalTimeSync;  // total time to sync
 
-  EditLogOutputStream() {
+  public EditLogOutputStream() throws IOException {
     numSync = totalTimeSync = 0;
   }
 
@@ -41,7 +45,7 @@ abstract class EditLogOutputStream {
    * @param op operation
    * @throws IOException
    */
-  abstract void write(FSEditLogOp op) throws IOException;
+  abstract public void write(FSEditLogOp op) throws IOException;
 
   /**
    * Write raw data to an edit log. This data should already have
@@ -54,7 +58,7 @@ abstract class EditLogOutputStream {
    * @param length number of bytes to write
    * @throws IOException
    */
-  abstract void writeRaw(byte[] bytes, int offset, int length)
+  abstract public void writeRaw(byte[] bytes, int offset, int length)
       throws IOException;
 
   /**
@@ -62,7 +66,7 @@ abstract class EditLogOutputStream {
    * 
    * @throws IOException
    */
-  abstract void create() throws IOException;
+  abstract public void create() throws IOException;
 
   /**
    * Close the journal.
@@ -81,7 +85,7 @@ abstract class EditLogOutputStream {
    * All data that has been written to the stream so far will be flushed.
    * New data can be still written to the stream while flushing is performed.
    */
-  abstract void setReadyToFlush() throws IOException;
+  abstract public void setReadyToFlush() throws IOException;
 
   /**
    * Flush and sync all data that is ready to be flush 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index bfe971b5eb4..aa16069ed15 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -24,6 +24,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
+import java.lang.reflect.Constructor;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -31,6 +32,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.Options;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
@@ -122,6 +124,7 @@ public class FSEditLog  {
   private NameNodeMetrics metrics;
 
   private NNStorage storage;
+  private Configuration conf;
 
   private static class TransactionId {
     public long txid;
@@ -163,6 +166,7 @@ public class FSEditLog  {
    * @param editsDirs List of journals to use
    */
   FSEditLog(Configuration conf, NNStorage storage, Collection<URI> editsDirs) {
+    this.conf = conf;
     isSyncRunning = false;
     this.storage = storage;
     metrics = NameNode.getNameNodeMetrics();
@@ -210,9 +214,13 @@ public class FSEditLog  {
   private void initJournals(Collection<URI> dirs) {
     this.journalSet = new JournalSet();
     for (URI u : dirs) {
-      StorageDirectory sd = storage.getStorageDirectory(u);
-      if (sd != null) {
-        journalSet.add(new FileJournalManager(sd));
+      if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
+        StorageDirectory sd = storage.getStorageDirectory(u);
+        if (sd != null) {
+          journalSet.add(new FileJournalManager(sd));
+        }
+      } else {
+        journalSet.add(createJournal(u));
       }
     }
  
@@ -1053,4 +1061,53 @@ public class FSEditLog  {
       IOUtils.closeStream(s);
     }
   }
+
+  /**
+   * Retrieve the implementation class for a Journal scheme.
+   * @param conf The configuration to retrieve the information from
+   * @param uriScheme The uri scheme to look up.
+   * @return the class of the journal implementation
+   * @throws IllegalArgumentException if no class is configured for uri
+   */
+  static Class<? extends JournalManager> getJournalClass(Configuration conf,
+                               String uriScheme) {
+    String key
+      = DFSConfigKeys.DFS_NAMENODE_EDITS_PLUGIN_PREFIX + "." + uriScheme;
+    Class <? extends JournalManager> clazz = null;
+    try {
+      clazz = conf.getClass(key, null, JournalManager.class);
+    } catch (RuntimeException re) {
+      throw new IllegalArgumentException(
+          "Invalid class specified for " + uriScheme, re);
+    }
+      
+    if (clazz == null) {
+      LOG.warn("No class configured for " +uriScheme
+               + ", " + key + " is empty");
+      throw new IllegalArgumentException(
+          "No class configured for " + uriScheme);
+    }
+    return clazz;
+  }
+
+  /**
+   * Construct a custom journal manager.
+   * The class to construct is taken from the configuration.
+   * @param uri Uri to construct
+   * @return The constructed journal manager
+   * @throws IllegalArgumentException if no class is configured for uri
+   */
+  private JournalManager createJournal(URI uri) {
+    Class<? extends JournalManager> clazz
+      = getJournalClass(conf, uri.getScheme());
+
+    try {
+      Constructor<? extends JournalManager> cons
+        = clazz.getConstructor(Configuration.class, URI.class);
+      return cons.newInstance(conf, uri);
+    } catch (Exception e) {
+      throw new IllegalArgumentException("Unable to construct journal, "
+                                         + uri, e);
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 30b0b8c1515..80aa115df1f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -25,6 +25,8 @@ import java.io.InputStream;
 import java.util.Arrays;
 import java.util.EnumMap;
 
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
@@ -57,6 +59,8 @@ import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.util.Holder;
 import com.google.common.base.Joiner;
 
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
 public class FSEditLogLoader {
   private final FSNamesystem fsNamesys;
 
@@ -514,7 +518,7 @@ public class FSEditLogLoader {
   /**
    * Stream wrapper that keeps track of the current stream position.
    */
-  static class PositionTrackingInputStream extends FilterInputStream {
+  public static class PositionTrackingInputStream extends FilterInputStream {
     private long curPos = 0;
     private long markPos = -1;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
index 3adb4393296..61b4ef8a411 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
@@ -113,6 +113,10 @@ public abstract class FSEditLogOp {
     this.txid = 0;
   }
 
+  public long getTransactionId() {
+    return txid;
+  }
+
   public void setTransactionId(long txid) {
     this.txid = txid;
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
index 348e3ef9819..d45de18e92d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.server.namenode;
 import java.io.Closeable;
 import java.io.IOException;
 
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
 
 /**
  * A JournalManager is responsible for managing a single place of storing
@@ -28,7 +30,9 @@ import java.io.IOException;
  * each conceptual place of storage corresponds to exactly one instance of
  * this class, which is created when the EditLog is first opened.
  */
-interface JournalManager extends Closeable {
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public interface JournalManager extends Closeable {
   /**
    * Begin writing to a new segment of the log stream, which starts at
    * the given transaction ID.
@@ -71,7 +75,6 @@ interface JournalManager extends Closeable {
    *
    * @param minTxIdToKeep the earliest txid that must be retained after purging
    *                      old logs
-   * @param purger the purging implementation to use
    * @throws IOException if purging fails
    */
   void purgeLogsOlderThan(long minTxIdToKeep)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
index 45b5714082d..8607364a56b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
@@ -309,7 +309,7 @@ public class JournalSet implements JournalManager {
     }
 
     @Override
-    void write(final FSEditLogOp op)
+    public void write(final FSEditLogOp op)
         throws IOException {
       mapJournalsAndReportErrors(new JournalClosure() {
         @Override
@@ -322,7 +322,7 @@ public class JournalSet implements JournalManager {
     }
 
     @Override
-    void writeRaw(final byte[] data, final int offset, final int length)
+    public void writeRaw(final byte[] data, final int offset, final int length)
         throws IOException {
       mapJournalsAndReportErrors(new JournalClosure() {
         @Override
@@ -335,7 +335,7 @@ public class JournalSet implements JournalManager {
     }
 
     @Override
-    void create() throws IOException {
+    public void create() throws IOException {
       mapJournalsAndReportErrors(new JournalClosure() {
         @Override
         public void apply(JournalAndStream jas) throws IOException {
@@ -367,7 +367,7 @@ public class JournalSet implements JournalManager {
     }
 
     @Override
-    void setReadyToFlush() throws IOException {
+    public void setReadyToFlush() throws IOException {
       mapJournalsAndReportErrors(new JournalClosure() {
         @Override
         public void apply(JournalAndStream jas) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
index 7bddaeb5d1f..118e4d26de6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
@@ -71,7 +71,8 @@ public class NNStorage extends Storage implements Closeable {
   private static final Log LOG = LogFactory.getLog(NNStorage.class.getName());
 
   static final String DEPRECATED_MESSAGE_DIGEST_PROPERTY = "imageMD5Digest";
-  
+  static final String LOCAL_URI_SCHEME = "file";
+
   //
   // The filenames used for storing the images
   //
@@ -338,22 +339,14 @@ public class NNStorage extends Storage implements Closeable {
 
   /**
    * Checks the consistency of a URI, in particular if the scheme
-   * is specified and is supported by a concrete implementation
+   * is specified 
    * @param u URI whose consistency is being checked.
    */
   private static void checkSchemeConsistency(URI u) throws IOException {
     String scheme = u.getScheme();
     // the URI should have a proper scheme
-    if(scheme == null)
+    if(scheme == null) {
       throw new IOException("Undefined scheme for " + u);
-    else {
-      try {
-        // the scheme should be enumerated as JournalType
-        JournalType.valueOf(scheme.toUpperCase());
-      } catch (IllegalArgumentException iae){
-        throw new IOException("Unknown scheme " + scheme +
-            ". It should correspond to a JournalType enumeration value");
-      }
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java
index 4d7cfd8fa92..9283f92cade 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java
@@ -33,6 +33,8 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.server.common.Util;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Collections2;
+import com.google.common.base.Predicate;
 
 /**
  * 
@@ -69,7 +71,18 @@ public class NameNodeResourceChecker {
         .getTrimmedStringCollection(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_KEY));
 
     addDirsToCheck(FSNamesystem.getNamespaceDirs(conf));
-    addDirsToCheck(FSNamesystem.getNamespaceEditsDirs(conf));
+    
+    Collection<URI> localEditDirs = Collections2.filter(
+        FSNamesystem.getNamespaceEditsDirs(conf),
+        new Predicate<URI>() {
+          public boolean apply(URI input) {
+            if (input.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
+              return true;
+            }
+            return false;
+          }
+        });
+    addDirsToCheck(localEditDirs);
     addDirsToCheck(extraCheckedVolumes);
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java
new file mode 100644
index 00000000000..85aa91b9140
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.protocolPB;
+
+import static junit.framework.Assert.*;
+
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeRegistrationProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeRegistrationProto.NamenodeRoleProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.StorageInfoProto;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
+import org.apache.hadoop.hdfs.server.common.StorageInfo;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
+import org.junit.Test;
+
+/**
+ * Tests for {@link PBHelper}
+ */
+public class TestPBHelper {
+  @Test
+  public void testConvertNamenodeRole() {
+    assertEquals(NamenodeRoleProto.BACKUP,
+        PBHelper.convert(NamenodeRole.BACKUP));
+    assertEquals(NamenodeRoleProto.CHECKPOINT,
+        PBHelper.convert(NamenodeRole.CHECKPOINT));
+    assertEquals(NamenodeRoleProto.NAMENODE,
+        PBHelper.convert(NamenodeRole.NAMENODE));
+    assertEquals(NamenodeRole.BACKUP,
+        PBHelper.convert(NamenodeRoleProto.BACKUP));
+    assertEquals(NamenodeRole.CHECKPOINT,
+        PBHelper.convert(NamenodeRoleProto.CHECKPOINT));
+    assertEquals(NamenodeRole.NAMENODE,
+        PBHelper.convert(NamenodeRoleProto.NAMENODE));
+  }
+  
+  @Test
+  public void testConvertStoragInfo() {
+    StorageInfo info = new StorageInfo(1, 2, "cid", 3);
+    StorageInfoProto infoProto = PBHelper.convert(info);
+    StorageInfo info2 = PBHelper.convert(infoProto);
+    assertEquals(info.getClusterID(), info2.getClusterID());
+    assertEquals(info.getCTime(), info2.getCTime());
+    assertEquals(info.getLayoutVersion(), info2.getLayoutVersion());
+    assertEquals(info.getNamespaceID(), info2.getNamespaceID());
+  }
+  
+  @Test
+  public void testConvertNamenodeRegistration() {
+    StorageInfo info = new StorageInfo(1, 2, "cid", 3);
+    NamenodeRegistration reg = new NamenodeRegistration("address:999",
+        "http:1000", info, NamenodeRole.NAMENODE);
+    NamenodeRegistrationProto regProto = PBHelper.convert(reg);
+    NamenodeRegistration reg2 = PBHelper.convert(regProto);
+    assertEquals(reg.getAddress(), reg2.getAddress());
+    assertEquals(reg.getClusterID(), reg2.getClusterID());
+    assertEquals(reg.getCTime(), reg2.getCTime());
+    assertEquals(reg.getHttpAddress(), reg2.getHttpAddress());
+    assertEquals(reg.getLayoutVersion(), reg2.getLayoutVersion());
+    assertEquals(reg.getNamespaceID(), reg2.getNamespaceID());
+    assertEquals(reg.getRegistrationID(), reg2.getRegistrationID());
+    assertEquals(reg.getRole(), reg2.getRole());
+    assertEquals(reg.getVersion(), reg2.getVersion());
+    
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index a8a3ac4cb61..104d6527881 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -773,7 +773,7 @@ public class TestEditLog extends TestCase {
     }
 
     @Override
-    boolean isInProgress() {
+    public boolean isInProgress() {
       return true;
     }
   }

From 6016e95feec93f0e17a8a1370c0ede735ca13f55 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 8 Dec 2011 02:00:20 +0000
Subject: [PATCH 038/177] HDFS-2627. Determine DN's view of which NN is active
 based on heartbeat responses. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1211735 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/server/datanode/BPOfferService.java  | 123 +++++++++++++++---
 .../hdfs/server/datanode/BPServiceActor.java  |  18 +++
 .../hdfs/server/namenode/FSNamesystem.java    |  33 ++++-
 .../hadoop/hdfs/server/namenode/NameNode.java |   2 +-
 .../server/protocol/HeartbeatResponse.java    |  14 +-
 .../server/protocol/NNHAStatusHeartbeat.java  |  73 +++++++++++
 .../HeartbeatResponseWritable.java            |   9 +-
 .../NNHAStatusHeartbeatWritable.java          |  77 +++++++++++
 .../server/datanode/TestBPOfferService.java   | 113 +++++++++++++++-
 10 files changed, 434 insertions(+), 30 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/NNHAStatusHeartbeatWritable.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 6b67be47f0f..763245d69a5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -45,3 +45,5 @@ HDFS-2626. BPOfferService.verifyAndSetNamespaceInfo needs to be synchronized (to
 HDFS-2624. ConfiguredFailoverProxyProvider doesn't correctly stop ProtocolTranslators (todd)
 
 HDFS-2625. TestDfsOverAvroRpc failing after introduction of HeartbeatResponse type (todd)
+
+HDFS-2627. Determine DN's view of which NN is active based on heartbeat responses (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index eb611bffcb0..d750d8587c2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -37,14 +37,15 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.FinalizeCommand;
 import org.apache.hadoop.hdfs.server.protocol.KeyUpdateCommand;
+import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
 import org.apache.hadoop.ipc.RPC;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
 /**
@@ -75,10 +76,31 @@ class BPOfferService {
   UpgradeManagerDatanode upgradeManager = null;
   private final DataNode dn;
 
-  private BPServiceActor bpServiceToActive;
+  /**
+   * A reference to the BPServiceActor associated with the currently
+   * ACTIVE NN. In the case that all NameNodes are in STANDBY mode,
+   * this can be null. If non-null, this must always refer to a member
+   * of the {@link #bpServices} list.
+   */
+  private BPServiceActor bpServiceToActive = null;
+  
+  /**
+   * The list of all actors for namenodes in this nameservice, regardless
+   * of their active or standby states.
+   */
   private List<BPServiceActor> bpServices =
     new CopyOnWriteArrayList<BPServiceActor>();
 
+  /**
+   * Each time we receive a heartbeat from a NN claiming to be ACTIVE,
+   * we record that NN's most recent transaction ID here, so long as it
+   * is more recent than the previous value. This allows us to detect
+   * split-brain scenarios in which a prior NN is still asserting its
+   * ACTIVE state but with a too-low transaction ID. See HDFS-2627
+   * for details. 
+   */
+  private long lastActiveClaimTxId = -1;
+
   BPOfferService(List<InetSocketAddress> nnAddrs, DataNode dn) {
     Preconditions.checkArgument(!nnAddrs.isEmpty(),
         "Must pass at least one NN.");
@@ -87,10 +109,6 @@ class BPOfferService {
     for (InetSocketAddress addr : nnAddrs) {
       this.bpServices.add(new BPServiceActor(addr, this));
     }
-    // TODO(HA): currently we just make the first one the initial
-    // active. In reality it should start in an unknown state and then
-    // as we figure out which is active, designate one as such.
-    this.bpServiceToActive = this.bpServices.get(0);
   }
 
   void refreshNNList(ArrayList<InetSocketAddress> addrs) throws IOException {
@@ -109,19 +127,23 @@ class BPOfferService {
   }
 
   /**
-   * returns true if BP thread has completed initialization of storage
-   * and has registered with the corresponding namenode
-   * @return true if initialized
+   * @return true if the service has registered with at least one NameNode.
    */
   boolean isInitialized() {
-    // TODO(HA) is this right?
-    return bpServiceToActive != null && bpServiceToActive.isInitialized();
+    return bpRegistration != null;
   }
   
+  /**
+   * @return true if there is at least one actor thread running which is
+   * talking to a NameNode.
+   */
   boolean isAlive() {
-    // TODO: should || all the bp actors probably?
-    return bpServiceToActive != null &&
-      bpServiceToActive.isAlive();
+    for (BPServiceActor actor : bpServices) {
+      if (actor.isAlive()) {
+        return true;
+      }
+    }
+    return false;
   }
   
   String getBlockPoolId() {
@@ -322,7 +344,7 @@ class BPOfferService {
    * Called when an actor shuts down. If this is the last actor
    * to shut down, shuts down the whole blockpool in the DN.
    */
-  void shutdownActor(BPServiceActor actor) {
+  synchronized void shutdownActor(BPServiceActor actor) {
     if (bpServiceToActive == actor) {
       bpServiceToActive = null;
     }
@@ -339,7 +361,7 @@ class BPOfferService {
   }
 
   @Deprecated
-  InetSocketAddress getNNSocketAddress() {
+  synchronized InetSocketAddress getNNSocketAddress() {
     // TODO(HA) this doesn't make sense anymore
     return bpServiceToActive.getNNSocketAddress();
   }
@@ -383,8 +405,61 @@ class BPOfferService {
    * @return a proxy to the active NN
    */
   @Deprecated
-  DatanodeProtocol getActiveNN() {
-    return bpServiceToActive.bpNamenode;
+  synchronized DatanodeProtocol getActiveNN() {
+    if (bpServiceToActive != null) {
+      return bpServiceToActive.bpNamenode;
+    } else {
+      return null;
+    }
+  }
+  
+  /**
+   * Update the BPOS's view of which NN is active, based on a heartbeat
+   * response from one of the actors.
+   * 
+   * @param actor the actor which received the heartbeat
+   * @param nnHaState the HA-related heartbeat contents
+   */
+  synchronized void updateActorStatesFromHeartbeat(
+      BPServiceActor actor,
+      NNHAStatusHeartbeat nnHaState) {
+    final long txid = nnHaState.getTxId();
+    
+    final boolean nnClaimsActive =
+      nnHaState.getState() == NNHAStatusHeartbeat.State.ACTIVE;
+    final boolean bposThinksActive = bpServiceToActive == actor;
+    final boolean isMoreRecentClaim = txid > lastActiveClaimTxId; 
+    
+    if (nnClaimsActive && !bposThinksActive) {
+      LOG.info("Namenode " + actor + " trying to claim ACTIVE state with " +
+          "txid=" + txid);
+      if (!isMoreRecentClaim) {
+        // Split-brain scenario - an NN is trying to claim active
+        // state when a different NN has already claimed it with a higher
+        // txid.
+        LOG.warn("NN " + actor + " tried to claim ACTIVE state at txid=" +
+            txid + " but there was already a more recent claim at txid=" +
+            lastActiveClaimTxId);
+        return;
+      } else {
+        if (bpServiceToActive == null) {
+          LOG.info("Acknowledging ACTIVE Namenode " + actor);
+        } else {
+          LOG.info("Namenode " + actor + " taking over ACTIVE state from " +
+              bpServiceToActive + " at higher txid=" + txid);
+        }
+        bpServiceToActive = actor;
+      }
+    } else if (!nnClaimsActive && bposThinksActive) {
+      LOG.info("Namenode " + actor + " relinquishing ACTIVE state with " +
+          "txid=" + nnHaState.getTxId());
+      bpServiceToActive = null;
+    }
+    
+    if (bpServiceToActive == actor) {
+      assert txid >= lastActiveClaimTxId;
+      lastActiveClaimTxId = txid;
+    }
   }
 
   /**
@@ -415,7 +490,17 @@ class BPOfferService {
     }
   }
 
-  boolean processCommandFromActor(DatanodeCommand cmd,
+  /**
+   * Run an immediate heartbeat from all actors. Used by tests.
+   */
+  @VisibleForTesting
+  void triggerHeartbeatForTests() throws IOException {
+    for (BPServiceActor actor : bpServices) {
+      actor.triggerHeartbeatForTests();
+    }
+  }
+
+  synchronized boolean processCommandFromActor(DatanodeCommand cmd,
       BPServiceActor actor) throws IOException {
     assert bpServices.contains(actor);
     if (actor == bpServiceToActive) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index bf49cc0a6b9..f6537fa4531 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -284,6 +284,14 @@ class BPServiceActor implements Runnable {
       lastBlockReport = 0;
       blockReport();
   }
+  
+  @VisibleForTesting
+  void triggerHeartbeatForTests() throws IOException {
+    synchronized (receivedAndDeletedBlockList) {
+      lastHeartbeat = 0;
+      receivedAndDeletedBlockList.notifyAll();
+    }
+  }
 
   /**
    * Report the list blocks to the Namenode
@@ -420,8 +428,18 @@ class BPServiceActor implements Runnable {
           lastHeartbeat = startTime;
           if (!dn.areHeartbeatsDisabledForTests()) {
             HeartbeatResponse resp = sendHeartBeat();
+            assert resp != null;
             dn.getMetrics().addHeartbeat(now() - startTime);
 
+            // If the state of this NN has changed (eg STANDBY->ACTIVE)
+            // then let the BPOfferService update itself.
+            //
+            // Important that this happens before processCommand below,
+            // since the first heartbeat to a new active might have commands
+            // that we should actually process.
+            bpos.updateActorStatesFromHeartbeat(
+                this, resp.getNameNodeHaState());
+
             long startProcessCommands = now();
             if (!processCommand(resp.getCommands()))
               continue;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 09b6634dab0..88fa9964fd0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -150,11 +150,16 @@ import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockRecei
 import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReportMessage;
 import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.CommitBlockSynchronizationMessage;
 import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.DataNodeMessage;
+import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
 import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
+import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
+import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
+import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
@@ -308,6 +313,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * Used when this NN is in standby state to read from the shared edit log.
    */
   private EditLogTailer editLogTailer = null;
+
+  /**
+   * Reference to the NN's HAContext object. This is only set once
+   * {@link #startCommonServices(Configuration, HAContext)} is called. 
+   */
+  private HAContext haContext;
   
   PendingDataNodeMessages getPendingDataNodeMessages() {
     return pendingDatanodeMessages;
@@ -434,11 +445,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   
   /** 
    * Start services common to both active and standby states
+   * @param haContext 
    * @throws IOException
    */
-  void startCommonServices(Configuration conf) throws IOException {
+  void startCommonServices(Configuration conf, HAContext haContext) throws IOException {
     this.registerMBean(); // register the MBean for the FSNamesystemState
     writeLock();
+    this.haContext = haContext;
     try {
       nnResourceChecker = new NameNodeResourceChecker(conf);
       checkAvailableResources();
@@ -2706,12 +2719,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           cmds = new DatanodeCommand[] {cmd};
         }
       }
-      return new HeartbeatResponse(cmds);
+      
+      return new HeartbeatResponse(cmds, createHaStatusHeartbeat());
     } finally {
       readUnlock();
     }
   }
 
+  private NNHAStatusHeartbeat createHaStatusHeartbeat() {
+    HAState state = haContext.getState();
+    NNHAStatusHeartbeat.State hbState;
+    if (state instanceof ActiveState) {
+      hbState = NNHAStatusHeartbeat.State.ACTIVE;
+    } else if (state instanceof StandbyState) {
+      hbState = NNHAStatusHeartbeat.State.STANDBY;      
+    } else {
+      throw new AssertionError("Invalid state: " + state.getClass());
+    }
+    return new NNHAStatusHeartbeat(hbState,
+        Math.max(getFSImage().getLastAppliedTxId(),
+                 getFSImage().getEditLog().getLastWrittenTxId()));
+  }
+
   /**
    * Returns whether or not there were available resources at the last check of
    * resources.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index b05b9f10bdd..fca815fdead 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -426,7 +426,7 @@ public class NameNode {
 
   /** Start the services common to active and standby states */
   private void startCommonServices(Configuration conf) throws IOException {
-    namesystem.startCommonServices(conf);
+    namesystem.startCommonServices(conf, haContext);
     startHttpServer(conf);
     rpcServer.start();
     plugins = conf.getInstances(DFS_NAMENODE_PLUGINS_KEY,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/HeartbeatResponse.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/HeartbeatResponse.java
index fb1a533afc0..96f74a0c79c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/HeartbeatResponse.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/HeartbeatResponse.java
@@ -35,17 +35,26 @@ public class HeartbeatResponse implements Writable {
   /** Commands returned from the namenode to the datanode */
   private DatanodeCommand[] commands;
   
+  /** Information about the current HA-related state of the NN */
+  private NNHAStatusHeartbeat haStatus;
+  
   public HeartbeatResponse() {
     // Empty constructor required for Writable
   }
   
-  public HeartbeatResponse(DatanodeCommand[] cmds) {
+  public HeartbeatResponse(DatanodeCommand[] cmds,
+      NNHAStatusHeartbeat haStatus) {
     commands = cmds;
+    this.haStatus = haStatus;
   }
   
   public DatanodeCommand[] getCommands() {
     return commands;
   }
+  
+  public NNHAStatusHeartbeat getNameNodeHaState() {
+    return haStatus;
+  }
 
   ///////////////////////////////////////////
   // Writable
@@ -58,6 +67,7 @@ public class HeartbeatResponse implements Writable {
       ObjectWritable.writeObject(out, commands[i], commands[i].getClass(),
                                  null, true);
     }
+    haStatus.write(out);
   }
 
   @Override
@@ -69,5 +79,7 @@ public class HeartbeatResponse implements Writable {
       commands[i] = (DatanodeCommand) ObjectWritable.readObject(in,
           objectWritable, null);
     }
+    haStatus = new NNHAStatusHeartbeat();
+    haStatus.readFields(in);
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java
new file mode 100644
index 00000000000..633aa850df5
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.protocol;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class NNHAStatusHeartbeat implements Writable {
+
+  private State state;
+  private long txid = HdfsConstants.INVALID_TXID;
+  
+  public NNHAStatusHeartbeat() {
+  }
+  
+  public NNHAStatusHeartbeat(State state, long txid) {
+    this.state = state;
+    this.txid = txid;
+  }
+
+  public State getState() {
+    return state;
+  }
+  
+  public long getTxId() {
+    return txid;
+  }
+  
+  ///////////////////////////////////////////
+  // Writable
+  ///////////////////////////////////////////
+  @Override
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeEnum(out, state);
+    out.writeLong(txid);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    state = WritableUtils.readEnum(in, State.class);
+    txid = in.readLong();
+  }
+
+  @InterfaceAudience.Private
+  public enum State {
+    ACTIVE,
+    STANDBY;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/HeartbeatResponseWritable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/HeartbeatResponseWritable.java
index f7fe3db7b77..e32c3b126ca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/HeartbeatResponseWritable.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/HeartbeatResponseWritable.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.io.Writable;
 @InterfaceStability.Evolving
 public class HeartbeatResponseWritable implements Writable {
   private DatanodeCommandWritable[] commands;
+  private NNHAStatusHeartbeatWritable haStatus;
   
   public HeartbeatResponseWritable() {
     // Empty constructor for Writable
@@ -41,7 +42,8 @@ public class HeartbeatResponseWritable implements Writable {
   }
   
   public HeartbeatResponse convert() {
-    return new HeartbeatResponse(DatanodeCommandWritable.convert(commands));
+    return new HeartbeatResponse(DatanodeCommandWritable.convert(commands),
+        NNHAStatusHeartbeatWritable.convert(haStatus));
   }
   
   ///////////////////////////////////////////
@@ -55,6 +57,7 @@ public class HeartbeatResponseWritable implements Writable {
       ObjectWritable.writeObject(out, commands[i], commands[i].getClass(),
                                  null, true);
     }
+    haStatus.write(out);
   }
 
   @Override
@@ -66,6 +69,8 @@ public class HeartbeatResponseWritable implements Writable {
       commands[i] = (DatanodeCommandWritable) ObjectWritable.readObject(in,
           objectWritable, null);
     }
+    haStatus = new NNHAStatusHeartbeatWritable();
+    haStatus.readFields(in);
   }
 
   public static HeartbeatResponseWritable convert(
@@ -73,4 +78,4 @@ public class HeartbeatResponseWritable implements Writable {
     return new HeartbeatResponseWritable(DatanodeCommandWritable.convert(resp
         .getCommands()));
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/NNHAStatusHeartbeatWritable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/NNHAStatusHeartbeatWritable.java
new file mode 100644
index 00000000000..44ba33f54d7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/NNHAStatusHeartbeatWritable.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.protocolR23Compatible;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
+import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+/**
+ * Response to {@link DatanodeProtocol#sendHeartbeat}
+ */
+public class NNHAStatusHeartbeatWritable implements Writable {
+
+  private State state;
+  private long txid = HdfsConstants.INVALID_TXID;
+  
+  public NNHAStatusHeartbeatWritable() {
+  }
+  
+  public NNHAStatusHeartbeatWritable(State state, long txid) {
+    this.state = state;
+    this.txid = txid;
+  }
+
+  public State getState() {
+    return state;
+  }
+  
+  public long getTxId() {
+    return txid;
+  }
+  
+  ///////////////////////////////////////////
+  // Writable
+  ///////////////////////////////////////////
+  @Override
+  public void write(DataOutput out) throws IOException {
+    WritableUtils.writeEnum(out, state);
+    out.writeLong(txid);
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    state = WritableUtils.readEnum(in, State.class);
+    txid = in.readLong();
+  }
+
+  public static NNHAStatusHeartbeat convert(
+      NNHAStatusHeartbeatWritable haStatus) {
+    return new NNHAStatusHeartbeat(haStatus.getState(), haStatus.getTxId());
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
index de26891f951..144b5c2aa62 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
@@ -21,6 +21,7 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.util.Arrays;
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
@@ -32,9 +33,12 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
 import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
+import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
+import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
 import org.apache.hadoop.test.GenericTestUtils;
@@ -43,6 +47,8 @@ import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
 
 import com.google.common.base.Supplier;
 import com.google.common.collect.Lists;
@@ -63,13 +69,15 @@ public class TestBPOfferService {
 
   private DatanodeProtocol mockNN1;
   private DatanodeProtocol mockNN2;
+  private NNHAStatusHeartbeat[] mockHaStatuses = new NNHAStatusHeartbeat[2];
+  private int heartbeatCounts[] = new int[2];
   private DataNode mockDn;
   private FSDatasetInterface mockFSDataset;
   
   @Before
   public void setupMocks() throws Exception {
-    mockNN1 = setupNNMock();
-    mockNN2 = setupNNMock();
+    mockNN1 = setupNNMock(0);
+    mockNN2 = setupNNMock(1);
 
     // Set up a mock DN with the bare-bones configuration
     // objects, etc.
@@ -92,14 +100,17 @@ public class TestBPOfferService {
   /**
    * Set up a mock NN with the bare minimum for a DN to register to it.
    */
-  private DatanodeProtocol setupNNMock() throws Exception {
+  private DatanodeProtocol setupNNMock(int nnIdx) throws Exception {
     DatanodeProtocol mock = Mockito.mock(DatanodeProtocol.class);
     Mockito.doReturn(
         new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID,
             0, HdfsConstants.LAYOUT_VERSION))
       .when(mock).versionRequest();
     
-    Mockito.doReturn(new HeartbeatResponse(null))
+    Mockito.doReturn(new DatanodeRegistration("fake-node"))
+      .when(mock).registerDatanode(Mockito.any(DatanodeRegistration.class));
+    
+    Mockito.doAnswer(new HeartbeatAnswer(nnIdx))
       .when(mock).sendHeartbeat(
           Mockito.any(DatanodeRegistration.class),
           Mockito.anyLong(),
@@ -109,10 +120,31 @@ public class TestBPOfferService {
           Mockito.anyInt(),
           Mockito.anyInt(),
           Mockito.anyInt());
-
+    mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(State.STANDBY, 0);
     return mock;
   }
   
+  /**
+   * Mock answer for heartbeats which returns an empty set of commands
+   * and the HA status for the chosen NN from the
+   * {@link TestBPOfferService#mockHaStatuses} array.
+   */
+  private class HeartbeatAnswer implements Answer<HeartbeatResponse> {
+    private final int nnIdx;
+
+    public HeartbeatAnswer(int nnIdx) {
+      this.nnIdx = nnIdx;
+    }
+
+    @Override
+    public HeartbeatResponse answer(InvocationOnMock invocation) throws Throwable {
+      heartbeatCounts[nnIdx]++;
+      return new HeartbeatResponse(new DatanodeCommand[0],
+          mockHaStatuses[nnIdx]);
+    }
+  }
+
+
   /**
    * Test that the BPOS can register to talk to two different NNs,
    * sends block reports to both, etc.
@@ -204,6 +236,53 @@ public class TestBPOfferService {
       bpos.stop();
     }
   }
+  
+  /**
+   * Test that the DataNode determines the active NameNode correctly
+   * based on the HA-related information in heartbeat responses.
+   * See HDFS-2627.
+   */
+  @Test
+  public void testPickActiveNameNode() throws Exception {
+    BPOfferService bpos = setupBPOSForNNs(mockNN1, mockNN2);
+    bpos.start();
+    try {
+      waitForInitialization(bpos);
+      
+      // Should start with neither NN as active.
+      assertNull(bpos.getActiveNN());
+
+      // Have NN1 claim active at txid 1
+      mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 1);
+      waitForHeartbeats(bpos);
+      assertSame(mockNN1, bpos.getActiveNN());
+
+      // NN2 claims active at a higher txid
+      mockHaStatuses[1] = new NNHAStatusHeartbeat(State.ACTIVE, 2);
+      waitForHeartbeats(bpos);
+      assertSame(mockNN2, bpos.getActiveNN());
+      
+      // Even after another heartbeat from the first NN, it should
+      // think NN2 is active, since it claimed a higher txid
+      waitForHeartbeats(bpos);
+      assertSame(mockNN2, bpos.getActiveNN());
+      
+      // Even if NN2 goes to standby, DN shouldn't reset to talking to NN1,
+      // because NN1's txid is lower than the last active txid. Instead,
+      // it should consider neither active.
+      mockHaStatuses[1] = new NNHAStatusHeartbeat(State.STANDBY, 2);
+      waitForHeartbeats(bpos);
+      assertNull(bpos.getActiveNN());
+      
+      // Now if NN1 goes back to a higher txid, it should be considered active
+      mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 3);
+      waitForHeartbeats(bpos);
+      assertSame(mockNN1, bpos.getActiveNN());
+
+    } finally {
+      bpos.stop();
+    }
+  }
 
   private void waitForOneToFail(final BPOfferService bpos)
       throws Exception {
@@ -269,6 +348,30 @@ public class TestBPOfferService {
     }, 500, 10000);
   }
   
+  private void waitForHeartbeats(BPOfferService bpos)
+    throws Exception {
+    final int countAtStart[];
+    synchronized (heartbeatCounts) {
+      countAtStart = Arrays.copyOf(
+          heartbeatCounts, heartbeatCounts.length);
+    }
+    bpos.triggerHeartbeatForTests();
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        synchronized (heartbeatCounts) {
+          for (int i = 0; i < countAtStart.length; i++) {
+            if (heartbeatCounts[i] <= countAtStart[i]) {
+              return false;
+            }
+          }
+          return true;
+        }
+      }
+    }, 200, 10000);
+  }
+
+  
   private ReceivedDeletedBlockInfo[] waitForBlockReceived(
       ExtendedBlock fakeBlock,
       DatanodeProtocol mockNN) throws Exception {

From 2481474bd9c50a23e4fd2eea67ac2dea11ca1f58 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 8 Dec 2011 23:55:40 +0000
Subject: [PATCH 039/177] HDFS-2634. Standby needs to ingest latest edit logs
 before transitioning to active. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1212187 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/server/namenode/BackupImage.java     |   2 +-
 .../hdfs/server/namenode/FSEditLog.java       |  39 +++-
 .../hadoop/hdfs/server/namenode/FSImage.java  |  15 +-
 .../hdfs/server/namenode/FSNamesystem.java    |  18 +-
 .../server/namenode/FileJournalManager.java   |  17 +-
 .../hdfs/server/namenode/JournalSet.java      |   9 +-
 .../server/namenode/ha/EditLogTailer.java     |  67 ++++---
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   8 +-
 .../hdfs/server/namenode/FSImageTestUtil.java |  22 +++
 .../ha/TestEditLogsDuringFailover.java        | 180 ++++++++++++++++++
 11 files changed, 331 insertions(+), 48 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 763245d69a5..925af24d937 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -47,3 +47,5 @@ HDFS-2624. ConfiguredFailoverProxyProvider doesn't correctly stop ProtocolTransl
 HDFS-2625. TestDfsOverAvroRpc failing after introduction of HeartbeatResponse type (todd)
 
 HDFS-2627. Determine DN's view of which NN is active based on heartbeat responses (todd)
+
+HDFS-2634. Standby needs to ingest latest edit logs before transitioning to active (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
index 4de70367a5a..4e28d83a528 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
@@ -345,7 +345,7 @@ public class BackupImage extends FSImage {
   synchronized void namenodeStartedLogSegment(long txid)
       throws IOException {
     LOG.info("NameNode started a new log segment at txid " + txid);
-    if (editLog.isOpenForWrite()) {
+    if (editLog.isSegmentOpen()) {
       if (editLog.getLastWrittenTxId() == txid - 1) {
         // We are in sync with the NN, so end and finalize the current segment
         editLog.endCurrentLogSegment(false);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 97961b26af2..92ef2b5ee3b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -249,14 +249,42 @@ public class FSEditLog  {
     Preconditions.checkState(state == State.BETWEEN_LOG_SEGMENTS,
         "Bad state: %s", state);
 
-    startLogSegment(getLastWrittenTxId() + 1, true);
+    long segmentTxId = getLastWrittenTxId() + 1;
+    // Safety check: we should never start a segment if there are
+    // newer txids readable.
+    EditLogInputStream s = journalSet.getInputStream(segmentTxId);
+    try {
+      Preconditions.checkState(s == null,
+          "Cannot start writing at txid %s when there is a stream " +
+          "available for read: %s", segmentTxId, s);
+    } finally {
+      IOUtils.closeStream(s);
+    }
+    
+    startLogSegment(segmentTxId, true);
     assert state == State.IN_SEGMENT : "Bad state: " + state;
   }
   
+  /**
+   * @return true if the log is currently open in write mode, regardless
+   * of whether it actually has an open segment.
+   */
   synchronized boolean isOpenForWrite() {
+    return state == State.IN_SEGMENT ||
+      state == State.BETWEEN_LOG_SEGMENTS;
+  }
+  
+  /**
+   * @return true if the log is open in write mode and has a segment open
+   * ready to take edits.
+   */
+  synchronized boolean isSegmentOpen() {
     return state == State.IN_SEGMENT;
   }
 
+  /**
+   * @return true if the log is open in read mode.
+   */
   synchronized boolean isOpenForRead() {
     return state == State.OPEN_FOR_READING;
   }
@@ -290,7 +318,7 @@ public class FSEditLog  {
    */
   void logEdit(final FSEditLogOp op) {
     synchronized (this) {
-      assert state != State.CLOSED && state != State.OPEN_FOR_READING :
+      assert isOpenForWrite() :
         "bad state: " + state;
       
       // wait if an automatic sync is scheduled
@@ -386,7 +414,7 @@ public class FSEditLog  {
    * @return the first transaction ID in the current log segment
    */
   synchronized long getCurSegmentTxId() {
-    Preconditions.checkState(state == State.IN_SEGMENT,
+    Preconditions.checkState(isSegmentOpen(),
         "Bad state: %s", state);
     return curSegmentTxId;
   }
@@ -856,7 +884,7 @@ public class FSEditLog  {
    */
   synchronized void endCurrentLogSegment(boolean writeEndTxn) {
     LOG.info("Ending log segment " + curSegmentTxId);
-    Preconditions.checkState(state == State.IN_SEGMENT,
+    Preconditions.checkState(isSegmentOpen(),
         "Bad state: %s", state);
     
     if (writeEndTxn) {
@@ -1017,6 +1045,9 @@ public class FSEditLog  {
    * Run recovery on all journals to recover any unclosed segments
    */
   void recoverUnclosedStreams() {
+    Preconditions.checkState(
+        state == State.BETWEEN_LOG_SEGMENTS,
+        "May not recover segments - wrong state: %s", state);
     try {
       journalSet.recoverUnfinalizedSegments();
     } catch (IOException ex) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 6f5533cdfd7..8ce90eb0e2f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -342,7 +342,7 @@ public class FSImage implements Closeable {
         assert curDir.exists() : "Current directory must exist.";
         assert !prevDir.exists() : "prvious directory must not exist.";
         assert !tmpDir.exists() : "prvious.tmp directory must not exist.";
-        assert !editLog.isOpenForWrite() : "Edits log must not be open.";
+        assert !editLog.isSegmentOpen() : "Edits log must not be open.";
 
         // rename current to tmp
         NNStorage.rename(curDir, tmpDir);
@@ -537,8 +537,6 @@ public class FSImage implements Closeable {
 
   void openEditLogForWrite() throws IOException {
     assert editLog != null : "editLog must be initialized";
-    Preconditions.checkState(!editLog.isOpenForWrite(),
-        "edit log should not yet be open");
     editLog.openForWrite();
     storage.writeTransactionIdFileToStorage(editLog.getCurSegmentTxId());
   };
@@ -580,13 +578,16 @@ public class FSImage implements Closeable {
 
     Iterable<EditLogInputStream> editStreams = null;
 
-    // TODO(HA): We shouldn't run this when coming up in standby state
-    editLog.recoverUnclosedStreams();
+    if (editLog.isOpenForWrite()) {
+      // We only want to recover streams if we're going into Active mode.
+      editLog.recoverUnclosedStreams();
+    }
 
     if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, 
                                getLayoutVersion())) {
       editStreams = editLog.selectInputStreams(imageFile.getCheckpointTxId() + 1,
-                                               inspector.getMaxSeenTxId());
+                                               inspector.getMaxSeenTxId(),
+                                               false);
     } else {
       editStreams = FSImagePreTransactionalStorageInspector
         .getEditLogStreams(storage);
@@ -811,7 +812,7 @@ public class FSImage implements Closeable {
     assert editLog != null : "editLog must be initialized";
     storage.attemptRestoreRemovedStorage();
 
-    boolean editLogWasOpen = editLog.isOpenForWrite();
+    boolean editLogWasOpen = editLog.isSegmentOpen();
     
     if (editLogWasOpen) {
       editLog.endCurrentLogSegment(true);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index de7817987ee..9229926cae2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -490,12 +490,24 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     LOG.info("Starting services required for active state");
     writeLock();
     try {
-      if (!dir.fsImage.editLog.isOpenForWrite()) {
+      FSEditLog editLog = dir.fsImage.getEditLog();
+      
+      if (!editLog.isSegmentOpen()) {
         // During startup, we're already open for write during initialization.
         // TODO(HA): consider adding a startup state?
-        dir.fsImage.editLog.initJournalsForWrite();
+        editLog.initJournalsForWrite();
         // May need to recover
-        dir.fsImage.editLog.recoverUnclosedStreams();
+        editLog.recoverUnclosedStreams();
+        
+        LOG.info("Catching up to latest edits from old active before " +
+            "taking over writer role in edits logs.");
+        editLogTailer.catchupDuringFailover();
+        
+        long nextTxId = dir.fsImage.getLastAppliedTxId() + 1;
+        LOG.info("Will take over writing edit logs at txnid " + 
+            nextTxId);
+        editLog.setNextTxId(nextTxId);
+
         dir.fsImage.editLog.openForWrite();
       }
       if (UserGroupInformation.isSecurityEnabled()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index bf7bfde2da3..bbab3e58f54 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -96,7 +96,7 @@ class FileJournalManager implements JournalManager {
         "Can't finalize edits file " + inprogressFile + " since finalized file " +
         "already exists");
     if (!inprogressFile.renameTo(dstFile)) {
-      throw new IOException("Unable to finalize edits file " + inprogressFile);
+      throw new IllegalStateException("Unable to finalize edits file " + inprogressFile);
     }
     if (inprogressFile.equals(currentInProgress)) {
       currentInProgress = null;
@@ -147,7 +147,7 @@ class FileJournalManager implements JournalManager {
         ret.add(new RemoteEditLog(elf.firstTxId, elf.lastTxId));
       } else if ((firstTxId > elf.getFirstTxId()) &&
                  (firstTxId <= elf.getLastTxId())) {
-        throw new IOException("Asked for firstTxId " + firstTxId
+        throw new IllegalStateException("Asked for firstTxId " + firstTxId
             + " which is in the middle of file " + elf.file);
       }
     }
@@ -237,7 +237,17 @@ class FileJournalManager implements JournalManager {
         if (elf.isInProgress()) {
           break;
         }
-      } // else skip
+      } else if (elf.getFirstTxId() < fromTxId &&
+                 elf.getLastTxId() >= fromTxId) {
+        // Middle of a log segment - this should never happen
+        // since getLogFiles checks for it. But we should be
+        // paranoid about this case since it might result in
+        // overlapping txid ranges, etc, if we had a bug.
+        IOException ioe = new IOException("txid " + fromTxId +
+            " falls in the middle of file " + elf);
+        LOG.error("Broken invariant in edit log file management", ioe);
+        throw ioe;
+      }
     }
 
     if (LOG.isDebugEnabled()) {
@@ -263,6 +273,7 @@ class FileJournalManager implements JournalManager {
   @Override
   synchronized public void recoverUnfinalizedSegments() throws IOException {
     File currentDir = sd.getCurrentDir();
+    LOG.info("Recovering unfinalized segments in " + currentDir);
     List<EditLogFile> allLogFiles = matchEditLogs(currentDir.listFiles());
     
     // make sure journal is aware of max seen transaction before moving corrupt 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
index 58b1ca09e64..7af0b51b909 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
@@ -204,6 +204,8 @@ public class JournalSet implements JournalManager {
     CorruptionException corruption = null;
 
     for (JournalAndStream jas : journals) {
+      if (jas.isDisabled()) continue;
+      
       JournalManager candidate = jas.getManager();
       long candidateNumTxns = 0;
       try {
@@ -211,6 +213,8 @@ public class JournalSet implements JournalManager {
       } catch (CorruptionException ce) {
         corruption = ce;
       } catch (IOException ioe) {
+        LOG.warn("Unable to read input streams from JournalManager " + candidate,
+            ioe);
         continue; // error reading disk, just skip
       }
       
@@ -235,7 +239,10 @@ public class JournalSet implements JournalManager {
   public long getNumberOfTransactions(long fromTxnId) throws IOException {
     long num = 0;
     for (JournalAndStream jas: journals) {
-      if (jas.isActive()) {
+      if (jas.isDisabled()) {
+        LOG.info("Skipping jas " + jas + " since it's disabled");
+        continue;
+      } else {
         long newNum = jas.getManager().getNumberOfTransactions(fromTxnId);
         if (newNum > num) {
           num = newNum;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index c15629f38b8..e1ce570c093 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 
 /**
  * EditLogTailer represents a thread which periodically reads from edits
@@ -44,8 +45,15 @@ public class EditLogTailer {
   
   private final EditLogTailerThread tailerThread;
   
+  private final FSNamesystem namesystem;
+  private final FSImage image;
+  private final FSEditLog editLog;
+  
   public EditLogTailer(FSNamesystem namesystem) {
-    this.tailerThread = new EditLogTailerThread(namesystem);
+    this.tailerThread = new EditLogTailerThread();
+    this.namesystem = namesystem;
+    this.image = namesystem.getFSImage();
+    this.editLog = namesystem.getEditLog();
   }
   
   public void start() {
@@ -72,25 +80,45 @@ public class EditLogTailer {
   public void interrupt() {
     tailerThread.interrupt();
   }
+  
+  public void catchupDuringFailover() throws IOException {
+    Preconditions.checkState(tailerThread == null ||
+        !tailerThread.isAlive(),
+        "Tailer thread should not be running once failover starts");
+    doTailEdits();
+  }
+  
+  private void doTailEdits() throws IOException {
+    // TODO(HA) in a transition from active to standby,
+    // the following is wrong and ends up causing all of the
+    // last log segment to get re-read
+    long lastTxnId = image.getLastAppliedTxId();
+    
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("lastTxnId: " + lastTxnId);
+    }
+    Collection<EditLogInputStream> streams = editLog
+        .selectInputStreams(lastTxnId + 1, 0, false);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("edit streams to load from: " + streams.size());
+    }
+    
+    long editsLoaded = image.loadEdits(streams, namesystem);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("editsLoaded: " + editsLoaded);
+    }
+  }
 
   /**
    * The thread which does the actual work of tailing edits journals and
    * applying the transactions to the FSNS.
    */
-  private static class EditLogTailerThread extends Thread {
-
-    private FSNamesystem namesystem;
-    private FSImage image;
-    private FSEditLog editLog;
-    
+  private class EditLogTailerThread extends Thread {
     private volatile boolean shouldRun = true;
     private long sleepTime = 60 * 1000;
     
-    private EditLogTailerThread(FSNamesystem namesystem) {
+    private EditLogTailerThread() {
       super("Edit log tailer");
-      this.namesystem = namesystem;
-      image = namesystem.getFSImage();
-      editLog = namesystem.getEditLog();
     }
     
     private void setShouldRun(boolean shouldRun) {
@@ -105,23 +133,8 @@ public class EditLogTailer {
     public void run() {
       while (shouldRun) {
         try {
-          long lastTxnId = image.getLastAppliedTxId();
-          
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("lastTxnId: " + lastTxnId);
-          }
           try {
-            // At least one record should be available.
-            Collection<EditLogInputStream> streams = editLog
-                .selectInputStreams(lastTxnId + 1, lastTxnId + 1, false);
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("edit streams to load from: " + streams.size());
-            }
-            
-            long editsLoaded = image.loadEdits(streams, namesystem);
-            if (LOG.isDebugEnabled()) {
-              LOG.debug("editsLoaded: " + editsLoaded);
-            }
+            doTailEdits();
           } catch (IOException e) {
             // Will try again
             LOG.info("Got error, will try again.", e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 2a2699048d1..13352ab82aa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -604,8 +604,7 @@ public class MiniDFSCluster {
         conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nameservice.getId()),
             Joiner.on(",").join(nnIds));
         if (manageNameDfsDirs) {
-          URI sharedEditsUri = fileAsURI(new File(base_dir, "shared-edits-" +
-              nnCounter + "-through-" + (nnCounter+nnIds.size()-1)));
+          URI sharedEditsUri = getSharedEditsDir(nnCounter, nnCounter+nnIds.size()-1); 
           conf.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, sharedEditsUri.toString());
         }
       }
@@ -638,6 +637,11 @@ public class MiniDFSCluster {
     
   }
   
+  public URI getSharedEditsDir(int minNN, int maxNN) throws IOException {
+    return fileAsURI(new File(base_dir, "shared-edits-" +
+        minNN + "-through-" + maxNN));
+  }
+
   private void initNameNodeConf(Configuration conf,
       String nameserviceId, String nnId,
       boolean manageNameDfsDirs, int nnIndex)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index 0269166b57d..9e9af7af617 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -34,6 +34,8 @@ import java.util.Properties;
 import java.util.Set;
 
 import org.apache.commons.logging.Log;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirType;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
@@ -189,6 +191,26 @@ public abstract class FSImageTestUtil {
     return editLog;
   }
   
+  /**
+   * Create an aborted in-progress log in the given directory, containing
+   * only a specified number of "mkdirs" operations.
+   */
+  public static void createAbortedLogWithMkdirs(File editsLogDir, int numDirs)
+      throws IOException {
+    FSEditLog editLog = FSImageTestUtil.createStandaloneEditLog(editsLogDir);
+    editLog.openForWrite();
+    
+    PermissionStatus perms = PermissionStatus.createImmutable("fakeuser", "fakegroup",
+        FsPermission.createImmutable((short)0755));
+    for (int i = 1; i <= numDirs; i++) {
+      String dirName = "dir" + i;
+      INodeDirectory dir = new INodeDirectory(dirName, perms);
+      editLog.logMkDir("/" + dirName, dir);
+    }
+    editLog.logSync();
+    editLog.abortCurrentLogSegment();
+  }
+
   /**
    * Assert that all of the given directories have the same newest filename
    * for fsimage that they hold the same data.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
new file mode 100644
index 00000000000..1bbe33b72d8
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
@@ -0,0 +1,180 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.Lists;
+
+/**
+ * Test cases for the handling of edit logs during failover
+ * and startup of the standby node.
+ */
+public class TestEditLogsDuringFailover {
+  private static final Log LOG =
+    LogFactory.getLog(TestEditLogsDuringFailover.class);
+  private static final int NUM_DIRS_IN_LOG = 5;
+  
+  @Test
+  public void testStartup() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(0)
+      .build();
+    try {
+      // During HA startup, both nodes should be in
+      // standby and we shouldn't have any edits files
+      // in any edits directory!
+      List<URI> allDirs = Lists.newArrayList();
+      allDirs.addAll(cluster.getNameDirs(0));
+      allDirs.addAll(cluster.getNameDirs(1));
+      allDirs.add(cluster.getSharedEditsDir(0, 1));
+      assertNoEditFiles(allDirs);
+      
+      // Set the first NN to active, make sure it creates edits
+      // in its own dirs and the shared dir. The standby
+      // should still have no edits!
+      cluster.getNameNode(0).getRpcServer().transitionToActive();
+      
+      assertEditFiles(cluster.getNameDirs(0),
+          NNStorage.getInProgressEditsFileName(1));
+      assertEditFiles(
+          Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
+          NNStorage.getInProgressEditsFileName(1));
+      assertNoEditFiles(cluster.getNameDirs(1));
+      
+      cluster.getNameNode(0).getRpcServer().mkdirs("/test",
+          FsPermission.createImmutable((short)0755), true);
+
+      // Restarting the standby should not finalize any edits files
+      // in the shared directory when it starts up!
+      cluster.restartNameNode(1);
+      
+      assertEditFiles(cluster.getNameDirs(0),
+          NNStorage.getInProgressEditsFileName(1));
+      assertEditFiles(
+          Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
+          NNStorage.getInProgressEditsFileName(1));
+      assertNoEditFiles(cluster.getNameDirs(1));
+      
+      // Additionally it should not have applied any in-progress logs
+      // at start-up -- otherwise, it would have read half-way into
+      // the current log segment, and on the next roll, it would have to
+      // either replay starting in the middle of the segment (not allowed)
+      // or double-replay the edits (incorrect).
+      assertNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
+      
+      cluster.getNameNode(0).getRpcServer().mkdirs("/test2",
+          FsPermission.createImmutable((short)0755), true);
+
+      // If we restart NN0, it'll come back as standby, and we can
+      // transition NN1 to active and make sure it reads edits correctly at this point.
+      cluster.restartNameNode(0);
+      cluster.getNameNode(1).getRpcServer().transitionToActive();
+
+      // NN1 should have both the edits that came before its restart, and the edits that
+      // came after its restart.
+      assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
+      assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test2", true));
+    } finally {
+      cluster.shutdown();
+    }
+  }
+  
+  @Test
+  public void testFailoverFinalizesAndReadsInProgress() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(0)
+      .build();
+    try {
+      // Create a fake in-progress edit-log in the shared directory
+      URI sharedUri = cluster.getSharedEditsDir(0, 1);
+      File sharedDir = new File(sharedUri.getPath(), "current");
+      FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG);
+      assertEditFiles(Collections.singletonList(sharedUri),
+          NNStorage.getInProgressEditsFileName(1));
+
+      // Transition one of the NNs to active
+      cluster.getNameNode(0).getRpcServer().transitionToActive();
+      
+      // In the transition to active, it should have read the log -- and
+      // hence see one of the dirs we made in the fake log.
+      String testPath = "/dir" + NUM_DIRS_IN_LOG;
+      assertNotNull(cluster.getNameNode(0).getRpcServer().getFileInfo(testPath));
+      
+      // It also should have finalized that log in the shared directory and started
+      // writing to a new one at the next txid.
+      assertEditFiles(Collections.singletonList(sharedUri),
+          NNStorage.getFinalizedEditsFileName(1, NUM_DIRS_IN_LOG + 1),
+          NNStorage.getInProgressEditsFileName(NUM_DIRS_IN_LOG + 2));
+    } finally {
+      cluster.shutdown();
+    }
+
+  }
+
+  /**
+   * Check that no edits files are present in the given storage dirs.
+   */
+  private void assertNoEditFiles(Iterable<URI> dirs) throws IOException {
+    assertEditFiles(dirs, new String[]{});
+  }
+  
+  /**
+   * Check that the given list of edits files are present in the given storage
+   * dirs.
+   */
+  private void assertEditFiles(Iterable<URI> dirs, String ... files)
+      throws IOException {
+    for (URI u : dirs) {
+      File editDirRoot = new File(u.getPath());
+      File editDir = new File(editDirRoot, "current");
+      GenericTestUtils.assertExists(editDir);
+      if (files.length == 0) {
+        LOG.info("Checking no edit files exist in " + editDir);
+      } else {
+        LOG.info("Checking for following edit files in " + editDir
+            + ": " + Joiner.on(",").join(files));
+      }
+      
+      GenericTestUtils.assertGlobEquals(editDir, "edits_.*", files);
+    }
+  }
+}

From 9818091a66b9c8e337de9d4e18655cf4b157056b Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 14 Dec 2011 07:24:36 +0000
Subject: [PATCH 040/177] HADOOP-7896. HA: if both NNs are in Standby mode,
 client needs to try failing back and forth several times with sleeps.
 Contributed by Aaron T. Myers

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214076 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  4 +
 .../io/retry/RetryInvocationHandler.java      | 45 ++++++-----
 .../apache/hadoop/io/retry/RetryPolicies.java | 78 ++++++++++++++++---
 .../apache/hadoop/io/retry/RetryPolicy.java   | 32 ++++++--
 .../org/apache/hadoop/util/ThreadUtil.java    | 49 ++++++++++++
 .../hadoop/io/retry/TestFailoverProxy.java    | 37 +++++++++
 .../io/retry/UnreliableImplementation.java    | 14 +++-
 7 files changed, 219 insertions(+), 40 deletions(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 3207e70c384..216b5622108 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -5,4 +5,8 @@ branch is merged.
 ------------------------------
 
 HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh)
+
 HADOOP-7774. HA: Administrative CLI to control HA daemons. (todd)
+
+HADOOP-7896. HA: if both NNs are in Standby mode, client needs to try failing
+             back and forth several times with sleeps. (atm)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
index f928760253f..d1655778251 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -24,11 +24,11 @@ import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.util.Collections;
 import java.util.Map;
-import java.util.concurrent.atomic.AtomicLong;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
+import org.apache.hadoop.util.ThreadUtil;
 
 class RetryInvocationHandler implements InvocationHandler, Closeable {
   public static final Log LOG = LogFactory.getLog(RetryInvocationHandler.class);
@@ -85,31 +85,38 @@ class RetryInvocationHandler implements InvocationHandler, Closeable {
             .isAnnotationPresent(Idempotent.class);
         RetryAction action = policy.shouldRetry(e, retries++, invocationFailoverCount,
             isMethodIdempotent);
-        if (action == RetryAction.FAIL) {
+        if (action.action == RetryAction.RetryDecision.FAIL) {
           LOG.warn("Exception while invoking " + method.getName()
                    + " of " + currentProxy.getClass() + ". Not retrying.", e);
           if (!method.getReturnType().equals(Void.TYPE)) {
             throw e; // non-void methods can't fail without an exception
           }
           return null;
-        } else if (action == RetryAction.FAILOVER_AND_RETRY) {
-          LOG.warn("Exception while invoking " + method.getName()
-              + " of " + currentProxy.getClass()
-              + " after " + invocationFailoverCount + " fail over attempts."
-              + " Trying to fail over.", e);
-          // Make sure that concurrent failed method invocations only cause a
-          // single actual fail over.
-          synchronized (proxyProvider) {
-            if (invocationAttemptFailoverCount == proxyProviderFailoverCount) {
-              proxyProvider.performFailover(currentProxy);
-              proxyProviderFailoverCount++;
-              currentProxy = proxyProvider.getProxy();
-            } else {
-              LOG.warn("A failover has occurred since the start of this method"
-                  + " invocation attempt.");
-            }
+        } else { // retry or failover
+          
+          if (action.delayMillis > 0) {
+            ThreadUtil.sleepAtLeastIgnoreInterrupts(action.delayMillis);
+          }
+          
+          if (action.action == RetryAction.RetryDecision.FAILOVER_AND_RETRY) {
+            LOG.warn("Exception while invoking " + method.getName()
+                + " of " + currentProxy.getClass()
+                + " after " + invocationFailoverCount + " fail over attempts."
+                + " Trying to fail over.", e);
+            // Make sure that concurrent failed method invocations only cause a
+            // single actual fail over.
+            synchronized (proxyProvider) {
+              if (invocationAttemptFailoverCount == proxyProviderFailoverCount) {
+                proxyProvider.performFailover(currentProxy);
+                proxyProviderFailoverCount++;
+                currentProxy = proxyProvider.getProxy();
+              } else {
+                LOG.warn("A failover has occurred since the start of this method"
+                    + " invocation attempt.");
+              }
+            }
+            invocationFailoverCount++;
           }
-          invocationFailoverCount++;
         }
         if(LOG.isDebugEnabled()) {
           LOG.debug("Exception while invoking " + method.getName()
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
index 3634e18673a..5afda594755 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
@@ -33,6 +33,8 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.StandbyException;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * <p>
  * A collection of useful implementations of {@link RetryPolicy}.
@@ -42,6 +44,8 @@ public class RetryPolicies {
   
   public static final Log LOG = LogFactory.getLog(RetryPolicies.class);
   
+  private static final Random RAND = new Random();
+  
   /**
    * <p>
    * Try once, and fail by re-throwing the exception.
@@ -137,7 +141,14 @@ public class RetryPolicies {
   
   public static final RetryPolicy failoverOnNetworkException(
       RetryPolicy fallbackPolicy, int maxFailovers) {
-    return new FailoverOnNetworkExceptionRetry(fallbackPolicy, maxFailovers);
+    return failoverOnNetworkException(fallbackPolicy, maxFailovers, 0, 0);
+  }
+  
+  public static final RetryPolicy failoverOnNetworkException(
+      RetryPolicy fallbackPolicy, int maxFailovers, long delayMillis,
+      long maxDelayBase) {
+    return new FailoverOnNetworkExceptionRetry(fallbackPolicy, maxFailovers,
+        delayMillis, maxDelayBase);
   }
   
   static class TryOnceThenFail implements RetryPolicy {
@@ -176,12 +187,8 @@ public class RetryPolicies {
       if (retries >= maxRetries) {
         throw e;
       }
-      try {
-        timeUnit.sleep(calculateSleepTime(retries));
-      } catch (InterruptedException ie) {
-        // retry
-      }
-      return RetryAction.RETRY;
+      return new RetryAction(RetryAction.RetryDecision.RETRY,
+          timeUnit.toMillis(calculateSleepTime(retries)));
     }
     
     protected abstract long calculateSleepTime(int retries);
@@ -268,7 +275,7 @@ public class RetryPolicies {
   }
   
   static class ExponentialBackoffRetry extends RetryLimited {
-    private Random r = new Random();
+    
     public ExponentialBackoffRetry(
         int maxRetries, long sleepTime, TimeUnit timeUnit) {
       super(maxRetries, sleepTime, timeUnit);
@@ -276,16 +283,19 @@ public class RetryPolicies {
     
     @Override
     protected long calculateSleepTime(int retries) {
-      return sleepTime*r.nextInt(1<<(retries+1));
+      return calculateExponentialTime(sleepTime, retries + 1);
     }
   }
   
-  /*
+  /**
    * Fail over and retry in the case of:
    *   Remote StandbyException (server is up, but is not the active server)
    *   Immediate socket exceptions (e.g. no route to host, econnrefused)
    *   Socket exceptions after initial connection when operation is idempotent
    * 
+   * The first failover is immediate, while all subsequent failovers wait an
+   * exponentially-increasing random amount of time.
+   * 
    * Fail immediately in the case of:
    *   Socket exceptions after initial connection when operation is not idempotent
    * 
@@ -295,11 +305,20 @@ public class RetryPolicies {
     
     private RetryPolicy fallbackPolicy;
     private int maxFailovers;
+    private long delayMillis;
+    private long maxDelayBase;
     
     public FailoverOnNetworkExceptionRetry(RetryPolicy fallbackPolicy,
         int maxFailovers) {
+      this(fallbackPolicy, maxFailovers, 0, 0);
+    }
+    
+    public FailoverOnNetworkExceptionRetry(RetryPolicy fallbackPolicy,
+        int maxFailovers, long delayMillis, long maxDelayBase) {
       this.fallbackPolicy = fallbackPolicy;
       this.maxFailovers = maxFailovers;
+      this.delayMillis = delayMillis;
+      this.maxDelayBase = maxDelayBase;
     }
 
     @Override
@@ -314,8 +333,13 @@ public class RetryPolicies {
       if (e instanceof ConnectException ||
           e instanceof NoRouteToHostException ||
           e instanceof UnknownHostException ||
-          e instanceof StandbyException) {
-        return RetryAction.FAILOVER_AND_RETRY;
+          e instanceof StandbyException ||
+          isWrappedStandbyException(e)) {
+        return new RetryAction(
+            RetryAction.RetryDecision.FAILOVER_AND_RETRY,
+            // retry immediately if this is our first failover, sleep otherwise
+            failovers == 0 ? 0 :
+                calculateExponentialTime(delayMillis, failovers, maxDelayBase));
       } else if (e instanceof SocketException ||
                  e instanceof IOException) {
         if (isMethodIdempotent) {
@@ -330,4 +354,34 @@ public class RetryPolicies {
     }
     
   }
+
+  /**
+   * Return a value which is <code>time</code> increasing exponentially as a
+   * function of <code>retries</code>, +/- 0%-50% of that value, chosen
+   * randomly.
+   * 
+   * @param time the base amount of time to work with
+   * @param retries the number of retries that have so occurred so far
+   * @param cap value at which to cap the base sleep time
+   * @return an amount of time to sleep
+   */
+  @VisibleForTesting
+  public static long calculateExponentialTime(long time, int retries,
+      long cap) {
+    long baseTime = Math.min(time * ((long)1 << retries), cap);
+    return (long) (baseTime * (RAND.nextFloat() + 0.5));
+  }
+
+  private static long calculateExponentialTime(long time, int retries) {
+    return calculateExponentialTime(time, retries, Long.MAX_VALUE);
+  }
+  
+  private static boolean isWrappedStandbyException(Exception e) {
+    if (!(e instanceof RemoteException)) {
+      return false;
+    }
+    Exception unwrapped = ((RemoteException)e).unwrapRemoteException(
+        StandbyException.class);
+    return unwrapped instanceof StandbyException;
+  }
 }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicy.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicy.java
index 4c4534ffb7e..90e5eaea671 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicy.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicy.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.io.retry;
 
 import org.apache.hadoop.classification.InterfaceStability;
 
-
 /**
  * <p>
  * Specifies a policy for retrying method failures.
@@ -33,10 +32,33 @@ public interface RetryPolicy {
    * Returned by {@link RetryPolicy#shouldRetry(Exception, int, int, boolean)}.
    */
   @InterfaceStability.Evolving
-  public enum RetryAction {
-    FAIL,
-    RETRY,
-    FAILOVER_AND_RETRY
+  public static class RetryAction {
+    
+    // A few common retry policies, with no delays.
+    public static final RetryAction FAIL =
+        new RetryAction(RetryDecision.FAIL);
+    public static final RetryAction RETRY =
+        new RetryAction(RetryDecision.RETRY);
+    public static final RetryAction FAILOVER_AND_RETRY =
+        new RetryAction(RetryDecision.FAILOVER_AND_RETRY);
+    
+    public final RetryDecision action;
+    public final long delayMillis;
+    
+    public RetryAction(RetryDecision action) {
+      this(action, 0);
+    }
+    
+    public RetryAction(RetryDecision action, long delayTime) {
+      this.action = action;
+      this.delayMillis = delayTime;
+    }
+    
+    public enum RetryDecision {
+      FAIL,
+      RETRY,
+      FAILOVER_AND_RETRY
+    }
   }
   
   /**
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java
new file mode 100644
index 00000000000..535ac341223
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.classification.InterfaceStability;
+
+@InterfaceStability.Evolving
+public class ThreadUtil {
+  
+  private static final Log LOG = LogFactory.getLog(ThreadUtil.class);
+
+  /**
+   * Cause the current thread to sleep as close as possible to the provided
+   * number of milliseconds. This method will log and ignore any
+   * {@link InterrupedException} encountered.
+   * 
+   * @param millis the number of milliseconds for the current thread to sleep
+   */
+  public static void sleepAtLeastIgnoreInterrupts(long millis) {
+    long start = System.currentTimeMillis();
+    while (System.currentTimeMillis() - start < millis) {
+      long timeToSleep = millis -
+          (System.currentTimeMillis() - start);
+      try {
+        Thread.sleep(timeToSleep);
+      } catch (InterruptedException ie) {
+        LOG.warn("interrupted while sleeping", ie);
+      }
+    }
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
index eec4797ab30..b52814cfc11 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
@@ -25,6 +25,7 @@ import java.util.concurrent.CountDownLatch;
 import org.apache.hadoop.io.retry.UnreliableImplementation.TypeOfExceptionToFailWith;
 import org.apache.hadoop.io.retry.UnreliableInterface.UnreliableException;
 import org.apache.hadoop.ipc.StandbyException;
+import org.apache.hadoop.util.ThreadUtil;
 import org.junit.Test;
 
 public class TestFailoverProxy {
@@ -267,4 +268,40 @@ public class TestFailoverProxy {
     assertEquals("impl2", t2.result);
     assertEquals(1, proxyProvider.getFailoversOccurred());
   }
+
+  /**
+   * Ensure that when all configured services are throwing StandbyException
+   * that we fail over back and forth between them until one is no longer
+   * throwing StandbyException.
+   */
+  @Test
+  public void testFailoverBetweenMultipleStandbys()
+      throws UnreliableException, StandbyException, IOException {
+    
+    final long millisToSleep = 10000;
+    
+    final UnreliableImplementation impl1 = new UnreliableImplementation("impl1",
+        TypeOfExceptionToFailWith.STANDBY_EXCEPTION);
+    FlipFlopProxyProvider proxyProvider = new FlipFlopProxyProvider(
+        UnreliableInterface.class,
+        impl1,
+        new UnreliableImplementation("impl2",
+            TypeOfExceptionToFailWith.STANDBY_EXCEPTION));
+    
+    final UnreliableInterface unreliable = (UnreliableInterface)RetryProxy
+      .create(UnreliableInterface.class, proxyProvider,
+          RetryPolicies.failoverOnNetworkException(
+              RetryPolicies.TRY_ONCE_THEN_FAIL, 10, 1000, 10000));
+    
+    new Thread() {
+      @Override
+      public void run() {
+        ThreadUtil.sleepAtLeastIgnoreInterrupts(millisToSleep);
+        impl1.setIdentifier("renamed-impl1");
+      }
+    }.start();
+    
+    String result = unreliable.failsIfIdentifierDoesntMatch("renamed-impl1");
+    assertEquals("renamed-impl1", result);
+  }
 }
\ No newline at end of file
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
index 7fa88b3b08e..74a63894d80 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
@@ -48,6 +48,10 @@ public class UnreliableImplementation implements UnreliableInterface {
     this(identifier, TypeOfExceptionToFailWith.UNRELIABLE_EXCEPTION);
   }
   
+  public void setIdentifier(String identifier) {
+    this.identifier = identifier;
+  }
+  
   public UnreliableImplementation(String identifier,
       TypeOfExceptionToFailWith exceptionToFailWith) {
     this.identifier = identifier;
@@ -147,15 +151,17 @@ public class UnreliableImplementation implements UnreliableInterface {
     if (this.identifier.equals(identifier)) {
       return identifier;
     } else {
+      String message = "expected '" + this.identifier + "' but received '" +
+          identifier + "'";
       switch (exceptionToFailWith) {
       case STANDBY_EXCEPTION:
-        throw new StandbyException(identifier);
+        throw new StandbyException(message);
       case UNRELIABLE_EXCEPTION:
-        throw new UnreliableException(identifier);
+        throw new UnreliableException(message);
       case IO_EXCEPTION:
-        throw new IOException(identifier);
+        throw new IOException(message);
       default:
-        throw new RuntimeException(identifier);
+        throw new RuntimeException(message);
       }
     }
   }

From 9cf3e0805f5967d1ed792c32728ab826fb7c927b Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 14 Dec 2011 07:51:30 +0000
Subject: [PATCH 041/177] HADOOP-7922. Improve some logging for client IPC
 failovers and StandbyExceptions. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214082 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  3 ++
 .../io/retry/RetryInvocationHandler.java      | 36 ++++++++++++++-----
 .../java/org/apache/hadoop/ipc/Server.java    |  4 +++
 3 files changed, 34 insertions(+), 9 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 216b5622108..04db5dae381 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -10,3 +10,6 @@ HADOOP-7774. HA: Administrative CLI to control HA daemons. (todd)
 
 HADOOP-7896. HA: if both NNs are in Standby mode, client needs to try failing
              back and forth several times with sleeps. (atm)
+
+HADOOP-7922. Improve some logging for client IPC failovers and
+             StandbyExceptions (todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
index d1655778251..f422960dc7e 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -93,16 +93,30 @@ class RetryInvocationHandler implements InvocationHandler, Closeable {
           }
           return null;
         } else { // retry or failover
+
+          if (action.action == RetryAction.RetryDecision.FAILOVER_AND_RETRY) {
+            String msg = "Exception while invoking " + method.getName()
+              + " of " + currentProxy.getClass()
+              + " after " + invocationFailoverCount + " fail over attempts."
+              + " Trying to fail over " + formatSleepMessage(action.delayMillis);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug(msg, e);
+            } else {
+              LOG.warn(msg);
+            }
+          } else {
+            if(LOG.isDebugEnabled()) {
+              LOG.debug("Exception while invoking " + method.getName()
+                  + " of " + currentProxy.getClass() + ". Retrying " +
+                  formatSleepMessage(action.delayMillis), e);
+            }
+          }
           
           if (action.delayMillis > 0) {
             ThreadUtil.sleepAtLeastIgnoreInterrupts(action.delayMillis);
           }
           
           if (action.action == RetryAction.RetryDecision.FAILOVER_AND_RETRY) {
-            LOG.warn("Exception while invoking " + method.getName()
-                + " of " + currentProxy.getClass()
-                + " after " + invocationFailoverCount + " fail over attempts."
-                + " Trying to fail over.", e);
             // Make sure that concurrent failed method invocations only cause a
             // single actual fail over.
             synchronized (proxyProvider) {
@@ -118,14 +132,18 @@ class RetryInvocationHandler implements InvocationHandler, Closeable {
             invocationFailoverCount++;
           }
         }
-        if(LOG.isDebugEnabled()) {
-          LOG.debug("Exception while invoking " + method.getName()
-              + " of " + currentProxy.getClass() + ". Retrying.", e);
-        }
       }
     }
   }
-
+  
+  private static String formatSleepMessage(long millis) {
+    if (millis > 0) {
+      return "after sleeping for " + millis + "ms.";
+    } else {
+      return "immediately.";
+    }
+  }
+  
   private Object invokeMethod(Method method, Object[] args) throws Throwable {
     try {
       if (!method.isAccessible()) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
index 8fdb55221b8..52ea35c5227 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java
@@ -1616,6 +1616,10 @@ public abstract class Server {
               // on the server side, as opposed to just a normal exceptional
               // result.
               LOG.warn(logMsg, e);
+            } else if (e instanceof StandbyException) {
+              // Don't log the whole stack trace of these exceptions.
+              // Way too noisy!
+              LOG.info(logMsg);
             } else {
               LOG.info(logMsg, e);
             }

From d5a658e7a449ecdcdd13370859cc295eb987679f Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 14 Dec 2011 08:10:34 +0000
Subject: [PATCH 042/177] HADOOP-7921. StandbyException should extend
 IOException. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214093 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt     | 2 ++
 .../src/main/java/org/apache/hadoop/ipc/StandbyException.java | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 04db5dae381..56e11457c5f 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -13,3 +13,5 @@ HADOOP-7896. HA: if both NNs are in Standby mode, client needs to try failing
 
 HADOOP-7922. Improve some logging for client IPC failovers and
              StandbyExceptions (todd)
+
+HADOOP-7921. StandbyException should extend IOException (todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/StandbyException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/StandbyException.java
index 49f4fadfd55..7a168619af1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/StandbyException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/StandbyException.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.ipc;
 
+import java.io.IOException;
+
 import org.apache.hadoop.classification.InterfaceStability;
 
 /**
@@ -24,7 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability;
  * set of servers in which only a subset may be active.
  */
 @InterfaceStability.Evolving
-public class StandbyException extends Exception {
+public class StandbyException extends IOException {
   static final long serialVersionUID = 0x12308AD010L;
   public StandbyException(String msg) {
     super(msg);

From 7e8accd68ebfe67455e1b8d223691a6c0242c18b Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 14 Dec 2011 08:56:21 +0000
Subject: [PATCH 043/177] HDFS-2671. NN should throw StandbyException in
 response to RPCs in STANDBY state. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214117 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt  |  2 ++
 .../apache/hadoop/hdfs/server/namenode/BackupNode.java |  5 +++--
 .../apache/hadoop/hdfs/server/namenode/NameNode.java   |  3 ++-
 .../hadoop/hdfs/server/namenode/ha/ActiveState.java    |  3 +--
 .../apache/hadoop/hdfs/server/namenode/ha/HAState.java |  9 +++------
 .../hadoop/hdfs/server/namenode/ha/StandbyState.java   | 10 ++++++++++
 6 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 925af24d937..fecb9c8c82f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -49,3 +49,5 @@ HDFS-2625. TestDfsOverAvroRpc failing after introduction of HeartbeatResponse ty
 HDFS-2627. Determine DN's view of which NN is active based on heartbeat responses (todd)
 
 HDFS-2634. Standby needs to ingest latest edit logs before transitioning to active (todd)
+
+HDFS-2671. NN should throw StandbyException in response to RPCs in STANDBY state (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index 381bf7b5bed..b84d4eb34a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.net.NetUtils;
 
 import com.google.protobuf.BlockingService;
@@ -397,11 +398,11 @@ public class BackupNode extends NameNode {
   
   @Override // NameNode
   protected void checkOperation(OperationCategory op)
-      throws UnsupportedActionException {
+      throws StandbyException {
     if (OperationCategory.JOURNAL != op) {
       String msg = "Operation category " + op
           + " is not supported at the BackupNode";
-      throw new UnsupportedActionException(msg);
+      throw new StandbyException(msg);
     }
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index fca815fdead..25cbb2e08b6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
@@ -910,7 +911,7 @@ public class NameNode {
   
   /** Check if an operation of given category is allowed */
   protected synchronized void checkOperation(final OperationCategory op)
-      throws UnsupportedActionException {
+      throws StandbyException {
     state.checkOperation(haContext, op);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
index e00df208d7b..f893cc28833 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
@@ -37,8 +37,7 @@ public class ActiveState extends HAState {
   }
 
   @Override
-  public void checkOperation(HAContext context, OperationCategory op)
-      throws UnsupportedActionException {
+  public void checkOperation(HAContext context, OperationCategory op) {
     return; // Other than journal all operations are allowed in active state
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
index 6ee516c4cab..d3ff252cbc2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
@@ -21,6 +21,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
+import org.apache.hadoop.ipc.StandbyException;
 
 /**
  * Namenode base state to implement state machine pattern.
@@ -89,12 +90,8 @@ abstract public class HAState {
    * @throws UnsupportedActionException if a given type of operation is not
    *           supported in this state.
    */
-  public void checkOperation(final HAContext context, final OperationCategory op)
-      throws UnsupportedActionException {
-    String msg = "Operation category " + op + " is not supported in state "
-        + context.getState();
-    throw new UnsupportedActionException(msg);
-  }
+  public abstract void checkOperation(final HAContext context, final OperationCategory op)
+      throws StandbyException;
   
   @Override
   public String toString() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
index a329934f582..aec86eae911 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -22,6 +22,8 @@ import java.io.IOException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.ipc.StandbyException;
 
 /**
  * Namenode standby state. In this state the namenode acts as warm standby and
@@ -66,5 +68,13 @@ public class StandbyState extends HAState {
       throw new ServiceFailedException("Failed to stop standby services", e);
     }
   }
+
+  @Override
+  public void checkOperation(HAContext context, OperationCategory op)
+      throws StandbyException {
+    String msg = "Operation category " + op + " is not supported in state "
+        + context.getState();
+    throw new StandbyException(msg);
+  }
 }
 

From 6c2da4bc0f8fc949fa2b9bebd4b4eeddfde544fc Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 14 Dec 2011 21:41:31 +0000
Subject: [PATCH 044/177] HDFS-2680. DFSClient should construct failover proxy
 with exponential backoff. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214487 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../org/apache/hadoop/hdfs/DFSClient.java     | 20 ++++++++++++++++++-
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |  6 ++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index fecb9c8c82f..11a2b6b00d3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -51,3 +51,5 @@ HDFS-2627. Determine DN's view of which NN is active based on heartbeat response
 HDFS-2634. Standby needs to ingest latest edit logs before transitioning to active (todd)
 
 HDFS-2671. NN should throw StandbyException in response to RPCs in STANDBY state (todd)
+
+HDFS-2680. DFSClient should construct failover proxy with exponential backoff (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 43af62c46cc..756899945d3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -147,6 +147,9 @@ public class DFSClient implements java.io.Closeable {
    * DFSClient configuration 
    */
   static class Conf {
+    final int maxFailoverAttempts;
+    final int failoverSleepBaseMillis;
+    final int failoverSleepMaxMillis;
     final int maxBlockAcquireFailures;
     final int confTime;
     final int ioBufferSize;
@@ -168,6 +171,16 @@ public class DFSClient implements java.io.Closeable {
     final boolean useLegacyBlockReader;
 
     Conf(Configuration conf) {
+      maxFailoverAttempts = conf.getInt(
+          DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
+          DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
+      failoverSleepBaseMillis = conf.getInt(
+          DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
+          DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
+      failoverSleepMaxMillis = conf.getInt(
+          DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY,
+          DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT);
+
       maxBlockAcquireFailures = conf.getInt(
           DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY,
           DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT);
@@ -306,7 +319,12 @@ public class DFSClient implements java.io.Closeable {
       FailoverProxyProvider failoverProxyProvider = (FailoverProxyProvider)
           ReflectionUtils.newInstance(failoverProxyProviderClass, conf);
       this.namenode = (ClientProtocol)RetryProxy.create(ClientProtocol.class,
-          failoverProxyProvider, RetryPolicies.failoverOnNetworkException(1));
+          failoverProxyProvider,
+          RetryPolicies.failoverOnNetworkException(
+              RetryPolicies.TRY_ONCE_THEN_FAIL,
+              dfsClientConf.maxFailoverAttempts,
+              dfsClientConf.failoverSleepBaseMillis,
+              dfsClientConf.failoverSleepMaxMillis));
       nnAddress = null;
     } else if (nameNodeUri != null && rpcNamenode == null) {
       this.namenode = DFSUtil.createNamenode(NameNode.getAddress(nameNodeUri), conf);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 1c9ed581927..b0a57863552 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -49,6 +49,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY = "dfs.client.socketcache.capacity";
   public static final int     DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT = 16;
   public static final String  DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX = "dfs.client.failover.proxy.provider";
+  public static final String  DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY = "dfs.client.failover.max.attempts";
+  public static final int     DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT = 15;
+  public static final String  DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY = "dfs.client.failover.sleep.base.millis";
+  public static final int     DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT = 500;
+  public static final String  DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY = "dfs.client.failover.sleep.max.millis";
+  public static final int     DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT = 15000;
   
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT = "localhost:50100";

From 8134b1c8702d7d6b3994c73b34afc7f8ee33ac6e Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 14 Dec 2011 22:47:41 +0000
Subject: [PATCH 045/177] Merge trunk into HA branch.

Several conflicts around introduction of protobuf translator for DatanodeProtocol - mostly trivial resolutions.

NB: this does not successfully pass any tests since the HAStatus field needs
to be integrated into the HeartbeatResponse Protobuf implementation.
That will be a separate commit for clearer history.



git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214518 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt   |  26 +-
 .../src/contrib/bkjournal/README.txt          |  60 +++
 .../hadoop-hdfs/src/contrib/bkjournal/pom.xml |  67 +++
 .../BookKeeperEditLogInputStream.java         | 221 ++++++++
 .../BookKeeperEditLogOutputStream.java        | 177 ++++++
 .../bkjournal/BookKeeperJournalManager.java   | 508 ++++++++++++++++++
 .../bkjournal/EditLogLedgerMetadata.java      | 200 +++++++
 .../hadoop/contrib/bkjournal/MaxTxId.java     |  81 +++
 .../hadoop/contrib/bkjournal/WriteLock.java   | 186 +++++++
 .../TestBookKeeperJournalManager.java         | 395 ++++++++++++++
 .../server/namenode/FSEditLogTestUtil.java}   |  34 +-
 .../src/test/resources/log4j.properties       |  62 +++
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |   6 +-
 .../hadoop/hdfs/protocol/ClientProtocol.java  |   7 +-
 .../hadoop/hdfs/protocol/DatanodeInfo.java    |   5 -
 .../hadoop/hdfs/protocol/HdfsFileStatus.java  |   3 -
 .../hadoop/hdfs/protocol/LocatedBlocks.java   |   3 -
 ...amenodeProtocolServerSideTranslatorPB.java | 190 ++++---
 .../ClientNamenodeProtocolTranslatorPB.java   |  65 ++-
 ...atanodeProtocolClientSideTranslatorPB.java |   8 +-
 ...atanodeProtocolServerSideTranslatorPB.java |   2 +-
 .../hadoop/hdfs/protocolPB/PBHelper.java      |  59 +-
 .../ClientNamenodeWireProtocol.java           |   7 +-
 .../DatanodeInfoWritable.java                 |   4 -
 .../HdfsFileStatusWritable.java               |   3 -
 .../LocatedBlocksWritable.java                |   3 -
 .../hdfs/server/datanode/BPOfferService.java  |  15 +-
 .../hdfs/server/datanode/BPServiceActor.java  |   6 +-
 .../hadoop/hdfs/server/datanode/DataNode.java |   7 +-
 .../web/resources/DatanodeWebHdfsMethods.java |  50 +-
 .../hdfs/server/namenode/FSEditLog.java       |   2 +-
 .../server/namenode/NameNodeRpcServer.java    |  51 +-
 .../web/resources/NamenodeWebHdfsMethods.java |   2 +
 .../hdfs/server/protocol/DatanodeCommand.java |   8 -
 .../server/protocol/DatanodeProtocol.java     |   5 +-
 .../hdfs/server/protocol/UpgradeCommand.java  |   6 +-
 .../DatanodeWireProtocol.java                 |   2 -
 .../web/resources/InetSocketAddressParam.java |  83 +++
 .../resources/NamenodeRpcAddressParam.java    |  50 ++
 .../main/proto/ClientNamenodeProtocol.proto   |  14 +-
 .../src/main/proto/DatanodeProtocol.proto     |   4 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  32 +-
 .../server/datanode/TestBPOfferService.java   |  25 +-
 .../server/datanode/TestDatanodeRegister.java |   5 +-
 .../web/TestWebHdfsWithMultipleNameNodes.java | 177 ++++++
 45 files changed, 2634 insertions(+), 292 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/README.txt
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/EditLogLedgerMetadata.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/MaxTxId.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/WriteLock.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java
 rename hadoop-hdfs-project/hadoop-hdfs/src/{test/java/org/apache/hadoop/hdfs/TestDfsOverAvroRpc.java => contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogTestUtil.java} (54%)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/resources/log4j.properties
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/InetSocketAddressParam.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/NamenodeRpcAddressParam.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index e19af25500c..86d1b32efec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1,6 +1,9 @@
 Hadoop HDFS Change Log
 
 Trunk (unreleased changes)
+  INCOMPATIBLE CHANGES
+    HDFS-2676. Remove Avro RPC. (suresh)
+
   NEW FEATURES
     HDFS-395.  DFS Scalability: Incremental block reports. (Tomasz Nykiel
     via hairong)
@@ -32,10 +35,16 @@ Trunk (unreleased changes)
     HDFS-2647. Used protobuf based RPC for InterDatanodeProtocol, 
     ClientDatanodeProtocol, JournalProtocol, NamenodeProtocol. (suresh)
 
-    HDFS-2663. Handle protobuf optional parameters correctly. (suresh)
-
     HDFS-2666. Fix TestBackupNode failure. (suresh)
 
+    HDFS-234. Integration with BookKeeper logging system. (Ivan Kelly 
+    via jitendra)
+
+    HDFS-2663. Optional protobuf parameters are not handled correctly.
+    (suresh)
+
+    HDFS-2661. Enable protobuf RPC for DatanodeProtocol. (jitendra)
+
   IMPROVEMENTS
 
     HADOOP-7524 Change RPC to allow multiple protocols including multuple 
@@ -106,6 +115,8 @@ Trunk (unreleased changes)
 
     HDFS-2650. Replace @inheritDoc with @Override. (Hari Mankude via suresh).
 
+    HDFS-2669 Enable protobuf rpc for ClientNamenodeProtocol
+
   OPTIMIZATIONS
     HDFS-2477. Optimize computing the diff between a block report and the
     namenode state. (Tomasz Nykiel via hairong)
@@ -171,6 +182,9 @@ Release 0.23.1 - UNRELEASED
     HDFS-2594. Support getDelegationTokens and createSymlink in WebHDFS.
     (szetszwo)
 
+    HDFS-2545. Change WebHDFS to support multiple namenodes in federation.
+    (szetszwo)
+
   IMPROVEMENTS
     HDFS-2560. Refactor BPOfferService to be a static inner class (todd)
 
@@ -209,6 +223,9 @@ Release 0.23.1 - UNRELEASED
 
     HDFS-2654. Make BlockReaderLocal not extend RemoteBlockReader2. (eli)
 
+    HDFS-2675. Reduce warning verbosity when double-closing edit logs
+    (todd)
+
   OPTIMIZATIONS
 
     HDFS-2130. Switch default checksum to CRC32C. (todd)
@@ -244,7 +261,10 @@ Release 0.23.1 - UNRELEASED
 
     HDFS-2653. DFSClient should cache whether addrs are non-local when
     short-circuiting is enabled. (eli)
-    
+
+    HDFS-2649. eclipse:eclipse build fails for hadoop-hdfs-httpfs.
+    (Jason Lowe via eli)
+
 Release 0.23.0 - 2011-11-01 
 
   INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/README.txt b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/README.txt
new file mode 100644
index 00000000000..0474c3f6e38
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/README.txt
@@ -0,0 +1,60 @@
+This module provides a BookKeeper backend for HFDS Namenode write
+ahead logging.  
+
+BookKeeper is a highly available distributed write ahead logging
+system. For more details, see
+   
+    http://zookeeper.apache.org/bookkeeper
+
+-------------------------------------------------------------------------------
+How do I build?
+
+ To generate the distribution packages for BK journal, do the
+ following.
+
+   $ mvn clean install -Pdist -Dtar
+
+ This will generate a tarball, 
+ target/hadoop-hdfs-bkjournal-<VERSION>.tar.gz 
+
+-------------------------------------------------------------------------------
+How do I use the BookKeeper Journal?
+
+ To run a HDFS namenode using BookKeeper as a backend, extract the
+ distribution package on top of hdfs
+
+   cd hadoop-hdfs-<VERSION>/
+   tar --strip-components 1 -zxvf path/to/hadoop-hdfs-bkjournal-<VERSION>.tar.gz
+
+ Then, in hdfs-site.xml, set the following properties.
+
+   <property>
+     <name>dfs.namenode.edits.dir</name>
+     <value>bookkeeper://localhost:2181/bkjournal,file:///path/for/edits</value>
+   </property>
+
+   <property>
+     <name>dfs.namenode.edits.journal-plugin.bookkeeper</name>
+     <value>org.apache.hadoop.contrib.bkjournal.BookKeeperJournalManager</value>
+   </property>
+
+ In this example, the namenode is configured to use 2 write ahead
+ logging devices. One writes to BookKeeper and the other to a local
+ file system. At the moment is is not possible to only write to 
+ BookKeeper, as the resource checker explicitly checked for local
+ disks currently.
+
+ The given example, configures the namenode to look for the journal
+ metadata at the path /bkjournal on the a standalone zookeeper ensemble
+ at localhost:2181. To configure a multiple host zookeeper ensemble,
+ separate the hosts with semicolons. For example, if you have 3
+ zookeeper servers, zk1, zk2 & zk3, each listening on port 2181, you
+ would specify this with 
+  
+   bookkeeper://zk1:2181;zk2:2181;zk3:2181/bkjournal
+
+ The final part /bkjournal specifies the znode in zookeeper where
+ ledger metadata will be store. Administrators can set this to anything
+ they wish.
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml
new file mode 100644
index 00000000000..a0bafcffbb0
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<project>
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hadoop</groupId>
+    <artifactId>hadoop-project-dist</artifactId>
+    <version>0.24.0-SNAPSHOT</version>
+    <relativePath>../../../../hadoop-project-dist</relativePath>
+  </parent>
+
+  <groupId>org.apache.hadoop.contrib</groupId>
+  <artifactId>hadoop-hdfs-bkjournal</artifactId>
+  <version>0.24.0-SNAPSHOT</version>
+  <description>Apache Hadoop HDFS BookKeeper Journal</description>
+  <name>Apache Hadoop HDFS BookKeeper Journal</name>
+  <packaging>jar</packaging>
+
+  <properties>
+    <hadoop.component>hdfs</hadoop.component>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-annotations</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency> 
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-common</artifactId>
+      <version>0.24.0-SNAPSHOT</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency> 
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <version>0.24.0-SNAPSHOT</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency> 
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
+      <version>0.24.0-SNAPSHOT</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.bookkeeper</groupId>
+      <artifactId>bookkeeper-server</artifactId>
+      <version>4.0.0</version>
+      <scope>compile</scope>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
new file mode 100644
index 00000000000..707182ec5cc
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
@@ -0,0 +1,221 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.contrib.bkjournal;
+
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Enumeration;
+
+import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader;
+import org.apache.bookkeeper.client.LedgerHandle;
+import org.apache.bookkeeper.client.LedgerEntry;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Input stream which reads from a BookKeeper ledger.
+ */
+class BookKeeperEditLogInputStream extends EditLogInputStream {
+  static final Log LOG = LogFactory.getLog(BookKeeperEditLogInputStream.class);
+
+  private final long firstTxId;
+  private final long lastTxId;
+  private final int logVersion;
+  private final LedgerHandle lh;
+
+  private final FSEditLogOp.Reader reader;
+  private final FSEditLogLoader.PositionTrackingInputStream tracker;
+
+  /**
+   * Construct BookKeeper edit log input stream.
+   * Starts reading from the first entry of the ledger.
+   */
+  BookKeeperEditLogInputStream(final LedgerHandle lh, 
+                               final EditLogLedgerMetadata metadata)
+      throws IOException {
+    this(lh, metadata, 0);
+  }
+
+  /**
+   * Construct BookKeeper edit log input stream. 
+   * Starts reading from firstBookKeeperEntry. This allows the stream
+   * to take a shortcut during recovery, as it doesn't have to read
+   * every edit log transaction to find out what the last one is.
+   */
+  BookKeeperEditLogInputStream(LedgerHandle lh, EditLogLedgerMetadata metadata,
+                               long firstBookKeeperEntry) 
+      throws IOException {
+    this.lh = lh;
+    this.firstTxId = metadata.getFirstTxId();
+    this.lastTxId = metadata.getLastTxId();
+    this.logVersion = metadata.getVersion();
+
+    BufferedInputStream bin = new BufferedInputStream(
+        new LedgerInputStream(lh, firstBookKeeperEntry));
+    tracker = new FSEditLogLoader.PositionTrackingInputStream(bin);
+    DataInputStream in = new DataInputStream(tracker);
+
+    reader = new FSEditLogOp.Reader(in, logVersion);
+  }
+
+  @Override
+  public long getFirstTxId() throws IOException {
+    return firstTxId;
+  }
+
+  @Override
+  public long getLastTxId() throws IOException {
+    return lastTxId;
+  }
+  
+  @Override
+  public int getVersion() throws IOException {
+    return logVersion;
+  }
+
+  @Override
+  public FSEditLogOp readOp() throws IOException {
+    return reader.readOp();
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      lh.close();
+    } catch (Exception e) {
+      throw new IOException("Exception closing ledger", e);
+    }
+  }
+
+  @Override
+  public long getPosition() {
+    return tracker.getPos();
+  }
+
+  @Override
+  public long length() throws IOException {
+    return lh.getLength();
+  }
+  
+  @Override
+  public String getName() {
+    return String.format("BookKeeper[%s,first=%d,last=%d]", 
+        lh.toString(), firstTxId, lastTxId);
+  }
+
+  @Override
+  public JournalType getType() {
+    assert (false);
+    return null;
+  }
+
+  /**
+   * Input stream implementation which can be used by 
+   * FSEditLogOp.Reader
+   */
+  private static class LedgerInputStream extends InputStream {
+    private long readEntries;
+    private InputStream entryStream = null;
+    private final LedgerHandle lh;
+    private final long maxEntry;
+
+    /**
+     * Construct ledger input stream
+     * @param lh the ledger handle to read from
+     * @param firstBookKeeperEntry ledger entry to start reading from
+     */
+    LedgerInputStream(LedgerHandle lh, long firstBookKeeperEntry) 
+        throws IOException {
+      this.lh = lh;
+      readEntries = firstBookKeeperEntry;
+      try {
+        maxEntry = lh.getLastAddConfirmed();
+      } catch (Exception e) {
+        throw new IOException("Error reading last entry id", e);
+      }
+    }
+
+    /**
+     * Get input stream representing next entry in the
+     * ledger.
+     * @return input stream, or null if no more entries
+     */
+    private InputStream nextStream() throws IOException {
+      try {        
+        if (readEntries > maxEntry) {
+          return null;
+        }
+        Enumeration<LedgerEntry> entries 
+          = lh.readEntries(readEntries, readEntries);
+        readEntries++;
+        if (entries.hasMoreElements()) {
+            LedgerEntry e = entries.nextElement();
+            assert !entries.hasMoreElements();
+            return e.getEntryInputStream();
+        }
+      } catch (Exception e) {
+        throw new IOException("Error reading entries from bookkeeper", e);
+      }
+      return null;
+    }
+
+    @Override
+    public int read() throws IOException {
+      byte[] b = new byte[1];
+      if (read(b, 0, 1) != 1) {
+        return -1;
+      } else {
+        return b[0];
+      }
+    }
+    
+    @Override
+    public int read(byte[] b, int off, int len) throws IOException {
+      try {
+        int read = 0;
+        if (entryStream == null) {
+          entryStream = nextStream();
+          if (entryStream == null) {
+            return read;
+          }
+        }
+
+        while (read < len) {
+          int thisread = entryStream.read(b, off+read, (len-read));
+          if (thisread == -1) {
+            entryStream = nextStream();
+            if (entryStream == null) {
+              return read;
+            }
+          } else {
+            read += thisread;
+          }
+        }
+        return read;
+      } catch (IOException e) {
+        throw e;
+      }
+
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java
new file mode 100644
index 00000000000..ddbe0b62e0a
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogOutputStream.java
@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.contrib.bkjournal;
+
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.CountDownLatch;
+
+import java.util.Arrays;
+
+import org.apache.bookkeeper.client.LedgerHandle;
+import org.apache.bookkeeper.client.BKException;
+import org.apache.bookkeeper.client.AsyncCallback.AddCallback;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.Writer;
+
+import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
+import org.apache.hadoop.io.DataOutputBuffer;
+import java.io.IOException;
+
+/**
+ * Output stream for BookKeeper Journal.
+ * Multiple complete edit log entries are packed into a single bookkeeper
+ * entry before sending it over the network. The fact that the edit log entries
+ * are complete in the bookkeeper entries means that each bookkeeper log entry
+ *can be read as a complete edit log. This is useful for recover, as we don't
+ * need to read through the entire edit log segment to get the last written
+ * entry.
+ */
+class BookKeeperEditLogOutputStream
+  extends EditLogOutputStream implements AddCallback {
+  private final DataOutputBuffer bufCurrent;
+  private final AtomicInteger outstandingRequests;
+  private final int transmissionThreshold;
+  private final LedgerHandle lh;
+  private CountDownLatch syncLatch;
+  private final WriteLock wl;
+  private final Writer writer;
+
+  /**
+   * Construct an edit log output stream which writes to a ledger.
+
+   */
+  protected BookKeeperEditLogOutputStream(Configuration conf,
+                                          LedgerHandle lh, WriteLock wl)
+      throws IOException {
+    super();
+
+    bufCurrent = new DataOutputBuffer();
+    outstandingRequests = new AtomicInteger(0);
+    syncLatch = null;
+    this.lh = lh;
+    this.wl = wl;
+    this.wl.acquire();
+    this.writer = new Writer(bufCurrent);
+    this.transmissionThreshold
+      = conf.getInt(BookKeeperJournalManager.BKJM_OUTPUT_BUFFER_SIZE,
+                    BookKeeperJournalManager.BKJM_OUTPUT_BUFFER_SIZE_DEFAULT);
+  }
+
+  @Override
+  public void create() throws IOException {
+    // noop
+  }
+
+  @Override
+  public void close() throws IOException {
+    setReadyToFlush();
+    flushAndSync();
+    try {
+      lh.close();
+    } catch (InterruptedException ie) {
+      throw new IOException("Interrupted waiting on close", ie);
+    } catch (BKException bke) {
+      throw new IOException("BookKeeper error during close", bke);
+    }
+  }
+
+  @Override
+  public void abort() throws IOException {
+    try {
+      lh.close();
+    } catch (InterruptedException ie) {
+      throw new IOException("Interrupted waiting on close", ie);
+    } catch (BKException bke) {
+      throw new IOException("BookKeeper error during abort", bke);
+    }
+
+    wl.release();
+  }
+
+  @Override
+  public void writeRaw(final byte[] data, int off, int len) throws IOException {
+    throw new IOException("Not supported for BK");
+  }
+
+  @Override
+  public void write(FSEditLogOp op) throws IOException {
+    wl.checkWriteLock();
+
+    writer.writeOp(op);
+
+    if (bufCurrent.getLength() > transmissionThreshold) {
+      transmit();
+    }
+  }
+
+  @Override
+  public void setReadyToFlush() throws IOException {
+    wl.checkWriteLock();
+
+    transmit();
+
+    synchronized(this) {
+      syncLatch = new CountDownLatch(outstandingRequests.get());
+    }
+  }
+
+  @Override
+  public void flushAndSync() throws IOException {
+    wl.checkWriteLock();
+
+    assert(syncLatch != null);
+    try {
+      syncLatch.await();
+    } catch (InterruptedException ie) {
+      throw new IOException("Interrupted waiting on latch", ie);
+    }
+
+    syncLatch = null;
+    // wait for whatever we wait on
+  }
+
+  /**
+   * Transmit the current buffer to bookkeeper.
+   * Synchronised at the FSEditLog level. #write() and #setReadyToFlush()
+   * are never called at the same time.
+   */
+  private void transmit() throws IOException {
+    wl.checkWriteLock();
+
+    if (bufCurrent.getLength() > 0) {
+      byte[] entry = Arrays.copyOf(bufCurrent.getData(),
+                                   bufCurrent.getLength());
+      lh.asyncAddEntry(entry, this, null);
+      bufCurrent.reset();
+      outstandingRequests.incrementAndGet();
+    }
+  }
+
+  @Override
+  public void addComplete(int rc, LedgerHandle handle,
+                          long entryId, Object ctx) {
+    synchronized(this) {
+      outstandingRequests.decrementAndGet();
+      CountDownLatch l = syncLatch;
+      if (l != null) {
+        l.countDown();
+      }
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
new file mode 100644
index 00000000000..7fa90269ecd
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
@@ -0,0 +1,508 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.contrib.bkjournal;
+
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.server.namenode.JournalManager;
+import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
+import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
+import org.apache.hadoop.conf.Configuration;
+
+import org.apache.bookkeeper.conf.ClientConfiguration;
+import org.apache.bookkeeper.client.BKException;
+import org.apache.bookkeeper.client.BookKeeper;
+import org.apache.bookkeeper.client.LedgerHandle;
+
+import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.ZooDefs.Ids;
+
+import java.util.Collections;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.io.IOException;
+
+import java.net.URI;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * BookKeeper Journal Manager
+ *
+ * To use, add the following to hdfs-site.xml.
+ * <pre>
+ * {@code
+ * <property>
+ *   <name>dfs.namenode.edits.dir</name>
+ *   <value>bookkeeper://zk1:2181;zk2:2181;zk3:2181/hdfsjournal</value>
+ * </property>
+ *
+ * <property>
+ *   <name>dfs.namenode.edits.journalPlugin.bookkeeper</name>
+ *   <value>org.apache.hadoop.contrib.bkjournal.BookKeeperJournalManager</value>
+ * </property>
+ * }
+ * </pre>
+ * The URI format for bookkeeper is bookkeeper://[zkEnsemble]/[rootZnode]
+ * [zookkeeper ensemble] is a list of semi-colon separated, zookeeper host:port
+ * pairs. In the example above there are 3 servers, in the ensemble,
+ * zk1, zk2 &amp; zk3, each one listening on port 2181.
+ *
+ * [root znode] is the path of the zookeeper znode, under which the editlog
+ * information will be stored.
+ *
+ * Other configuration options are:
+ * <ul>
+ *   <li><b>dfs.namenode.bookkeeperjournal.output-buffer-size</b>
+ *       Number of bytes a bookkeeper journal stream will buffer before
+ *       forcing a flush. Default is 1024.</li>
+ *   <li><b>dfs.namenode.bookkeeperjournal.ensemble-size</b>
+ *       Number of bookkeeper servers in edit log ledger ensembles. This
+ *       is the number of bookkeeper servers which need to be available
+ *       for the ledger to be writable. Default is 3.</li>
+ *   <li><b>dfs.namenode.bookkeeperjournal.quorum-size</b>
+ *       Number of bookkeeper servers in the write quorum. This is the
+ *       number of bookkeeper servers which must have acknowledged the
+ *       write of an entry before it is considered written.
+ *       Default is 2.</li>
+ *   <li><b>dfs.namenode.bookkeeperjournal.digestPw</b>
+ *       Password to use when creating ledgers. </li>
+ * </ul>
+ */
+public class BookKeeperJournalManager implements JournalManager {
+  static final Log LOG = LogFactory.getLog(BookKeeperJournalManager.class);
+
+  public static final String BKJM_OUTPUT_BUFFER_SIZE
+    = "dfs.namenode.bookkeeperjournal.output-buffer-size";
+  public static final int BKJM_OUTPUT_BUFFER_SIZE_DEFAULT = 1024;
+
+  public static final String BKJM_BOOKKEEPER_ENSEMBLE_SIZE
+    = "dfs.namenode.bookkeeperjournal.ensemble-size";
+  public static final int BKJM_BOOKKEEPER_ENSEMBLE_SIZE_DEFAULT = 3;
+
+ public static final String BKJM_BOOKKEEPER_QUORUM_SIZE
+    = "dfs.namenode.bookkeeperjournal.quorum-size";
+  public static final int BKJM_BOOKKEEPER_QUORUM_SIZE_DEFAULT = 2;
+
+  public static final String BKJM_BOOKKEEPER_DIGEST_PW
+    = "dfs.namenode.bookkeeperjournal.digestPw";
+  public static final String BKJM_BOOKKEEPER_DIGEST_PW_DEFAULT = "";
+
+  private static final int BKJM_LAYOUT_VERSION = -1;
+
+  private final ZooKeeper zkc;
+  private final Configuration conf;
+  private final BookKeeper bkc;
+  private final WriteLock wl;
+  private final String ledgerPath;
+  private final MaxTxId maxTxId;
+  private final int ensembleSize;
+  private final int quorumSize;
+  private final String digestpw;
+  private final CountDownLatch zkConnectLatch;
+
+  private LedgerHandle currentLedger = null;
+
+  private int bytesToInt(byte[] b) {
+    assert b.length >= 4;
+    return b[0] << 24 | b[1] << 16 | b[2] << 8 | b[3];
+  }
+
+  private byte[] intToBytes(int i) {
+    return new byte[] {
+      (byte)(i >> 24),
+      (byte)(i >> 16),
+      (byte)(i >> 8),
+      (byte)(i) };
+  }
+
+  /**
+   * Construct a Bookkeeper journal manager.
+   */
+  public BookKeeperJournalManager(Configuration conf, URI uri)
+      throws IOException {
+    this.conf = conf;
+    String zkConnect = uri.getAuthority().replace(";", ",");
+    String zkPath = uri.getPath();
+    ensembleSize = conf.getInt(BKJM_BOOKKEEPER_ENSEMBLE_SIZE,
+                               BKJM_BOOKKEEPER_ENSEMBLE_SIZE_DEFAULT);
+    quorumSize = conf.getInt(BKJM_BOOKKEEPER_QUORUM_SIZE,
+                             BKJM_BOOKKEEPER_QUORUM_SIZE_DEFAULT);
+
+    ledgerPath = zkPath + "/ledgers";
+    String maxTxIdPath = zkPath + "/maxtxid";
+    String lockPath = zkPath + "/lock";
+    String versionPath = zkPath + "/version";
+    digestpw = conf.get(BKJM_BOOKKEEPER_DIGEST_PW,
+                        BKJM_BOOKKEEPER_DIGEST_PW_DEFAULT);
+
+    try {
+      zkConnectLatch = new CountDownLatch(1);
+      zkc = new ZooKeeper(zkConnect, 3000, new ZkConnectionWatcher());
+      if (!zkConnectLatch.await(6000, TimeUnit.MILLISECONDS)) {
+        throw new IOException("Error connecting to zookeeper");
+      }
+      if (zkc.exists(zkPath, false) == null) {
+        zkc.create(zkPath, new byte[] {'0'},
+            Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+      }
+
+      Stat versionStat = zkc.exists(versionPath, false);
+      if (versionStat != null) {
+        byte[] d = zkc.getData(versionPath, false, versionStat);
+        // There's only one version at the moment
+        assert bytesToInt(d) == BKJM_LAYOUT_VERSION;
+      } else {
+        zkc.create(versionPath, intToBytes(BKJM_LAYOUT_VERSION),
+                   Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+      }
+
+      if (zkc.exists(ledgerPath, false) == null) {
+        zkc.create(ledgerPath, new byte[] {'0'},
+            Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+      }
+
+      bkc = new BookKeeper(new ClientConfiguration(),
+                           zkc);
+    } catch (Exception e) {
+      throw new IOException("Error initializing zk", e);
+    }
+
+    wl = new WriteLock(zkc, lockPath);
+    maxTxId = new MaxTxId(zkc, maxTxIdPath);
+  }
+
+  /**
+   * Start a new log segment in a BookKeeper ledger.
+   * First ensure that we have the write lock for this journal.
+   * Then create a ledger and stream based on that ledger.
+   * The ledger id is written to the inprogress znode, so that in the
+   * case of a crash, a recovery process can find the ledger we were writing
+   * to when we crashed.
+   * @param txId First transaction id to be written to the stream
+   */
+  @Override
+  public EditLogOutputStream startLogSegment(long txId) throws IOException {
+    wl.acquire();
+
+    if (txId <= maxTxId.get()) {
+      throw new IOException("We've already seen " + txId
+          + ". A new stream cannot be created with it");
+    }
+    if (currentLedger != null) {
+      throw new IOException("Already writing to a ledger, id="
+                            + currentLedger.getId());
+    }
+    try {
+      currentLedger = bkc.createLedger(ensembleSize, quorumSize,
+                                       BookKeeper.DigestType.MAC,
+                                       digestpw.getBytes());
+      String znodePath = inprogressZNode();
+      EditLogLedgerMetadata l = new EditLogLedgerMetadata(znodePath,
+          HdfsConstants.LAYOUT_VERSION,  currentLedger.getId(), txId);
+      /* Write the ledger metadata out to the inprogress ledger znode
+       * This can fail if for some reason our write lock has
+       * expired (@see WriteLock) and another process has managed to
+       * create the inprogress znode.
+       * In this case, throw an exception. We don't want to continue
+       * as this would lead to a split brain situation.
+       */
+      l.write(zkc, znodePath);
+
+      return new BookKeeperEditLogOutputStream(conf, currentLedger, wl);
+    } catch (Exception e) {
+      if (currentLedger != null) {
+        try {
+          currentLedger.close();
+        } catch (Exception e2) {
+          //log & ignore, an IOException will be thrown soon
+          LOG.error("Error closing ledger", e2);
+        }
+      }
+      throw new IOException("Error creating ledger", e);
+    }
+  }
+
+  /**
+   * Finalize a log segment. If the journal manager is currently
+   * writing to a ledger, ensure that this is the ledger of the log segment
+   * being finalized.
+   *
+   * Otherwise this is the recovery case. In the recovery case, ensure that
+   * the firstTxId of the ledger matches firstTxId for the segment we are
+   * trying to finalize.
+   */
+  @Override
+  public void finalizeLogSegment(long firstTxId, long lastTxId)
+      throws IOException {
+    String inprogressPath = inprogressZNode();
+    try {
+      Stat inprogressStat = zkc.exists(inprogressPath, false);
+      if (inprogressStat == null) {
+        throw new IOException("Inprogress znode " + inprogressPath
+                              + " doesn't exist");
+      }
+
+      wl.checkWriteLock();
+      EditLogLedgerMetadata l
+        =  EditLogLedgerMetadata.read(zkc, inprogressPath);
+
+      if (currentLedger != null) { // normal, non-recovery case
+        if (l.getLedgerId() == currentLedger.getId()) {
+          try {
+            currentLedger.close();
+          } catch (BKException bke) {
+            LOG.error("Error closing current ledger", bke);
+          }
+          currentLedger = null;
+        } else {
+          throw new IOException(
+              "Active ledger has different ID to inprogress. "
+              + l.getLedgerId() + " found, "
+              + currentLedger.getId() + " expected");
+        }
+      }
+
+      if (l.getFirstTxId() != firstTxId) {
+        throw new IOException("Transaction id not as expected, "
+            + l.getFirstTxId() + " found, " + firstTxId + " expected");
+      }
+
+      l.finalizeLedger(lastTxId);
+      String finalisedPath = finalizedLedgerZNode(firstTxId, lastTxId);
+      try {
+        l.write(zkc, finalisedPath);
+      } catch (KeeperException.NodeExistsException nee) {
+        if (!l.verify(zkc, finalisedPath)) {
+          throw new IOException("Node " + finalisedPath + " already exists"
+                                + " but data doesn't match");
+        }
+      }
+      maxTxId.store(lastTxId);
+      zkc.delete(inprogressPath, inprogressStat.getVersion());
+    } catch (KeeperException e) {
+      throw new IOException("Error finalising ledger", e);
+    } catch (InterruptedException ie) {
+      throw new IOException("Error finalising ledger", ie);
+    } finally {
+      wl.release();
+    }
+  }
+
+  @Override
+  public EditLogInputStream getInputStream(long fromTxnId) throws IOException {
+    for (EditLogLedgerMetadata l : getLedgerList()) {
+      if (l.getFirstTxId() == fromTxnId) {
+        try {
+          LedgerHandle h = bkc.openLedger(l.getLedgerId(),
+                                          BookKeeper.DigestType.MAC,
+                                          digestpw.getBytes());
+          return new BookKeeperEditLogInputStream(h, l);
+        } catch (Exception e) {
+          throw new IOException("Could not open ledger for " + fromTxnId, e);
+        }
+      }
+    }
+    throw new IOException("No ledger for fromTxnId " + fromTxnId + " found.");
+  }
+
+  @Override
+  public long getNumberOfTransactions(long fromTxnId) throws IOException {
+    long count = 0;
+    long expectedStart = 0;
+    for (EditLogLedgerMetadata l : getLedgerList()) {
+      if (l.isInProgress()) {
+        long endTxId = recoverLastTxId(l);
+        if (endTxId == HdfsConstants.INVALID_TXID) {
+          break;
+        }
+        count += (endTxId - l.getFirstTxId()) + 1;
+        break;
+      }
+
+      if (l.getFirstTxId() < fromTxnId) {
+        continue;
+      } else if (l.getFirstTxId() == fromTxnId) {
+        count = (l.getLastTxId() - l.getFirstTxId()) + 1;
+        expectedStart = l.getLastTxId() + 1;
+      } else {
+        if (expectedStart != l.getFirstTxId()) {
+          if (count == 0) {
+            throw new CorruptionException("StartTxId " + l.getFirstTxId()
+                + " is not as expected " + expectedStart
+                + ". Gap in transaction log?");
+          } else {
+            break;
+          }
+        }
+        count += (l.getLastTxId() - l.getFirstTxId()) + 1;
+        expectedStart = l.getLastTxId() + 1;
+      }
+    }
+    return count;
+  }
+
+  @Override
+  public void recoverUnfinalizedSegments() throws IOException {
+    wl.acquire();
+
+    synchronized (this) {
+      try {
+        EditLogLedgerMetadata l
+          = EditLogLedgerMetadata.read(zkc, inprogressZNode());
+        long endTxId = recoverLastTxId(l);
+        if (endTxId == HdfsConstants.INVALID_TXID) {
+          LOG.error("Unrecoverable corruption has occurred in segment "
+                    + l.toString() + " at path " + inprogressZNode()
+                    + ". Unable to continue recovery.");
+          throw new IOException("Unrecoverable corruption, please check logs.");
+        }
+        finalizeLogSegment(l.getFirstTxId(), endTxId);
+      } catch (KeeperException.NoNodeException nne) {
+          // nothing to recover, ignore
+      } finally {
+        if (wl.haveLock()) {
+          wl.release();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void purgeLogsOlderThan(long minTxIdToKeep)
+      throws IOException {
+    for (EditLogLedgerMetadata l : getLedgerList()) {
+      if (!l.isInProgress()
+          && l.getLastTxId() < minTxIdToKeep) {
+        try {
+          Stat stat = zkc.exists(l.getZkPath(), false);
+          zkc.delete(l.getZkPath(), stat.getVersion());
+          bkc.deleteLedger(l.getLedgerId());
+        } catch (InterruptedException ie) {
+          LOG.error("Interrupted while purging " + l, ie);
+        } catch (BKException bke) {
+          LOG.error("Couldn't delete ledger from bookkeeper", bke);
+        } catch (KeeperException ke) {
+          LOG.error("Error deleting ledger entry in zookeeper", ke);
+        }
+      }
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      bkc.close();
+      zkc.close();
+    } catch (Exception e) {
+      throw new IOException("Couldn't close zookeeper client", e);
+    }
+  }
+
+  /**
+   * Set the amount of memory that this stream should use to buffer edits.
+   * Setting this will only affect future output stream. Streams
+   * which have currently be created won't be affected.
+   */
+  @Override
+  public void setOutputBufferCapacity(int size) {
+    conf.getInt(BKJM_OUTPUT_BUFFER_SIZE, size);
+  }
+
+  /**
+   * Find the id of the last edit log transaction writen to a edit log
+   * ledger.
+   */
+  private long recoverLastTxId(EditLogLedgerMetadata l) throws IOException {
+    try {
+      LedgerHandle lh = bkc.openLedger(l.getLedgerId(),
+                                       BookKeeper.DigestType.MAC,
+                                       digestpw.getBytes());
+      long lastAddConfirmed = lh.getLastAddConfirmed();
+      BookKeeperEditLogInputStream in
+        = new BookKeeperEditLogInputStream(lh, l, lastAddConfirmed);
+
+      long endTxId = HdfsConstants.INVALID_TXID;
+      FSEditLogOp op = in.readOp();
+      while (op != null) {
+        if (endTxId == HdfsConstants.INVALID_TXID
+            || op.getTransactionId() == endTxId+1) {
+          endTxId = op.getTransactionId();
+        }
+        op = in.readOp();
+      }
+      return endTxId;
+    } catch (Exception e) {
+      throw new IOException("Exception retreiving last tx id for ledger " + l,
+                            e);
+    }
+  }
+
+  /**
+   * Get a list of all segments in the journal.
+   */
+  private List<EditLogLedgerMetadata> getLedgerList() throws IOException {
+    List<EditLogLedgerMetadata> ledgers
+      = new ArrayList<EditLogLedgerMetadata>();
+    try {
+      List<String> ledgerNames = zkc.getChildren(ledgerPath, false);
+      for (String n : ledgerNames) {
+        ledgers.add(EditLogLedgerMetadata.read(zkc, ledgerPath + "/" + n));
+      }
+    } catch (Exception e) {
+      throw new IOException("Exception reading ledger list from zk", e);
+    }
+
+    Collections.sort(ledgers, EditLogLedgerMetadata.COMPARATOR);
+    return ledgers;
+  }
+
+  /**
+   * Get the znode path for a finalize ledger
+   */
+  String finalizedLedgerZNode(long startTxId, long endTxId) {
+    return String.format("%s/edits_%018d_%018d",
+                         ledgerPath, startTxId, endTxId);
+  }
+
+  /**
+   * Get the znode path for the inprogressZNode
+   */
+  String inprogressZNode() {
+    return ledgerPath + "/inprogress";
+  }
+
+  /**
+   * Simple watcher to notify when zookeeper has connected
+   */
+  private class ZkConnectionWatcher implements Watcher {
+    public void process(WatchedEvent event) {
+      if (Event.KeeperState.SyncConnected.equals(event.getState())) {
+        zkConnectLatch.countDown();
+      }
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/EditLogLedgerMetadata.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/EditLogLedgerMetadata.java
new file mode 100644
index 00000000000..9ae5cdd93f7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/EditLogLedgerMetadata.java
@@ -0,0 +1,200 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.contrib.bkjournal;
+
+import java.io.IOException;
+import java.util.Comparator;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.ZooDefs.Ids;
+import org.apache.zookeeper.KeeperException;
+
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Utility class for storing the metadata associated 
+ * with a single edit log segment, stored in a single ledger
+ */
+public class EditLogLedgerMetadata {
+  static final Log LOG = LogFactory.getLog(EditLogLedgerMetadata.class);
+
+  private String zkPath;
+  private final long ledgerId;
+  private final int version;
+  private final long firstTxId;
+  private long lastTxId;
+  private boolean inprogress;
+  
+  public static final Comparator COMPARATOR 
+    = new Comparator<EditLogLedgerMetadata>() {
+    public int compare(EditLogLedgerMetadata o1,
+        EditLogLedgerMetadata o2) {
+      if (o1.firstTxId < o2.firstTxId) {
+        return -1;
+      } else if (o1.firstTxId == o2.firstTxId) {
+        return 0;
+      } else {
+        return 1;
+      }
+    }
+  };
+
+  EditLogLedgerMetadata(String zkPath, int version, 
+                        long ledgerId, long firstTxId) {
+    this.zkPath = zkPath;
+    this.ledgerId = ledgerId;
+    this.version = version;
+    this.firstTxId = firstTxId;
+    this.lastTxId = HdfsConstants.INVALID_TXID;
+    this.inprogress = true;
+  }
+  
+  EditLogLedgerMetadata(String zkPath, int version, long ledgerId, 
+                        long firstTxId, long lastTxId) {
+    this.zkPath = zkPath;
+    this.ledgerId = ledgerId;
+    this.version = version;
+    this.firstTxId = firstTxId;
+    this.lastTxId = lastTxId;
+    this.inprogress = false;
+  }
+
+  String getZkPath() {
+    return zkPath;
+  }
+
+  long getFirstTxId() {
+    return firstTxId;
+  }
+  
+  long getLastTxId() {
+    return lastTxId;
+  }
+  
+  long getLedgerId() {
+    return ledgerId;
+  }
+  
+  int getVersion() {
+    return version;
+  }
+
+  boolean isInProgress() {
+    return this.inprogress;
+  }
+
+  void finalizeLedger(long newLastTxId) {
+    assert this.lastTxId == HdfsConstants.INVALID_TXID;
+    this.lastTxId = newLastTxId;
+    this.inprogress = false;      
+  }
+  
+  static EditLogLedgerMetadata read(ZooKeeper zkc, String path)
+      throws IOException, KeeperException.NoNodeException  {
+    try {
+      byte[] data = zkc.getData(path, false, null);
+      String[] parts = new String(data).split(";");
+      if (parts.length == 3) {
+        int version = Integer.valueOf(parts[0]);
+        long ledgerId = Long.valueOf(parts[1]);
+        long txId = Long.valueOf(parts[2]);
+        return new EditLogLedgerMetadata(path, version, ledgerId, txId);
+      } else if (parts.length == 4) {
+        int version = Integer.valueOf(parts[0]);
+        long ledgerId = Long.valueOf(parts[1]);
+        long firstTxId = Long.valueOf(parts[2]);
+        long lastTxId = Long.valueOf(parts[3]);
+        return new EditLogLedgerMetadata(path, version, ledgerId,
+                                         firstTxId, lastTxId);
+      } else {
+        throw new IOException("Invalid ledger entry, "
+                              + new String(data));
+      }
+    } catch(KeeperException.NoNodeException nne) {
+      throw nne;
+    } catch(Exception e) {
+      throw new IOException("Error reading from zookeeper", e);
+    }
+  }
+    
+  void write(ZooKeeper zkc, String path)
+      throws IOException, KeeperException.NodeExistsException {
+    this.zkPath = path;
+    String finalisedData;
+    if (inprogress) {
+      finalisedData = String.format("%d;%d;%d",
+          version, ledgerId, firstTxId);
+    } else {
+      finalisedData = String.format("%d;%d;%d;%d",
+          version, ledgerId, firstTxId, lastTxId);
+    }
+    try {
+      zkc.create(path, finalisedData.getBytes(), 
+                 Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+    } catch (KeeperException.NodeExistsException nee) {
+      throw nee;
+    } catch (Exception e) {
+      throw new IOException("Error creating ledger znode");
+    } 
+  }
+  
+  boolean verify(ZooKeeper zkc, String path) {
+    try {
+      EditLogLedgerMetadata other = read(zkc, path);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Verifying " + this.toString() 
+                  + " against " + other);
+      }
+      return other == this;
+    } catch (Exception e) {
+      LOG.error("Couldn't verify data in " + path, e);
+      return false;
+    }
+  }
+  
+  public boolean equals(Object o) {
+    if (!(o instanceof EditLogLedgerMetadata)) {
+      return false;
+    }
+    EditLogLedgerMetadata ol = (EditLogLedgerMetadata)o;
+    return ledgerId == ol.ledgerId
+      && firstTxId == ol.firstTxId
+      && lastTxId == ol.lastTxId
+      && version == ol.version;
+  }
+
+ public int hashCode() { 
+    int hash = 1;
+    hash = hash * 31 + (int)ledgerId;
+    hash = hash * 31 + (int)firstTxId;
+    hash = hash * 31 + (int)lastTxId;
+    hash = hash * 31 + (int)version;
+    return hash;
+  }
+    
+  public String toString() {
+    return "[LedgerId:"+ledgerId +
+      ", firstTxId:" + firstTxId +
+      ", lastTxId:" + lastTxId + 
+      ", version:" + version + "]";
+  }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/MaxTxId.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/MaxTxId.java
new file mode 100644
index 00000000000..f2724096832
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/MaxTxId.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.contrib.bkjournal;
+
+import java.io.IOException;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.ZooDefs.Ids;
+import org.apache.zookeeper.data.Stat;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Utility class for storing and reading
+ * the max seen txid in zookeeper
+ */
+class MaxTxId {
+  static final Log LOG = LogFactory.getLog(MaxTxId.class);
+  
+  private final ZooKeeper zkc;
+  private final String path;
+
+  private Stat currentStat;
+
+  MaxTxId(ZooKeeper zkc, String path) {
+    this.zkc = zkc;
+    this.path = path;
+  }
+
+  synchronized void store(long maxTxId) throws IOException {
+    long currentMax = get();
+    if (currentMax < maxTxId) {
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Setting maxTxId to " + maxTxId);
+      }
+      String txidStr = Long.toString(maxTxId);
+      try {
+        if (currentStat != null) {
+          currentStat = zkc.setData(path, txidStr.getBytes("UTF-8"), 
+                                    currentStat.getVersion());
+        } else {
+          zkc.create(path, txidStr.getBytes("UTF-8"), 
+                     Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+        }
+      } catch (Exception e) {
+        throw new IOException("Error writing max tx id", e);
+      }
+    }
+  }
+
+  synchronized long get() throws IOException {
+    try {
+      currentStat = zkc.exists(path, false);
+      if (currentStat == null) {
+        return 0;
+      } else {
+        byte[] bytes = zkc.getData(path, false, currentStat);
+        String txidString = new String(bytes, "UTF-8");
+        return Long.valueOf(txidString);
+      }
+    } catch (Exception e) {
+      throw new IOException("Error reading the max tx id from zk", e);
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/WriteLock.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/WriteLock.java
new file mode 100644
index 00000000000..67743b2228c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/WriteLock.java
@@ -0,0 +1,186 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.contrib.bkjournal;
+
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.Watcher.Event.KeeperState;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.ZooDefs.Ids;
+
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.List;
+import java.util.Collections;
+import java.util.Comparator;
+
+import java.net.InetAddress;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Distributed lock, using ZooKeeper.
+ *
+ * The lock is vulnerable to timing issues. For example, the process could
+ * encounter a really long GC cycle between acquiring the lock, and writing to
+ * a ledger. This could have timed out the lock, and another process could have
+ * acquired the lock and started writing to bookkeeper. Therefore other
+ * mechanisms are required to ensure correctness (i.e. Fencing).
+ */
+class WriteLock implements Watcher {
+  static final Log LOG = LogFactory.getLog(WriteLock.class);
+
+  private final ZooKeeper zkc;
+  private final String lockpath;
+
+  private AtomicInteger lockCount = new AtomicInteger(0);
+  private String myznode = null;
+
+  WriteLock(ZooKeeper zkc, String lockpath) throws IOException {
+    this.lockpath = lockpath;
+
+    this.zkc = zkc;
+    try {
+      if (zkc.exists(lockpath, false) == null) {
+        String localString = InetAddress.getLocalHost().toString();
+        zkc.create(lockpath, localString.getBytes(),
+                   Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
+      }
+    } catch (Exception e) {
+      throw new IOException("Exception accessing Zookeeper", e);
+    }
+  }
+
+  void acquire() throws IOException {
+    while (true) {
+      if (lockCount.get() == 0) {
+        try {
+          synchronized(this) {
+            if (lockCount.get() > 0) {
+              lockCount.incrementAndGet();
+              return;
+            }
+            myznode = zkc.create(lockpath + "/lock-", new byte[] {'0'},
+                                 Ids.OPEN_ACL_UNSAFE,
+                                 CreateMode.EPHEMERAL_SEQUENTIAL);
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("Acquiring lock, trying " + myznode);
+            }
+
+            List<String> nodes = zkc.getChildren(lockpath, false);
+            Collections.sort(nodes, new Comparator<String>() {
+                public int compare(String o1,
+                                   String o2) {
+                  Integer l1 = Integer.valueOf(o1.replace("lock-", ""));
+                  Integer l2 = Integer.valueOf(o2.replace("lock-", ""));
+                  return l1 - l2;
+                }
+              });
+            if ((lockpath + "/" + nodes.get(0)).equals(myznode)) {
+              if (LOG.isTraceEnabled()) {
+                LOG.trace("Lock acquired - " + myznode);
+              }
+              lockCount.set(1);
+              zkc.exists(myznode, this);
+              return;
+            } else {
+              LOG.error("Failed to acquire lock with " + myznode
+                        + ", " + nodes.get(0) + " already has it");
+              throw new IOException("Could not acquire lock");
+            }
+          }
+        } catch (KeeperException e) {
+          throw new IOException("Exception accessing Zookeeper", e);
+        } catch (InterruptedException ie) {
+          throw new IOException("Exception accessing Zookeeper", ie);
+        }
+      } else {
+        int ret = lockCount.getAndIncrement();
+        if (ret == 0) {
+          lockCount.decrementAndGet();
+          continue; // try again;
+        } else {
+          return;
+        }
+      }
+    }
+  }
+
+  void release() throws IOException {
+    try {
+      if (lockCount.decrementAndGet() <= 0) {
+        if (lockCount.get() < 0) {
+          LOG.warn("Unbalanced lock handling somewhere, lockCount down to "
+                   + lockCount.get());
+        }
+        synchronized(this) {
+          if (lockCount.get() <= 0) {
+            if (LOG.isTraceEnabled()) {
+              LOG.trace("releasing lock " + myznode);
+            }
+            if (myznode != null) {
+              zkc.delete(myznode, -1);
+              myznode = null;
+            }
+          }
+        }
+      }
+    } catch (Exception e) {
+      throw new IOException("Exception accessing Zookeeper", e);
+    }
+  }
+
+  public void checkWriteLock() throws IOException {
+    if (!haveLock()) {
+      throw new IOException("Lost writer lock");
+    }
+  }
+
+  boolean haveLock() throws IOException {
+    return lockCount.get() > 0;
+  }
+
+  public void process(WatchedEvent event) {
+    if (event.getState() == KeeperState.Disconnected
+        || event.getState() == KeeperState.Expired) {
+      LOG.warn("Lost zookeeper session, lost lock ");
+      lockCount.set(0);
+    } else {
+      // reapply the watch
+      synchronized (this) {
+        LOG.info("Zookeeper event " + event
+                 + " received, reapplying watch to " + myznode);
+        if (myznode != null) {
+          try {
+            zkc.exists(myznode, this);
+          } catch (Exception e) {
+            LOG.warn("Could not set watch on lock, releasing", e);
+            try {
+              release();
+            } catch (IOException ioe) {
+              LOG.error("Could not release Zk lock", ioe);
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java
new file mode 100644
index 00000000000..b949bc200ea
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java
@@ -0,0 +1,395 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.contrib.bkjournal;
+
+import static org.junit.Assert.*;
+
+import java.net.URI;
+import java.util.Collections;
+import java.util.Arrays;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.bookkeeper.util.LocalBookKeeper;
+
+import java.io.RandomAccessFile;
+import java.io.File;
+import java.io.FilenameFilter;
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.security.SecurityUtil;
+import org.junit.Test;
+import org.junit.Before;
+import org.junit.After;
+import org.junit.BeforeClass;
+import org.junit.AfterClass;
+
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import static org.apache.hadoop.hdfs.server.namenode.TestEditLog.setupEdits;
+import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
+import org.apache.hadoop.hdfs.server.namenode.EditLogOutputStream;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.JournalManager;
+
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.KeeperException;
+
+import com.google.common.collect.ImmutableList;
+
+import java.util.zip.CheckedInputStream;
+import java.util.zip.Checksum;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+public class TestBookKeeperJournalManager {
+  static final Log LOG = LogFactory.getLog(TestBookKeeperJournalManager.class);
+  
+  private static final long DEFAULT_SEGMENT_SIZE = 1000;
+  private static final String zkEnsemble = "localhost:2181";
+
+  private static Thread bkthread;
+  protected static Configuration conf = new Configuration();
+  private ZooKeeper zkc;
+
+  private static ZooKeeper connectZooKeeper(String ensemble) 
+      throws IOException, KeeperException, InterruptedException {
+    final CountDownLatch latch = new CountDownLatch(1);
+        
+    ZooKeeper zkc = new ZooKeeper(zkEnsemble, 3600, new Watcher() {
+        public void process(WatchedEvent event) {
+          if (event.getState() == Watcher.Event.KeeperState.SyncConnected) {
+            latch.countDown();
+          }
+        }
+      });
+    if (!latch.await(3, TimeUnit.SECONDS)) {
+      throw new IOException("Zookeeper took too long to connect");
+    }
+    return zkc;
+  }
+
+  @BeforeClass
+  public static void setupBookkeeper() throws Exception {
+    final int numBookies = 5;
+    bkthread = new Thread() {
+        public void run() {
+          try {
+            String[] args = new String[1];
+            args[0] = String.valueOf(numBookies);
+            LOG.info("Starting bk");
+            LocalBookKeeper.main(args);
+          } catch (InterruptedException e) {
+            // go away quietly
+          } catch (Exception e) {
+            LOG.error("Error starting local bk", e);
+          }
+        }
+      };
+    bkthread.start();
+    
+    if (!LocalBookKeeper.waitForServerUp(zkEnsemble, 10000)) {
+      throw new Exception("Error starting zookeeper/bookkeeper");
+    }
+
+    ZooKeeper zkc = connectZooKeeper(zkEnsemble);
+    try {
+      boolean up = false;
+      for (int i = 0; i < 10; i++) {
+        try {
+          List<String> children = zkc.getChildren("/ledgers/available", 
+                                                  false);
+          if (children.size() == numBookies) {
+            up = true;
+            break;
+          }
+        } catch (KeeperException e) {
+          // ignore
+        }
+        Thread.sleep(1000);
+      }
+      if (!up) {
+        throw new IOException("Not enough bookies started");
+      }
+    } finally {
+      zkc.close();
+    }
+  }
+  
+  @Before
+  public void setup() throws Exception {
+    zkc = connectZooKeeper(zkEnsemble);
+  }
+
+  @After
+  public void teardown() throws Exception {
+    zkc.close();
+  }
+
+  @AfterClass
+  public static void teardownBookkeeper() throws Exception {
+    if (bkthread != null) {
+      bkthread.interrupt();
+      bkthread.join();
+    }
+  }
+
+  @Test
+  public void testSimpleWrite() throws Exception {
+    BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf,
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-simplewrite"));
+    long txid = 1;
+    EditLogOutputStream out = bkjm.startLogSegment(1);
+    for (long i = 1 ; i <= 100; i++) {
+      FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+      op.setTransactionId(i);
+      out.write(op);
+    }
+    out.close();
+    bkjm.finalizeLogSegment(1, 100);
+ 
+    String zkpath = bkjm.finalizedLedgerZNode(1, 100);
+    
+    assertNotNull(zkc.exists(zkpath, false));
+    assertNull(zkc.exists(bkjm.inprogressZNode(), false));
+  }
+
+  @Test
+  public void testNumberOfTransactions() throws Exception {
+    BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf, 
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-txncount"));
+    long txid = 1;
+    EditLogOutputStream out = bkjm.startLogSegment(1);
+    for (long i = 1 ; i <= 100; i++) {
+      FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+      op.setTransactionId(i);
+      out.write(op);
+    }
+    out.close();
+    bkjm.finalizeLogSegment(1, 100);
+
+    long numTrans = bkjm.getNumberOfTransactions(1);
+    assertEquals(100, numTrans);
+  }
+
+  @Test 
+  public void testNumberOfTransactionsWithGaps() throws Exception {
+    BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf, 
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-gaps"));
+    long txid = 1;
+    for (long i = 0; i < 3; i++) {
+      long start = txid;
+      EditLogOutputStream out = bkjm.startLogSegment(start);
+      for (long j = 1 ; j <= DEFAULT_SEGMENT_SIZE; j++) {
+        FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+        op.setTransactionId(txid++);
+        out.write(op);
+      }
+      out.close();
+      bkjm.finalizeLogSegment(start, txid-1);
+      assertNotNull(zkc.exists(bkjm.finalizedLedgerZNode(start, txid-1), false));
+    }
+    zkc.delete(bkjm.finalizedLedgerZNode(DEFAULT_SEGMENT_SIZE+1, DEFAULT_SEGMENT_SIZE*2), -1);
+    
+    long numTrans = bkjm.getNumberOfTransactions(1);
+    assertEquals(DEFAULT_SEGMENT_SIZE, numTrans);
+    
+    try {
+      numTrans = bkjm.getNumberOfTransactions(DEFAULT_SEGMENT_SIZE+1);
+      fail("Should have thrown corruption exception by this point");
+    } catch (JournalManager.CorruptionException ce) {
+      // if we get here, everything is going good
+    }
+
+    numTrans = bkjm.getNumberOfTransactions((DEFAULT_SEGMENT_SIZE*2)+1);
+    assertEquals(DEFAULT_SEGMENT_SIZE, numTrans);
+  }
+
+  @Test
+  public void testNumberOfTransactionsWithInprogressAtEnd() throws Exception {
+    BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf, 
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-inprogressAtEnd"));
+    long txid = 1;
+    for (long i = 0; i < 3; i++) {
+      long start = txid;
+      EditLogOutputStream out = bkjm.startLogSegment(start);
+      for (long j = 1 ; j <= DEFAULT_SEGMENT_SIZE; j++) {
+        FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+        op.setTransactionId(txid++);
+        out.write(op);
+      }
+      
+      out.close();
+      bkjm.finalizeLogSegment(start, (txid-1));
+      assertNotNull(zkc.exists(bkjm.finalizedLedgerZNode(start, (txid-1)), false));
+    }
+    long start = txid;
+    EditLogOutputStream out = bkjm.startLogSegment(start);
+    for (long j = 1 ; j <= DEFAULT_SEGMENT_SIZE/2; j++) {
+      FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+      op.setTransactionId(txid++);
+      out.write(op);
+    }
+    out.setReadyToFlush();
+    out.flush();
+    out.abort();
+    out.close();
+    
+    long numTrans = bkjm.getNumberOfTransactions(1);
+    assertEquals((txid-1), numTrans);
+  }
+
+  /**
+   * Create a bkjm namespace, write a journal from txid 1, close stream.
+   * Try to create a new journal from txid 1. Should throw an exception.
+   */
+  @Test
+  public void testWriteRestartFrom1() throws Exception {
+    BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf, 
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-restartFrom1"));
+    long txid = 1;
+    long start = txid;
+    EditLogOutputStream out = bkjm.startLogSegment(txid);
+    for (long j = 1 ; j <= DEFAULT_SEGMENT_SIZE; j++) {
+      FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+      op.setTransactionId(txid++);
+      out.write(op);
+    }
+    out.close();
+    bkjm.finalizeLogSegment(start, (txid-1));
+    
+    txid = 1;
+    try {
+      out = bkjm.startLogSegment(txid);
+      fail("Shouldn't be able to start another journal from " + txid
+          + " when one already exists");
+    } catch (Exception ioe) {
+      LOG.info("Caught exception as expected", ioe);
+    }
+
+    // test border case
+    txid = DEFAULT_SEGMENT_SIZE;
+    try {
+      out = bkjm.startLogSegment(txid);
+      fail("Shouldn't be able to start another journal from " + txid
+          + " when one already exists");
+    } catch (IOException ioe) {
+      LOG.info("Caught exception as expected", ioe);
+    }
+
+    // open journal continuing from before
+    txid = DEFAULT_SEGMENT_SIZE + 1;
+    start = txid;
+    out = bkjm.startLogSegment(start);
+    assertNotNull(out);
+
+    for (long j = 1 ; j <= DEFAULT_SEGMENT_SIZE; j++) {
+      FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+      op.setTransactionId(txid++);
+      out.write(op);
+    }
+    out.close();
+    bkjm.finalizeLogSegment(start, (txid-1));
+
+    // open journal arbitarily far in the future
+    txid = DEFAULT_SEGMENT_SIZE * 4;
+    out = bkjm.startLogSegment(txid);
+    assertNotNull(out);
+  }
+
+  @Test
+  public void testTwoWriters() throws Exception {
+    long start = 1;
+    BookKeeperJournalManager bkjm1 = new BookKeeperJournalManager(conf, 
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-dualWriter"));
+    BookKeeperJournalManager bkjm2 = new BookKeeperJournalManager(conf, 
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-dualWriter"));
+    
+    EditLogOutputStream out1 = bkjm1.startLogSegment(start);
+    try {
+      EditLogOutputStream out2 = bkjm2.startLogSegment(start);
+      fail("Shouldn't have been able to open the second writer");
+    } catch (IOException ioe) {
+      LOG.info("Caught exception as expected", ioe);
+    }
+  }
+
+  @Test
+  public void testSimpleRead() throws Exception {
+    BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf, 
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-simpleread"));
+    long txid = 1;
+    final long numTransactions = 10000;
+    EditLogOutputStream out = bkjm.startLogSegment(1);
+    for (long i = 1 ; i <= numTransactions; i++) {
+      FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+      op.setTransactionId(i);
+      out.write(op);
+    }
+    out.close();
+    bkjm.finalizeLogSegment(1, numTransactions);
+
+     
+    EditLogInputStream in = bkjm.getInputStream(1);
+    try {
+      assertEquals(numTransactions, 
+                   FSEditLogTestUtil.countTransactionsInStream(in));
+    } finally {
+      in.close();
+    }
+  }
+
+  @Test
+  public void testSimpleRecovery() throws Exception {
+    BookKeeperJournalManager bkjm = new BookKeeperJournalManager(conf, 
+        URI.create("bookkeeper://" + zkEnsemble + "/hdfsjournal-simplerecovery"));
+    EditLogOutputStream out = bkjm.startLogSegment(1);
+    long txid = 1;
+    for (long i = 1 ; i <= 100; i++) {
+      FSEditLogOp op = FSEditLogTestUtil.getNoOpInstance();
+      op.setTransactionId(i);
+      out.write(op);
+    }
+    out.setReadyToFlush();
+    out.flush();
+
+    out.abort();
+    out.close();
+
+
+    assertNull(zkc.exists(bkjm.finalizedLedgerZNode(1, 100), false));
+    assertNotNull(zkc.exists(bkjm.inprogressZNode(), false));
+
+    bkjm.recoverUnfinalizedSegments();
+
+    assertNotNull(zkc.exists(bkjm.finalizedLedgerZNode(1, 100), false));
+    assertNull(zkc.exists(bkjm.inprogressZNode(), false));
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDfsOverAvroRpc.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogTestUtil.java
similarity index 54%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDfsOverAvroRpc.java
rename to hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogTestUtil.java
index 7f4ad2f023e..6557b96e18a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDfsOverAvroRpc.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogTestUtil.java
@@ -15,29 +15,21 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs;
+package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.IOException;
 
-import org.junit.Test;
-
-/** Test for simple signs of life using Avro RPC.  Not an exhaustive test
- * yet, just enough to catch fundamental problems using Avro reflection to
- * infer namenode RPC protocols. */
-public class TestDfsOverAvroRpc extends TestLocalDFS {
-
-  @Test(timeout=20000)
-  public void testWorkingDirectory() throws IOException {
-    /*
-    Test turned off - see HDFS-2647 and HDFS-2660 for related comments.
-    This test can be turned on when Avro RPC is enabled using mechanism
-    similar to protobuf.
-    */
-    /*
-    System.setProperty("hdfs.rpc.engine",
-                       "org.apache.hadoop.ipc.AvroRpcEngine");
-    super.testWorkingDirectory();
-    */
+/**
+ * Utilities for testing edit logs
+ */
+public class FSEditLogTestUtil {
+  public static FSEditLogOp getNoOpInstance() {
+    return FSEditLogOp.LogSegmentOp.getInstance(FSEditLogOpCodes.OP_END_LOG_SEGMENT);
   }
 
-}
+  public static long countTransactionsInStream(EditLogInputStream in) 
+      throws IOException {
+    FSEditLogLoader.EditLogValidation validation = FSEditLogLoader.validateEditLog(in);
+    return validation.getNumTransactions();
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/resources/log4j.properties b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/resources/log4j.properties
new file mode 100644
index 00000000000..8a6b2174144
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/resources/log4j.properties
@@ -0,0 +1,62 @@
+#
+# 
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# 
+#
+
+#
+# Bookkeeper Journal Logging Configuration
+#
+
+# Format is "<default threshold> (, <appender>)+
+
+# DEFAULT: console appender only
+log4j.rootLogger=OFF, CONSOLE
+
+# Example with rolling log file
+#log4j.rootLogger=DEBUG, CONSOLE, ROLLINGFILE
+
+# Example with rolling log file and tracing
+#log4j.rootLogger=TRACE, CONSOLE, ROLLINGFILE, TRACEFILE
+
+#
+# Log INFO level and above messages to the console
+#
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.Threshold=INFO
+log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n
+
+#
+# Add ROLLINGFILE to rootLogger to get log file output
+#    Log DEBUG level and above messages to a log file
+log4j.appender.ROLLINGFILE=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.ROLLINGFILE.Threshold=DEBUG
+log4j.appender.ROLLINGFILE.File=hdfs-namenode.log
+log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
+log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p - [%t:%C{1}@%L] - %m%n
+
+# Max log file size of 10MB
+log4j.appender.ROLLINGFILE.MaxFileSize=10MB
+# uncomment the next line to limit number of backup files
+#log4j.appender.ROLLINGFILE.MaxBackupIndex=10
+
+log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
+log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} - %-5p [%t:%C{1}@%L] - %m%n
+
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 829de5133ce..db90a052715 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -761,12 +761,12 @@ public class DFSUtil {
       Configuration conf, UserGroupInformation ugi) throws IOException {
     /** 
      * Currently we have simply burnt-in support for a SINGLE
-     * protocol - protocolR23Compatible. This will be replaced
+     * protocol - protocolPB. This will be replaced
      * by a way to pick the right protocol based on the 
      * version of the target server.  
      */
-    return new org.apache.hadoop.hdfs.protocolR23Compatible.
-        ClientNamenodeProtocolTranslatorR23(nameNodeAddr, conf, ugi);
+    return new org.apache.hadoop.hdfs.protocolPB.
+        ClientNamenodeProtocolTranslatorPB(nameNodeAddr, conf, ugi);
   }
 
   /** Create a {@link ClientDatanodeProtocol} proxy */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
index 5a5a24b40c0..71b39f8426e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
@@ -20,8 +20,6 @@ package org.apache.hadoop.hdfs.protocol;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 
-import org.apache.avro.reflect.Nullable;
-
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.ContentSummary;
@@ -117,7 +115,6 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException If <code>src</code> contains a symlink
    * @throws IOException If an I/O error occurred
    */
-  @Nullable
   @Idempotent
   public LocatedBlocks getBlockLocations(String src,
                                          long offset,
@@ -317,7 +314,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws IOException If an I/O error occurred
    */
   public LocatedBlock addBlock(String src, String clientName,
-      @Nullable ExtendedBlock previous, @Nullable DatanodeInfo[] excludeNodes)
+      ExtendedBlock previous, DatanodeInfo[] excludeNodes)
       throws AccessControlException, FileNotFoundException,
       NotReplicatedYetException, SafeModeException, UnresolvedLinkException,
       IOException;
@@ -706,7 +703,6 @@ public interface ClientProtocol extends VersionedProtocol {
    * @return upgrade status information or null if no upgrades are in progress
    * @throws IOException
    */
-  @Nullable
   //TODO(HA): Should this be @Idempotent?
   public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action) 
       throws IOException;
@@ -754,7 +750,6 @@ public interface ClientProtocol extends VersionedProtocol {
    * @throws UnresolvedLinkException if the path contains a symlink. 
    * @throws IOException If an I/O error occurred        
    */
-  @Nullable
   @Idempotent
   public HdfsFileStatus getFileInfo(String src) throws AccessControlException,
       FileNotFoundException, UnresolvedLinkException, IOException;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
index ae908480b9a..80b2d28d802 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/DatanodeInfo.java
@@ -36,8 +36,6 @@ import org.apache.hadoop.net.Node;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.util.StringUtils;
 
-import org.apache.avro.reflect.Nullable;
-
 /** 
  * DatanodeInfo represents the status of a DataNode.
  * This object is used for communication in the
@@ -57,7 +55,6 @@ public class DatanodeInfo extends DatanodeID implements Node {
   /** HostName as supplied by the datanode during registration as its 
    * name. Namenode uses datanode IP address as the name.
    */
-  @Nullable
   protected String hostName = null;
   
   // administrative states of a datanode
@@ -84,10 +81,8 @@ public class DatanodeInfo extends DatanodeID implements Node {
     }
   }
 
-  @Nullable
   protected AdminStates adminState;
 
-
   public DatanodeInfo() {
     super();
     adminState = null;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java
index 4659dd3352f..cb6b5d0e688 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsFileStatus.java
@@ -31,8 +31,6 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableFactories;
 import org.apache.hadoop.io.WritableFactory;
 
-import org.apache.avro.reflect.Nullable;
-
 /** Interface that represents the over the wire information for a file.
  */
 @InterfaceAudience.Private
@@ -47,7 +45,6 @@ public class HdfsFileStatus implements Writable {
   }
 
   private byte[] path;  // local name of the inode that's encoded in java UTF8
-  @Nullable
   private byte[] symlink; // symlink target encoded in java UTF8 or null
   private long length;
   private boolean isdir;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlocks.java
index b8971d64bba..02dd547f20a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LocatedBlocks.java
@@ -31,8 +31,6 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableFactories;
 import org.apache.hadoop.io.WritableFactory;
 
-import org.apache.avro.reflect.Nullable;
-
 /**
  * Collection of blocks with their locations and the file length.
  */
@@ -42,7 +40,6 @@ public class LocatedBlocks implements Writable {
   private long fileLength;
   private List<LocatedBlock> blocks; // array of blocks with prioritized locations
   private boolean underConstruction;
-  @Nullable
   private LocatedBlock lastLocatedBlock = null;
   private boolean isLastBlockComplete = false;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
index 1204a76c4e9..c75c3494220 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java
@@ -19,11 +19,16 @@ package org.apache.hadoop.hdfs.protocolPB;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.List;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FsServerDefaults;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
+import org.apache.hadoop.hdfs.protocol.DirectoryListing;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
@@ -124,9 +129,7 @@ import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.CorruptFileBlocksProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DatanodeIDProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DatanodeInfoProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DirectoryListingProto;
-import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.HdfsFileStatusProto;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.LocatedBlockProto;
-import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.UpgradeStatusReportProto;
 import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
 import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
 import org.apache.hadoop.io.Text;
@@ -218,9 +221,7 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
       Builder builder = GetBlockLocationsResponseProto
           .newBuilder();
       if (b != null) {
-        builder.setLocations(
-            PBHelper.convert(server.getBlockLocations(req.getSrc(),
-                req.getOffset(), req.getLength()))).build();
+        builder.setLocations(PBHelper.convert(b)).build();
       }
       return builder.build();
     } catch (IOException e) {
@@ -233,14 +234,19 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
       RpcController controller, GetServerDefaultsRequestProto req)
       throws ServiceException {
     try {
+      FsServerDefaults result = server.getServerDefaults();
       return GetServerDefaultsResponseProto.newBuilder()
-          .setServerDefaults(PBHelper.convert(server.getServerDefaults()))
+          .setServerDefaults(PBHelper.convert(result))
           .build();
     } catch (IOException e) {
       throw new ServiceException(e);
     }
   }
 
+  
+  static final CreateResponseProto VOID_CREATE_RESPONSE = 
+      CreateResponseProto.newBuilder().build();
+  
   @Override
   public CreateResponseProto create(RpcController controller,
       CreateRequestProto req) throws ServiceException {
@@ -252,19 +258,22 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return CreateResponseProto.newBuilder().build();
-
+    return VOID_CREATE_RESPONSE;
   }
   
+  static final AppendResponseProto NULL_APPEND_RESPONSE = 
+      AppendResponseProto.newBuilder().build();
+  
   @Override
   public AppendResponseProto append(RpcController controller,
       AppendRequestProto req) throws ServiceException {
     try {
-      return AppendResponseProto
-          .newBuilder()
-          .setBlock(
-              PBHelper.convert(server.append(req.getSrc(), req.getClientName())))
-          .build();
+      LocatedBlock result = server.append(req.getSrc(), req.getClientName());
+      if (result != null) {
+        return AppendResponseProto.newBuilder()
+            .setBlock(PBHelper.convert(result)).build();
+      }
+      return NULL_APPEND_RESPONSE;
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -274,18 +283,16 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   public SetReplicationResponseProto setReplication(RpcController controller,
       SetReplicationRequestProto req) throws ServiceException {
     try {
-      return SetReplicationResponseProto
-          .newBuilder()
-          .setResult(
-              server.setReplication(req.getSrc(), (short) req.getReplication()))
-          .build();
+      boolean result = 
+          server.setReplication(req.getSrc(), (short) req.getReplication());
+      return SetReplicationResponseProto.newBuilder().setResult(result).build();
     } catch (IOException e) {
       throw new ServiceException(e);
     }
   }
 
 
-  static final SetPermissionResponseProto SET_PERM_RESPONSE = 
+  static final SetPermissionResponseProto VOID_SET_PERM_RESPONSE = 
       SetPermissionResponseProto.newBuilder().build();
 
   @Override
@@ -296,24 +303,26 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return SET_PERM_RESPONSE;
+    return VOID_SET_PERM_RESPONSE;
   }
 
-  static final SetOwnerResponseProto SET_OWNER_RESPONSE = 
+  static final SetOwnerResponseProto VOID_SET_OWNER_RESPONSE = 
       SetOwnerResponseProto.newBuilder().build();
 
   @Override
   public SetOwnerResponseProto setOwner(RpcController controller,
       SetOwnerRequestProto req) throws ServiceException {
     try {
-      server.setOwner(req.getSrc(), req.getUsername(), req.getGroupname());
+      server.setOwner(req.getSrc(), 
+          req.hasUsername() ? req.getUsername() : null,
+          req.hasGroupname() ? req.getGroupname() : null);
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return SET_OWNER_RESPONSE;
+    return VOID_SET_OWNER_RESPONSE;
   }
 
-  static final AbandonBlockResponseProto ABD_BLOCK_RESPONSE = 
+  static final AbandonBlockResponseProto VOID_ADD_BLOCK_RESPONSE = 
       AbandonBlockResponseProto.newBuilder().build();
 
   @Override
@@ -325,20 +334,22 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     } catch (IOException e) {
       throw new ServiceException(e);
     }
-    return ABD_BLOCK_RESPONSE;
+    return VOID_ADD_BLOCK_RESPONSE;
   }
 
   @Override
   public AddBlockResponseProto addBlock(RpcController controller,
       AddBlockRequestProto req) throws ServiceException {
+    
     try {
+      List<DatanodeInfoProto> excl = req.getExcludeNodesList();
+      LocatedBlock result = server.addBlock(req.getSrc(), req.getClientName(),
+          req.hasPrevious() ? PBHelper.convert(req.getPrevious()) : null,
+          (excl == null || 
+           excl.size() == 0) ? null : 
+            PBHelper.convert(excl.toArray(new DatanodeInfoProto[excl.size()])));
       return AddBlockResponseProto.newBuilder().setBlock(
-          PBHelper.convert(
-          server.addBlock(req.getSrc(), req.getClientName(), 
-                req.hasPrevious() ? PBHelper.convert(req.getPrevious()) : null, 
-                PBHelper.convert(
-                  (DatanodeInfoProto[]) req.getExcludeNodesList().toArray()))))
-           .build();
+          PBHelper.convert(result)).build();
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -349,15 +360,17 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
       RpcController controller, GetAdditionalDatanodeRequestProto req)
       throws ServiceException {
     try {
+      List<DatanodeInfoProto> existingList = req.getExistingsList();
+      List<DatanodeInfoProto> excludesList = req.getExcludesList();
+      LocatedBlock result = server.getAdditionalDatanode(
+          req.getSrc(), PBHelper.convert(req.getBlk()),
+          PBHelper.convert(existingList.toArray(
+              new DatanodeInfoProto[existingList.size()])),
+          PBHelper.convert(excludesList.toArray(
+              new DatanodeInfoProto[excludesList.size()])), 
+              req.getNumAdditionalNodes(), req.getClientName());
       return GetAdditionalDatanodeResponseProto.newBuilder().setBlock(
-          PBHelper.convert(
-              server.getAdditionalDatanode(req.getSrc(),
-                  PBHelper.convert(req.getBlk()), 
-                  PBHelper.convert((DatanodeInfoProto[]) req.getExistingsList()
-                      .toArray()), PBHelper
-                  .convert((DatanodeInfoProto[]) req.getExcludesList()
-                      .toArray()), req.getNumAdditionalNodes(), req
-                  .getClientName())))
+          PBHelper.convert(result))
           .build();
     } catch (IOException e) {
       throw new ServiceException(e);
@@ -368,10 +381,10 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   public CompleteResponseProto complete(RpcController controller,
       CompleteRequestProto req) throws ServiceException {
     try {
-      return CompleteResponseProto.newBuilder().setResult(
-                server.complete(req.getSrc(), req.getClientName(),
-                PBHelper.convert(req.getLast())))
-          .build();
+      boolean result = 
+          server.complete(req.getSrc(), req.getClientName(),
+          req.hasLast() ? PBHelper.convert(req.getLast()) : null);
+      return CompleteResponseProto.newBuilder().setResult(result).build();
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -384,8 +397,9 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   public ReportBadBlocksResponseProto reportBadBlocks(RpcController controller,
       ReportBadBlocksRequestProto req) throws ServiceException {
     try {
+      List<LocatedBlockProto> bl = req.getBlocksList();
       server.reportBadBlocks(PBHelper.convertLocatedBlock(
-          (LocatedBlockProto[]) req.getBlocksList().toArray()));
+              bl.toArray(new LocatedBlockProto[bl.size()])));
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -399,7 +413,8 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   public ConcatResponseProto concat(RpcController controller,
       ConcatRequestProto req) throws ServiceException {
     try {
-      server.concat(req.getTrg(), (String[])req.getSrcsList().toArray());
+      List<String> srcs = req.getSrcsList();
+      server.concat(req.getTrg(), srcs.toArray(new String[srcs.size()]));
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -456,14 +471,21 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
 
+  static final GetListingResponseProto NULL_GETLISTING_RESPONSE = 
+      GetListingResponseProto.newBuilder().build();
   @Override
   public GetListingResponseProto getListing(RpcController controller,
       GetListingRequestProto req) throws ServiceException {
     try {
-      DirectoryListingProto result = PBHelper.convert(server.getListing(
+      DirectoryListing result = server.getListing(
           req.getSrc(), req.getStartAfter().toByteArray(),
-          req.getNeedLocation()));
-      return GetListingResponseProto.newBuilder().setDirList(result).build();
+          req.getNeedLocation());
+      if (result !=null) {
+        return GetListingResponseProto.newBuilder().setDirList(
+          PBHelper.convert(result)).build();
+      } else {
+        return NULL_GETLISTING_RESPONSE;
+      }
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -494,6 +516,19 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
     }
   }
   
+  @Override
+  public RestoreFailedStorageResponseProto restoreFailedStorage(
+      RpcController controller, RestoreFailedStorageRequestProto req)
+      throws ServiceException {
+    try {
+      boolean result = server.restoreFailedStorage(req.getArg());
+      return RestoreFailedStorageResponseProto.newBuilder().setResult(result)
+          .build();
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
   @Override
   public GetFsStatsResponseProto getFsStats(RpcController controller,
       GetFsStatusRequestProto req) throws ServiceException {
@@ -557,19 +592,6 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
 
   }
 
-  @Override
-  public RestoreFailedStorageResponseProto restoreFailedStorage(
-      RpcController controller, RestoreFailedStorageRequestProto req)
-      throws ServiceException {
-    try {
-      boolean result = server.restoreFailedStorage(req.getArg());
-      return RestoreFailedStorageResponseProto.newBuilder().setResult(result)
-          .build();
-    } catch (IOException e) {
-      throw new ServiceException(e);
-    }
-  }
-
   static final RefreshNodesResponseProto VOID_REFRESHNODES_RESPONSE = 
       RefreshNodesResponseProto.newBuilder().build();
 
@@ -622,9 +644,10 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
       RpcController controller, ListCorruptFileBlocksRequestProto req)
       throws ServiceException {
     try {
-      CorruptFileBlocksProto result = PBHelper.convert(server
-          .listCorruptFileBlocks(req.getPath(), req.getCookie()));
-      return ListCorruptFileBlocksResponseProto.newBuilder().setCorrupt(result)
+      CorruptFileBlocks result = server.listCorruptFileBlocks(
+          req.getPath(), req.hasCookie() ? req.getCookie(): null);
+      return ListCorruptFileBlocksResponseProto.newBuilder()
+          .setCorrupt(PBHelper.convert(result))
           .build();
     } catch (IOException e) {
       throw new ServiceException(e);
@@ -646,29 +669,40 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
 
   }
 
+  static final GetFileInfoResponseProto NULL_GETFILEINFO_RESPONSE = 
+      GetFileInfoResponseProto.newBuilder().build();
   @Override
   public GetFileInfoResponseProto getFileInfo(RpcController controller,
       GetFileInfoRequestProto req) throws ServiceException {
     try {
-      HdfsFileStatus res = server.getFileInfo(req.getSrc());
-      GetFileInfoResponseProto.Builder builder = 
-          GetFileInfoResponseProto.newBuilder();
-      if (res != null) {
-        builder.setFs(PBHelper.convert(res));
+      HdfsFileStatus result = server.getFileInfo(req.getSrc());
+ 
+      if (result != null) {
+        return GetFileInfoResponseProto.newBuilder().setFs(
+            PBHelper.convert(result)).build();
       }
-      return builder.build();
+      return NULL_GETFILEINFO_RESPONSE;      
     } catch (IOException e) {
       throw new ServiceException(e);
     }
   }
 
+  static final GetFileLinkInfoResponseProto NULL_GETFILELINKINFO_RESPONSE = 
+      GetFileLinkInfoResponseProto.newBuilder().build();
   @Override
   public GetFileLinkInfoResponseProto getFileLinkInfo(RpcController controller,
       GetFileLinkInfoRequestProto req) throws ServiceException {
     try {
-      HdfsFileStatusProto result = 
-          PBHelper.convert(server.getFileLinkInfo(req.getSrc()));
-      return GetFileLinkInfoResponseProto.newBuilder().setFs(result).build();
+      HdfsFileStatus result = server.getFileLinkInfo(req.getSrc());
+      if (result != null) {
+        System.out.println("got non null result for getFileLinkInfo for " + req.getSrc());
+        return GetFileLinkInfoResponseProto.newBuilder().setFs(
+            PBHelper.convert(result)).build();
+      } else {
+        System.out.println("got  null result for getFileLinkInfo for " + req.getSrc());
+        return NULL_GETFILELINKINFO_RESPONSE;      
+      }
+
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -679,10 +713,9 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
       RpcController controller, GetContentSummaryRequestProto req)
       throws ServiceException {
     try {
-      ContentSummaryProto result = 
-          PBHelper.convert(server.getContentSummary(req.getPath()));
-      return 
-        GetContentSummaryResponseProto.newBuilder().setSummary(result).build();
+      ContentSummary result = server.getContentSummary(req.getPath());
+      return GetContentSummaryResponseProto.newBuilder()
+          .setSummary(PBHelper.convert(result)).build();
     } catch (IOException e) {
       throw new ServiceException(e);
     }
@@ -780,10 +813,11 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements
   public UpdatePipelineResponseProto updatePipeline(RpcController controller,
       UpdatePipelineRequestProto req) throws ServiceException {
     try {
+      List<DatanodeIDProto> newNodes = req.getNewNodesList();
       server
           .updatePipeline(req.getClientName(), PBHelper.convert(req
               .getOldBlock()), PBHelper.convert(req.getNewBlock()), PBHelper
-              .convert((DatanodeIDProto[]) req.getNewNodesList().toArray()));
+              .convert(newNodes.toArray(new DatanodeIDProto[newNodes.size()])));
       return VOID_UPDATEPIPELINE_RESPONSE;
     } catch (IOException e) {
       throw new ServiceException(e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
index 75fbc7bc8e4..5860d3a13af 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
@@ -76,6 +76,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AbandonBlockRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AddBlockRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AppendRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AppendResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CancelDelegationTokenRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CompleteRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ConcatRequestProto;
@@ -95,9 +96,11 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetDel
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileInfoRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileInfoResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFileLinkInfoResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetFsStatusRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetLinkTargetRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetListingRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetListingResponseProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetPreferredBlockSizeRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.GetServerDefaultsRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ListCorruptFileBlocksRequestProto;
@@ -121,6 +124,8 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetSaf
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.SetTimesRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdateBlockForPipelineRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.UpdatePipelineRequestProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DirectoryListingProto;
+import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.HdfsFileStatusProto;
 
 import com.google.protobuf.ByteString;
 import com.google.protobuf.ServiceException;
@@ -263,7 +268,8 @@ public class ClientNamenodeProtocolTranslatorPB implements
         .setClientName(clientName)
         .build();
     try {
-      return PBHelper.convert(rpcProxy.append(null, req).getBlock());
+      AppendResponseProto res = rpcProxy.append(null, req);
+      return res.hasBlock() ? PBHelper.convert(res.getBlock()) : null;
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -304,13 +310,14 @@ public class ClientNamenodeProtocolTranslatorPB implements
   public void setOwner(String src, String username, String groupname)
       throws AccessControlException, FileNotFoundException, SafeModeException,
       UnresolvedLinkException, IOException {
-    SetOwnerRequestProto req = SetOwnerRequestProto.newBuilder()
-        .setSrc(src)
-        .setUsername(username)
-        .setGroupname(groupname)
-        .build();
+    SetOwnerRequestProto.Builder req = SetOwnerRequestProto.newBuilder()
+        .setSrc(src);
+    if (username != null)
+        req.setUsername(username);
+    if (groupname != null)
+        req.setGroupname(groupname);
     try {
-      rpcProxy.setOwner(null, req);
+      rpcProxy.setOwner(null, req.build());
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -335,15 +342,14 @@ public class ClientNamenodeProtocolTranslatorPB implements
       throws AccessControlException, FileNotFoundException,
       NotReplicatedYetException, SafeModeException, UnresolvedLinkException,
       IOException {
-    AddBlockRequestProto.Builder builder = AddBlockRequestProto.newBuilder();
-    builder.setSrc(src)
-        .setClientName(clientName)
-        .addAllExcludeNodes(Arrays.asList(PBHelper.convert(excludeNodes)));
-    if (previous != null) {
-      builder.setPrevious(PBHelper.convert(previous));
-    }
+    AddBlockRequestProto.Builder req = AddBlockRequestProto.newBuilder().setSrc(src)
+        .setClientName(clientName);
+    if (previous != null) 
+      req.setPrevious(PBHelper.convert(previous)); 
+    if (excludeNodes != null) 
+      req.addAllExcludeNodes(Arrays.asList(PBHelper.convert(excludeNodes)));
     try {
-      return PBHelper.convert(rpcProxy.addBlock(null, builder.build()).getBlock());
+      return PBHelper.convert(rpcProxy.addBlock(null, req.build()).getBlock());
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -376,13 +382,13 @@ public class ClientNamenodeProtocolTranslatorPB implements
   public boolean complete(String src, String clientName, ExtendedBlock last)
       throws AccessControlException, FileNotFoundException, SafeModeException,
       UnresolvedLinkException, IOException {
-    CompleteRequestProto req = CompleteRequestProto.newBuilder()
+    CompleteRequestProto.Builder req = CompleteRequestProto.newBuilder()
         .setSrc(src)
-        .setClientName(clientName)
-        .setLast(PBHelper.convert(last))
-        .build();
+        .setClientName(clientName);   
+    if (last != null)
+      req.setLast(PBHelper.convert(last));
     try {
-      return rpcProxy.complete(null, req).getResult();
+      return rpcProxy.complete(null, req.build()).getResult();
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -493,7 +499,12 @@ public class ClientNamenodeProtocolTranslatorPB implements
         .setStartAfter(ByteString.copyFrom(startAfter))
         .setNeedLocation(needLocation).build();
     try {
-      return PBHelper.convert(rpcProxy.getListing(null, req).getDirList());
+      GetListingResponseProto result = rpcProxy.getListing(null, req);
+      
+      if (result.hasDirList()) {
+        return PBHelper.convert(result.getDirList());
+      }
+      return null;
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -635,11 +646,13 @@ public class ClientNamenodeProtocolTranslatorPB implements
   @Override
   public CorruptFileBlocks listCorruptFileBlocks(String path, String cookie)
       throws IOException {
-    ListCorruptFileBlocksRequestProto req = ListCorruptFileBlocksRequestProto
-        .newBuilder().setPath(path).setCookie(cookie).build();
+    ListCorruptFileBlocksRequestProto.Builder req = 
+        ListCorruptFileBlocksRequestProto.newBuilder().setPath(path);   
+    if (cookie != null) 
+      req.setCookie(cookie);
     try {
       return PBHelper.convert(
-          rpcProxy.listCorruptFileBlocks(null, req).getCorrupt());
+          rpcProxy.listCorruptFileBlocks(null, req.build()).getCorrupt());
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
@@ -676,7 +689,9 @@ public class ClientNamenodeProtocolTranslatorPB implements
     GetFileLinkInfoRequestProto req = GetFileLinkInfoRequestProto.newBuilder()
         .setSrc(src).build();
     try {
-      return PBHelper.convert(rpcProxy.getFileLinkInfo(null, req).getFs());
+      GetFileLinkInfoResponseProto result = rpcProxy.getFileLinkInfo(null, req);
+      return result.hasFs() ?  
+          PBHelper.convert(rpcProxy.getFileLinkInfo(null, req).getFs()) : null;
     } catch (ServiceException e) {
       throw ProtobufHelper.getRemoteException(e);
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
index 0ba4387adb0..e6d6005f9b1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java
@@ -170,7 +170,7 @@ public class DatanodeProtocolClientSideTranslatorPB implements DatanodeProtocol,
       throws IOException {
     HeartbeatRequestProto req = HeartbeatRequestProto.newBuilder()
         .setRegistration(PBHelper.convert(registration)).setCapacity(capacity)
-        .setCapacity(dfsUsed).setRemaining(remaining)
+        .setDfsUsed(dfsUsed).setRemaining(remaining)
         .setBlockPoolUsed(blockPoolUsed).setXmitsInProgress(xmitsInProgress)
         .setXceiverCount(xceiverCount).setFailedVolumes(failedVolumes).build();
     HeartbeatResponseProto resp;
@@ -196,7 +196,7 @@ public class DatanodeProtocolClientSideTranslatorPB implements DatanodeProtocol,
         .setBlockPoolId(poolId);
     if (blocks != null) {
       for (int i = 0; i < blocks.length; i++) {
-        builder.setBlocks(i, blocks[i]);
+        builder.addBlocks(blocks[i]);
       }
     }
     BlockReportRequestProto req = builder.build();
@@ -219,7 +219,7 @@ public class DatanodeProtocolClientSideTranslatorPB implements DatanodeProtocol,
         .setBlockPoolId(poolId);
     if (receivedAndDeletedBlocks != null) {
       for (int i = 0; i < receivedAndDeletedBlocks.length; i++) {
-        builder.setBlocks(i, PBHelper.convert(receivedAndDeletedBlocks[i]));
+        builder.addBlocks(PBHelper.convert(receivedAndDeletedBlocks[i]));
       }
     }
     BlockReceivedAndDeletedRequestProto req = builder.build();
@@ -292,7 +292,7 @@ public class DatanodeProtocolClientSideTranslatorPB implements DatanodeProtocol,
         .setNewLength(newlength).setCloseFile(closeFile)
         .setDeleteBlock(deleteblock);
     for (int i = 0; i < newtargets.length; i++) {
-      builder.setNewTaragets(i, PBHelper.convert(newtargets[i]));
+      builder.addNewTaragets(PBHelper.convert(newtargets[i]));
     }
     CommitBlockSynchronizationRequestProto req = builder.build();
     try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
index 890b6a6b202..7413fd1d809 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
@@ -122,7 +122,7 @@ public class DatanodeProtocolServerSideTranslatorPB implements
   @Override
   public BlockReportResponseProto blockReport(RpcController controller,
       BlockReportRequestProto request) throws ServiceException {
-    DatanodeCommand cmd;
+    DatanodeCommand cmd = null;
     List<Long> blockIds = request.getBlocksList();
     long[] blocks = new long[blockIds.size()];
     for (int i = 0; i < blockIds.size(); i++) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
index a3788481352..78f2319fed9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@@ -667,6 +667,9 @@ public class PBHelper {
     case DatanodeProtocol.DNA_INVALIDATE:
       builder.setAction(BlockCommandProto.Action.INVALIDATE);
       break;
+    case DatanodeProtocol.DNA_SHUTDOWN:
+      builder.setAction(BlockCommandProto.Action.SHUTDOWN);
+      break;
     }
     Block[] blocks = cmd.getBlocks();
     for (int i = 0; i < blocks.length; i++) {
@@ -687,6 +690,10 @@ public class PBHelper {
 
   public static DatanodeCommandProto convert(DatanodeCommand datanodeCommand) {
     DatanodeCommandProto.Builder builder = DatanodeCommandProto.newBuilder();
+    if (datanodeCommand == null) {
+      return builder.setCmdType(DatanodeCommandProto.Type.NullDatanodeCommand)
+          .build();
+    }
     switch (datanodeCommand.getAction()) {
     case DatanodeProtocol.DNA_BALANCERBANDWIDTHUPDATE:
       builder.setCmdType(DatanodeCommandProto.Type.BalancerBandwidthCommand)
@@ -713,11 +720,18 @@ public class PBHelper {
       break;
     case DatanodeProtocol.DNA_TRANSFER:
     case DatanodeProtocol.DNA_INVALIDATE:
+    case DatanodeProtocol.DNA_SHUTDOWN:
       builder.setCmdType(DatanodeCommandProto.Type.BlockCommand).setBlkCmd(
           PBHelper.convert((BlockCommand) datanodeCommand));
       break;
-    case DatanodeProtocol.DNA_SHUTDOWN: //Not expected
+    case DatanodeProtocol.DNA_UC_ACTION_REPORT_STATUS:
+    case DatanodeProtocol.DNA_UC_ACTION_START_UPGRADE:
+      builder.setCmdType(DatanodeCommandProto.Type.UpgradeCommand)
+          .setUpgradeCmd(PBHelper.convert((UpgradeCommand) datanodeCommand));
+      break;
     case DatanodeProtocol.DNA_UNKNOWN: //Not expected
+    default:
+      builder.setCmdType(DatanodeCommandProto.Type.NullDatanodeCommand);
     }
     return builder.build();
   }
@@ -756,13 +770,15 @@ public class PBHelper {
 
   public static BlockCommand convert(BlockCommandProto blkCmd) {
     List<BlockProto> blockProtoList = blkCmd.getBlocksList();
-    List<DatanodeInfosProto> targetList = blkCmd.getTargetsList();
-    DatanodeInfo[][] targets = new DatanodeInfo[blockProtoList.size()][];
     Block[] blocks = new Block[blockProtoList.size()];
     for (int i = 0; i < blockProtoList.size(); i++) {
-      targets[i] = PBHelper.convert(targetList.get(i));
       blocks[i] = PBHelper.convert(blockProtoList.get(i));
     }
+    List<DatanodeInfosProto> targetList = blkCmd.getTargetsList();
+    DatanodeInfo[][] targets = new DatanodeInfo[targetList.size()][];
+    for (int i = 0; i < targetList.size(); i++) {
+      targets[i] = PBHelper.convert(targetList.get(i));
+    }
     int action = DatanodeProtocol.DNA_UNKNOWN;
     switch (blkCmd.getAction()) {
     case TRANSFER:
@@ -771,6 +787,9 @@ public class PBHelper {
     case INVALIDATE:
       action = DatanodeProtocol.DNA_INVALIDATE;
       break;
+    case SHUTDOWN:
+      action = DatanodeProtocol.DNA_SHUTDOWN;
+      break;
     }
     return new BlockCommand(action, blkCmd.getBlockPoolId(), blocks, targets);
   }
@@ -802,9 +821,13 @@ public class PBHelper {
   }
 
   public static UpgradeCommandProto convert(UpgradeCommand comm) {
-    UpgradeCommandProto.Builder builder = UpgradeCommandProto.newBuilder()
-        .setVersion(comm.getVersion())
-        .setUpgradeStatus(comm.getCurrentStatus());
+    UpgradeCommandProto.Builder builder = UpgradeCommandProto.newBuilder();
+    if (comm == null) {
+      return builder.setAction(UpgradeCommandProto.Action.UNKNOWN)
+          .setVersion(0).setUpgradeStatus(0).build();
+    }
+    builder.setVersion(comm.getVersion()).setUpgradeStatus(
+        comm.getCurrentStatus());
     switch (comm.getAction()) {
     case UpgradeCommand.UC_ACTION_REPORT_STATUS:
       builder.setAction(UpgradeCommandProto.Action.REPORT_STATUS);
@@ -953,6 +976,13 @@ public class PBHelper {
     if ((flag & CreateFlagProto.APPEND_VALUE) == CreateFlagProto.APPEND_VALUE) {
       result.add(CreateFlag.APPEND);
     }
+    if ((flag & CreateFlagProto.CREATE_VALUE) == CreateFlagProto.CREATE_VALUE) {
+      result.add(CreateFlag.CREATE);
+    }
+    if ((flag & CreateFlagProto.OVERWRITE_VALUE) 
+        == CreateFlagProto.OVERWRITE_VALUE) {
+      result.add(CreateFlag.OVERWRITE);
+    }
     return new EnumSetWritable<CreateFlag>(result);
   }
   
@@ -984,7 +1014,7 @@ public class PBHelper {
   public static HdfsFileStatusProto convert(HdfsFileStatus fs) {
     if (fs == null)
       return null;
-    FileType fType = FileType.IS_DIR;;
+    FileType fType = FileType.IS_FILE;
     if (fs.isDir()) {
       fType = FileType.IS_DIR;
     } else if (fs.isSymlink()) {
@@ -1003,8 +1033,7 @@ public class PBHelper {
       setOwner(fs.getOwner()).
       setGroup(fs.getGroup()).
       setPath(ByteString.copyFrom(fs.getLocalNameInBytes()));
-    
-    if (fs.getSymlink() != null) {
+    if (fs.isSymlink())  {
       builder.setSymlink(ByteString.copyFrom(fs.getSymlinkInBytes()));
     }
     if (fs instanceof HdfsLocatedFileStatus) {
@@ -1031,7 +1060,7 @@ public class PBHelper {
     final int len = fs.length;
     HdfsFileStatus[] result = new HdfsFileStatus[len];
     for (int i = 0; i < len; ++i) {
-      PBHelper.convert(fs[i]);
+      result[i] = PBHelper.convert(fs[i]);
     }
     return result;
   }
@@ -1039,9 +1068,11 @@ public class PBHelper {
   public static DirectoryListing convert(DirectoryListingProto dl) {
     if (dl == null)
       return null;
-    return new DirectoryListing(
-        PBHelper.convert((HdfsFileStatusProto[]) 
-            dl.getPartialListingList().toArray()),
+    List<HdfsFileStatusProto> partList =  dl.getPartialListingList();
+    return new DirectoryListing( 
+        partList.isEmpty() ? new HdfsFileStatus[0] 
+          : PBHelper.convert(
+              partList.toArray(new HdfsFileStatusProto[partList.size()])),
         dl.getRemainingEntries());
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/ClientNamenodeWireProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/ClientNamenodeWireProtocol.java
index 6224fe9d6b9..dbce17b1a1d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/ClientNamenodeWireProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/ClientNamenodeWireProtocol.java
@@ -20,8 +20,6 @@ package org.apache.hadoop.hdfs.protocolR23Compatible;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 
-import org.apache.avro.reflect.Nullable;
-
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.CreateFlag;
@@ -97,7 +95,6 @@ public interface ClientNamenodeWireProtocol extends VersionedProtocol {
    * The specification of this method matches that of
    * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol#getBlockLocations}
    */
-  @Nullable
   public LocatedBlocksWritable getBlockLocations(String src,
                                          long offset,
                                          long length) 
@@ -175,7 +172,7 @@ public interface ClientNamenodeWireProtocol extends VersionedProtocol {
    * org.apache.hadoop.hdfs.protocol.DatanodeInfo[])}
    */
   public LocatedBlockWritable addBlock(String src, String clientName,
-      @Nullable ExtendedBlockWritable previous, @Nullable DatanodeInfoWritable[] excludeNodes)
+      ExtendedBlockWritable previous, DatanodeInfoWritable[] excludeNodes)
       throws AccessControlException, FileNotFoundException,
       NotReplicatedYetException, SafeModeException, UnresolvedLinkException,
       IOException;
@@ -344,7 +341,6 @@ public interface ClientNamenodeWireProtocol extends VersionedProtocol {
    * The specification of this method matches that of
    * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol#distributedUpgradeProgress}
    */
-  @Nullable
   public UpgradeStatusReportWritable distributedUpgradeProgress(
       UpgradeAction action) 
       throws IOException;
@@ -373,7 +369,6 @@ public interface ClientNamenodeWireProtocol extends VersionedProtocol {
    * The specification of this method matches that of
    * {@link org.apache.hadoop.hdfs.protocol.ClientProtocol#getFileInfo(String)}
    */
-  @Nullable
   public HdfsFileStatusWritable getFileInfo(String src)
       throws AccessControlException,
       FileNotFoundException, UnresolvedLinkException, IOException;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/DatanodeInfoWritable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/DatanodeInfoWritable.java
index 6c9e4b423f2..71603f7fa6c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/DatanodeInfoWritable.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/DatanodeInfoWritable.java
@@ -34,8 +34,6 @@ import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 
-import org.apache.avro.reflect.Nullable;
-
 /** 
  * DatanodeInfo represents the status of a DataNode.
  * This object is used for communication in the
@@ -55,7 +53,6 @@ public class DatanodeInfoWritable extends DatanodeIDWritable  {
   /** HostName as supplied by the datanode during registration as its 
    * name. Namenode uses datanode IP address as the name.
    */
-  @Nullable
   protected String hostName = null;
   
   // administrative states of a datanode
@@ -82,7 +79,6 @@ public class DatanodeInfoWritable extends DatanodeIDWritable  {
     }
   }
 
-  @Nullable
   protected AdminStates adminState;
   
   static public DatanodeInfo convertDatanodeInfo(DatanodeInfoWritable di) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/HdfsFileStatusWritable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/HdfsFileStatusWritable.java
index 5e362c1cb4b..c817266c150 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/HdfsFileStatusWritable.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/HdfsFileStatusWritable.java
@@ -30,8 +30,6 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableFactories;
 import org.apache.hadoop.io.WritableFactory;
 
-import org.apache.avro.reflect.Nullable;
-
 /** Interface that represents the over the wire information for a file.
  */
 @InterfaceAudience.Private
@@ -46,7 +44,6 @@ public class HdfsFileStatusWritable implements Writable {
   }
 
   private byte[] path;  // local name of the inode that's encoded in java UTF8
-  @Nullable
   private byte[] symlink; // symlink target encoded in java UTF8 or null
   private long length;
   private boolean isdir;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/LocatedBlocksWritable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/LocatedBlocksWritable.java
index c38eb6ea4c6..7c9be79bd81 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/LocatedBlocksWritable.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/LocatedBlocksWritable.java
@@ -29,8 +29,6 @@ import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableFactories;
 import org.apache.hadoop.io.WritableFactory;
 
-import org.apache.avro.reflect.Nullable;
-
 /**
  * Collection of blocks with their locations and the file length.
  */
@@ -40,7 +38,6 @@ public class LocatedBlocksWritable implements Writable {
   private long fileLength;
   private List<LocatedBlockWritable> blocks; // array of blocks with prioritized locations
   private boolean underConstruction;
-  @Nullable
   private LocatedBlockWritable lastLocatedBlock = null;
   private boolean isLastBlockComplete = false;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index d750d8587c2..14507960d0b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -29,6 +29,13 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.protocol.BalancerBandwidthCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand;
@@ -405,7 +412,7 @@ class BPOfferService {
    * @return a proxy to the active NN
    */
   @Deprecated
-  synchronized DatanodeProtocol getActiveNN() {
+  synchronized DatanodeProtocolClientSideTranslatorPB getActiveNN() {
     if (bpServiceToActive != null) {
       return bpServiceToActive.bpNamenode;
     } else {
@@ -622,10 +629,10 @@ class BPOfferService {
    * Connect to the NN at the given address. This is separated out for ease
    * of testing.
    */
-  DatanodeProtocol connectToNN(InetSocketAddress nnAddr)
+  DatanodeProtocolClientSideTranslatorPB connectToNN(InetSocketAddress nnAddr)
       throws IOException {
-    return (DatanodeProtocol)RPC.waitForProxy(DatanodeProtocol.class,
-        DatanodeProtocol.versionID, nnAddr, dn.getConf());
+    return new DatanodeProtocolClientSideTranslatorPB(nnAddr,
+        dn.getConf());
   }
 
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index f6537fa4531..203b135290c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -35,11 +35,11 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
-import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.DisallowedDatanodeException;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
@@ -74,7 +74,7 @@ class BPServiceActor implements Runnable {
   boolean resetBlockReportTime = true;
 
   Thread bpThread;
-  DatanodeProtocol bpNamenode;
+  DatanodeProtocolClientSideTranslatorPB bpNamenode;
   private long lastHeartbeat = 0;
   private volatile boolean initialized = false;
   private final LinkedList<ReceivedDeletedBlockInfo> receivedAndDeletedBlockList 
@@ -119,7 +119,7 @@ class BPServiceActor implements Runnable {
    * Used to inject a spy NN in the unit tests.
    */
   @VisibleForTesting
-  void setNameNode(DatanodeProtocol dnProtocol) {
+  void setNameNode(DatanodeProtocolClientSideTranslatorPB dnProtocol) {
     bpNamenode = dnProtocol;
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 8418151a54a..8878ae1879f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -111,6 +111,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
 import org.apache.hadoop.hdfs.protocol.proto.InterDatanodeProtocolProtos.InterDatanodeProtocolService;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolServerSideTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolServerSideTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.InterDatanodeProtocolTranslatorPB;
@@ -1878,7 +1879,8 @@ public class DataNode extends Configured
    * @return Namenode corresponding to the bpid
    * @throws IOException
    */
-  public DatanodeProtocol getBPNamenode(String bpid) throws IOException {
+  public DatanodeProtocolClientSideTranslatorPB getBPNamenode(String bpid)
+      throws IOException {
     BPOfferService bpos = blockPoolManager.get(bpid);
     if (bpos == null) {
       throw new IOException("cannot find a namnode proxy for bpid=" + bpid);
@@ -1890,7 +1892,8 @@ public class DataNode extends Configured
   void syncBlock(RecoveringBlock rBlock,
                          List<BlockRecord> syncList) throws IOException {
     ExtendedBlock block = rBlock.getBlock();
-    DatanodeProtocol nn = getBPNamenode(block.getBlockPoolId());
+    DatanodeProtocolClientSideTranslatorPB nn = getBPNamenode(block
+        .getBlockPoolId());
     
     long recoveryId = rBlock.getNewGenerationStamp();
     if (LOG.isDebugEnabled()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
index 0ecf5fadbaf..093cd9c863e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/resources/DatanodeWebHdfsMethods.java
@@ -63,6 +63,7 @@ import org.apache.hadoop.hdfs.web.resources.DelegationParam;
 import org.apache.hadoop.hdfs.web.resources.GetOpParam;
 import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
 import org.apache.hadoop.hdfs.web.resources.LengthParam;
+import org.apache.hadoop.hdfs.web.resources.NamenodeRpcAddressParam;
 import org.apache.hadoop.hdfs.web.resources.OffsetParam;
 import org.apache.hadoop.hdfs.web.resources.OverwriteParam;
 import org.apache.hadoop.hdfs.web.resources.Param;
@@ -89,7 +90,8 @@ public class DatanodeWebHdfsMethods {
   private @Context ServletContext context;
   private @Context HttpServletResponse response;
 
-  private void init(final UserGroupInformation ugi, final DelegationParam delegation,
+  private void init(final UserGroupInformation ugi,
+      final DelegationParam delegation, final InetSocketAddress nnRpcAddr,
       final UriFsPathParam path, final HttpOpParam<?> op,
       final Param<?, ?>... parameters) throws IOException {
     if (LOG.isTraceEnabled()) {
@@ -102,9 +104,8 @@ public class DatanodeWebHdfsMethods {
     
     if (UserGroupInformation.isSecurityEnabled()) {
       //add a token for RPC.
-      final DataNode datanode = (DataNode)context.getAttribute("datanode");
-      final InetSocketAddress nnRpcAddr = NameNode.getAddress(datanode.getConf());
-      final Token<DelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>();
+      final Token<DelegationTokenIdentifier> token = 
+          new Token<DelegationTokenIdentifier>();
       token.decodeFromUrlString(delegation.getValue());
       SecurityUtil.setTokenService(token, nnRpcAddr);
       token.setKind(DelegationTokenIdentifier.HDFS_DELEGATION_KIND);
@@ -122,6 +123,9 @@ public class DatanodeWebHdfsMethods {
       @Context final UserGroupInformation ugi,
       @QueryParam(DelegationParam.NAME) @DefaultValue(DelegationParam.DEFAULT)
           final DelegationParam delegation,
+      @QueryParam(NamenodeRpcAddressParam.NAME) 
+      @DefaultValue(NamenodeRpcAddressParam.DEFAULT) 
+          final NamenodeRpcAddressParam namenodeRpcAddress,
       @QueryParam(PutOpParam.NAME) @DefaultValue(PutOpParam.DEFAULT)
           final PutOpParam op,
       @QueryParam(PermissionParam.NAME) @DefaultValue(PermissionParam.DEFAULT)
@@ -135,8 +139,8 @@ public class DatanodeWebHdfsMethods {
       @QueryParam(BlockSizeParam.NAME) @DefaultValue(BlockSizeParam.DEFAULT)
           final BlockSizeParam blockSize
       ) throws IOException, InterruptedException {
-    return put(in, ugi, delegation, ROOT, op, permission, overwrite, bufferSize,
-        replication, blockSize);
+    return put(in, ugi, delegation, namenodeRpcAddress, ROOT, op, permission,
+        overwrite, bufferSize, replication, blockSize);
   }
 
   /** Handle HTTP PUT request. */
@@ -149,6 +153,9 @@ public class DatanodeWebHdfsMethods {
       @Context final UserGroupInformation ugi,
       @QueryParam(DelegationParam.NAME) @DefaultValue(DelegationParam.DEFAULT)
           final DelegationParam delegation,
+      @QueryParam(NamenodeRpcAddressParam.NAME)
+      @DefaultValue(NamenodeRpcAddressParam.DEFAULT)
+          final NamenodeRpcAddressParam namenodeRpcAddress,
       @PathParam(UriFsPathParam.NAME) final UriFsPathParam path,
       @QueryParam(PutOpParam.NAME) @DefaultValue(PutOpParam.DEFAULT)
           final PutOpParam op,
@@ -164,8 +171,9 @@ public class DatanodeWebHdfsMethods {
           final BlockSizeParam blockSize
       ) throws IOException, InterruptedException {
 
-    init(ugi, delegation, path, op, permission, overwrite, bufferSize,
-        replication, blockSize);
+    final InetSocketAddress nnRpcAddr = namenodeRpcAddress.getValue();
+    init(ugi, delegation, nnRpcAddr, path, op, permission,
+        overwrite, bufferSize, replication, blockSize);
 
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
@@ -178,7 +186,6 @@ public class DatanodeWebHdfsMethods {
     case CREATE:
     {
       final Configuration conf = new Configuration(datanode.getConf());
-      final InetSocketAddress nnRpcAddr = NameNode.getAddress(conf);
       conf.set(FsPermission.UMASK_LABEL, "000");
 
       final int b = bufferSize.getValue(conf);
@@ -221,12 +228,15 @@ public class DatanodeWebHdfsMethods {
       @Context final UserGroupInformation ugi,
       @QueryParam(DelegationParam.NAME) @DefaultValue(DelegationParam.DEFAULT)
           final DelegationParam delegation,
+      @QueryParam(NamenodeRpcAddressParam.NAME)
+      @DefaultValue(NamenodeRpcAddressParam.DEFAULT)
+          final NamenodeRpcAddressParam namenodeRpcAddress,
       @QueryParam(PostOpParam.NAME) @DefaultValue(PostOpParam.DEFAULT)
           final PostOpParam op,
       @QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT)
           final BufferSizeParam bufferSize
       ) throws IOException, InterruptedException {
-    return post(in, ugi, delegation, ROOT, op, bufferSize);
+    return post(in, ugi, delegation, namenodeRpcAddress, ROOT, op, bufferSize);
   }
 
   /** Handle HTTP POST request. */
@@ -239,6 +249,9 @@ public class DatanodeWebHdfsMethods {
       @Context final UserGroupInformation ugi,
       @QueryParam(DelegationParam.NAME) @DefaultValue(DelegationParam.DEFAULT)
           final DelegationParam delegation,
+      @QueryParam(NamenodeRpcAddressParam.NAME)
+      @DefaultValue(NamenodeRpcAddressParam.DEFAULT)
+          final NamenodeRpcAddressParam namenodeRpcAddress,
       @PathParam(UriFsPathParam.NAME) final UriFsPathParam path,
       @QueryParam(PostOpParam.NAME) @DefaultValue(PostOpParam.DEFAULT)
           final PostOpParam op,
@@ -246,7 +259,8 @@ public class DatanodeWebHdfsMethods {
           final BufferSizeParam bufferSize
       ) throws IOException, InterruptedException {
 
-    init(ugi, delegation, path, op, bufferSize);
+    final InetSocketAddress nnRpcAddr = namenodeRpcAddress.getValue();
+    init(ugi, delegation, nnRpcAddr, path, op, bufferSize);
 
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
@@ -259,7 +273,6 @@ public class DatanodeWebHdfsMethods {
     case APPEND:
     {
       final Configuration conf = new Configuration(datanode.getConf());
-      final InetSocketAddress nnRpcAddr = NameNode.getAddress(conf);
       final int b = bufferSize.getValue(conf);
       DFSClient dfsclient = new DFSClient(nnRpcAddr, conf);
       FSDataOutputStream out = null;
@@ -291,6 +304,9 @@ public class DatanodeWebHdfsMethods {
       @Context final UserGroupInformation ugi,
       @QueryParam(DelegationParam.NAME) @DefaultValue(DelegationParam.DEFAULT)
           final DelegationParam delegation,
+      @QueryParam(NamenodeRpcAddressParam.NAME)
+      @DefaultValue(NamenodeRpcAddressParam.DEFAULT)
+          final NamenodeRpcAddressParam namenodeRpcAddress,
       @QueryParam(GetOpParam.NAME) @DefaultValue(GetOpParam.DEFAULT)
           final GetOpParam op,
       @QueryParam(OffsetParam.NAME) @DefaultValue(OffsetParam.DEFAULT)
@@ -300,7 +316,8 @@ public class DatanodeWebHdfsMethods {
       @QueryParam(BufferSizeParam.NAME) @DefaultValue(BufferSizeParam.DEFAULT)
           final BufferSizeParam bufferSize
       ) throws IOException, InterruptedException {
-    return get(ugi, delegation, ROOT, op, offset, length, bufferSize); 
+    return get(ugi, delegation, namenodeRpcAddress, ROOT, op, offset, length,
+        bufferSize);
   }
 
   /** Handle HTTP GET request. */
@@ -311,6 +328,9 @@ public class DatanodeWebHdfsMethods {
       @Context final UserGroupInformation ugi,
       @QueryParam(DelegationParam.NAME) @DefaultValue(DelegationParam.DEFAULT)
           final DelegationParam delegation,
+      @QueryParam(NamenodeRpcAddressParam.NAME)
+      @DefaultValue(NamenodeRpcAddressParam.DEFAULT)
+          final NamenodeRpcAddressParam namenodeRpcAddress,
       @PathParam(UriFsPathParam.NAME) final UriFsPathParam path,
       @QueryParam(GetOpParam.NAME) @DefaultValue(GetOpParam.DEFAULT)
           final GetOpParam op,
@@ -322,7 +342,8 @@ public class DatanodeWebHdfsMethods {
           final BufferSizeParam bufferSize
       ) throws IOException, InterruptedException {
 
-    init(ugi, delegation, path, op, offset, length, bufferSize);
+    final InetSocketAddress nnRpcAddr = namenodeRpcAddress.getValue();
+    init(ugi, delegation, nnRpcAddr, path, op, offset, length, bufferSize);
 
     return ugi.doAs(new PrivilegedExceptionAction<Response>() {
       @Override
@@ -331,7 +352,6 @@ public class DatanodeWebHdfsMethods {
     final String fullpath = path.getAbsolutePath();
     final DataNode datanode = (DataNode)context.getAttribute("datanode");
     final Configuration conf = new Configuration(datanode.getConf());
-    final InetSocketAddress nnRpcAddr = NameNode.getAddress(conf);
 
     switch(op.getValue()) {
     case OPEN:
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 92ef2b5ee3b..0f66fe644ed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -294,7 +294,7 @@ public class FSEditLog  {
    */
   synchronized void close() {
     if (state == State.CLOSED) {
-      LOG.warn("Closing log when already closed", new Exception());
+      LOG.debug("Closing log when already closed");
       return;
     }
     if (state == State.IN_SEGMENT) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index d5edb10d13b..874cb5febf9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -62,11 +62,15 @@ import org.apache.hadoop.hdfs.protocol.UnresolvedPathException;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.UpgradeAction;
+import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol;
 import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.NamenodeProtocolService;
+import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.DatanodeProtocolService;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolServerSideTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolServerSideTranslatorPB;
-import org.apache.hadoop.hdfs.protocolR23Compatible.ClientNamenodeWireProtocol;
-import org.apache.hadoop.hdfs.protocolR23Compatible.ClientNamenodeProtocolServerSideTranslatorR23;
+import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB;
 import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
@@ -92,6 +96,7 @@ import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
 import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.Server;
@@ -144,14 +149,23 @@ class NameNodeRpcServer implements NamenodeProtocols {
       conf.getInt(DFS_DATANODE_HANDLER_COUNT_KEY, 
                   DFS_DATANODE_HANDLER_COUNT_DEFAULT);
     InetSocketAddress socAddr = nn.getRpcServerAddress(conf);
-    ClientNamenodeProtocolServerSideTranslatorR23 
-    clientProtocolServerTranslator = 
-        new ClientNamenodeProtocolServerSideTranslatorR23(this);
+		RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class,
+         ProtobufRpcEngine.class);
+     ClientNamenodeProtocolServerSideTranslatorPB 
+       clientProtocolServerTranslator = 
+         new ClientNamenodeProtocolServerSideTranslatorPB(this);
+     BlockingService clientNNPbService = ClientNamenodeProtocol.
+         newReflectiveBlockingService(clientProtocolServerTranslator);
     
+    DatanodeProtocolServerSideTranslatorPB dnProtoPbTranslator = 
+        new DatanodeProtocolServerSideTranslatorPB(this);
+    BlockingService dnProtoPbService = DatanodeProtocolService
+        .newReflectiveBlockingService(dnProtoPbTranslator);
+
     NamenodeProtocolServerSideTranslatorPB namenodeProtocolXlator = 
         new NamenodeProtocolServerSideTranslatorPB(this);
-    BlockingService service = NamenodeProtocolService
-        .newReflectiveBlockingService(namenodeProtocolXlator);
+	  BlockingService NNPbService = NamenodeProtocolService
+          .newReflectiveBlockingService(namenodeProtocolXlator);
     
     InetSocketAddress dnSocketAddr = nn.getServiceRpcServerAddress(conf);
     if (dnSocketAddr != null) {
@@ -160,13 +174,11 @@ class NameNodeRpcServer implements NamenodeProtocols {
                     DFS_NAMENODE_SERVICE_HANDLER_COUNT_DEFAULT);
       // Add all the RPC protocols that the namenode implements
       this.serviceRpcServer = 
-          RPC.getServer(org.apache.hadoop.hdfs.protocolR23Compatible.
-              ClientNamenodeWireProtocol.class, clientProtocolServerTranslator,
+          RPC.getServer(org.apache.hadoop.hdfs.protocolPB.
+              ClientNamenodeProtocolPB.class, clientNNPbService,
           dnSocketAddr.getHostName(), dnSocketAddr.getPort(), 
           serviceHandlerCount,
           false, conf, namesystem.getDelegationTokenSecretManager());
-      this.serviceRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
-          DatanodeProtocol.class, this);
       this.serviceRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
           RefreshAuthorizationPolicyProtocol.class, this);
       this.serviceRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
@@ -175,7 +187,9 @@ class NameNodeRpcServer implements NamenodeProtocols {
           GetUserMappingsProtocol.class, this);
       this.serviceRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
           HAServiceProtocol.class, this);
-      DFSUtil.addPBProtocol(conf, NamenodeProtocolPB.class, service,
+      DFSUtil.addPBProtocol(conf, NamenodeProtocolPB.class, NNPbService,
+          serviceRpcServer);
+      DFSUtil.addPBProtocol(conf, DatanodeProtocolPB.class, dnProtoPbService,
           serviceRpcServer);
       
       this.serviceRPCAddress = this.serviceRpcServer.getListenerAddress();
@@ -186,13 +200,10 @@ class NameNodeRpcServer implements NamenodeProtocols {
     }
     // Add all the RPC protocols that the namenode implements
     this.clientRpcServer = RPC.getServer(
-            org.apache.hadoop.hdfs.protocolR23Compatible.
-            ClientNamenodeWireProtocol.class,
-            clientProtocolServerTranslator, socAddr.getHostName(),
+        org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB.class, 
+        clientNNPbService, socAddr.getHostName(),
             socAddr.getPort(), handlerCount, false, conf,
             namesystem.getDelegationTokenSecretManager());
-    this.clientRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
-        DatanodeProtocol.class, this);
     this.clientRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
         RefreshAuthorizationPolicyProtocol.class, this);
     this.clientRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
@@ -201,7 +212,9 @@ class NameNodeRpcServer implements NamenodeProtocols {
         GetUserMappingsProtocol.class, this);
     this.clientRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
         HAServiceProtocol.class, this);
-    DFSUtil.addPBProtocol(conf, NamenodeProtocolPB.class, service,
+    DFSUtil.addPBProtocol(conf, NamenodeProtocolPB.class, NNPbService,
+        clientRpcServer);
+    DFSUtil.addPBProtocol(conf, DatanodeProtocolPB.class, dnProtoPbService,
         clientRpcServer);
 
     // set service-level authorization security policy
@@ -261,7 +274,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
                                  long clientVersion) throws IOException {
     if (protocol.equals(ClientProtocol.class.getName())) {
       throw new IOException("Old Namenode Client protocol is not supported:" + 
-      protocol + "Switch your clientside to " + ClientNamenodeWireProtocol.class); 
+      protocol + "Switch your clientside to " + ClientNamenodeProtocol.class); 
     } else if (protocol.equals(DatanodeProtocol.class.getName())){
       return DatanodeProtocol.versionID;
     } else if (protocol.equals(NamenodeProtocol.class.getName())){
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
index 9529ecf766b..44e5e13bc8a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/web/resources/NamenodeWebHdfsMethods.java
@@ -76,6 +76,7 @@ import org.apache.hadoop.hdfs.web.resources.GroupParam;
 import org.apache.hadoop.hdfs.web.resources.HttpOpParam;
 import org.apache.hadoop.hdfs.web.resources.LengthParam;
 import org.apache.hadoop.hdfs.web.resources.ModificationTimeParam;
+import org.apache.hadoop.hdfs.web.resources.NamenodeRpcAddressParam;
 import org.apache.hadoop.hdfs.web.resources.OffsetParam;
 import org.apache.hadoop.hdfs.web.resources.OverwriteParam;
 import org.apache.hadoop.hdfs.web.resources.OwnerParam;
@@ -198,6 +199,7 @@ public class NamenodeWebHdfsMethods {
       delegationQuery = "&" + new DelegationParam(t.encodeToUrlString());
     }
     final String query = op.toQueryString() + delegationQuery
+        + "&" + new NamenodeRpcAddressParam(namenode)
         + Param.toSortedString("&", parameters);
     final String uripath = WebHdfsFileSystem.PATH_PREFIX + path;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeCommand.java
index 9c6950f2174..6d59274efd2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeCommand.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeCommand.java
@@ -17,7 +17,6 @@
  */
 package org.apache.hadoop.hdfs.server.protocol;
 
-import org.apache.avro.reflect.Union;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
@@ -25,13 +24,6 @@ import org.apache.hadoop.classification.InterfaceStability;
  * Base class for data-node command.
  * Issued by the name-node to notify data-nodes what should be done.
  */
-
-// Declare subclasses for Avro's denormalized representation
-@Union({Void.class,
-      RegisterCommand.class, FinalizeCommand.class,
-      BlockCommand.class, UpgradeCommand.class,
-      BlockRecoveryCommand.class, KeyUpdateCommand.class})
-
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
 public abstract class DatanodeCommand extends ServerCommand {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
index 7b99f371239..f9a88e18891 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/DatanodeProtocol.java
@@ -30,8 +30,6 @@ import org.apache.hadoop.hdfs.server.protocolR23Compatible.DatanodeWireProtocol;
 import org.apache.hadoop.ipc.VersionedProtocol;
 import org.apache.hadoop.security.KerberosInfo;
 
-import org.apache.avro.reflect.Nullable;
-
 /**********************************************************************
  * Protocol that a DFS datanode uses to communicate with the NameNode.
  * It's used to upload current load information and block reports.
@@ -76,6 +74,8 @@ public interface DatanodeProtocol extends VersionedProtocol {
   final static int DNA_RECOVERBLOCK = 6;  // request a block recovery
   final static int DNA_ACCESSKEYUPDATE = 7;  // update access key
   final static int DNA_BALANCERBANDWIDTHUPDATE = 8; // update balancer bandwidth
+  final static int DNA_UC_ACTION_REPORT_STATUS = 100; // Report upgrade status
+  final static int DNA_UC_ACTION_START_UPGRADE = 101; // start upgrade
 
   /** 
    * Register Datanode.
@@ -105,7 +105,6 @@ public interface DatanodeProtocol extends VersionedProtocol {
    * @param failedVolumes number of failed volumes
    * @throws IOException on error
    */
-  @Nullable
   public HeartbeatResponse sendHeartbeat(DatanodeRegistration registration,
                                        long capacity,
                                        long dfsUsed, long remaining,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/UpgradeCommand.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/UpgradeCommand.java
index fd9263fbc6a..a6de55d843c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/UpgradeCommand.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/UpgradeCommand.java
@@ -41,8 +41,10 @@ import org.apache.hadoop.io.WritableFactory;
 @InterfaceStability.Evolving
 public class UpgradeCommand extends DatanodeCommand {
   public final static int UC_ACTION_UNKNOWN = DatanodeProtocol.DNA_UNKNOWN;
-  public final static int UC_ACTION_REPORT_STATUS = 100; // report upgrade status
-  public final static int UC_ACTION_START_UPGRADE = 101; // start upgrade
+  public final static int UC_ACTION_REPORT_STATUS = 
+      DatanodeProtocol.DNA_UC_ACTION_REPORT_STATUS;
+  public final static int UC_ACTION_START_UPGRADE =
+      DatanodeProtocol.DNA_UC_ACTION_START_UPGRADE;
 
   private int version;
   private short upgradeStatus;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeWireProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeWireProtocol.java
index 8625c22a535..a07b485a679 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeWireProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/DatanodeWireProtocol.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.protocolR23Compatible;
 
 import java.io.IOException;
 
-import org.apache.avro.reflect.Nullable;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
@@ -98,7 +97,6 @@ public interface DatanodeWireProtocol extends VersionedProtocol {
    * @param failedVolumes number of failed volumes
    * @throws IOException on error
    */
-  @Nullable
   public HeartbeatResponseWritable sendHeartbeat(
       DatanodeRegistrationWritable registration, long capacity, long dfsUsed,
       long remaining, long blockPoolUsed, int xmitsInProgress,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/InetSocketAddressParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/InetSocketAddressParam.java
new file mode 100644
index 00000000000..dc21f684e7e
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/InetSocketAddressParam.java
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.web.resources;
+
+import java.net.InetSocketAddress;
+
+/** InetSocketAddressParam parameter. */
+abstract class InetSocketAddressParam
+    extends Param<InetSocketAddress, InetSocketAddressParam.Domain> {
+  InetSocketAddressParam(final Domain domain, final InetSocketAddress value) {
+    super(domain, value);
+  }
+
+  @Override
+  public String toString() {
+    return getName() + "=" + Domain.toString(getValue());
+  }
+
+  /** The domain of the parameter. */
+  static final class Domain extends Param.Domain<InetSocketAddress> {
+    Domain(final String paramName) {
+      super(paramName);
+    }
+
+    @Override
+    public String getDomain() {
+      return "<HOST:PORT>";
+    }
+
+    @Override
+    InetSocketAddress parse(final String str) {
+      final int i = str.indexOf(':');
+      if (i < 0) {
+        throw new IllegalArgumentException("Failed to parse \"" + str
+            + "\" as " + getDomain() + ": the ':' character not found.");
+      } else if (i == 0) {
+        throw new IllegalArgumentException("Failed to parse \"" + str
+            + "\" as " + getDomain() + ": HOST is empty.");
+      } else if (i == str.length() - 1) {
+        throw new IllegalArgumentException("Failed to parse \"" + str
+            + "\" as " + getDomain() + ": PORT is empty.");
+      }
+
+      final String host = str.substring(0, i);
+      final int port;
+      try {
+        port = Integer.parseInt(str.substring(i + 1));
+      } catch(NumberFormatException e) {
+        throw new IllegalArgumentException("Failed to parse \"" + str
+            + "\" as " + getDomain() + ": the ':' position is " + i
+            + " but failed to parse PORT.", e);
+      }
+
+      try {
+        return new InetSocketAddress(host, port);
+      } catch(Exception e) {
+        throw new IllegalArgumentException("Failed to parse \"" + str
+            + "\": cannot create InetSocketAddress(host=" + host
+            + ", port=" + port + ")", e);
+      }
+    }
+
+    /** Convert an InetSocketAddress to a HOST:PORT String. */
+    static String toString(final InetSocketAddress addr) {
+      return addr.getHostName() + ":" + addr.getPort();
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/NamenodeRpcAddressParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/NamenodeRpcAddressParam.java
new file mode 100644
index 00000000000..431454c93d4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/NamenodeRpcAddressParam.java
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.web.resources;
+
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+
+/** Namenode RPC address parameter. */
+public class NamenodeRpcAddressParam extends InetSocketAddressParam {
+  /** Parameter name. */
+  public static final String NAME = "namenoderpcaddress";
+  /** Default parameter value. */
+  public static final String DEFAULT = "";
+
+  private static final Domain DOMAIN = new Domain(NAME);
+
+  /**
+   * Constructor.
+   * @param str a string representation of the parameter value.
+   */
+  public NamenodeRpcAddressParam(final String str) {
+    super(DOMAIN, str == null || str.equals(DEFAULT)? null: DOMAIN.parse(str));
+  }
+
+  /**
+   * Construct an object using the RPC address of the given namenode.
+   */
+  public NamenodeRpcAddressParam(final NameNode namenode) {
+    super(DOMAIN, namenode.getNameNodeAddress());
+  }
+
+  @Override
+  public String getName() {
+    return NAME;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto
index 7a52460ef08..10f39eaa136 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto
@@ -74,7 +74,7 @@ message AppendRequestProto {
 }
 
 message AppendResponseProto {
-  required LocatedBlockProto block = 1;
+  optional LocatedBlockProto block = 1;
 }
 
 message SetReplicationRequestProto {
@@ -96,8 +96,8 @@ message SetPermissionResponseProto { // void response
 
 message SetOwnerRequestProto {
   required string src = 1;
-  required string username = 2;
-  required string groupname = 3;
+  optional string username = 2;
+  optional string groupname = 3;
 }
 
 message SetOwnerResponseProto { // void response
@@ -139,7 +139,7 @@ message GetAdditionalDatanodeResponseProto {
 message CompleteRequestProto {
   required string src = 1;
   required string clientName = 2;
-  required ExtendedBlockProto last = 3;
+  optional ExtendedBlockProto last = 3;
 }
 
 message CompleteResponseProto {
@@ -204,7 +204,7 @@ message GetListingRequestProto {
   required bool needLocation = 3;
 }
 message GetListingResponseProto {
-  required DirectoryListingProto dirList = 1;
+  optional DirectoryListingProto dirList = 1;
 }
 
 message RenewLeaseRequestProto {
@@ -311,7 +311,7 @@ message DistributedUpgradeProgressResponseProto {
 
 message ListCorruptFileBlocksRequestProto {
   required string path = 1;
-  required string cookie = 2;
+  optional string cookie = 2;
 }
 
 message ListCorruptFileBlocksResponseProto {
@@ -338,7 +338,7 @@ message GetFileLinkInfoRequestProto {
 }
 
 message GetFileLinkInfoResponseProto {
-  required HdfsFileStatusProto fs = 1;
+  optional HdfsFileStatusProto fs = 1;
 }
 
 message GetContentSummaryRequestProto {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
index d837ccc43a7..6426de95ba7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
@@ -47,6 +47,7 @@ message DatanodeCommandProto {
     KeyUpdateCommand = 4;
     RegisterCommand = 5;
     UpgradeCommand = 6;
+    NullDatanodeCommand = 7;
   }
 
   required Type cmdType = 1;    // Type of the command
@@ -80,6 +81,7 @@ message BlockCommandProto {
   enum Action {  
     TRANSFER = 1;   // Transfer blocks to another datanode
     INVALIDATE = 2; // Invalidate blocks
+    SHUTDOWN = 3; // Shutdown the datanode
   }
   required Action action = 1;
   required string blockPoolId = 2;
@@ -205,7 +207,7 @@ message BlockReportRequestProto {
  * cmd - Command from namenode to the datanode
  */
 message BlockReportResponseProto {
-  required DatanodeCommandProto cmd = 1;
+  optional DatanodeCommandProto cmd = 1;
 } 
 
 /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index e9f023ee724..5840fbaf9dc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -56,6 +56,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolR23Compatible.ClientNamenodeWireProtocol;
 import org.apache.hadoop.hdfs.server.common.Storage;
@@ -514,37 +515,6 @@ public class MiniDFSCluster {
     data_dir = new File(base_dir, "data");
     this.waitSafeMode = waitSafeMode;
     
-    // use alternate RPC engine if spec'd
-    /*
-    Turned off - see HDFS-2647 and HDFS-2660 for related comments.
-    This test can be turned on when Avro RPC is enabled using mechanism
-    similar to protobuf.
-    
-    String rpcEngineName = System.getProperty("hdfs.rpc.engine");
-    if (rpcEngineName != null && !"".equals(rpcEngineName)) {
-      
-      LOG.info("HDFS using RPCEngine: " + rpcEngineName);
-      try {
-        Class<?> rpcEngine = conf.getClassByName(rpcEngineName);
-        setRpcEngine(conf, NamenodeProtocols.class, rpcEngine);
-        setRpcEngine(conf, ClientNamenodeWireProtocol.class, rpcEngine);
-        setRpcEngine(conf, ClientDatanodeProtocolPB.class, rpcEngine);
-        setRpcEngine(conf, NamenodeProtocolPB.class, rpcEngine);
-        setRpcEngine(conf, ClientProtocol.class, rpcEngine);
-        setRpcEngine(conf, DatanodeProtocol.class, rpcEngine);
-        setRpcEngine(conf, RefreshAuthorizationPolicyProtocol.class, rpcEngine);
-        setRpcEngine(conf, RefreshUserMappingsProtocol.class, rpcEngine);
-        setRpcEngine(conf, GetUserMappingsProtocol.class, rpcEngine);
-      } catch (ClassNotFoundException e) {
-        throw new RuntimeException(e);
-      }
-
-      // disable service authorization, as it does not work with tunnelled RPC
-      conf.setBoolean(HADOOP_SECURITY_AUTHORIZATION,
-                      false);
-    }
-    */
-    
     int replication = conf.getInt(DFS_REPLICATION_KEY, 3);
     conf.setInt(DFS_REPLICATION_KEY, Math.min(replication, numDataNodes));
     conf.setInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
index 144b5c2aa62..3fc1a12cefb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.datanode.metrics.DataNodeMetrics;
 import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
@@ -67,8 +68,8 @@ public class TestBPOfferService {
     ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL);
   }
 
-  private DatanodeProtocol mockNN1;
-  private DatanodeProtocol mockNN2;
+  private DatanodeProtocolClientSideTranslatorPB mockNN1;
+  private DatanodeProtocolClientSideTranslatorPB mockNN2;
   private NNHAStatusHeartbeat[] mockHaStatuses = new NNHAStatusHeartbeat[2];
   private int heartbeatCounts[] = new int[2];
   private DataNode mockDn;
@@ -100,8 +101,10 @@ public class TestBPOfferService {
   /**
    * Set up a mock NN with the bare minimum for a DN to register to it.
    */
-  private DatanodeProtocol setupNNMock(int nnIdx) throws Exception {
-    DatanodeProtocol mock = Mockito.mock(DatanodeProtocol.class);
+  private DatanodeProtocolClientSideTranslatorPB setupNNMock(int nnIdx)
+      throws Exception {
+    DatanodeProtocolClientSideTranslatorPB mock =
+        Mockito.mock(DatanodeProtocolClientSideTranslatorPB.class);
     Mockito.doReturn(
         new NamespaceInfo(1, FAKE_CLUSTERID, FAKE_BPID,
             0, HdfsConstants.LAYOUT_VERSION))
@@ -298,19 +301,21 @@ public class TestBPOfferService {
    * Create a BPOfferService which registers with and heartbeats with the
    * specified namenode proxy objects.
    */
-  private BPOfferService setupBPOSForNNs(DatanodeProtocol ... nns) {
+  private BPOfferService setupBPOSForNNs(
+      DatanodeProtocolClientSideTranslatorPB ... nns) {
     // Set up some fake InetAddresses, then override the connectToNN
     // function to return the corresponding proxies.
 
-    final Map<InetSocketAddress, DatanodeProtocol> nnMap = Maps.newLinkedHashMap();
+    final Map<InetSocketAddress, DatanodeProtocolClientSideTranslatorPB> nnMap = Maps.newLinkedHashMap();
     for (int port = 0; port < nns.length; port++) {
       nnMap.put(new InetSocketAddress(port), nns[port]);
     }
 
     return new BPOfferService(Lists.newArrayList(nnMap.keySet()), mockDn) {
       @Override
-      DatanodeProtocol connectToNN(InetSocketAddress nnAddr) throws IOException {
-        DatanodeProtocol nn = nnMap.get(nnAddr);
+      DatanodeProtocolClientSideTranslatorPB  connectToNN(InetSocketAddress nnAddr)
+          throws IOException {
+        DatanodeProtocolClientSideTranslatorPB nn = nnMap.get(nnAddr);
         if (nn == null) {
           throw new AssertionError("bad NN addr: " + nnAddr);
         }
@@ -329,7 +334,7 @@ public class TestBPOfferService {
     }, 100, 10000);
   }
   
-  private void waitForBlockReport(final DatanodeProtocol mockNN)
+  private void waitForBlockReport(final DatanodeProtocolClientSideTranslatorPB mockNN)
       throws Exception {
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       @Override
@@ -374,7 +379,7 @@ public class TestBPOfferService {
   
   private ReceivedDeletedBlockInfo[] waitForBlockReceived(
       ExtendedBlock fakeBlock,
-      DatanodeProtocol mockNN) throws Exception {
+      DatanodeProtocolClientSideTranslatorPB mockNN) throws Exception {
     final ArgumentCaptor<ReceivedDeletedBlockInfo[]> captor =
       ArgumentCaptor.forClass(ReceivedDeletedBlockInfo[].class);
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java
index ba36b277647..dbbaedd6f86 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeRegister.java
@@ -23,8 +23,8 @@ import static org.junit.Assert.fail;
 import static org.mockito.Mockito.*;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.common.IncorrectVersionException;
-import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.junit.Test;
 import org.mockito.Mockito;
@@ -49,7 +49,8 @@ public class TestDatanodeRegister {
 
     NamespaceInfo fakeNSInfo = mock(NamespaceInfo.class);
     when(fakeNSInfo.getBuildVersion()).thenReturn("NSBuildVersion");
-    DatanodeProtocol fakeDNProt = mock(DatanodeProtocol.class);
+    DatanodeProtocolClientSideTranslatorPB fakeDNProt = 
+        mock(DatanodeProtocolClientSideTranslatorPB.class);
     when(fakeDNProt.versionRequest()).thenReturn(fakeNSInfo);
 
     actor.setNameNode( fakeDNProt );
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java
new file mode 100644
index 00000000000..5d3272af7ac
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java
@@ -0,0 +1,177 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.web;
+
+import java.net.InetSocketAddress;
+import java.net.URI;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
+import org.apache.log4j.Level;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Test WebHDFS with multiple NameNodes
+ */
+public class TestWebHdfsWithMultipleNameNodes {
+  static final Log LOG = WebHdfsTestUtil.LOG;
+
+  static private void setLogLevel() {
+    ((Log4JLogger)LOG).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)NamenodeWebHdfsMethods.LOG).getLogger().setLevel(Level.ALL);
+
+    ((Log4JLogger)NameNode.stateChangeLog).getLogger().setLevel(Level.OFF);
+    ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.OFF);
+    ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.OFF);
+    ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.OFF);
+  }
+
+  private static final Configuration conf = new HdfsConfiguration();
+  private static MiniDFSCluster cluster;
+  private static WebHdfsFileSystem[] webhdfs;
+
+  @BeforeClass
+  public static void setupTest() {
+    setLogLevel();
+    try {
+      setupCluster(4, 3);
+    } catch(Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static void setupCluster(final int nNameNodes, final int nDataNodes)
+      throws Exception {
+    LOG.info("nNameNodes=" + nNameNodes + ", nDataNodes=" + nDataNodes);
+
+    conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
+
+    cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(nNameNodes))
+        .numDataNodes(nDataNodes)
+        .build();
+    cluster.waitActive();
+    
+    webhdfs = new WebHdfsFileSystem[nNameNodes];
+    for(int i = 0; i < webhdfs.length; i++) {
+      final InetSocketAddress addr = cluster.getNameNode(i).getHttpAddress();
+      final String uri = WebHdfsFileSystem.SCHEME  + "://"
+          + addr.getHostName() + ":" + addr.getPort() + "/";
+      webhdfs[i] = (WebHdfsFileSystem)FileSystem.get(new URI(uri), conf);
+    }
+  }
+
+  @AfterClass
+  public static void shutdownCluster() {
+    if (cluster != null) {
+      cluster.shutdown();
+      cluster = null;
+    }
+  }
+
+  private static String createString(String prefix, int i) {
+    //The suffix is to make sure the strings have different lengths.
+    final String suffix = "*********************".substring(0, i+1);
+    return prefix + i + suffix + "\n";
+  }
+
+  private static String[] createStrings(String prefix, String name) {
+    final String[] strings = new String[webhdfs.length]; 
+    for(int i = 0; i < webhdfs.length; i++) {
+      strings[i] = createString(prefix, i);
+      LOG.info(name + "[" + i + "] = " + strings[i]);
+    }
+    return strings;
+  }
+
+  @Test
+  public void testRedirect() throws Exception {
+    final String dir = "/testRedirect/";
+    final String filename = "file";
+    final Path p = new Path(dir, filename);
+
+    final String[] writeStrings = createStrings("write to webhdfs ", "write"); 
+    final String[] appendStrings = createStrings("append to webhdfs ", "append"); 
+    
+    //test create: create a file for each namenode
+    for(int i = 0; i < webhdfs.length; i++) {
+      final FSDataOutputStream out = webhdfs[i].create(p);
+      out.write(writeStrings[i].getBytes());
+      out.close();
+    }
+    
+    for(int i = 0; i < webhdfs.length; i++) {
+      //check file length
+      final long expected = writeStrings[i].length();
+      Assert.assertEquals(expected, webhdfs[i].getFileStatus(p).getLen());
+    }
+
+    //test read: check file content for each namenode
+    for(int i = 0; i < webhdfs.length; i++) {
+      final FSDataInputStream in = webhdfs[i].open(p);
+      for(int c, j = 0; (c = in.read()) != -1; j++) {
+        Assert.assertEquals(writeStrings[i].charAt(j), c);
+      }
+      in.close();
+    }
+
+    //test append: append to the file for each namenode
+    for(int i = 0; i < webhdfs.length; i++) {
+      final FSDataOutputStream out = webhdfs[i].append(p);
+      out.write(appendStrings[i].getBytes());
+      out.close();
+    }
+
+    for(int i = 0; i < webhdfs.length; i++) {
+      //check file length
+      final long expected = writeStrings[i].length() + appendStrings[i].length();
+      Assert.assertEquals(expected, webhdfs[i].getFileStatus(p).getLen());
+    }
+
+    //test read: check file content for each namenode
+    for(int i = 0; i < webhdfs.length; i++) {
+      final StringBuilder b = new StringBuilder(); 
+      final FSDataInputStream in = webhdfs[i].open(p);
+      for(int c; (c = in.read()) != -1; ) {
+        b.append((char)c);
+      }
+      final int wlen = writeStrings[i].length();
+      Assert.assertEquals(writeStrings[i], b.substring(0, wlen));
+      Assert.assertEquals(appendStrings[i], b.substring(wlen));
+      in.close();
+    }
+  }
+}

From ecdf9da770b2b0efc1ca5940366aaef6c58364ff Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 14 Dec 2011 23:28:43 +0000
Subject: [PATCH 046/177] Fix treatment of NNHAStatusHeartbeat in protobuffer.

Committing without pre-commit review since it's a pretty trivial merge fix


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214543 13f79535-47bb-0310-9956-ffa450edef68
---
 ...atanodeProtocolServerSideTranslatorPB.java |  1 +
 .../hadoop/hdfs/protocolPB/PBHelper.java      | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
index 7413fd1d809..fb2c95873d6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java
@@ -116,6 +116,7 @@ public class DatanodeProtocolServerSideTranslatorPB implements
         }
       }
     }
+    builder.setHaStatus(PBHelper.convert(response.getNameNodeHaState()));
     return builder.build();
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
index 78f2319fed9..9823ef72c3b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@@ -1259,4 +1259,23 @@ public class PBHelper {
       throw new IllegalArgumentException("Unexpected NNHAStatusHeartbeat.State:" + s.getState());
     }
   }
+
+  public static NNHAStatusHeartbeatProto convert(NNHAStatusHeartbeat hb) {
+    if (hb == null) return null;
+    NNHAStatusHeartbeatProto.Builder builder =
+      NNHAStatusHeartbeatProto.newBuilder();
+    switch (hb.getState()) {
+      case ACTIVE:
+        builder.setState(NNHAStatusHeartbeatProto.State.ACTIVE);
+        break;
+      case STANDBY:
+        builder.setState(NNHAStatusHeartbeatProto.State.STANDBY);
+        break;
+      default:
+        throw new IllegalArgumentException("Unexpected NNHAStatusHeartbeat.State:" +
+            hb.getState());
+    }
+    builder.setTxid(hb.getTxId());
+    return builder.build();
+  }
 }

From afd0333196ebd273b7eaeedfaec00ec68a358ea8 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 15 Dec 2011 00:42:50 +0000
Subject: [PATCH 047/177] HDFS-2683. Authority-based lookup of proxy provider
 fails if path becomes canonicalized. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214579 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../org/apache/hadoop/hdfs/DFSClient.java     | 28 +++++++++++---
 .../hadoop/hdfs/TestDFSClientFailover.java    | 38 +++++++++++++++----
 3 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 11a2b6b00d3..3c5caae1a32 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -53,3 +53,5 @@ HDFS-2634. Standby needs to ingest latest edit logs before transitioning to acti
 HDFS-2671. NN should throw StandbyException in response to RPCs in STANDBY state (todd)
 
 HDFS-2680. DFSClient should construct failover proxy with exponential backoff (todd)
+
+HDFS-2683. Authority-based lookup of proxy provider fails if path becomes canonicalized (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 756899945d3..529ee7177a3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -313,7 +313,8 @@ public class DFSClient implements java.io.Closeable {
     
     this.socketCache = new SocketCache(dfsClientConf.socketCacheCapacity);
     
-    Class<?> failoverProxyProviderClass = getFailoverProxyProviderClass(authority, conf);
+    Class<?> failoverProxyProviderClass = getFailoverProxyProviderClass(
+        nameNodeUri, conf);
     
     if (nameNodeUri != null && failoverProxyProviderClass != null) {
       FailoverProxyProvider failoverProxyProvider = (FailoverProxyProvider)
@@ -353,15 +354,32 @@ public class DFSClient implements java.io.Closeable {
     }
   }
   
-  private Class<?> getFailoverProxyProviderClass(String authority, Configuration conf)
+  private Class<?> getFailoverProxyProviderClass(URI nameNodeUri, Configuration conf)
       throws IOException {
-    String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + authority;
+    if (nameNodeUri == null) {
+      return null;
+    }
+    String host = nameNodeUri.getHost();
+
+    String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + host;
     try {
-      return conf.getClass(configKey, null);
+      Class<?> ret = conf.getClass(configKey, null);
+      if (ret != null) {
+        // If we found a proxy provider, then this URI should be a logical NN.
+        // Given that, it shouldn't have a non-default port number.
+        int port = nameNodeUri.getPort();
+        if (port > 0 && port != NameNode.DEFAULT_PORT) {
+          throw new IOException(
+              "Port " + port + " specified in URI " + nameNodeUri +
+              " but host '" + host + "' is a logical (HA) namenode" +
+              " and does not use port information.");
+        }
+      }
+      return ret;
     } catch (RuntimeException e) {
       if (e.getCause() instanceof ClassNotFoundException) {
         throw new IOException("Could not load failover proxy provider class "
-            + conf.get(configKey) + " which is configured for authority " + authority,
+            + conf.get(configKey) + " which is configured for authority " + nameNodeUri,
             e);
       } else {
         throw e;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
index a1db640c2c9..1146ae7b7a2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
+import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.io.OutputStream;
@@ -31,7 +32,9 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -43,6 +46,7 @@ public class TestDFSClientFailover {
   
   private Configuration conf = new Configuration();
   private MiniDFSCluster cluster;
+  private static final String LOGICAL_HOSTNAME = "ha-nn-uri";
   
   @Before
   public void setUpCluster() throws IOException {
@@ -65,10 +69,6 @@ public class TestDFSClientFailover {
   public void testDfsClientFailover() throws IOException, URISyntaxException {
     InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
     InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
-    String nameServiceId1 = DFSUtil.getNameServiceIdFromAddress(conf, nnAddr1,
-        DFS_NAMENODE_RPC_ADDRESS_KEY);
-    String nameServiceId2 = DFSUtil.getNameServiceIdFromAddress(conf, nnAddr2,
-        DFS_NAMENODE_RPC_ADDRESS_KEY);
     
     ClientProtocol nn1 = DFSUtil.createNamenode(nnAddr1, conf);
     ClientProtocol nn2 = DFSUtil.createNamenode(nnAddr2, conf);
@@ -89,8 +89,33 @@ public class TestDFSClientFailover {
     cluster.getNameNode(0).stop();
     AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
     
+    // Check that it functions even if the URL becomes canonicalized
+    // to include a port number.
+    Path withPort = new Path("hdfs://" + LOGICAL_HOSTNAME + ":" +
+        NameNode.DEFAULT_PORT + "/" + TEST_FILE.toUri().getPath());
+    FileSystem fs2 = withPort.getFileSystem(fs.getConf());
+    assertTrue(fs2.exists(withPort));
+
     fs.close();
   }
+  
+  /**
+   * Regression test for HDFS-2683.
+   */
+  @Test
+  public void testLogicalUriShouldNotHavePorts() {
+    Configuration conf = new HdfsConfiguration();
+    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + ".foo",
+        ConfiguredFailoverProxyProvider.class.getName());
+    Path p = new Path("hdfs://foo:12345/");
+    try {
+      p.getFileSystem(conf).exists(p);
+      fail("Did not fail with fake FS");
+    } catch (IOException ioe) {
+      GenericTestUtils.assertExceptionContains(
+          "does not use port information", ioe);
+    }
+  }
 
   public static FileSystem configureFailoverFs(MiniDFSCluster cluster, Configuration conf)
   throws IOException, URISyntaxException {
@@ -99,7 +124,6 @@ public class TestDFSClientFailover {
 
     String nsId = "nameserviceId1";
     
-    final String logicalNameNodeId = "ha-nn-uri";
     String nameNodeId1 = "nn1";
     String nameNodeId2 = "nn2";
     
@@ -114,10 +138,10 @@ public class TestDFSClientFailover {
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nsId);
     conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nsId),
         nameNodeId1 + "," + nameNodeId2);
-    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalNameNodeId,
+    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + LOGICAL_HOSTNAME,
         ConfiguredFailoverProxyProvider.class.getName());
     
-    FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalNameNodeId), conf);
+    FileSystem fs = FileSystem.get(new URI("hdfs://" + LOGICAL_HOSTNAME), conf);
     return fs;
   }
 

From 353bc3799d11a7fcdfcee96016535ccd6063cbc0 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 15 Dec 2011 21:54:00 +0000
Subject: [PATCH 048/177] HDFS-2689. HA: BookKeeperEditLogInputStream doesn't
 implement isInProgress(). Contributed by Aaron T. Myers

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1214965 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt       | 2 ++
 .../contrib/bkjournal/BookKeeperEditLogInputStream.java     | 6 ++++++
 .../hadoop/hdfs/server/namenode/EditLogInputStream.java     | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 3c5caae1a32..fb18eea7805 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -55,3 +55,5 @@ HDFS-2671. NN should throw StandbyException in response to RPCs in STANDBY state
 HDFS-2680. DFSClient should construct failover proxy with exponential backoff (todd)
 
 HDFS-2683. Authority-based lookup of proxy provider fails if path becomes canonicalized (todd)
+
+HDFS-2689. HA: BookKeeperEditLogInputStream doesn't implement isInProgress() (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
index 707182ec5cc..636471a450f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperEditLogInputStream.java
@@ -129,6 +129,12 @@ class BookKeeperEditLogInputStream extends EditLogInputStream {
     return null;
   }
 
+  // TODO(HA): Test this.
+  @Override
+  public boolean isInProgress() {
+    return true;
+  }
+
   /**
    * Input stream implementation which can be used by 
    * FSEditLogOp.Reader
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
index 2c4bdd53d00..7a7f8d8743a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputStream.java
@@ -86,5 +86,5 @@ public abstract class EditLogInputStream implements JournalStream, Closeable {
   /**
    * Return true if this stream is in progress, false if it is finalized.
    */
-  abstract boolean isInProgress();
+  public abstract boolean isInProgress();
 }

From 116bf57bd673b55f91d8dde7a83fc43e11522ebd Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 16 Dec 2011 01:54:44 +0000
Subject: [PATCH 049/177] HADOOP-7928. HA: Client failover policy is
 incorrectly trying to fail over all IOExceptions. Contributed by Aaron T.
 Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1215019 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  3 +
 .../apache/hadoop/io/retry/RetryPolicies.java |  2 +-
 .../hadoop/io/retry/TestFailoverProxy.java    | 24 ++++++-
 .../io/retry/UnreliableImplementation.java    | 64 ++++++++-----------
 4 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 56e11457c5f..56e1d8f823d 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -15,3 +15,6 @@ HADOOP-7922. Improve some logging for client IPC failovers and
              StandbyExceptions (todd)
 
 HADOOP-7921. StandbyException should extend IOException (todd)
+
+HADOOP-7928. HA: Client failover policy is incorrectly trying to fail over all
+             IOExceptions (atm)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
index 5afda594755..a96dc9ee0bc 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
@@ -341,7 +341,7 @@ public class RetryPolicies {
             failovers == 0 ? 0 :
                 calculateExponentialTime(delayMillis, failovers, maxDelayBase));
       } else if (e instanceof SocketException ||
-                 e instanceof IOException) {
+                 (e instanceof IOException && !(e instanceof RemoteException))) {
         if (isMethodIdempotent) {
           return RetryAction.FAILOVER_AND_RETRY;
         } else {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
index b52814cfc11..0a2963f7be5 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
@@ -181,7 +181,7 @@ public class TestFailoverProxy {
     
     assertEquals("impl1", unreliable.succeedsOnceThenFailsReturningString());
     try {
-      assertEquals("impl2", unreliable.succeedsOnceThenFailsReturningString());
+      unreliable.succeedsOnceThenFailsReturningString();
       fail("should not have succeeded twice");
     } catch (IOException e) {
       // Make sure we *don't* fail over since the first implementation threw an
@@ -304,4 +304,26 @@ public class TestFailoverProxy {
     String result = unreliable.failsIfIdentifierDoesntMatch("renamed-impl1");
     assertEquals("renamed-impl1", result);
   }
+  
+  /**
+   * Ensure that normal IO exceptions don't result in a failover.
+   */
+  @Test
+  public void testExpectedIOException() {
+    UnreliableInterface unreliable = (UnreliableInterface)RetryProxy
+    .create(UnreliableInterface.class,
+        new FlipFlopProxyProvider(UnreliableInterface.class,
+          new UnreliableImplementation("impl1", TypeOfExceptionToFailWith.REMOTE_EXCEPTION),
+          new UnreliableImplementation("impl2", TypeOfExceptionToFailWith.UNRELIABLE_EXCEPTION)),
+          RetryPolicies.failoverOnNetworkException(
+              RetryPolicies.TRY_ONCE_THEN_FAIL, 10, 1000, 10000));
+    
+    try {
+      unreliable.failsIfIdentifierDoesntMatch("no-such-identifier");
+      fail("Should have thrown *some* exception");
+    } catch (Exception e) {
+      assertTrue("Expected IOE but got " + e.getClass(),
+          e instanceof IOException);
+    }
+  }
 }
\ No newline at end of file
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
index 74a63894d80..185ed2a4426 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.io.retry;
 
 import java.io.IOException;
 
+import org.apache.hadoop.io.retry.UnreliableInterface.UnreliableException;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.StandbyException;
 
@@ -37,7 +38,8 @@ public class UnreliableImplementation implements UnreliableInterface {
   public static enum TypeOfExceptionToFailWith {
     UNRELIABLE_EXCEPTION,
     STANDBY_EXCEPTION,
-    IO_EXCEPTION
+    IO_EXCEPTION,
+    REMOTE_EXCEPTION
   }
   
   public UnreliableImplementation() {
@@ -95,14 +97,7 @@ public class UnreliableImplementation implements UnreliableInterface {
     if (succeedsOnceThenFailsCount++ < 1) {
       return identifier;
     } else {
-      switch (exceptionToFailWith) {
-      case STANDBY_EXCEPTION:
-        throw new StandbyException(identifier);
-      case UNRELIABLE_EXCEPTION:
-        throw new UnreliableException(identifier);
-      case IO_EXCEPTION:
-        throw new IOException(identifier);
-      }
+      throwAppropriateException(exceptionToFailWith, identifier);
       return null;
     }
   }
@@ -113,16 +108,8 @@ public class UnreliableImplementation implements UnreliableInterface {
     if (succeedsTenTimesThenFailsCount++ < 10) {
       return identifier;
     } else {
-      switch (exceptionToFailWith) {
-      case STANDBY_EXCEPTION:
-        throw new StandbyException(identifier);
-      case UNRELIABLE_EXCEPTION:
-        throw new UnreliableException(identifier);
-      case IO_EXCEPTION:
-        throw new IOException(identifier);
-      default:
-        throw new RuntimeException(identifier);
-      }
+      throwAppropriateException(exceptionToFailWith, identifier);
+      return null;
     }
   }
 
@@ -132,16 +119,8 @@ public class UnreliableImplementation implements UnreliableInterface {
     if (succeedsOnceThenFailsIdempotentCount++ < 1) {
       return identifier;
     } else {
-      switch (exceptionToFailWith) {
-      case STANDBY_EXCEPTION:
-        throw new StandbyException(identifier);
-      case UNRELIABLE_EXCEPTION:
-        throw new UnreliableException(identifier);
-      case IO_EXCEPTION:
-        throw new IOException(identifier);
-      default:
-        throw new RuntimeException(identifier);
-      }
+      throwAppropriateException(exceptionToFailWith, identifier);
+      return null;
     }
   }
 
@@ -153,17 +132,24 @@ public class UnreliableImplementation implements UnreliableInterface {
     } else {
       String message = "expected '" + this.identifier + "' but received '" +
           identifier + "'";
-      switch (exceptionToFailWith) {
-      case STANDBY_EXCEPTION:
-        throw new StandbyException(message);
-      case UNRELIABLE_EXCEPTION:
-        throw new UnreliableException(message);
-      case IO_EXCEPTION:
-        throw new IOException(message);
-      default:
-        throw new RuntimeException(message);
-      }
+      throwAppropriateException(exceptionToFailWith, message);
+      return null;
     }
   }
 
+  private static void throwAppropriateException(TypeOfExceptionToFailWith eType,
+      String message) throws UnreliableException, StandbyException, IOException {
+    switch (eType) {
+    case STANDBY_EXCEPTION:
+      throw new StandbyException(message);
+    case UNRELIABLE_EXCEPTION:
+      throw new UnreliableException(message);
+    case IO_EXCEPTION:
+      throw new IOException(message);
+    case REMOTE_EXCEPTION:
+      throw new RemoteException(IOException.class.getName(), message);
+    default:
+      throw new RuntimeException(message);
+    }
+  }
 }

From 71071b904d0c9aec7b3713d41740f24182e81c36 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 16 Dec 2011 04:18:58 +0000
Subject: [PATCH 050/177] HDFS-2602. NN should log newly-allocated blocks
 without losing BlockInfo. Contributed by Aaron T. Myers

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1215036 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   2 +
 .../java/org/apache/hadoop/hdfs/HAUtil.java   |  10 +
 .../server/blockmanagement/BlockManager.java  |  34 ++-
 .../hdfs/server/namenode/FSDirectory.java     |  58 ++--
 .../hdfs/server/namenode/FSEditLogLoader.java | 225 +++++++++-----
 .../hdfs/server/namenode/FSNamesystem.java    |  91 ++++--
 .../hdfs/server/namenode/LeaseManager.java    |   3 +
 .../namenode/PendingDataNodeMessages.java     |   2 +-
 .../server/namenode/ha/EditLogTailer.java     |   1 +
 .../apache/hadoop/hdfs/TestPersistBlocks.java | 280 ++++++++++++++++++
 .../hdfs/server/namenode/TestEditLog.java     |  11 +-
 12 files changed, 574 insertions(+), 145 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index fb18eea7805..bee6c4c2808 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -57,3 +57,5 @@ HDFS-2680. DFSClient should construct failover proxy with exponential backoff (t
 HDFS-2683. Authority-based lookup of proxy provider fails if path becomes canonicalized (todd)
 
 HDFS-2689. HA: BookKeeperEditLogInputStream doesn't implement isInProgress() (atm)
+
+HDFS-2602. NN should log newly-allocated blocks without losing BlockInfo (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index b0a57863552..f30b58e9169 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -125,6 +125,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final boolean DFS_WEBHDFS_ENABLED_DEFAULT = false;
   public static final String  DFS_PERMISSIONS_ENABLED_KEY = "dfs.permissions.enabled";
   public static final boolean DFS_PERMISSIONS_ENABLED_DEFAULT = true;
+  public static final String  DFS_PERSIST_BLOCKS_KEY = "dfs.persist.blocks";
+  public static final boolean DFS_PERSIST_BLOCKS_DEFAULT = false;
   public static final String  DFS_PERMISSIONS_SUPERUSERGROUP_KEY = "dfs.permissions.superusergroup";
   public static final String  DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT = "supergroup";
   public static final String  DFS_ADMIN = "dfs.cluster.administrators";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 6952871ccd1..24537a3d38c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -44,6 +44,16 @@ public class HAUtil {
     return nnMap != null && nnMap.size() > 1;
   }
 
+  /**
+   * Returns true if HA is using a shared edits directory.
+   *
+   * @param conf Configuration
+   * @return true if HA config is using a shared edits dir, false otherwise.
+   */
+  public static boolean usesSharedEditsDir(Configuration conf) {
+    return null != conf.get(DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
+  }
+
   /**
    * Get the namenode Id by matching the {@code addressKey}
    * with the the address of the local node.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 50619dff57b..03a851a7b5a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -425,7 +425,7 @@ public class BlockManager {
     
     final boolean b = commitBlock((BlockInfoUnderConstruction)lastBlock, commitBlock);
     if(countNodes(lastBlock).liveReplicas() >= minReplication)
-      completeBlock(fileINode,fileINode.numBlocks()-1);
+      completeBlock(fileINode,fileINode.numBlocks()-1, false);
     return b;
   }
 
@@ -437,14 +437,14 @@ public class BlockManager {
    * of replicas reported from data-nodes.
    */
   private BlockInfo completeBlock(final INodeFile fileINode,
-      final int blkIndex) throws IOException {
+      final int blkIndex, boolean force) throws IOException {
     if(blkIndex < 0)
       return null;
     BlockInfo curBlock = fileINode.getBlocks()[blkIndex];
     if(curBlock.isComplete())
       return curBlock;
     BlockInfoUnderConstruction ucBlock = (BlockInfoUnderConstruction)curBlock;
-    if(ucBlock.numNodes() < minReplication)
+    if (!force && ucBlock.numNodes() < minReplication)
       throw new IOException("Cannot complete block: " +
           "block does not satisfy minimal replication requirement.");
     BlockInfo completeBlock = ucBlock.convertToCompleteBlock();
@@ -455,15 +455,27 @@ public class BlockManager {
   }
 
   private BlockInfo completeBlock(final INodeFile fileINode,
-      final BlockInfo block) throws IOException {
+      final BlockInfo block, boolean force) throws IOException {
     BlockInfo[] fileBlocks = fileINode.getBlocks();
     for(int idx = 0; idx < fileBlocks.length; idx++)
       if(fileBlocks[idx] == block) {
-        return completeBlock(fileINode, idx);
+        return completeBlock(fileINode, idx, force);
       }
     return block;
   }
+  
+  /**
+   * Force the given block in the given file to be marked as complete,
+   * regardless of whether enough replicas are present. This is necessary
+   * when tailing edit logs as a Standby.
+   */
+  public BlockInfo forceCompleteBlock(final INodeFile fileINode,
+      final BlockInfoUnderConstruction block) throws IOException {
+    block.commitBlock(block);
+    return completeBlock(fileINode, block, true);
+  }
 
+  
   /**
    * Convert the last block of the file to an under construction block.<p>
    * The block is converted only if the file has blocks and the last one
@@ -590,8 +602,8 @@ public class BlockManager {
     final boolean isCorrupt = numCorruptNodes == numNodes;
     final int numMachines = isCorrupt ? numNodes: numNodes - numCorruptNodes;
     final DatanodeDescriptor[] machines = new DatanodeDescriptor[numMachines];
+    int j = 0;
     if (numMachines > 0) {
-      int j = 0;
       for(Iterator<DatanodeDescriptor> it = blocksMap.nodeIterator(blk);
           it.hasNext();) {
         final DatanodeDescriptor d = it.next();
@@ -600,6 +612,12 @@ public class BlockManager {
           machines[j++] = d;
       }
     }
+    assert j == machines.length :
+      "isCorrupt: " + isCorrupt + 
+      " numMachines: " + numMachines +
+      " numNodes: " + numNodes +
+      " numCorrupt: " + numCorruptNodes +
+      " numCorruptRepls: " + numCorruptReplicas;
     final ExtendedBlock eb = new ExtendedBlock(namesystem.getBlockPoolId(), blk);
     return new LocatedBlock(eb, machines, pos, isCorrupt);
   }
@@ -1608,7 +1626,7 @@ public class BlockManager {
     int numCurrentReplica = countLiveNodes(storedBlock);
     if (storedBlock.getBlockUCState() == BlockUCState.COMMITTED
         && numCurrentReplica >= minReplication)
-      storedBlock = completeBlock(storedBlock.getINode(), storedBlock);
+      storedBlock = completeBlock(storedBlock.getINode(), storedBlock, false);
 
     // check whether safe replication is reached for the block
     // only complete blocks are counted towards that
@@ -1673,7 +1691,7 @@ public class BlockManager {
 
     if(storedBlock.getBlockUCState() == BlockUCState.COMMITTED &&
         numLiveReplicas >= minReplication)
-      storedBlock = completeBlock(fileINode, storedBlock);
+      storedBlock = completeBlock(fileINode, storedBlock, false);
 
     // check whether safe replication is reached for the block
     // only complete blocks are counted towards that
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index 23b2e220b8f..e11690e566e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -263,34 +263,19 @@ public class FSDirectory implements Closeable {
    */
   INode unprotectedAddFile( String path, 
                             PermissionStatus permissions,
-                            BlockInfo[] blocks, 
                             short replication,
                             long modificationTime,
                             long atime,
                             long preferredBlockSize) 
       throws UnresolvedLinkException {
     INode newNode;
-    long diskspace = UNKNOWN_DISK_SPACE;
     assert hasWriteLock();
-    if (blocks == null)
-      newNode = new INodeDirectory(permissions, modificationTime);
-    else {
-      newNode = new INodeFile(permissions, blocks.length, replication,
-                              modificationTime, atime, preferredBlockSize);
-      diskspace = ((INodeFile)newNode).diskspaceConsumed(blocks);
-    }
+    newNode = new INodeFile(permissions, new BlockInfo[0], replication,
+                            modificationTime, atime, preferredBlockSize);
     writeLock();
     try {
       try {
-        newNode = addNode(path, newNode, diskspace);
-        if(newNode != null && blocks != null) {
-          int nrBlocks = blocks.length;
-          // Add file->block mapping
-          INodeFile newF = (INodeFile)newNode;
-          for (int i = 0; i < nrBlocks; i++) {
-            newF.setBlock(i, getBlockManager().addINode(blocks[i], newF));
-          }
-        }
+        newNode = addNode(path, newNode, 0);
       } catch (IOException e) {
         return null;
       }
@@ -391,7 +376,7 @@ public class FSDirectory implements Closeable {
       writeUnlock();
     }
   }
-
+  
   /**
    * Close file.
    */
@@ -414,7 +399,7 @@ public class FSDirectory implements Closeable {
   }
 
   /**
-   * Remove a block to the file.
+   * Remove a block from the file.
    */
   boolean removeBlock(String path, INodeFileUnderConstruction fileNode, 
                       Block block) throws IOException {
@@ -422,27 +407,32 @@ public class FSDirectory implements Closeable {
 
     writeLock();
     try {
-      // modify file-> block and blocksMap
-      fileNode.removeLastBlock(block);
-      getBlockManager().removeBlockFromMap(block);
-
+      unprotectedRemoveBlock(path, fileNode, block);
       // write modified block locations to log
       fsImage.getEditLog().logOpenFile(path, fileNode);
-      if(NameNode.stateChangeLog.isDebugEnabled()) {
-        NameNode.stateChangeLog.debug("DIR* FSDirectory.removeBlock: "
-            +path+" with "+block
-            +" block is removed from the file system");
-      }
-
-      // update space consumed
-      INode[] pathINodes = getExistingPathINodes(path);
-      updateCount(pathINodes, pathINodes.length-1, 0,
-          -fileNode.getPreferredBlockSize()*fileNode.getReplication(), true);
     } finally {
       writeUnlock();
     }
     return true;
   }
+  
+  void unprotectedRemoveBlock(String path,
+      INodeFileUnderConstruction fileNode, Block block) throws IOException {
+    // modify file-> block and blocksMap
+    fileNode.removeLastBlock(block);
+    getBlockManager().removeBlockFromMap(block);
+
+    if(NameNode.stateChangeLog.isDebugEnabled()) {
+      NameNode.stateChangeLog.debug("DIR* FSDirectory.removeBlock: "
+          +path+" with "+block
+          +" block is removed from the file system");
+    }
+
+    // update space consumed
+    INode[] pathINodes = getExistingPathINodes(path);
+    updateCount(pathINodes, pathINodes.length - 1, 0,
+        - fileNode.getPreferredBlockSize()*fileNode.getReplication(), true);
+  }
 
   /**
    * @see #unprotectedRenameTo(String, String, long)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 80aa115df1f..c82f425b3a4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -28,6 +28,7 @@ import java.util.EnumMap;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LayoutVersion;
@@ -57,6 +58,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.util.Holder;
+
 import com.google.common.base.Joiner;
 
 @InterfaceAudience.Private
@@ -137,82 +139,84 @@ public class FSEditLogLoader {
           numEdits++;
           incrOpCount(op.opCode, opCounts);
           switch (op.opCode) {
-          case OP_ADD:
-          case OP_CLOSE: {
+          case OP_ADD: {
             AddCloseOp addCloseOp = (AddCloseOp)op;
 
-            // versions > 0 support per file replication
-            // get name and replication
-            final short replication  = fsNamesys.getBlockManager(
-                ).adjustReplication(addCloseOp.replication);
-
-            long blockSize = addCloseOp.blockSize;
-            BlockInfo blocks[] = new BlockInfo[addCloseOp.blocks.length];
-            for (int i = 0; i < addCloseOp.blocks.length; i++) {
-              if(addCloseOp.opCode == FSEditLogOpCodes.OP_ADD
-                 && i == addCloseOp.blocks.length-1) {
-                blocks[i] = new BlockInfoUnderConstruction(addCloseOp.blocks[i],
-                                                           replication);
-              } else {
-                blocks[i] = new BlockInfo(addCloseOp.blocks[i], replication);
+            // See if the file already exists (persistBlocks call)
+            INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
+            if (oldFile == null) { // this is OP_ADD on a new file
+              // versions > 0 support per file replication
+              // get name and replication
+              final short replication  = fsNamesys.getBlockManager(
+                  ).adjustReplication(addCloseOp.replication);
+              PermissionStatus permissions = fsNamesys.getUpgradePermission();
+              if (addCloseOp.permissions != null) {
+                permissions = addCloseOp.permissions;
               }
-            }
-
-            PermissionStatus permissions = fsNamesys.getUpgradePermission();
-            if (addCloseOp.permissions != null) {
-              permissions = addCloseOp.permissions;
-            }
-
-
-            // Older versions of HDFS does not store the block size in inode.
-            // If the file has more than one block, use the size of the
-            // first block as the blocksize. Otherwise use the default
-            // block size.
-            if (-8 <= logVersion && blockSize == 0) {
-              if (blocks.length > 1) {
-                blockSize = blocks[0].getNumBytes();
-              } else {
-                long first = ((blocks.length == 1)? blocks[0].getNumBytes(): 0);
-                blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first);
+              long blockSize = addCloseOp.blockSize;
+              
+              if (FSNamesystem.LOG.isDebugEnabled()) {
+                FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
+                    " numblocks : " + addCloseOp.blocks.length +
+                    " clientHolder " + addCloseOp.clientName +
+                    " clientMachine " + addCloseOp.clientMachine);
               }
+
+              // Older versions of HDFS does not store the block size in inode.
+              // If the file has more than one block, use the size of the
+              // first block as the blocksize. Otherwise use the default
+              // block size.
+              if (-8 <= logVersion && blockSize == 0) {
+                if (addCloseOp.blocks.length > 1) {
+                  blockSize = addCloseOp.blocks[0].getNumBytes();
+                } else {
+                  long first = ((addCloseOp.blocks.length == 1)?
+                      addCloseOp.blocks[0].getNumBytes(): 0);
+                  blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first);
+                }
+              }
+
+              // TODO: We should do away with this add-then-replace dance.
+
+              // add to the file tree
+              INodeFile node = (INodeFile)fsDir.unprotectedAddFile(
+                  addCloseOp.path, permissions,
+                  replication, addCloseOp.mtime,
+                  addCloseOp.atime, blockSize);
+
+              fsNamesys.prepareFileForWrite(addCloseOp.path, node,
+                  addCloseOp.clientName, addCloseOp.clientMachine, null);
+            } else { // This is OP_ADD on an existing file
+              if (!oldFile.isUnderConstruction()) {
+                // This is a call to append() on an already-closed file.
+                fsNamesys.prepareFileForWrite(addCloseOp.path, oldFile,
+                    addCloseOp.clientName, addCloseOp.clientMachine, null);
+                oldFile = getINodeFile(fsDir, addCloseOp.path);
+              }
+              
+              updateBlocks(fsDir, addCloseOp, oldFile);
             }
-
-
-            // The open lease transaction re-creates a file if necessary.
-            // Delete the file if it already exists.
-            if (FSNamesystem.LOG.isDebugEnabled()) {
-              FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
-                  " numblocks : " + blocks.length +
-                  " clientHolder " + addCloseOp.clientName +
-                  " clientMachine " + addCloseOp.clientMachine);
+            break;
+          }
+          case OP_CLOSE: {
+            AddCloseOp addCloseOp = (AddCloseOp)op;
+            
+            INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
+            if (oldFile == null) {
+              throw new IOException("Operation trying to close non-existent file " +
+                  addCloseOp.path);
             }
+            
+            // Update in-memory data structures
+            updateBlocks(fsDir, addCloseOp, oldFile);
 
-            fsDir.unprotectedDelete(addCloseOp.path, addCloseOp.mtime);
-
-            // add to the file tree
-            INodeFile node = (INodeFile)fsDir.unprotectedAddFile(
-                addCloseOp.path, permissions,
-                blocks, replication,
-                addCloseOp.mtime, addCloseOp.atime, blockSize);
-            if (addCloseOp.opCode == FSEditLogOpCodes.OP_ADD) {
-              //
-              // Replace current node with a INodeUnderConstruction.
-              // Recreate in-memory lease record.
-              //
-              INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
-                                        node.getLocalNameBytes(),
-                                        node.getReplication(),
-                                        node.getModificationTime(),
-                                        node.getPreferredBlockSize(),
-                                        node.getBlocks(),
-                                        node.getPermissionStatus(),
-                                        addCloseOp.clientName,
-                                        addCloseOp.clientMachine,
-                                        null);
-              fsDir.replaceNode(addCloseOp.path, node, cons);
-              fsNamesys.leaseManager.addLease(cons.getClientName(),
-                                              addCloseOp.path);
-            }
+            // Now close the file
+            INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile;
+            // TODO: we could use removeLease(holder, path) here, but OP_CLOSE
+            // doesn't seem to serialize the holder... unclear why!
+            fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
+            INodeFile newFile = ucFile.convertToInodeFile();
+            fsDir.replaceNode(addCloseOp.path, ucFile, newFile);
             break;
           }
           case OP_SET_REPLICATION: {
@@ -404,7 +408,88 @@ public class FSEditLogLoader {
     }
     return numEdits;
   }
-
+  
+  private static INodeFile getINodeFile(FSDirectory fsDir, String path)
+      throws IOException {
+    INode inode = fsDir.getINode(path);
+    if (inode != null) {
+      if (!(inode instanceof INodeFile)) {
+        throw new IOException("Operation trying to get non-file " + path);
+      }
+    }
+    return (INodeFile)inode;
+  }
+  
+  /**
+   * Update in-memory data structures with new block information.
+   * @throws IOException
+   */
+  private void updateBlocks(FSDirectory fsDir, AddCloseOp addCloseOp,
+      INodeFile file) throws IOException {
+    
+    // Update the salient file attributes.
+    file.setAccessTime(addCloseOp.atime);
+    file.setModificationTimeForce(addCloseOp.mtime);
+    
+    // Update its block list
+    BlockInfo[] oldBlocks = file.getBlocks();
+    
+    // Are we only updating the last block's gen stamp.
+    boolean isGenStampUpdate = oldBlocks.length == addCloseOp.blocks.length;
+    
+    // First, update blocks in common
+    for (int i = 0; i < oldBlocks.length && i < addCloseOp.blocks.length; i++) {
+      BlockInfo oldBlock = oldBlocks[i];
+      Block newBlock = addCloseOp.blocks[i];
+      
+      boolean isLastBlock = i == oldBlocks.length - 1;
+      if (oldBlock.getBlockId() != newBlock.getBlockId() ||
+          (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
+              !(isGenStampUpdate && isLastBlock))) {
+        throw new IOException("Mismatched block IDs or generation stamps, " + 
+            "attempting to replace block " + oldBlock + " with " + newBlock +
+            " as block # " + i + "/" + addCloseOp.blocks.length + " of " +
+            addCloseOp.path);
+      }
+      
+      oldBlock.setNumBytes(newBlock.getNumBytes());
+      oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
+      
+      if (oldBlock instanceof BlockInfoUnderConstruction &&
+          (!isLastBlock || addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE)) {
+        fsNamesys.getBlockManager().forceCompleteBlock(
+            (INodeFileUnderConstruction)file,
+            (BlockInfoUnderConstruction)oldBlock);
+      }
+    }
+    
+    if (addCloseOp.blocks.length < oldBlocks.length) {
+      // We're removing a block from the file, e.g. abandonBlock(...)
+      if (!file.isUnderConstruction()) {
+        throw new IOException("Trying to remove a block from file " +
+            addCloseOp.path + " which is not under construction.");
+      }
+      if (addCloseOp.blocks.length != oldBlocks.length - 1) {
+        throw new IOException("Trying to remove more than one block from file "
+            + addCloseOp.path);
+      }
+      fsDir.unprotectedRemoveBlock(addCloseOp.path,
+          (INodeFileUnderConstruction)file, oldBlocks[oldBlocks.length - 1]);
+    } else if (addCloseOp.blocks.length > oldBlocks.length) {
+      // We're adding blocks
+      for (int i = oldBlocks.length; i < addCloseOp.blocks.length; i++) {
+        Block newBlock = addCloseOp.blocks[i];
+        BlockInfo newBI = new BlockInfoUnderConstruction(newBlock, file.getReplication());
+        fsNamesys.getBlockManager().addINode(newBI, file);
+        file.addBlock(newBI);
+      }
+    }
+    
+    if (addCloseOp.blocks.length > 0) {
+      fsNamesys.notifyGenStampUpdate(
+          addCloseOp.blocks[addCloseOp.blocks.length - 1].getGenerationStamp());
+    }
+  }
 
   private static void dumpOpCounts(
       EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 9229926cae2..1003dc4a0f3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -52,6 +52,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_UPGRADE_PERMISSI
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_UPGRADE_PERMISSION_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERSIST_BLOCKS_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_REPLICATION_DEFAULT;
@@ -203,7 +205,7 @@ import com.google.common.base.Preconditions;
 @Metrics(context="dfs")
 public class FSNamesystem implements Namesystem, FSClusterStats,
     FSNamesystemMBean, NameNodeMXBean {
-  static final Log LOG = LogFactory.getLog(FSNamesystem.class);
+  public static final Log LOG = LogFactory.getLog(FSNamesystem.class);
 
   private static final ThreadLocal<StringBuilder> auditBuffer =
     new ThreadLocal<StringBuilder>() {
@@ -252,6 +254,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   static final int DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED = 100;
   static int BLOCK_DELETION_INCREMENT = 1000;
   private boolean isPermissionEnabled;
+  private boolean persistBlocks;
   private UserGroupInformation fsOwner;
   private String supergroup;
   private PermissionStatus defaultPermission;
@@ -669,6 +672,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
                                                DFS_PERMISSIONS_ENABLED_DEFAULT);
     LOG.info("supergroup=" + supergroup);
     LOG.info("isPermissionEnabled=" + isPermissionEnabled);
+
+    this.persistBlocks = conf.getBoolean(DFS_PERSIST_BLOCKS_KEY,
+                                         DFS_PERSIST_BLOCKS_DEFAULT);
+    // block allocation has to be persisted in HA using a shared edits directory
+    // so that the standby has up-to-date namespace information
+    String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
+    this.persistBlocks |= HAUtil.isHAEnabled(conf, nameserviceId) &&
+        HAUtil.usesSharedEditsDir(conf);
+
     short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
                                               DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);
     this.defaultPermission = PermissionStatus.createImmutable(
@@ -1403,26 +1415,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
 
       if (append && myFile != null) {
-        //
-        // Replace current node with a INodeUnderConstruction.
-        // Recreate in-memory lease record.
-        //
-        INodeFile node = (INodeFile) myFile;
-        INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
-                                        node.getLocalNameBytes(),
-                                        node.getReplication(),
-                                        node.getModificationTime(),
-                                        node.getPreferredBlockSize(),
-                                        node.getBlocks(),
-                                        node.getPermissionStatus(),
-                                        holder,
-                                        clientMachine,
-                                        clientNode);
-        dir.replaceNode(src, node, cons);
-        leaseManager.addLease(cons.getClientName(), src);
-
-        // convert last block to under-construction
-        return blockManager.convertLastBlockToUnderConstruction(cons);
+        return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode);
       } else {
        // Now we can add the name to the filesystem. This file has no
        // blocks associated with it.
@@ -1450,6 +1443,39 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     return null;
   }
+  
+  /**
+   * Replace current node with a INodeUnderConstruction.
+   * Recreate in-memory lease record.
+   * 
+   * @param src path to the file
+   * @param file existing file object
+   * @param leaseHolder identifier of the lease holder on this file
+   * @param clientMachine identifier of the client machine
+   * @param clientNode if the client is collocated with a DN, that DN's descriptor
+   * @return the last block locations if the block is partial or null otherwise
+   * @throws UnresolvedLinkException
+   * @throws IOException
+   */
+  public LocatedBlock prepareFileForWrite(String src, INode file,
+      String leaseHolder, String clientMachine, DatanodeDescriptor clientNode)
+      throws UnresolvedLinkException, IOException {
+    INodeFile node = (INodeFile) file;
+    INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
+                                    node.getLocalNameBytes(),
+                                    node.getReplication(),
+                                    node.getModificationTime(),
+                                    node.getPreferredBlockSize(),
+                                    node.getBlocks(),
+                                    node.getPermissionStatus(),
+                                    leaseHolder,
+                                    clientMachine,
+                                    clientNode);
+    dir.replaceNode(src, node, cons);
+    leaseManager.addLease(cons.getClientName(), src);
+
+    return blockManager.convertLastBlockToUnderConstruction(cons);
+  }
 
   /**
    * Recover lease;
@@ -1700,10 +1726,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       
       for (DatanodeDescriptor dn : targets) {
         dn.incBlocksScheduled();
-      }      
+      }
+      dir.persistBlocks(src, pendingFile);
     } finally {
       writeUnlock();
     }
+    if (persistBlocks) {
+      getEditLog().logSync();
+    }
 
     // Create next block
     LocatedBlock b = new LocatedBlock(getExtendedBlock(newBlock), targets, fileLength);
@@ -1782,10 +1812,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         NameNode.stateChangeLog.debug("BLOCK* NameSystem.abandonBlock: "
                                       + b + " is removed from pendingCreates");
       }
-      return true;
+      dir.persistBlocks(src, file);
     } finally {
       writeUnlock();
     }
+    if (persistBlocks) {
+      getEditLog().logSync();
+    }
+
+    return true;
   }
   
   // make sure that we still have the lease on this file.
@@ -2594,8 +2629,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         //remove lease, close file
         finalizeINodeFileUnderConstruction(src, pendingFile);
       } else if (supportAppends) {
-        // If this commit does not want to close the file, persist
-        // blocks only if append is supported 
+        // If this commit does not want to close the file, persist blocks
+        // only if append is supported or we're explicitly told to
         dir.persistBlocks(src, pendingFile);
       }
     } finally {
@@ -3565,7 +3600,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           }
           assert node != null : "Found a lease for nonexisting file.";
           assert node.isUnderConstruction() :
-            "Found a lease for file that is not under construction.";
+            "Found a lease for file " + path + " that is not under construction." +
+            " lease=" + lease;
           INodeFileUnderConstruction cons = (INodeFileUnderConstruction) node;
           BlockInfo[] blocks = cons.getBlocks();
           if(blocks == null)
@@ -3881,7 +3917,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   void setGenerationStamp(long stamp) {
     generationStamp.setStamp(stamp);
-    notifyGenStampUpdate(stamp);
   }
 
   /**
@@ -4000,7 +4035,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     } finally {
       writeUnlock();
     }
-    if (supportAppends) {
+    if (supportAppends || persistBlocks) {
       getEditLog().logSync();
     }
     LOG.info("updatePipeline(" + oldBlock + ") successfully to " + newBlock);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
index 07f04ecba43..323dac06a32 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@@ -153,6 +153,9 @@ public class LeaseManager {
     Lease lease = getLease(holder);
     if (lease != null) {
       removeLease(lease, src);
+    } else {
+      LOG.warn("Removing non-existent lease! holder=" + holder +
+          " src=" + src);
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
index aafa022136e..04eb4b9ccc0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
@@ -188,7 +188,7 @@ public class PendingDataNodeMessages {
    */
   synchronized DataNodeMessage take(long gs) {
     DataNodeMessage m = queue.peek();
-    if (m != null && m.getTargetGs() < gs) {
+    if (m != null && m.getTargetGs() <= gs) {
       return queue.remove();
     } else {
       return null;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index e1ce570c093..8a837eea725 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -152,4 +152,5 @@ public class EditLogTailer {
       }
     }
   }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java
new file mode 100644
index 00000000000..dd1ff016a8a
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java
@@ -0,0 +1,280 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs;
+
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.log4j.Level;
+
+import java.io.IOException;
+import java.util.Random;
+import static org.junit.Assert.*;
+import org.junit.Test;
+
+/**
+ * A JUnit test for checking if restarting DFS preserves the
+ * blocks that are part of an unclosed file.
+ */
+public class TestPersistBlocks {
+  static {
+    ((Log4JLogger)FSImage.LOG).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL);
+  }
+  
+  private static final int BLOCK_SIZE = 4096;
+  private static final int NUM_BLOCKS = 5;
+
+  private static final String FILE_NAME = "/data";
+  private static final Path FILE_PATH = new Path(FILE_NAME);
+  
+  static final byte[] DATA_BEFORE_RESTART = new byte[BLOCK_SIZE * NUM_BLOCKS];
+  static final byte[] DATA_AFTER_RESTART = new byte[BLOCK_SIZE * NUM_BLOCKS];
+  static {
+    Random rand = new Random();
+    rand.nextBytes(DATA_BEFORE_RESTART);
+    rand.nextBytes(DATA_AFTER_RESTART);
+  }
+  
+  /** check if DFS remains in proper condition after a restart */
+  @Test
+  public void testRestartDfs() throws Exception {
+    final Configuration conf = new HdfsConfiguration();
+    // Turn off persistent IPC, so that the DFSClient can survive NN restart
+    conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
+        0);
+    conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
+    MiniDFSCluster cluster = null;
+
+    long len = 0;
+    FSDataOutputStream stream;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+      FileSystem fs = cluster.getFileSystem();
+      // Creating a file with 4096 blockSize to write multiple blocks
+      stream = fs.create(FILE_PATH, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
+      stream.write(DATA_BEFORE_RESTART);
+      stream.hflush();
+      
+      // Wait for at least a few blocks to get through
+      while (len <= BLOCK_SIZE) {
+        FileStatus status = fs.getFileStatus(FILE_PATH);
+        len = status.getLen();
+        Thread.sleep(100);
+      }
+      
+      // explicitly do NOT close the file.
+      cluster.restartNameNode();
+      
+      // Check that the file has no less bytes than before the restart
+      // This would mean that blocks were successfully persisted to the log
+      FileStatus status = fs.getFileStatus(FILE_PATH);
+      assertTrue("Length too short: " + status.getLen(),
+          status.getLen() >= len);
+      
+      // And keep writing (ensures that leases are also persisted correctly)
+      stream.write(DATA_AFTER_RESTART);
+      stream.close();
+      
+      // Verify that the data showed up, both from before and after the restart.
+      FSDataInputStream readStream = fs.open(FILE_PATH);
+      try {
+        byte[] verifyBuf = new byte[DATA_BEFORE_RESTART.length];
+        IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
+        assertArrayEquals(DATA_BEFORE_RESTART, verifyBuf);
+        
+        IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
+        assertArrayEquals(DATA_AFTER_RESTART, verifyBuf);
+      } finally {
+        IOUtils.closeStream(readStream);
+      }
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+  
+  @Test
+  public void testRestartDfsWithAbandonedBlock() throws Exception {
+    final Configuration conf = new HdfsConfiguration();
+    // Turn off persistent IPC, so that the DFSClient can survive NN restart
+    conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
+        0);
+    conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
+    MiniDFSCluster cluster = null;
+
+    long len = 0;
+    FSDataOutputStream stream;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+      FileSystem fs = cluster.getFileSystem();
+      // Creating a file with 4096 blockSize to write multiple blocks
+      stream = fs.create(FILE_PATH, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
+      stream.write(DATA_BEFORE_RESTART);
+      stream.hflush();
+      
+      // Wait for all of the blocks to get through
+      while (len < BLOCK_SIZE * (NUM_BLOCKS - 1)) {
+        FileStatus status = fs.getFileStatus(FILE_PATH);
+        len = status.getLen();
+        Thread.sleep(100);
+      }
+      
+      // Abandon the last block
+      DFSClient dfsclient = DFSClientAdapter.getDFSClient((DistributedFileSystem)fs);
+      LocatedBlocks blocks = dfsclient.getNamenode().getBlockLocations(
+          FILE_NAME, 0, BLOCK_SIZE * NUM_BLOCKS);
+      assertEquals(NUM_BLOCKS, blocks.getLocatedBlocks().size());
+      LocatedBlock b = blocks.getLastLocatedBlock();
+      dfsclient.getNamenode().abandonBlock(b.getBlock(), FILE_NAME,
+          dfsclient.clientName);
+      
+      // explicitly do NOT close the file.
+      cluster.restartNameNode();
+      
+      // Check that the file has no less bytes than before the restart
+      // This would mean that blocks were successfully persisted to the log
+      FileStatus status = fs.getFileStatus(FILE_PATH);
+      assertTrue("Length incorrect: " + status.getLen(),
+          status.getLen() != len - BLOCK_SIZE);
+
+      // Verify the data showed up from before restart, sans abandoned block.
+      FSDataInputStream readStream = fs.open(FILE_PATH);
+      try {
+        byte[] verifyBuf = new byte[DATA_BEFORE_RESTART.length - BLOCK_SIZE];
+        IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
+        byte[] expectedBuf = new byte[DATA_BEFORE_RESTART.length - BLOCK_SIZE];
+        System.arraycopy(DATA_BEFORE_RESTART, 0,
+            expectedBuf, 0, expectedBuf.length);
+        assertArrayEquals(expectedBuf, verifyBuf);
+      } finally {
+        IOUtils.closeStream(readStream);
+      }
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+  
+  @Test
+  public void testRestartWithPartialBlockHflushed() throws IOException {
+    final Configuration conf = new HdfsConfiguration();
+    // Turn off persistent IPC, so that the DFSClient can survive NN restart
+    conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
+        0);
+    conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
+    MiniDFSCluster cluster = null;
+
+    FSDataOutputStream stream;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+      FileSystem fs = cluster.getFileSystem();
+      NameNode.getAddress(conf).getPort();
+      // Creating a file with 4096 blockSize to write multiple blocks
+      stream = fs.create(FILE_PATH, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
+      stream.write(DATA_BEFORE_RESTART);
+      stream.write((byte)1);
+      stream.hflush();
+      
+      // explicitly do NOT close the file before restarting the NN.
+      cluster.restartNameNode();
+      
+      // this will fail if the final block of the file is prematurely COMPLETEd
+      stream.write((byte)2);
+      stream.hflush();
+      stream.close();
+      
+      assertEquals(DATA_BEFORE_RESTART.length + 2,
+          fs.getFileStatus(FILE_PATH).getLen());
+      
+      FSDataInputStream readStream = fs.open(FILE_PATH);
+      try {
+        byte[] verifyBuf = new byte[DATA_BEFORE_RESTART.length + 2];
+        IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
+        byte[] expectedBuf = new byte[DATA_BEFORE_RESTART.length + 2];
+        System.arraycopy(DATA_BEFORE_RESTART, 0, expectedBuf, 0,
+            DATA_BEFORE_RESTART.length);
+        System.arraycopy(new byte[]{1, 2}, 0, expectedBuf,
+            DATA_BEFORE_RESTART.length, 2);
+        assertArrayEquals(expectedBuf, verifyBuf);
+      } finally {
+        IOUtils.closeStream(readStream);
+      }
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+  
+  @Test
+  public void testRestartWithAppend() throws IOException {
+    final Configuration conf = new HdfsConfiguration();
+    // Turn off persistent IPC, so that the DFSClient can survive NN restart
+    conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
+        0);
+    conf.setBoolean(DFSConfigKeys.DFS_PERSIST_BLOCKS_KEY, true);
+    MiniDFSCluster cluster = null;
+
+    FSDataOutputStream stream;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
+      FileSystem fs = cluster.getFileSystem();
+      NameNode.getAddress(conf).getPort();
+      // Creating a file with 4096 blockSize to write multiple blocks
+      stream = fs.create(FILE_PATH, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
+      stream.write(DATA_BEFORE_RESTART, 0, DATA_BEFORE_RESTART.length / 2);
+      stream.close();
+      stream = fs.append(FILE_PATH, BLOCK_SIZE);
+      stream.write(DATA_BEFORE_RESTART, DATA_BEFORE_RESTART.length / 2,
+          DATA_BEFORE_RESTART.length / 2);
+      stream.close();
+      
+      assertEquals(DATA_BEFORE_RESTART.length,
+          fs.getFileStatus(FILE_PATH).getLen());
+      
+      cluster.restartNameNode();
+      
+      assertEquals(DATA_BEFORE_RESTART.length,
+          fs.getFileStatus(FILE_PATH).getLen());
+      
+      FSDataInputStream readStream = fs.open(FILE_PATH);
+      try {
+        byte[] verifyBuf = new byte[DATA_BEFORE_RESTART.length];
+        IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
+        assertArrayEquals(DATA_BEFORE_RESTART, verifyBuf);
+      } finally {
+        IOUtils.closeStream(readStream);
+      }
+    } finally {
+      if (cluster != null) { cluster.shutdown(); }
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index 104d6527881..fe756b2c992 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -116,10 +116,12 @@ public class TestEditLog extends TestCase {
     int numTransactions;
     short replication = 3;
     long blockSize = 64;
+    final int id;
 
-    Transactions(FSNamesystem ns, int num) {
+    Transactions(FSNamesystem ns, int num, int id) {
       namesystem = ns;
       numTransactions = num;
+      this.id = id;
     }
 
     // add a bunch of transactions.
@@ -131,8 +133,9 @@ public class TestEditLog extends TestCase {
       for (int i = 0; i < numTransactions; i++) {
         INodeFileUnderConstruction inode = new INodeFileUnderConstruction(
                             p, replication, blockSize, 0, "", "", null);
-        editLog.logOpenFile("/filename" + i, inode);
-        editLog.logCloseFile("/filename" + i, inode);
+        String fileName = "/filename-" + id + "-" + i;
+        editLog.logOpenFile(fileName, inode);
+        editLog.logCloseFile(fileName, inode);
         editLog.logSync();
       }
     }
@@ -280,7 +283,7 @@ public class TestEditLog extends TestCase {
       // Create threads and make them run transactions concurrently.
       Thread threadId[] = new Thread[NUM_THREADS];
       for (int i = 0; i < NUM_THREADS; i++) {
-        Transactions trans = new Transactions(namesystem, NUM_TRANSACTIONS);
+        Transactions trans = new Transactions(namesystem, NUM_TRANSACTIONS, i);
         threadId[i] = new Thread(trans, "TransactionThread-" + i);
         threadId[i].start();
       }

From cdb9f01ad4e6084ddf83e40eb3ec18a89fbbae42 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 16 Dec 2011 04:25:13 +0000
Subject: [PATCH 051/177] HDFS-2667. Fix transition from active to standby.
 Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1215037 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hadoop/hdfs/server/namenode/FSImage.java  |  11 +-
 .../hdfs/server/namenode/FSNamesystem.java    |   6 +-
 .../server/namenode/FileJournalManager.java   |   2 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   2 +-
 .../namenode/TestFileJournalManager.java      |   4 +-
 .../namenode/ha/TestHAStateTransitions.java   | 136 ++++++++++++++++++
 7 files changed, 156 insertions(+), 7 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index bee6c4c2808..4ba57020936 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -59,3 +59,5 @@ HDFS-2683. Authority-based lookup of proxy provider fails if path becomes canoni
 HDFS-2689. HA: BookKeeperEditLogInputStream doesn't implement isInProgress() (atm)
 
 HDFS-2602. NN should log newly-allocated blocks without losing BlockInfo (atm)
+
+HDFS-2667. Fix transition from active to standby (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 8ce90eb0e2f..54c5cf8e109 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -678,9 +678,9 @@ public class FSImage implements Closeable {
       for (EditLogInputStream editIn : editStreams) {
         LOG.info("Reading " + editIn + " expecting start txid #" + startingTxId);
         int thisNumLoaded = loader.loadFSEdits(editIn, startingTxId);
+        lastAppliedTxId = startingTxId + thisNumLoaded - 1;
         startingTxId += thisNumLoaded;
         numLoaded += thisNumLoaded;
-        lastAppliedTxId += thisNumLoaded;
       }
     } finally {
       // TODO(HA): Should this happen when called by the tailer?
@@ -1117,4 +1117,13 @@ public class FSImage implements Closeable {
     return lastAppliedTxId;
   }
 
+  public long getLastAppliedOrWrittenTxId() {
+    return Math.max(lastAppliedTxId,
+        editLog != null ? editLog.getLastWrittenTxId() : 0);
+  }
+
+  public void updateLastAppliedTxIdFromWritten() {
+    this.lastAppliedTxId = editLog.getLastWrittenTxId();
+  }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 1003dc4a0f3..f9c7db2268f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -535,6 +535,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         leaseManager.stopMonitor();
       }
       dir.fsImage.editLog.close();
+      // Update the fsimage with the last txid that we wrote
+      // so that the tailer starts from the right spot.
+      dir.fsImage.updateLastAppliedTxIdFromWritten();
     } finally {
       writeUnlock();
     }
@@ -2795,8 +2798,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throw new AssertionError("Invalid state: " + state.getClass());
     }
     return new NNHAStatusHeartbeat(hbState,
-        Math.max(getFSImage().getLastAppliedTxId(),
-                 getFSImage().getEditLog().getLastWrittenTxId()));
+        getFSImage().getLastAppliedOrWrittenTxId());
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index bbab3e58f54..182d5f763d3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -304,7 +304,7 @@ class FileJournalManager implements JournalManager {
     for (EditLogFile elf : allLogFiles) {
       if (fromTxId > elf.getFirstTxId()
           && fromTxId <= elf.getLastTxId()) {
-        throw new IOException("Asked for fromTxId " + fromTxId
+        throw new IllegalStateException("Asked for fromTxId " + fromTxId
             + " which is in middle of file " + elf.file);
       }
       if (fromTxId <= elf.getFirstTxId()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 5840fbaf9dc..57dafa807eb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -1553,7 +1553,7 @@ public class MiniDFSCluster {
   
   public void transitionToStandby(int nnIndex) throws IOException,
       ServiceFailedException {
-    getHaServiceClient(nnIndex).transitionToActive();
+    getHaServiceClient(nnIndex).transitionToStandby();
   }
 
   /** Wait until the given namenode gets registration from all the datanodes */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
index e4ff4bb732b..0321dff4e16 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
@@ -199,7 +199,7 @@ public class TestFileJournalManager {
    * This should fail as edit logs must currently be treated as indevisable 
    * units.
    */
-  @Test(expected=IOException.class)
+  @Test(expected=IllegalStateException.class)
   public void testAskForTransactionsMidfile() throws IOException {
     File f = new File(TestEditLog.TEST_DIR + "/filejournaltest2");
     NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 
@@ -295,7 +295,7 @@ public class TestFileJournalManager {
     try {
       assertEquals("[]", getLogsAsString(fjm, 150));
       fail("Did not throw when asking for a txn in the middle of a log");
-    } catch (IOException ioe) {
+    } catch (IllegalStateException ioe) {
       GenericTestUtils.assertExceptionContains(
           "150 which is in the middle", ioe);
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
new file mode 100644
index 00000000000..7ac3c658de9
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -0,0 +1,136 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+
+/**
+ * Tests state transition from active->standby, and manual failover
+ * and failback between two namenodes.
+ */
+public class TestHAStateTransitions {
+  protected static final Log LOG = LogFactory.getLog(
+      TestStandbyIsHot.class);
+  private static final Path TEST_DIR = new Path("/test");
+  private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo");
+  private static final String TEST_FILE_DATA =
+    "Hello state transitioning world";
+
+  /**
+   * Test which takes a single node and flip flops between
+   * active and standby mode, making sure it doesn't
+   * double-play any edits.
+   */
+  @Test
+  public void testTransitionActiveToStandby() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(1)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      FileSystem fs = cluster.getFileSystem(0);
+      
+      fs.mkdirs(TEST_DIR);
+      cluster.transitionToStandby(0);
+      try {
+        fs.mkdirs(new Path("/x"));
+        fail("Didn't throw trying to mutate FS in standby state");
+      } catch (Throwable t) {
+        GenericTestUtils.assertExceptionContains(
+            "Operation category WRITE is not supported", t);
+      }
+      cluster.transitionToActive(0);
+      
+      // Create a file, then delete the whole directory recursively.
+      DFSTestUtil.createFile(fs, new Path(TEST_DIR, "foo"),
+          10, (short)1, 1L);
+      fs.delete(TEST_DIR, true);
+      
+      // Now if the standby tries to replay the last segment that it just
+      // wrote as active, it would fail since it's trying to create a file
+      // in a non-existent directory.
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(0);
+      
+      assertFalse(fs.exists(TEST_DIR));
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+  
+  /**
+   * Tests manual failover back and forth between two NameNodes.
+   */
+  @Test
+  public void testManualFailoverAndFailback() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(1)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      
+      LOG.info("Starting with NN 0 active");
+      FileSystem fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+      fs.mkdirs(TEST_DIR);
+
+      LOG.info("Failing over to NN 1");
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      assertTrue(fs.exists(TEST_DIR));
+      DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA);
+
+      LOG.info("Failing over to NN 0");
+      cluster.transitionToStandby(1);
+      cluster.transitionToActive(0);
+      assertTrue(fs.exists(TEST_DIR));
+      assertEquals(TEST_FILE_DATA, 
+          DFSTestUtil.readFile(fs, TEST_FILE_PATH));
+
+      LOG.info("Removing test file");
+      fs.delete(TEST_DIR, true);
+      assertFalse(fs.exists(TEST_DIR));
+
+      LOG.info("Failing over to NN 1");
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      assertFalse(fs.exists(TEST_DIR));
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+}

From 371f4228e86f5ebffb3d8647fb30b8bdc2b777c4 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 16 Dec 2011 18:36:24 +0000
Subject: [PATCH 052/177] HDFS-2684. Fix up some failing unit tests on HA
 branch. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1215241 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hdfs/server/datanode/BPOfferService.java  |  2 ++
 .../hdfs/server/namenode/BackupNode.java      |  5 +++++
 .../hdfs/server/namenode/FSEditLog.java       |  1 +
 .../hdfs/server/namenode/FSNamesystem.java    |  4 ++--
 .../apache/hadoop/hdfs/TestDFSUpgrade.java    |  2 +-
 .../hadoop/hdfs/TestDataTransferProtocol.java |  2 +-
 .../TestHeartbeatHandling.java                |  2 +-
 .../hdfs/server/namenode/TestBackupNode.java  | 12 ++++++-----
 .../hdfs/server/namenode/TestCheckpoint.java  | 20 +++++++++++--------
 10 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4ba57020936..9a46ee15dcd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -61,3 +61,5 @@ HDFS-2689. HA: BookKeeperEditLogInputStream doesn't implement isInProgress() (at
 HDFS-2602. NN should log newly-allocated blocks without losing BlockInfo (atm)
 
 HDFS-2667. Fix transition from active to standby (todd)
+
+HDFS-2684. Fix up some failing unit tests on HA branch (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index 14507960d0b..0ef132553d7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -325,6 +325,8 @@ class BPOfferService {
     } else {
       bpRegistration = reg;
     }
+    
+    dn.bpRegistrationSucceeded(bpRegistration, getBlockPoolId());
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index b84d4eb34a3..3acec16874f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -199,6 +199,11 @@ public class BackupNode extends NameNode {
       checkpointManager.interrupt();
       checkpointManager = null;
     }
+
+    // Abort current log segment - otherwise the NN shutdown code
+    // will close it gracefully, which is incorrect.
+    getFSImage().getEditLog().abortCurrentLogSegment();
+
     // Stop name-node threads
     super.stop();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 0f66fe644ed..f1f163eed2e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -916,6 +916,7 @@ public class FSEditLog  {
       if (editLogStream != null) {
         editLogStream.abort();
         editLogStream = null;
+        state = State.BETWEEN_LOG_SEGMENTS;
       }
     } catch (IOException e) {
       LOG.warn("All journals failed to abort", e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index f9c7db2268f..02bf2118cdf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -495,7 +495,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       FSEditLog editLog = dir.fsImage.getEditLog();
       
-      if (!editLog.isSegmentOpen()) {
+      if (!editLog.isOpenForWrite()) {
         // During startup, we're already open for write during initialization.
         // TODO(HA): consider adding a startup state?
         editLog.initJournalsForWrite();
@@ -2774,7 +2774,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       DatanodeCommand[] cmds = blockManager.getDatanodeManager().handleHeartbeat(
           nodeReg, blockPoolId, capacity, dfsUsed, remaining, blockPoolUsed,
           xceiverCount, maxTransfer, failedVolumes);
-      if (cmds == null) {
+      if (cmds == null || cmds.length == 0) {
         DatanodeCommand cmd = upgradeManager.getBroadcastCommand();
         if (cmd != null) {
           cmds = new DatanodeCommand[] {cmd};
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
index 9246e6e42f6..ad3e6d8c551 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUpgrade.java
@@ -52,7 +52,7 @@ import static org.junit.Assert.*;
 */
 public class TestDFSUpgrade {
  
-  private static final int EXPECTED_TXID = 33;
+  private static final int EXPECTED_TXID = 49;
   private static final Log LOG = LogFactory.getLog(TestDFSUpgrade.class.getName());
   private Configuration conf;
   private int testCounter = 0;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferProtocol.java
index 089ab4d837e..af0bf6a19d5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDataTransferProtocol.java
@@ -302,7 +302,7 @@ public class TestDataTransferProtocol extends TestCase {
         testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_CREATE, 0L,
             "Cannot create a RBW block", true);
         // test PIPELINE_SETUP_APPEND on an existing block
-        newGS = newBlock.getGenerationStamp() + 1;
+        newGS = firstBlock.getGenerationStamp() + 1;
         testWrite(firstBlock, BlockConstructionStage.PIPELINE_SETUP_APPEND,
             newGS, "Cannot append to a RBW replica", true);
         // test PIPELINE_SETUP_APPEND on an existing block
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java
index 45741ceae2a..2d7a122c465 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java
@@ -110,7 +110,7 @@ public class TestHeartbeatHandling extends TestCase {
 
           cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
               .getCommands();
-          assertEquals(null, cmds);
+          assertEquals(0, cmds.length);
         }
       } finally {
         namesystem.writeUnlock();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
index d392718ae22..545d4b5660b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
@@ -240,9 +240,9 @@ public class TestBackupNode extends TestCase {
   }  
 
   void testCheckpoint(StartupOption op) throws Exception {
-    Path file1 = new Path("checkpoint.dat");
-    Path file2 = new Path("checkpoint2.dat");
-    Path file3 = new Path("backup.dat");
+    Path file1 = new Path("/checkpoint.dat");
+    Path file2 = new Path("/checkpoint2.dat");
+    Path file3 = new Path("/backup.dat");
 
     Configuration conf = new HdfsConfiguration();
     short replication = (short)conf.getInt("dfs.replication", 3);
@@ -341,11 +341,13 @@ public class TestBackupNode extends TestCase {
       TestCheckpoint.checkFile(fileSys, file3, replication);
       // should also be on BN right away
       assertTrue("file3 does not exist on BackupNode",
-          op != StartupOption.BACKUP || bnFS.exists(file3));
+          op != StartupOption.BACKUP ||
+          backup.getNamesystem().getFileInfo(
+              file3.toUri().getPath(), false) != null);
 
     } catch(IOException e) {
       LOG.error("Error in TestBackupNode:", e);
-      assertTrue(e.getLocalizedMessage(), false);
+      throw new AssertionError(e);
     } finally {
       if(backup != null) backup.stop();
       if(fileSys != null) fileSys.close();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index 7fe193e913b..f40a89e8491 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -923,10 +923,12 @@ public class TestCheckpoint extends TestCase {
         throw new IOException(e);
       }
       
+      final int EXPECTED_TXNS_FIRST_SEG = 12;
+      
       // the following steps should have happened:
-      //   edits_inprogress_1 -> edits_1-8  (finalized)
-      //   fsimage_8 created
-      //   edits_inprogress_9 created
+      //   edits_inprogress_1 -> edits_1-12  (finalized)
+      //   fsimage_12 created
+      //   edits_inprogress_13 created
       //
       for(URI uri : editsDirs) {
         File ed = new File(uri.getPath());
@@ -938,19 +940,21 @@ public class TestCheckpoint extends TestCase {
                                       NNStorage.getInProgressEditsFileName(1));
         assertFalse(originalEdits.exists());
         File finalizedEdits = new File(curDir,
-            NNStorage.getFinalizedEditsFileName(1,8));
-        assertTrue(finalizedEdits.exists());
+            NNStorage.getFinalizedEditsFileName(1, EXPECTED_TXNS_FIRST_SEG));
+        GenericTestUtils.assertExists(finalizedEdits);
         assertTrue(finalizedEdits.length() > Integer.SIZE/Byte.SIZE);
 
-        assertTrue(new File(ed, "current/"
-                       + NNStorage.getInProgressEditsFileName(9)).exists());
+        GenericTestUtils.assertExists(new File(ed, "current/"
+                       + NNStorage.getInProgressEditsFileName(
+                           EXPECTED_TXNS_FIRST_SEG + 1)));
       }
       
       Collection<URI> imageDirs = cluster.getNameDirs(0);
       for (URI uri : imageDirs) {
         File imageDir = new File(uri.getPath());
         File savedImage = new File(imageDir, "current/"
-                                   + NNStorage.getImageFileName(8));
+                                   + NNStorage.getImageFileName(
+                                       EXPECTED_TXNS_FIRST_SEG));
         assertTrue("Should have saved image at " + savedImage,
             savedImage.exists());        
       }

From 45e81ae415141c306df9746353faec2f9f151ab1 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 19 Dec 2011 05:21:24 +0000
Subject: [PATCH 053/177] HADOOP-7925. Add interface and update CLI to query
 current state to HAServiceProtocol. Contributed by Eli Collins.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1220611 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  3 +++
 .../java/org/apache/hadoop/ha/HAAdmin.java    | 22 ++++++++++++++++--
 .../apache/hadoop/ha/HAServiceProtocol.java   | 23 +++++++++++++++++++
 .../org/apache/hadoop/ha/TestHAAdmin.java     | 10 ++++++--
 4 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 56e1d8f823d..0547c3b3b70 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -18,3 +18,6 @@ HADOOP-7921. StandbyException should extend IOException (todd)
 
 HADOOP-7928. HA: Client failover policy is incorrectly trying to fail over all
              IOExceptions (atm)
+
+HADOOP-7925. Add interface and update CLI to query current state to
+             HAServiceProtocol (eli via todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index b880311da41..fff82e83b40 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -24,6 +24,7 @@ import java.util.Map;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.util.Tool;
@@ -44,7 +45,9 @@ public class HAAdmin extends Configured implements Tool {
     .put("-transitionToActive",
         new UsageInfo("<host:port>", "Transitions the daemon into Active state"))
     .put("-transitionToStandby",
-        new UsageInfo("<host:port>", "Transitions the daemon into Passive state"))
+        new UsageInfo("<host:port>", "Transitions the daemon into Standby state"))
+    .put("-getServiceState",
+        new UsageInfo("<host:port>", "Returns the state of the daemon"))
     .put("-checkHealth",
         new UsageInfo("<host:port>",
             "Requests that the daemon perform a health check.\n" + 
@@ -123,6 +126,19 @@ public class HAAdmin extends Configured implements Tool {
     return 0;
   }
 
+  private int getServiceState(final String[] argv)
+      throws IOException, ServiceFailedException {
+    if (argv.length != 2) {
+      errOut.println("getServiceState: incorrect number of arguments");
+      printUsage(errOut, "-getServiceState");
+      return -1;
+    }
+
+    HAServiceProtocol proto = getProtocol(argv[1]);
+    out.println(proto.getServiceState());
+    return 0;
+  }
+
   /**
    * Return a proxy to the specified target host:port.
    */
@@ -155,6 +171,8 @@ public class HAAdmin extends Configured implements Tool {
       return transitionToActive(argv);
     } else if ("-transitionToStandby".equals(cmd)) {
       return transitionToStandby(argv);
+    } else if ("-getServiceState".equals(cmd)) {
+      return getServiceState(argv);
     } else if ("-checkHealth".equals(cmd)) {
       return checkHealth(argv);
     } else if ("-help".equals(cmd)) {
@@ -182,7 +200,7 @@ public class HAAdmin extends Configured implements Tool {
       return -1;
     }
     
-    errOut .println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help);
+    errOut.println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help);
     return 1;
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
index 20f0d04bc21..86f5f675ce3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
@@ -35,6 +35,24 @@ public interface HAServiceProtocol extends VersionedProtocol {
    */
   public static final long versionID = 1L;
 
+  /**
+   * An HA service may be in active or standby state.
+   */
+  public enum HAServiceState {
+    ACTIVE("active"),
+    STANDBY("standby");
+
+    private String name;
+
+    HAServiceState(String name) {
+      this.name = name;
+    }
+
+    public String toString() {
+      return name;
+    }
+  }
+
   /**
    * Monitor the health of service. This periodically called by the HA
    * frameworks to monitor the health of the service.
@@ -69,4 +87,9 @@ public interface HAServiceProtocol extends VersionedProtocol {
    *           if transition from active to standby fails.
    */
   public void transitionToStandby() throws ServiceFailedException;
+
+  /**
+   * Return the current state of the service.
+   */
+  public HAServiceState getServiceState();
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
index 3cddbbe8a21..b465029d47f 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
@@ -79,7 +79,7 @@ public class TestHAAdmin {
     assertOutputContains("transitionToActive: incorrect number of arguments");
     assertEquals(-1, runTool("-transitionToActive", "x", "y"));
     assertOutputContains("transitionToActive: incorrect number of arguments");
-}
+  }
   
   @Test
   public void testHelp() throws Exception {
@@ -99,7 +99,13 @@ public class TestHAAdmin {
     assertEquals(0, runTool("-transitionToStandby", "xxx"));
     Mockito.verify(mockProtocol).transitionToStandby();
   }
-  
+
+  @Test
+  public void testGetServiceState() throws Exception {
+    assertEquals(0, runTool("-getServiceState", "xxx"));
+    Mockito.verify(mockProtocol).getServiceState();
+  }
+
   @Test
   public void testCheckHealth() throws Exception {
     assertEquals(0, runTool("-checkHealth", "xxx"));

From 329717264f8380a1f0fd2cdabd1bf0517ff1067b Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 19 Dec 2011 05:24:32 +0000
Subject: [PATCH 054/177] HDFS-2679. Add interface to query current state to
 HAServiceProtocol. Contributed by Eli Collins.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1220612 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hadoop/hdfs/server/namenode/NameNode.java |  7 +++++-
 .../server/namenode/NameNodeRpcServer.java    |  7 +++++-
 .../hdfs/server/namenode/ha/ActiveState.java  |  4 ++--
 .../hdfs/server/namenode/ha/HAState.java      | 22 ++++++++++++++-----
 .../hdfs/server/namenode/ha/StandbyState.java |  3 ++-
 6 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 9a46ee15dcd..c475d886bc4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -63,3 +63,5 @@ HDFS-2602. NN should log newly-allocated blocks without losing BlockInfo (atm)
 HDFS-2667. Fix transition from active to standby (todd)
 
 HDFS-2684. Fix up some failing unit tests on HA branch (todd)
+
+HDFS-2679. Add interface to query current state to HAServiceProtocol (eli via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 25cbb2e08b6..7d8ecd993b3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -30,6 +30,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
@@ -908,7 +909,11 @@ public class NameNode {
     }
     state.setState(haContext, STANDBY_STATE);
   }
-  
+
+  synchronized HAServiceState getServiceState() {
+    return state.getServiceState();
+  }
+
   /** Check if an operation of given category is allowed */
   protected synchronized void checkOperation(final OperationCategory op)
       throws StandbyException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index d4c5ef08502..cfea22f5a26 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -1061,7 +1061,12 @@ class NameNodeRpcServer implements NamenodeProtocols {
   public synchronized void transitionToStandby() throws ServiceFailedException {
     nn.transitionToStandby();
   }
-  
+
+  @Override // HAServiceProtocol
+  public synchronized HAServiceState getServiceState() {
+    return nn.getServiceState();
+  }
+
   /**
    * Verify version.
    * 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
index f893cc28833..6da3b8ecead 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
@@ -20,10 +20,10 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 import java.io.IOException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
-import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
 
 /**
  * Active state of the namenode. In this state, namenode provides the namenode
@@ -33,7 +33,7 @@ import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
 @InterfaceAudience.Private
 public class ActiveState extends HAState {
   public ActiveState() {
-    super("active");
+    super(HAServiceState.ACTIVE);
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
index d3ff252cbc2..7dfab914939 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.UnsupportedActionException;
@@ -28,14 +29,21 @@ import org.apache.hadoop.ipc.StandbyException;
  */
 @InterfaceAudience.Private
 abstract public class HAState {
-  protected final String name;
+  protected final HAServiceState state;
 
   /**
    * Constructor
    * @param name Name of the state.
    */
-  public HAState(String name) {
-    this.name = name;
+  public HAState(HAServiceState state) {
+    this.state = state;
+  }
+
+  /**
+   * @return the generic service state
+   */
+  public HAServiceState getServiceState() {
+    return state;
   }
 
   /**
@@ -92,9 +100,11 @@ abstract public class HAState {
    */
   public abstract void checkOperation(final HAContext context, final OperationCategory op)
       throws StandbyException;
-  
-  @Override
+
+  /**
+   * @return String representation of the service state.
+   */
   public String toString() {
-    return super.toString();
+    return state.toString();
   }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
index aec86eae911..eb34f0f43a4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.ipc.StandbyException;
@@ -39,7 +40,7 @@ import org.apache.hadoop.ipc.StandbyException;
 @InterfaceAudience.Private
 public class StandbyState extends HAState {
   public StandbyState() {
-    super("standby");
+    super(HAServiceState.STANDBY);
   }
 
   @Override

From 625d1b380aa44a60882b3cd1a032bdf231c5bdb6 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 19 Dec 2011 05:26:07 +0000
Subject: [PATCH 055/177] HDFS-2677. Web UI should indicate the NN state.
 Contributed by Eli Collins.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1220613 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt         | 2 ++
 .../hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp           | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index c475d886bc4..caee9333980 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -65,3 +65,5 @@ HDFS-2667. Fix transition from active to standby (todd)
 HDFS-2684. Fix up some failing unit tests on HA branch (todd)
 
 HDFS-2679. Add interface to query current state to HAServiceProtocol (eli via todd)
+
+HDFS-2677. Web UI should indicate the NN state. (eli via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
index 648200ce8a7..b6d96a0ff3e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
@@ -20,6 +20,7 @@
 <%@ page
   contentType="text/html; charset=UTF-8"
   import="org.apache.hadoop.util.ServletUtil"
+  import="org.apache.hadoop.ha.HAServiceProtocol.HAServiceState"
 %>
 <%!
   //for java.io.Serializable
@@ -30,6 +31,7 @@
   NameNode nn = NameNodeHttpServer.getNameNodeFromContext(application);
   FSNamesystem fsn = nn.getNamesystem();
   String namenodeRole = nn.getRole().toString();
+  String namenodeState = HAServiceState.ACTIVE.equals(nn.getServiceState()) ? "active" : "standby";
   String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":" + nn.getNameNodeAddress().getPort();
 %>
 
@@ -40,7 +42,7 @@
 <title>Hadoop <%=namenodeRole%> <%=namenodeLabel%></title>
     
 <body>
-<h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
+<h1><%=namenodeRole%> '<%=namenodeLabel%>' (<%=namenodeState%>)</h1>
 <%= NamenodeJspHelper.getVersionTable(fsn) %>
 <br />
 <b><a href="/nn_browsedfscontent.jsp">Browse the filesystem</a></b><br>

From 35206c716e85547360063c6566697a529d9bd09d Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Mon, 19 Dec 2011 07:19:36 +0000
Subject: [PATCH 056/177] Merge trunk into HA branch.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1220631 13f79535-47bb-0310-9956-ffa450edef68

From 4bb0456c66ae3461ac368647c698c753534b0225 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 19 Dec 2011 20:57:13 +0000
Subject: [PATCH 057/177] HDFS-2678. When a FailoverProxyProvider is used,
 DFSClient should not retry connection ten times before failing over.
 Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1220942 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt  |  2 ++
 .../java/org/apache/hadoop/hdfs/DFSConfigKeys.java     |  2 ++
 .../namenode/ha/ConfiguredFailoverProxyProvider.java   | 10 +++++++++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index caee9333980..19c8638d2d4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -67,3 +67,5 @@ HDFS-2684. Fix up some failing unit tests on HA branch (todd)
 HDFS-2679. Add interface to query current state to HAServiceProtocol (eli via todd)
 
 HDFS-2677. Web UI should indicate the NN state. (eli via todd)
+
+HDFS-2678. When a FailoverProxyProvider is used, DFSClient should not retry connection ten times before failing over (atm via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index f30b58e9169..b17dc5b0f4f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -55,6 +55,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final int     DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT = 500;
   public static final String  DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY = "dfs.client.failover.sleep.max.millis";
   public static final int     DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT = 15000;
+  public static final String  DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_KEY = "dfs.client.failover.connection.retries";
+  public static final int     DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT = 0;
   
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT = "localhost:50100";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index 8239c5e03b0..75cd2086843 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -29,6 +29,8 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
@@ -87,7 +89,13 @@ public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
 
   @Override
   public synchronized void setConf(Configuration conf) {
-    this.conf = conf;
+    this.conf = new Configuration(conf);
+    int maxRetries = this.conf.getInt(
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_KEY,
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT);
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
+        maxRetries);
     try {
       ugi = UserGroupInformation.getCurrentUser();
       

From 009dfed8d7b3056cd8915f74d84dc06850cb8a05 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 19 Dec 2011 21:32:55 +0000
Subject: [PATCH 058/177] HADOOP-7932. Make client connection retries on socket
 time outs configurable. Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1220957 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  3 ++
 .../fs/CommonConfigurationKeysPublic.java     |  5 +++
 .../java/org/apache/hadoop/ipc/Client.java    | 26 ++++++++++----
 .../java/org/apache/hadoop/ipc/TestIPC.java   | 34 +++++++++++++++++++
 4 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 0547c3b3b70..1663eee16c5 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -21,3 +21,6 @@ HADOOP-7928. HA: Client failover policy is incorrectly trying to fail over all
 
 HADOOP-7925. Add interface and update CLI to query current state to
              HAServiceProtocol (eli via todd)
+
+HADOOP-7932. Make client connection retries on socket time outs configurable.
+             (Uma Maheswara Rao G via todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
index 3ea4ed70c2f..534046a9abb 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
@@ -172,6 +172,11 @@ public class CommonConfigurationKeysPublic {
   /** Default value for IPC_CLIENT_CONNECT_MAX_RETRIES_KEY */
   public static final int     IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT = 10;
   /** See <a href="{@docRoot}/../core-default.html">core-default.xml</a> */
+  public static final String  IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY =
+    "ipc.client.connect.max.retries.on.timeouts";
+  /** Default value for IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY */
+  public static final int  IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT = 45;
+  /** See <a href="{@docRoot}/../core-default.html">core-default.xml</a> */
   public static final String  IPC_CLIENT_TCPNODELAY_KEY =
     "ipc.client.tcpnodelay";
   /** Defalt value for IPC_CLIENT_TCPNODELAY_KEY */
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index a6c2b472825..c6364939111 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -48,6 +48,7 @@ import org.apache.commons.logging.*;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.ipc.RpcPayloadHeader.*;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.Text;
@@ -224,6 +225,8 @@ public class Client {
     private int maxIdleTime; //connections will be culled if it was idle for 
     //maxIdleTime msecs
     private int maxRetries; //the max. no. of retries for socket connections
+    // the max. no. of retries for socket connections on time out exceptions
+    private int maxRetriesOnSocketTimeouts;
     private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
     private boolean doPing; //do we need to send ping message
     private int pingInterval; // how often sends ping to the server in msecs
@@ -247,6 +250,7 @@ public class Client {
       this.rpcTimeout = remoteId.getRpcTimeout();
       this.maxIdleTime = remoteId.getMaxIdleTime();
       this.maxRetries = remoteId.getMaxRetries();
+      this.maxRetriesOnSocketTimeouts = remoteId.getMaxRetriesOnSocketTimeouts();
       this.tcpNoDelay = remoteId.getTcpNoDelay();
       this.doPing = remoteId.getDoPing();
       this.pingInterval = remoteId.getPingInterval();
@@ -475,11 +479,8 @@ public class Client {
           if (updateAddress()) {
             timeoutFailures = ioFailures = 0;
           }
-          /*
-           * The max number of retries is 45, which amounts to 20s*45 = 15
-           * minutes retries.
-           */
-          handleConnectionFailure(timeoutFailures++, 45, toe);
+          handleConnectionFailure(timeoutFailures++,
+              maxRetriesOnSocketTimeouts, toe);
         } catch (IOException ie) {
           if (updateAddress()) {
             timeoutFailures = ioFailures = 0;
@@ -1263,6 +1264,8 @@ public class Client {
     private int maxIdleTime; //connections will be culled if it was idle for 
     //maxIdleTime msecs
     private int maxRetries; //the max. no. of retries for socket connections
+    // the max. no. of retries for socket connections on time out exceptions
+    private int maxRetriesOnSocketTimeouts;
     private boolean tcpNoDelay; // if T then disable Nagle's Algorithm
     private boolean doPing; //do we need to send ping message
     private int pingInterval; // how often sends ping to the server in msecs
@@ -1270,8 +1273,8 @@ public class Client {
     ConnectionId(InetSocketAddress address, Class<?> protocol, 
                  UserGroupInformation ticket, int rpcTimeout,
                  String serverPrincipal, int maxIdleTime, 
-                 int maxRetries, boolean tcpNoDelay,
-                 boolean doPing, int pingInterval) {
+                 int maxRetries, int maxRetriesOnSocketTimeouts,
+                 boolean tcpNoDelay, boolean doPing, int pingInterval) {
       this.protocol = protocol;
       this.address = address;
       this.ticket = ticket;
@@ -1279,6 +1282,7 @@ public class Client {
       this.serverPrincipal = serverPrincipal;
       this.maxIdleTime = maxIdleTime;
       this.maxRetries = maxRetries;
+      this.maxRetriesOnSocketTimeouts = maxRetriesOnSocketTimeouts;
       this.tcpNoDelay = tcpNoDelay;
       this.doPing = doPing;
       this.pingInterval = pingInterval;
@@ -1312,6 +1316,11 @@ public class Client {
       return maxRetries;
     }
     
+    /** max connection retries on socket time outs */
+    public int getMaxRetriesOnSocketTimeouts() {
+      return maxRetriesOnSocketTimeouts;
+    }
+    
     boolean getTcpNoDelay() {
       return tcpNoDelay;
     }
@@ -1343,6 +1352,9 @@ public class Client {
           rpcTimeout, remotePrincipal,
           conf.getInt("ipc.client.connection.maxidletime", 10000), // 10s
           conf.getInt("ipc.client.connect.max.retries", 10),
+          conf.getInt(
+            CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+            CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT),
           conf.getBoolean("ipc.client.tcpnodelay", false),
           doPing, 
           (doPing ? Client.getPingInterval(conf) : 0));
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
index 1f3e67a4f9a..efb2dc1126d 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java
@@ -20,7 +20,9 @@ package org.apache.hadoop.ipc;
 
 import org.apache.commons.logging.*;
 
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
@@ -590,6 +592,38 @@ public class TestIPC {
         Server.RECEIVED_HTTP_REQ_RESPONSE.getBytes());
   }
   
+  @Test
+  public void testConnectionRetriesOnSocketTimeoutExceptions() throws Exception {
+    Configuration conf = new Configuration();
+    // set max retries to 0
+    conf.setInt(
+      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+      0);
+    assertRetriesOnSocketTimeouts(conf, 1);
+
+    // set max retries to 3
+    conf.setInt(
+      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+      3);
+    assertRetriesOnSocketTimeouts(conf, 4);
+  }
+
+  private void assertRetriesOnSocketTimeouts(Configuration conf,
+      int maxTimeoutRetries) throws IOException, InterruptedException {
+    SocketFactory mockFactory = Mockito.mock(SocketFactory.class);
+    doThrow(new SocketTimeoutException()).when(mockFactory).createSocket();
+    Client client = new Client(IntWritable.class, conf, mockFactory);
+    InetSocketAddress address = new InetSocketAddress("127.0.0.1", 9090);
+    try {
+      client.call(new IntWritable(RANDOM.nextInt()), address, null, null, 0,
+          conf);
+      fail("Not throwing the SocketTimeoutException");
+    } catch (SocketTimeoutException e) {
+      Mockito.verify(mockFactory, Mockito.times(maxTimeoutRetries))
+          .createSocket();
+    }
+  }
+  
   private void doIpcVersionTest(
       byte[] requestData,
       byte[] expectedResponse) throws Exception {

From 57ef902bbc798eb25e86a7d41571b0be4f2270f0 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 19 Dec 2011 21:49:39 +0000
Subject: [PATCH 059/177] HDFS-2682. When a FailoverProxyProvider is used,
 Client should not retry for 45 times if it is timing out to connect to
 server. Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1220965 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt     | 2 ++
 .../main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java   | 2 ++
 .../namenode/ha/ConfiguredFailoverProxyProvider.java      | 8 +++++++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 19c8638d2d4..fdd6d6e06eb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -69,3 +69,5 @@ HDFS-2679. Add interface to query current state to HAServiceProtocol (eli via to
 HDFS-2677. Web UI should indicate the NN state. (eli via todd)
 
 HDFS-2678. When a FailoverProxyProvider is used, DFSClient should not retry connection ten times before failing over (atm via todd)
+
+HDFS-2682. When a FailoverProxyProvider is used, Client should not retry for 45 times if it is timing out to connect to server. (Uma Maheswara Rao G via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index b17dc5b0f4f..a5debe0a29d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -57,6 +57,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final int     DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT = 15000;
   public static final String  DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_KEY = "dfs.client.failover.connection.retries";
   public static final int     DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT = 0;
+  public static final String  DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY = "dfs.client.failover.connection.retries.on.timeouts";
+  public static final int     DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT = 0;
   
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_KEY = "dfs.namenode.backup.address";
   public static final String  DFS_NAMENODE_BACKUP_ADDRESS_DEFAULT = "localhost:50100";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index 75cd2086843..65e4655b52a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -21,7 +21,6 @@ import java.io.Closeable;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
@@ -96,6 +95,13 @@ public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
     this.conf.setInt(
         CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
         maxRetries);
+    
+    int maxRetriesOnSocketTimeouts = this.conf.getInt(
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT);
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        maxRetriesOnSocketTimeouts);
     try {
       ugi = UserGroupInformation.getCurrentUser();
       

From 36d1c49486587c2dbb193e8538b1d4510c462fa6 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 21 Dec 2011 03:03:23 +0000
Subject: [PATCH 060/177] HDFS-2693. Fix synchronization issues around state
 transition. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1221582 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../java/org/apache/hadoop/hdfs/HAUtil.java   |  12 ++
 .../server/blockmanagement/BlockManager.java  |  26 ++--
 .../hdfs/server/namenode/BackupNode.java      |  29 ++--
 .../hdfs/server/namenode/FSNamesystem.java    | 133 ++++++++++++++++--
 .../hadoop/hdfs/server/namenode/NameNode.java |  37 ++++-
 .../server/namenode/NameNodeRpcServer.java    |  66 +--------
 .../server/namenode/ha/EditLogTailer.java     |  48 ++++---
 .../hdfs/server/namenode/ha/HAContext.java    |  27 ++++
 .../hdfs/server/namenode/ha/HAState.java      |  13 +-
 .../hdfs/server/namenode/ha/StandbyState.java |   3 +
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  19 +++
 .../hadoop/hdfs/TestDFSClientFailover.java    |  14 +-
 .../hadoop/hdfs/TestFileCorruption.java       |  10 +-
 .../hdfs/server/namenode/NameNodeAdapter.java |  12 +-
 .../hdfs/server/namenode/TestBackupNode.java  |   3 +
 .../server/namenode/ha/TestEditLogTailer.java |   2 +
 .../ha/TestEditLogsDuringFailover.java        |   2 +
 .../namenode/ha/TestHAStateTransitions.java   |  59 ++++++++
 .../server/namenode/ha/TestStandbyIsHot.java  |   3 +
 .../namenode/metrics/TestNameNodeMetrics.java |  14 +-
 .../apache/hadoop/test/GenericTestUtils.java  |  32 +++++
 22 files changed, 432 insertions(+), 134 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index fdd6d6e06eb..6ffb0dfc4d8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -71,3 +71,5 @@ HDFS-2677. Web UI should indicate the NN state. (eli via todd)
 HDFS-2678. When a FailoverProxyProvider is used, DFSClient should not retry connection ten times before failing over (atm via todd)
 
 HDFS-2682. When a FailoverProxyProvider is used, Client should not retry for 45 times if it is timing out to connect to server. (Uma Maheswara Rao G via todd)
+
+HDFS-2693. Fix synchronization issues around state transition (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 24537a3d38c..6a619712c48 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -99,4 +99,16 @@ public class HAUtil {
     return null;
   }
 
+  /**
+   * This is used only by tests at the moment.
+   * @return true if the NN should allow read operations while in standby mode.
+   */
+  public static boolean shouldAllowStandbyReads(Configuration conf) {
+    return conf.getBoolean("dfs.ha.allow.stale.reads", false);
+  }
+  
+  public static void setAllowStandbyReads(Configuration conf, boolean val) {
+    conf.setBoolean("dfs.ha.allow.stale.reads", val);
+  }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 03a851a7b5a..abefbb562d3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -817,22 +817,18 @@ public class BlockManager {
    */
   public void findAndMarkBlockAsCorrupt(final ExtendedBlock blk,
       final DatanodeInfo dn) throws IOException {
-    namesystem.writeLock();
-    try {
-      final BlockInfo storedBlock = getStoredBlock(blk.getLocalBlock());
-      if (storedBlock == null) {
-        // Check if the replica is in the blockMap, if not
-        // ignore the request for now. This could happen when BlockScanner
-        // thread of Datanode reports bad block before Block reports are sent
-        // by the Datanode on startup
-        NameNode.stateChangeLog.info("BLOCK* findAndMarkBlockAsCorrupt: "
-            + blk + " not found.");
-        return;
-      }
-      markBlockAsCorrupt(storedBlock, dn);
-    } finally {
-      namesystem.writeUnlock();
+    assert namesystem.hasWriteLock();
+    final BlockInfo storedBlock = getStoredBlock(blk.getLocalBlock());
+    if (storedBlock == null) {
+      // Check if the replica is in the blockMap, if not
+      // ignore the request for now. This could happen when BlockScanner
+      // thread of Datanode reports bad block before Block reports are sent
+      // by the Datanode on startup
+      NameNode.stateChangeLog.info("BLOCK* findAndMarkBlockAsCorrupt: "
+          + blk + " not found.");
+      return;
     }
+    markBlockAsCorrupt(storedBlock, dn);
   }
 
   private void markBlockAsCorrupt(BlockInfo storedBlock,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index 3acec16874f..c54743962ea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -244,18 +244,17 @@ public class BackupNode extends NameNode {
     @Override
     public void startLogSegment(NamenodeRegistration registration, long txid)
         throws IOException {
-      nn.checkOperation(OperationCategory.JOURNAL);
+      namesystem.checkOperation(OperationCategory.JOURNAL);
       verifyRequest(registration);
-        verifyRequest(registration);
       
-        getBNImage().namenodeStartedLogSegment(txid);
+      getBNImage().namenodeStartedLogSegment(txid);
     }
     
     @Override
     public void journal(NamenodeRegistration nnReg,
         long firstTxId, int numTxns,
         byte[] records) throws IOException {
-      nn.checkOperation(OperationCategory.JOURNAL);
+      namesystem.checkOperation(OperationCategory.JOURNAL);
       verifyRequest(nnReg);
       if(!nnRpcAddress.equals(nnReg.getAddress()))
         throw new IOException("Journal request from unexpected name-node: "
@@ -401,13 +400,21 @@ public class BackupNode extends NameNode {
     return clusterId;
   }
   
-  @Override // NameNode
-  protected void checkOperation(OperationCategory op)
-      throws StandbyException {
-    if (OperationCategory.JOURNAL != op) {
-      String msg = "Operation category " + op
-          + " is not supported at the BackupNode";
-      throw new StandbyException(msg);
+  @Override
+  protected NameNodeHAContext createHAContext() {
+    return new BNHAContext();
+  }
+  
+  private class BNHAContext extends NameNodeHAContext {
+    @Override // NameNode
+    public void checkOperation(OperationCategory op)
+        throws StandbyException {
+      if (OperationCategory.JOURNAL != op &&
+          !(OperationCategory.READ == op && allowStaleStandbyReads)) {
+        String msg = "Operation category " + op
+            + " is not supported at the BackupNode";
+        throw new StandbyException(msg);
+      }
     }
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 02bf2118cdf..4c4aac3e951 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -149,6 +149,7 @@ import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
 import org.apache.hadoop.hdfs.server.common.Util;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReceivedDeleteMessage;
 import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReportMessage;
 import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.CommitBlockSynchronizationMessage;
@@ -170,6 +171,7 @@ import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.metrics2.annotation.Metric;
 import org.apache.hadoop.metrics2.annotation.Metrics;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
@@ -563,6 +565,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     dir.fsImage.editLog.close();
   }
   
+  
+  void checkOperation(OperationCategory op) throws StandbyException {
+    haContext.checkOperation(op);
+  }
+  
   public static Collection<URI> getNamespaceDirs(Configuration conf) {
     return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
   }
@@ -793,7 +800,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return serverDefaults.getBlockSize();
   }
 
-  FsServerDefaults getServerDefaults() {
+  FsServerDefaults getServerDefaults() throws StandbyException {
+    checkOperation(OperationCategory.READ);
     return serverDefaults;
   }
 
@@ -820,6 +828,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     HdfsFileStatus resultingStat = null;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set permission for " + src, safeMode);
       }
@@ -849,6 +859,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     HdfsFileStatus resultingStat = null;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set owner for " + src, safeMode);
       }
@@ -939,13 +951,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       }  else { // second attempt is with  write lock
         writeLock(); // writelock is needed to set accesstime
       }
-
-      // if the namenode is in safemode, then do not update access time
-      if (isInSafeMode()) {
-        doAccessTime = false;
-      }
-
       try {
+        checkOperation(OperationCategory.READ);
+
+        // if the namenode is in safemode, then do not update access time
+        if (isInSafeMode()) {
+          doAccessTime = false;
+        }
+
         long now = now();
         INodeFile inode = dir.getFileINode(src);
         if (inode == null) {
@@ -1013,6 +1026,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     HdfsFileStatus resultingStat = null;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot concat " + target, safeMode);
       }
@@ -1144,6 +1158,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       // Write access is required to set access and modification times
       if (isPermissionEnabled) {
         checkPathAccess(src, FsAction.WRITE);
@@ -1174,6 +1190,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     HdfsFileStatus resultingStat = null;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (!createParent) {
         verifyParentDir(link);
       }
@@ -1243,6 +1261,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     final boolean isFile;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set replication for " + src, safeMode);
       }
@@ -1273,6 +1293,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws IOException, UnresolvedLinkException {
     readLock();
     try {
+      checkOperation(OperationCategory.READ);
       if (isPermissionEnabled) {
         checkTraverse(filename);
       }
@@ -1315,6 +1336,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       FileNotFoundException, ParentNotDirectoryException, IOException {
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       startFileInternal(src, permissions, holder, clientMachine, flag,
           createParent, replication, blockSize);
     } finally {
@@ -1495,6 +1518,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws IOException {
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException(
             "Cannot recover the lease of " + src, safeMode);
@@ -1614,6 +1639,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     LocatedBlock lb = null;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       lb = startFileInternal(src, null, holder, clientMachine, 
                         EnumSet.of(CreateFlag.APPEND), 
                         false, blockManager.maxReplication, 0);
@@ -1678,6 +1705,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot add block to " + src, safeMode);
       }
@@ -1711,6 +1740,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     // Allocate a new block and record it in the INode. 
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot add block to " + src, safeMode);
       }
@@ -1757,6 +1787,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     final List<DatanodeDescriptor> chosen;
     readLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       //check safe mode
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot add datanode; src=" + src
@@ -1798,6 +1829,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       UnresolvedLinkException, IOException {
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       //
       // Remove the block from the pending creates list
       //
@@ -1873,6 +1905,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     boolean success = false;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       success = completeFileInternal(src, holder, 
         ExtendedBlock.getLocalBlock(last));
     } finally {
@@ -2012,6 +2046,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       status = renameToInternal(src, dst);
       if (status && auditLog.isInfoEnabled() && isExternalInvocation()) {
         resultingStat = dir.getFileInfo(dst, false);
@@ -2067,6 +2103,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       renameToInternal(src, dst, options);
       if (auditLog.isInfoEnabled() && isExternalInvocation()) {
         resultingStat = dir.getFileInfo(dst, false); 
@@ -2145,6 +2183,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot delete " + src, safeMode);
       }
@@ -2222,11 +2261,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    *
    * @return object containing information regarding the file
    *         or null if file not found
+   * @throws StandbyException 
    */
   HdfsFileStatus getFileInfo(String src, boolean resolveLink) 
-    throws AccessControlException, UnresolvedLinkException {
+    throws AccessControlException, UnresolvedLinkException,
+           StandbyException {
     readLock();
     try {
+      checkOperation(OperationCategory.READ);
+
       if (!DFSUtil.isValidName(src)) {
         throw new InvalidPathException("Invalid file name: " + src);
       }
@@ -2250,6 +2293,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       status = mkdirsInternal(src, permissions, createParent);
     } finally {
       writeUnlock();
@@ -2304,9 +2349,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
 
   ContentSummary getContentSummary(String src) throws AccessControlException,
-      FileNotFoundException, UnresolvedLinkException {
+      FileNotFoundException, UnresolvedLinkException, StandbyException {
     readLock();
     try {
+      checkOperation(OperationCategory.READ);
+
       if (isPermissionEnabled) {
         checkPermission(src, false, null, null, null, FsAction.READ_EXECUTE);
       }
@@ -2325,6 +2372,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws IOException, UnresolvedLinkException {
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot set quota on " + path, safeMode);
       }
@@ -2349,6 +2397,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
                                   + src + " for " + clientName);
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot fsync file " + src, safeMode);
       }
@@ -2558,6 +2607,20 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     String src = "";
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+      if (haContext.getState().equals(NameNode.STANDBY_STATE)) {
+        // TODO(HA) we'll never get here, since we check for WRITE operation above!
+        if (isGenStampInFuture(newgenerationstamp)) {
+          LOG.info("Required GS=" + newgenerationstamp
+              + ", Queuing commitBlockSynchronization message");
+          getPendingDataNodeMessages().queueMessage(
+              new PendingDataNodeMessages.CommitBlockSynchronizationMessage(
+                  lastblock, newgenerationstamp, newlength, closeFile, deleteblock,
+                  newtargets, newgenerationstamp));
+          return;
+        }
+      }
+
       if (isInSafeMode()) {
         throw new SafeModeException(
           "Cannot commitBlockSynchronization while in safe mode",
@@ -2658,6 +2721,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void renewLease(String holder) throws IOException {
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot renew lease for " + holder, safeMode);
       }
@@ -2685,6 +2750,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     DirectoryListing dl;
     readLock();
     try {
+      checkOperation(OperationCategory.READ);
+
       if (isPermissionEnabled) {
         if (dir.isDir(src)) {
           checkPathAccess(src, FsAction.READ_EXECUTE);
@@ -3699,6 +3766,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   throws IOException {
     writeLock();
     try {
+      checkOperation(OperationCategory.CHECKPOINT);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Checkpoint not started", safeMode);
       }
@@ -3715,6 +3784,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
                             CheckpointSignature sig) throws IOException {
     readLock();
     try {
+      checkOperation(OperationCategory.CHECKPOINT);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Checkpoint not ended", safeMode);
       }
@@ -3976,6 +4047,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return pendingFile;
   }
   
+  /**
+   * Client is reporting some bad block locations.
+   */
+  void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
+    writeLock();
+    try {
+      checkOperation(OperationCategory.WRITE);
+      
+      NameNode.stateChangeLog.info("*DIR* NameNode.reportBadBlocks");
+      for (int i = 0; i < blocks.length; i++) {
+        ExtendedBlock blk = blocks[i].getBlock();
+        DatanodeInfo[] nodes = blocks[i].getLocations();
+        for (int j = 0; j < nodes.length; j++) {
+          DatanodeInfo dn = nodes[j];
+          blockManager.findAndMarkBlockAsCorrupt(blk, dn);
+        }
+      }
+    } finally {
+      writeUnlock();
+    }
+  }
+
   /**
    * Get a new generation stamp together with an access token for 
    * a block under construction
@@ -3993,6 +4086,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     LocatedBlock locatedBlock;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       // check vadility of parameters
       checkUCBlock(block, clientName);
   
@@ -4022,6 +4117,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws IOException {
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Pipeline not updated", safeMode);
       }
@@ -4222,6 +4319,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
     readLock();
     try {
+      checkOperation(OperationCategory.READ);
+
       if (!isPopulatingReplQueues()) {
         throw new IOException("Cannot run listCorruptFileBlocks because " +
                               "replication queues have not been initialized.");
@@ -4314,6 +4413,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     Token<DelegationTokenIdentifier> token;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot issue delegation token", safeMode);
       }
@@ -4358,6 +4459,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     long expiryTime;
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot renew delegation token", safeMode);
       }
@@ -4388,6 +4491,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       throws IOException {
     writeLock();
     try {
+      checkOperation(OperationCategory.WRITE);
+
       if (isInSafeMode()) {
         throw new SafeModeException("Cannot cancel delegation token", safeMode);
       }
@@ -4727,4 +4832,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public EditLogTailer getEditLogTailer() {
     return editLogTailer;
   }
+  
+  @VisibleForTesting
+  void setFsLockForTests(ReentrantReadWriteLock lock) {
+    this.fsLock = lock;
+  }
+  
+  @VisibleForTesting
+  ReentrantReadWriteLock getFsLockForTests() {
+    return fsLock;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 7d8ecd993b3..54d4d2f2901 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -182,6 +182,7 @@ public class NameNode {
   private HAState state;
   private final boolean haEnabled;
   private final HAContext haContext;
+  protected boolean allowStaleStandbyReads;
 
   
   /** httpServer */
@@ -531,7 +532,8 @@ public class NameNode {
     this.role = role;
     String nsId = getNameServiceId(conf);
     this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
-    this.haContext = new NameNodeHAContext();
+    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
+    this.haContext = createHAContext();
     try {
       initializeGenericKeys(conf, nsId);
       initialize(conf);
@@ -553,6 +555,10 @@ public class NameNode {
     }
   }
 
+  protected HAContext createHAContext() {
+    return new NameNodeHAContext();
+  }
+
   /**
    * Wait for service to finish.
    * (Normally, it runs forever.)
@@ -914,11 +920,6 @@ public class NameNode {
     return state.getServiceState();
   }
 
-  /** Check if an operation of given category is allowed */
-  protected synchronized void checkOperation(final OperationCategory op)
-      throws StandbyException {
-    state.checkOperation(haContext, op);
-  }
   
   /**
    * Class used as expose {@link NameNode} as context to {@link HAState}
@@ -928,7 +929,7 @@ public class NameNode {
    * appropriate action is needed todo either shutdown the node or recover
    * from failure.
    */
-  private class NameNodeHAContext implements HAContext {
+  protected class NameNodeHAContext implements HAContext {
     @Override
     public void setState(HAState s) {
       state = s;
@@ -961,6 +962,28 @@ public class NameNode {
       // TODO(HA): Are we guaranteed to be the only active here?
       namesystem.stopStandbyServices();
     }
+    
+    @Override
+    public void writeLock() {
+      namesystem.writeLock();
+    }
+    
+    @Override
+    public void writeUnlock() {
+      namesystem.writeUnlock();
+    }
+    
+    /** Check if an operation of given category is allowed */
+    @Override
+    public void checkOperation(final OperationCategory op)
+        throws StandbyException {
+      state.checkOperation(haContext, op);
+    }
+    
+    @Override
+    public boolean allowStaleReads() {
+      return allowStaleStandbyReads;
+    }
   }
   
   public boolean isStandbyState() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index cfea22f5a26..b7433ef7b65 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -126,7 +126,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
   private static final Log stateChangeLog = NameNode.stateChangeLog;
   
   // Dependencies from other parts of NN.
-  private final FSNamesystem namesystem;
+  protected final FSNamesystem namesystem;
   protected final NameNode nn;
   private final NameNodeMetrics metrics;
   
@@ -318,7 +318,9 @@ class NameNodeRpcServer implements NamenodeProtocols {
   public void errorReport(NamenodeRegistration registration,
                           int errorCode, 
                           String msg) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
+    // nn.checkOperation(OperationCategory.WRITE);
+    // TODO: I dont think this should be checked - it's just for logging
+    // and dropping backups
     verifyRequest(registration);
     LOG.info("Error report from " + registration + ": " + msg);
     if(errorCode == FATAL)
@@ -346,28 +348,24 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // NamenodeProtocol
   public void endCheckpoint(NamenodeRegistration registration,
                             CheckpointSignature sig) throws IOException {
-    nn.checkOperation(OperationCategory.CHECKPOINT);
     namesystem.endCheckpoint(registration, sig);
   }
 
   @Override // ClientProtocol
   public Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     return namesystem.getDelegationToken(renewer);
   }
 
   @Override // ClientProtocol
   public long renewDelegationToken(Token<DelegationTokenIdentifier> token)
       throws InvalidToken, IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     return namesystem.renewDelegationToken(token);
   }
 
   @Override // ClientProtocol
   public void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.cancelDelegationToken(token);
   }
   
@@ -376,7 +374,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
                                           long offset, 
                                           long length) 
       throws IOException {
-    nn.checkOperation(OperationCategory.READ);
     metrics.incrGetBlockLocations();
     return namesystem.getBlockLocations(getClientMachine(), 
                                         src, offset, length);
@@ -384,7 +381,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   
   @Override // ClientProtocol
   public FsServerDefaults getServerDefaults() throws IOException {
-    nn.checkOperation(OperationCategory.READ);
     return namesystem.getServerDefaults();
   }
 
@@ -396,7 +392,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
                      boolean createParent,
                      short replication,
                      long blockSize) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     String clientMachine = getClientMachine();
     if (stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.create: file "
@@ -417,7 +412,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public LocatedBlock append(String src, String clientName) 
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     String clientMachine = getClientMachine();
     if (stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.append: file "
@@ -430,7 +424,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // ClientProtocol
   public boolean recoverLease(String src, String clientName) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     String clientMachine = getClientMachine();
     return namesystem.recoverLease(src, clientName, clientMachine);
   }
@@ -438,21 +431,18 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public boolean setReplication(String src, short replication) 
     throws IOException {  
-    nn.checkOperation(OperationCategory.WRITE);
     return namesystem.setReplication(src, replication);
   }
     
   @Override // ClientProtocol
   public void setPermission(String src, FsPermission permissions)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.setPermission(src, permissions);
   }
 
   @Override // ClientProtocol
   public void setOwner(String src, String username, String groupname)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.setOwner(src, username, groupname);
   }
 
@@ -462,7 +452,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
                                ExtendedBlock previous,
                                DatanodeInfo[] excludedNodes)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*BLOCK* NameNode.addBlock: file "
           +src+" for "+clientName);
@@ -486,7 +475,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
       final DatanodeInfo[] existings, final DatanodeInfo[] excludes,
       final int numAdditionalNodes, final String clientName
       ) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     if (LOG.isDebugEnabled()) {
       LOG.debug("getAdditionalDatanode: src=" + src
           + ", blk=" + blk
@@ -514,7 +502,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public void abandonBlock(ExtendedBlock b, String src, String holder)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*BLOCK* NameNode.abandonBlock: "
           +b+" of file "+src);
@@ -527,7 +514,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public boolean complete(String src, String clientName, ExtendedBlock last)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.complete: "
           + src + " for " + clientName);
@@ -543,22 +529,12 @@ class NameNodeRpcServer implements NamenodeProtocols {
    */
   @Override // ClientProtocol, DatanodeProtocol
   public void reportBadBlocks(LocatedBlock[] blocks) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
-    stateChangeLog.info("*DIR* NameNode.reportBadBlocks");
-    for (int i = 0; i < blocks.length; i++) {
-      ExtendedBlock blk = blocks[i].getBlock();
-      DatanodeInfo[] nodes = blocks[i].getLocations();
-      for (int j = 0; j < nodes.length; j++) {
-        DatanodeInfo dn = nodes[j];
-        namesystem.getBlockManager().findAndMarkBlockAsCorrupt(blk, dn);
-      }
-    }
+    namesystem.reportBadBlocks(blocks);
   }
 
   @Override // ClientProtocol
   public LocatedBlock updateBlockForPipeline(ExtendedBlock block, String clientName)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     return namesystem.updateBlockForPipeline(block, clientName);
   }
 
@@ -567,7 +543,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   public void updatePipeline(String clientName, ExtendedBlock oldBlock,
       ExtendedBlock newBlock, DatanodeID[] newNodes)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.updatePipeline(clientName, oldBlock, newBlock, newNodes);
   }
   
@@ -576,18 +551,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
       long newgenerationstamp, long newlength,
       boolean closeFile, boolean deleteblock, DatanodeID[] newtargets)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
-    if (nn.isStandbyState()) {
-      if (namesystem.isGenStampInFuture(newgenerationstamp)) {
-        LOG.info("Required GS=" + newgenerationstamp
-            + ", Queuing commitBlockSynchronization message");
-        namesystem.getPendingDataNodeMessages().queueMessage(
-            new PendingDataNodeMessages.CommitBlockSynchronizationMessage(
-                block, newgenerationstamp, newlength, closeFile, deleteblock,
-                newtargets, newgenerationstamp));
-        return;
-      }
-    }
     namesystem.commitBlockSynchronization(block,
         newgenerationstamp, newlength, closeFile, deleteblock, newtargets);
   }
@@ -595,14 +558,12 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public long getPreferredBlockSize(String filename) 
       throws IOException {
-    nn.checkOperation(OperationCategory.READ);
     return namesystem.getPreferredBlockSize(filename);
   }
     
   @Deprecated
   @Override // ClientProtocol
   public boolean rename(String src, String dst) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.rename: " + src + " to " + dst);
     }
@@ -619,14 +580,12 @@ class NameNodeRpcServer implements NamenodeProtocols {
   
   @Override // ClientProtocol
   public void concat(String trg, String[] src) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.concat(trg, src);
   }
   
   @Override // ClientProtocol
   public void rename2(String src, String dst, Options.Rename... options)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.rename: " + src + " to " + dst);
     }
@@ -640,7 +599,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // ClientProtocol
   public boolean delete(String src, boolean recursive) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     if (stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* Namenode.delete: src=" + src
           + ", recursive=" + recursive);
@@ -665,7 +623,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public boolean mkdirs(String src, FsPermission masked, boolean createParent)
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*DIR* NameNode.mkdirs: " + src);
     }
@@ -680,14 +637,12 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // ClientProtocol
   public void renewLease(String clientName) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.renewLease(clientName);        
   }
 
   @Override // ClientProtocol
   public DirectoryListing getListing(String src, byte[] startAfter,
       boolean needLocation) throws IOException {
-    nn.checkOperation(OperationCategory.READ);
     DirectoryListing files = namesystem.getListing(
         src, startAfter, needLocation);
     if (files != null) {
@@ -699,21 +654,19 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // ClientProtocol
   public HdfsFileStatus getFileInfo(String src)  throws IOException {
-    nn.checkOperation(OperationCategory.READ);
     metrics.incrFileInfoOps();
     return namesystem.getFileInfo(src, true);
   }
 
   @Override // ClientProtocol
   public HdfsFileStatus getFileLinkInfo(String src) throws IOException { 
-    nn.checkOperation(OperationCategory.READ);
     metrics.incrFileInfoOps();
     return namesystem.getFileInfo(src, false);
   }
   
   @Override // ClientProtocol
   public long[] getStats() throws IOException {
-    nn.checkOperation(OperationCategory.READ);
+    namesystem.checkOperation(OperationCategory.READ);
     return namesystem.getStats();
   }
 
@@ -793,7 +746,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public CorruptFileBlocks listCorruptFileBlocks(String path, String cookie)
       throws IOException {
-    nn.checkOperation(OperationCategory.READ);
     String[] cookieTab = new String[] { cookie };
     Collection<FSNamesystem.CorruptFileBlockInfo> fbs =
       namesystem.listCorruptFileBlocks(path, cookieTab);
@@ -820,34 +772,29 @@ class NameNodeRpcServer implements NamenodeProtocols {
   
   @Override // ClientProtocol
   public ContentSummary getContentSummary(String path) throws IOException {
-    nn.checkOperation(OperationCategory.READ);
     return namesystem.getContentSummary(path);
   }
 
   @Override // ClientProtocol
   public void setQuota(String path, long namespaceQuota, long diskspaceQuota) 
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.setQuota(path, namespaceQuota, diskspaceQuota);
   }
   
   @Override // ClientProtocol
   public void fsync(String src, String clientName) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.fsync(src, clientName);
   }
 
   @Override // ClientProtocol
   public void setTimes(String src, long mtime, long atime) 
       throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     namesystem.setTimes(src, mtime, atime);
   }
 
   @Override // ClientProtocol
   public void createSymlink(String target, String link, FsPermission dirPerms,
       boolean createParent) throws IOException {
-    nn.checkOperation(OperationCategory.WRITE);
     metrics.incrCreateSymlinkOps();
     /* We enforce the MAX_PATH_LENGTH limit even though a symlink target 
      * URI may refer to a non-HDFS file system. 
@@ -867,7 +814,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // ClientProtocol
   public String getLinkTarget(String path) throws IOException {
-    nn.checkOperation(OperationCategory.READ);
     metrics.incrGetLinkTargetOps();
     try {
       HdfsFileStatus stat = namesystem.getFileInfo(path, false);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 8a837eea725..b7b1adb479c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -85,27 +85,37 @@ public class EditLogTailer {
     Preconditions.checkState(tailerThread == null ||
         !tailerThread.isAlive(),
         "Tailer thread should not be running once failover starts");
-    doTailEdits();
+    try {
+      doTailEdits();
+    } catch (InterruptedException e) {
+      throw new IOException(e);
+    }
   }
   
-  private void doTailEdits() throws IOException {
-    // TODO(HA) in a transition from active to standby,
-    // the following is wrong and ends up causing all of the
-    // last log segment to get re-read
-    long lastTxnId = image.getLastAppliedTxId();
-    
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("lastTxnId: " + lastTxnId);
-    }
-    Collection<EditLogInputStream> streams = editLog
-        .selectInputStreams(lastTxnId + 1, 0, false);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("edit streams to load from: " + streams.size());
-    }
-    
-    long editsLoaded = image.loadEdits(streams, namesystem);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("editsLoaded: " + editsLoaded);
+  private void doTailEdits() throws IOException, InterruptedException {
+    // Write lock needs to be interruptible here because the 
+    // transitionToActive RPC takes the write lock before calling
+    // tailer.stop() -- so if we're not interruptible, it will
+    // deadlock.
+    namesystem.writeLockInterruptibly();
+    try {
+      long lastTxnId = image.getLastAppliedTxId();
+      
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("lastTxnId: " + lastTxnId);
+      }
+      Collection<EditLogInputStream> streams = editLog
+          .selectInputStreams(lastTxnId + 1, 0, false);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("edit streams to load from: " + streams.size());
+      }
+      
+      long editsLoaded = image.loadEdits(streams, namesystem);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("editsLoaded: " + editsLoaded);
+      }
+    } finally {
+      namesystem.writeUnlock();
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
index 58d7773d514..dce1cfb34a8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
@@ -3,6 +3,8 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 import java.io.IOException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
+import org.apache.hadoop.ipc.StandbyException;
 
 /**
  * Context that is to be used by {@link HAState} for getting/setting the
@@ -27,4 +29,29 @@ public interface HAContext {
   
   /** Stop the services when exiting standby state */
   public void stopStandbyServices() throws IOException;
+
+  /**
+   * Take a write-lock on the underlying namesystem
+   * so that no concurrent state transitions or edits
+   * can be made.
+   */
+  void writeLock();
+
+  /**
+   * Unlock the lock taken by {@link #writeLock()}
+   */
+  void writeUnlock();
+
+  /**
+   * Verify that the given operation category is allowed in the
+   * current state. This is to allow NN implementations (eg BackupNode)
+   * to override it with node-specific handling.
+   */
+  void checkOperation(OperationCategory op) throws StandbyException;
+
+  /**
+   * @return true if the node should allow stale reads (ie reads
+   * while the namespace is not up to date)
+   */
+  boolean allowStaleReads();
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
index 7dfab914939..20c09d5f4e7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
@@ -54,9 +54,14 @@ abstract public class HAState {
    */
   protected final void setStateInternal(final HAContext context, final HAState s)
       throws ServiceFailedException {
-    exitState(context);
-    context.setState(s);
-    s.enterState(context);
+    context.writeLock();
+    try {
+      exitState(context);
+      context.setState(s);
+      s.enterState(context);
+    } finally {
+      context.writeUnlock();
+    }
   }
 
   /**
@@ -107,4 +112,4 @@ abstract public class HAState {
   public String toString() {
     return state.toString();
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
index eb34f0f43a4..b22b2e43ed8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -73,6 +73,9 @@ public class StandbyState extends HAState {
   @Override
   public void checkOperation(HAContext context, OperationCategory op)
       throws StandbyException {
+    if (op == OperationCategory.READ && context.allowStaleReads()) {
+      return;
+    }
     String msg = "Operation category " + op + " is not supported in state "
         + context.getState();
     throw new StandbyException(msg);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 57dafa807eb..f4732986364 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -307,6 +307,14 @@ public class MiniDFSCluster {
   private boolean waitSafeMode = true;
   private boolean federation;
   
+  /**
+   * A unique instance identifier for the cluster. This
+   * is used to disambiguate HA filesystems in the case where
+   * multiple MiniDFSClusters are used in the same test suite. 
+   */
+  private int instanceId;
+  private static int instanceCount = 0;
+  
   /**
    * Stores the information related to a namenode in the cluster
    */
@@ -325,6 +333,9 @@ public class MiniDFSCluster {
    */
   public MiniDFSCluster() {
     nameNodes = new NameNodeInfo[0]; // No namenode in the cluster
+    synchronized (MiniDFSCluster.class) {
+      instanceId = instanceCount++;
+    }
   }
   
   /**
@@ -510,6 +521,10 @@ public class MiniDFSCluster {
       boolean waitSafeMode, boolean setupHostsFile,
       MiniDFSNNTopology nnTopology)
   throws IOException {
+    synchronized (MiniDFSCluster.class) {
+      instanceId = instanceCount++;
+    }
+
     this.conf = conf;
     base_dir = new File(determineDfsBaseDir());
     data_dir = new File(base_dir, "data");
@@ -737,6 +752,10 @@ public class MiniDFSCluster {
     }
     return uri;
   }
+  
+  public int getInstanceId() {
+    return instanceId;
+  }
 
   /**
    * @return Configuration of for the given namenode
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
index 1146ae7b7a2..90739693f5c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -46,7 +46,7 @@ public class TestDFSClientFailover {
   
   private Configuration conf = new Configuration();
   private MiniDFSCluster cluster;
-  private static final String LOGICAL_HOSTNAME = "ha-nn-uri";
+  private static final String LOGICAL_HOSTNAME = "ha-nn-uri-%d";
   
   @Before
   public void setUpCluster() throws IOException {
@@ -91,7 +91,8 @@ public class TestDFSClientFailover {
     
     // Check that it functions even if the URL becomes canonicalized
     // to include a port number.
-    Path withPort = new Path("hdfs://" + LOGICAL_HOSTNAME + ":" +
+    Path withPort = new Path("hdfs://" +
+        getLogicalHostname(cluster) + ":" +
         NameNode.DEFAULT_PORT + "/" + TEST_FILE.toUri().getPath());
     FileSystem fs2 = withPort.getFileSystem(fs.getConf());
     assertTrue(fs2.exists(withPort));
@@ -126,6 +127,7 @@ public class TestDFSClientFailover {
     
     String nameNodeId1 = "nn1";
     String nameNodeId2 = "nn2";
+    String logicalName = getLogicalHostname(cluster);
     
     conf = new Configuration(conf);
     String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
@@ -138,11 +140,15 @@ public class TestDFSClientFailover {
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nsId);
     conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nsId),
         nameNodeId1 + "," + nameNodeId2);
-    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + LOGICAL_HOSTNAME,
+    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
         ConfiguredFailoverProxyProvider.class.getName());
     
-    FileSystem fs = FileSystem.get(new URI("hdfs://" + LOGICAL_HOSTNAME), conf);
+    FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalName), conf);
     return fs;
   }
 
+  private static String getLogicalHostname(MiniDFSCluster cluster) {
+    return String.format(LOGICAL_HOSTNAME, cluster.getInstanceId());
+  }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java
index d5ba1992a8f..2c25855accb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java
@@ -146,8 +146,14 @@ public class TestFileCorruption extends TestCase {
       // report corrupted block by the third datanode
       DatanodeRegistration dnR = 
         DataNodeTestUtils.getDNRegistrationForBP(dataNode, blk.getBlockPoolId());
-      cluster.getNamesystem().getBlockManager().findAndMarkBlockAsCorrupt(
-          blk, new DatanodeInfo(dnR));
+      FSNamesystem ns = cluster.getNamesystem();
+      ns.writeLock();
+      try {
+        cluster.getNamesystem().getBlockManager().findAndMarkBlockAsCorrupt(
+            blk, new DatanodeInfo(dnR));
+      } finally {
+        ns.writeUnlock();
+      }
       
       // open the file
       fs.open(FILE_PATH);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index c7cc61dc137..d05df3eaef1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.IOException;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.PermissionStatus;
@@ -29,7 +30,9 @@ import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.security.AccessControlException;
+import org.mockito.Mockito;
 
 /**
  * This is a utility class to expose NameNode functionality for unit tests.
@@ -52,7 +55,8 @@ public class NameNodeAdapter {
   }
   
   public static HdfsFileStatus getFileInfo(NameNode namenode, String src,
-      boolean resolveLink) throws AccessControlException, UnresolvedLinkException {
+      boolean resolveLink) throws AccessControlException, UnresolvedLinkException,
+        StandbyException {
     return namenode.getNamesystem().getFileInfo(src, resolveLink);
   }
   
@@ -134,4 +138,10 @@ public class NameNodeAdapter {
   public static long[] getStats(final FSNamesystem fsn) {
     return fsn.getStats();
   }
+  
+  public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
+    ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
+    fsn.setFsLockForTests(spy);
+    return spy;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
index 545d4b5660b..67f821288df 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
@@ -120,6 +121,7 @@ public class TestBackupNode extends TestCase {
    */
   public void testBackupNodeTailsEdits() throws Exception {
     Configuration conf = new HdfsConfiguration();
+    HAUtil.setAllowStandbyReads(conf, true);
     MiniDFSCluster cluster = null;
     FileSystem fileSys = null;
     BackupNode backup = null;
@@ -245,6 +247,7 @@ public class TestBackupNode extends TestCase {
     Path file3 = new Path("/backup.dat");
 
     Configuration conf = new HdfsConfiguration();
+    HAUtil.setAllowStandbyReads(conf, true);
     short replication = (short)conf.getInt("dfs.replication", 3);
     int numDatanodes = Math.max(3, replication);
     conf.set(DFSConfigKeys.DFS_BLOCKREPORT_INITIAL_DELAY_KEY, "0");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
index b22ef02b864..4c398916790 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
@@ -52,6 +53,7 @@ public class TestEditLogTailer {
   public void testTailer() throws IOException, InterruptedException,
       ServiceFailedException {
     Configuration conf = new HdfsConfiguration();
+    HAUtil.setAllowStandbyReads(conf, true);
     
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
index 1bbe33b72d8..952df211a74 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
@@ -29,6 +29,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
@@ -52,6 +53,7 @@ public class TestEditLogsDuringFailover {
   @Test
   public void testStartup() throws Exception {
     Configuration conf = new Configuration();
+    HAUtil.setAllowStandbyReads(conf, true);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(0)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 7ac3c658de9..6eac5756b61 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.junit.Assert.*;
 
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -28,8 +30,12 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
+import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 /**
  * Tests state transition from active->standby, and manual failover
@@ -133,4 +139,57 @@ public class TestHAStateTransitions {
       cluster.shutdown();
     }
   }
+  
+  /**
+   * Regression test for HDFS-2693: when doing state transitions, we need to
+   * lock the FSNamesystem so that we don't end up doing any writes while it's
+   * "in between" states.
+   * This test case starts up several client threads which do mutation operations
+   * while flipping a NN back and forth from active to standby.
+   */
+  @Test(timeout=120000)
+  public void testTransitionSynchronization() throws Exception {
+    Configuration conf = new Configuration();
+    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(0)
+      .build();
+    try {
+      cluster.waitActive();
+      ReentrantReadWriteLock spyLock = NameNodeAdapter.spyOnFsLock(
+          cluster.getNameNode(0).getNamesystem());
+      Mockito.doAnswer(new GenericTestUtils.SleepAnswer(50))
+        .when(spyLock).writeLock();
+      
+      final FileSystem fs = TestDFSClientFailover.configureFailoverFs(
+          cluster, conf);
+      
+      TestContext ctx = new TestContext();
+      for (int i = 0; i < 50; i++) {
+        final int finalI = i;
+        ctx.addThread(new RepeatingTestThread(ctx) {
+          @Override
+          public void doAnAction() throws Exception {
+            Path p = new Path("/test-" + finalI);
+            fs.mkdirs(p);
+            fs.delete(p, true);
+          }
+        });
+      }
+      
+      ctx.addThread(new RepeatingTestThread(ctx) {
+        @Override
+        public void doAnAction() throws Exception {
+          cluster.transitionToStandby(0);
+          Thread.sleep(50);
+          cluster.transitionToActive(0);
+        }
+      });
+      ctx.startThreads();
+      ctx.waitFor(20000);
+      ctx.stop();
+    } finally {
+      cluster.shutdown();
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
index 036e914cee9..22604275882 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.AppendTestUtil;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.TestDFSClientFailover;
@@ -54,6 +55,8 @@ public class TestStandbyIsHot {
   @Test
   public void testStandbyIsHot() throws Exception {
     Configuration conf = new Configuration();
+    // We read from the standby to watch block locations
+    HAUtil.setAllowStandbyReads(conf, true);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(3)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
index c8c528d0bbd..b9f49145569 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java
@@ -167,7 +167,12 @@ public class TestNameNodeMetrics extends TestCase {
     // Corrupt first replica of the block
     LocatedBlock block = NameNodeAdapter.getBlockLocations(
         cluster.getNameNode(), file.toString(), 0, 1).get(0);
-    bm.findAndMarkBlockAsCorrupt(block.getBlock(), block.getLocations()[0]);
+    cluster.getNamesystem().writeLock();
+    try {
+      bm.findAndMarkBlockAsCorrupt(block.getBlock(), block.getLocations()[0]);
+    } finally {
+      cluster.getNamesystem().writeUnlock();
+    }
     updateMetrics();
     MetricsRecordBuilder rb = getMetrics(NS_METRICS);
     assertGauge("CorruptBlocks", 1L, rb);
@@ -204,7 +209,12 @@ public class TestNameNodeMetrics extends TestCase {
     // Corrupt the only replica of the block to result in a missing block
     LocatedBlock block = NameNodeAdapter.getBlockLocations(
         cluster.getNameNode(), file.toString(), 0, 1).get(0);
-    bm.findAndMarkBlockAsCorrupt(block.getBlock(), block.getLocations()[0]);
+    cluster.getNamesystem().writeLock();
+    try {
+      bm.findAndMarkBlockAsCorrupt(block.getBlock(), block.getLocations()[0]);
+    } finally {
+      cluster.getNamesystem().writeUnlock();
+    }
     updateMetrics();
     MetricsRecordBuilder rb = getMetrics(NS_METRICS);
     assertGauge("UnderReplicatedBlocks", 1L, rb);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
index 13e96830847..ea2b11e2b17 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.test;
 import java.io.File;
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeoutException;
@@ -176,4 +177,35 @@ public abstract class GenericTestUtils {
     }
   }
 
+  /**
+   * An Answer implementation which sleeps for a random number of milliseconds
+   * between 0 and a configurable value before delegating to the real
+   * implementation of the method. This can be useful for drawing out race
+   * conditions.
+   */
+  public static class SleepAnswer implements Answer<Object> {
+    private final int maxSleepTime;
+    private static Random r = new Random();
+    
+    public SleepAnswer(int maxSleepTime) {
+      this.maxSleepTime = maxSleepTime;
+    }
+    
+    @Override
+    public Object answer(InvocationOnMock invocation) throws Throwable {
+      boolean interrupted = false;
+      try {
+        Thread.sleep(r.nextInt(maxSleepTime));
+      } catch (InterruptedException ie) {
+        interrupted = true;
+      }
+      try {
+        return invocation.callRealMethod();
+      } finally {
+        if (interrupted) {
+          Thread.currentThread().interrupt();
+        }
+      }
+    }
+  }
 }

From 31c91706f7d17da006ef2d6c541f8dd092fae077 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 21 Dec 2011 04:32:40 +0000
Subject: [PATCH 061/177] HDFS-1972. Fencing mechanism for block invalidations
 and replications. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1221608 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../server/blockmanagement/BlockManager.java  | 354 ++++++++++---
 .../BlockPlacementPolicyDefault.java          |   2 +-
 .../blockmanagement/DatanodeDescriptor.java   |  62 +++
 .../blockmanagement/DatanodeManager.java      |  23 +
 .../blockmanagement/InvalidateBlocks.java     |   5 +
 .../blockmanagement/NumberReplicas.java       |  19 +-
 .../PendingReplicationBlocks.java             |   8 +
 .../hdfs/server/datanode/BPOfferService.java  |  10 +
 .../hdfs/server/datanode/BPServiceActor.java  |  35 +-
 .../datanode/FSDatasetAsyncDiskService.java   |   8 +
 .../hdfs/server/namenode/FSNamesystem.java    |  50 +-
 .../hdfs/server/namenode/ha/ActiveState.java  |   6 +
 .../hdfs/server/namenode/ha/HAState.java      |   3 +
 .../hdfs/server/namenode/ha/StandbyState.java |   5 +
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  25 +
 .../blockmanagement/BlockManagerTestUtil.java |  22 +
 .../hdfs/server/datanode/DataNodeAdapter.java |  25 +
 .../hdfs/server/namenode/NameNodeAdapter.java |   5 +
 .../server/namenode/ha/TestDNFencing.java     | 469 ++++++++++++++++++
 .../ha/TestDNFencingWithReplication.java      | 204 ++++++++
 .../server/namenode/ha/TestEditLogTailer.java |  11 +-
 .../server/namenode/ha/TestStandbyIsHot.java  |  62 ++-
 23 files changed, 1302 insertions(+), 113 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 6ffb0dfc4d8..4c7fd33bdec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -73,3 +73,5 @@ HDFS-2678. When a FailoverProxyProvider is used, DFSClient should not retry conn
 HDFS-2682. When a FailoverProxyProvider is used, Client should not retry for 45 times if it is timing out to connect to server. (Uma Maheswara Rao G via todd)
 
 HDFS-2693. Fix synchronization issues around state transition (todd)
+
+HDFS-1972. Fencing mechanism for block invalidations and replications (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index abefbb562d3..8db9490a53f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -28,6 +28,7 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import java.util.TreeMap;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -68,6 +69,8 @@ import org.apache.hadoop.net.Node;
 import org.apache.hadoop.util.Daemon;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
+import com.google.common.collect.Sets;
 
 /**
  * Keeps information related to the blocks stored in the Hadoop cluster.
@@ -91,6 +94,7 @@ public class BlockManager {
   private volatile long underReplicatedBlocksCount = 0L;
   private volatile long scheduledReplicationBlocksCount = 0L;
   private volatile long excessBlocksCount = 0L;
+  private volatile long postponedMisreplicatedBlocksCount = 0L;
   
   /** Used by metrics */
   public long getPendingReplicationBlocksCount() {
@@ -116,6 +120,10 @@ public class BlockManager {
   public long getExcessBlocksCount() {
     return excessBlocksCount;
   }
+  /** Used by metrics */
+  public long getPostponedMisreplicatedBlocksCount() {
+    return postponedMisreplicatedBlocksCount;
+  }
 
   /**replicationRecheckInterval is how often namenode checks for new replication work*/
   private final long replicationRecheckInterval;
@@ -134,6 +142,15 @@ public class BlockManager {
 
   /** Blocks to be invalidated. */
   private final InvalidateBlocks invalidateBlocks;
+  
+  /**
+   * After a failover, over-replicated blocks may not be handled
+   * until all of the replicas have done a block report to the
+   * new active. This is to make sure that this NameNode has been
+   * notified of all block deletions that might have been pending
+   * when the failover happened.
+   */
+  private final Set<Block> postponedMisreplicatedBlocks = Sets.newHashSet();
 
   //
   // Keeps a TreeSet for every named node. Each treeset contains
@@ -316,49 +333,15 @@ public class BlockManager {
       out.println("Metasave: Blocks waiting for replication: " + 
                   neededReplications.size());
       for (Block block : neededReplications) {
-        List<DatanodeDescriptor> containingNodes =
-                                          new ArrayList<DatanodeDescriptor>();
-        List<DatanodeDescriptor> containingLiveReplicasNodes =
-          new ArrayList<DatanodeDescriptor>();
-        
-        NumberReplicas numReplicas = new NumberReplicas();
-        // source node returned is not used
-        chooseSourceDatanode(block, containingNodes,
-            containingLiveReplicasNodes, numReplicas);
-        assert containingLiveReplicasNodes.size() == numReplicas.liveReplicas();
-        int usableReplicas = numReplicas.liveReplicas() +
-                             numReplicas.decommissionedReplicas();
-       
-        if (block instanceof BlockInfo) {
-          String fileName = ((BlockInfo)block).getINode().getFullPathName();
-          out.print(fileName + ": ");
-        }
-        // l: == live:, d: == decommissioned c: == corrupt e: == excess
-        out.print(block + ((usableReplicas > 0)? "" : " MISSING") + 
-                  " (replicas:" +
-                  " l: " + numReplicas.liveReplicas() +
-                  " d: " + numReplicas.decommissionedReplicas() +
-                  " c: " + numReplicas.corruptReplicas() +
-                  " e: " + numReplicas.excessReplicas() + ") "); 
-
-        Collection<DatanodeDescriptor> corruptNodes = 
-                                      corruptReplicas.getNodes(block);
-        
-        for (Iterator<DatanodeDescriptor> jt = blocksMap.nodeIterator(block);
-             jt.hasNext();) {
-          DatanodeDescriptor node = jt.next();
-          String state = "";
-          if (corruptNodes != null && corruptNodes.contains(node)) {
-            state = "(corrupt)";
-          } else if (node.isDecommissioned() || 
-              node.isDecommissionInProgress()) {
-            state = "(decommissioned)";
-          }          
-          out.print(" " + node + state + " : ");
-        }
-        out.println("");
+        dumpBlockMeta(block, out);
       }
     }
+    
+    // Dump any postponed over-replicated blocks
+    out.println("Mis-replicated blocks that have been postponed:");
+    for (Block block : postponedMisreplicatedBlocks) {
+      dumpBlockMeta(block, out);
+    }
 
     // Dump blocks from pendingReplication
     pendingReplications.metaSave(out);
@@ -369,6 +352,58 @@ public class BlockManager {
     // Dump all datanodes
     getDatanodeManager().datanodeDump(out);
   }
+  
+  /**
+   * Dump the metadata for the given block in a human-readable
+   * form.
+   */
+  private void dumpBlockMeta(Block block, PrintWriter out) {
+    List<DatanodeDescriptor> containingNodes =
+                                      new ArrayList<DatanodeDescriptor>();
+    List<DatanodeDescriptor> containingLiveReplicasNodes =
+      new ArrayList<DatanodeDescriptor>();
+    
+    NumberReplicas numReplicas = new NumberReplicas();
+    // source node returned is not used
+    chooseSourceDatanode(block, containingNodes,
+        containingLiveReplicasNodes, numReplicas);
+    assert containingLiveReplicasNodes.size() == numReplicas.liveReplicas();
+    int usableReplicas = numReplicas.liveReplicas() +
+                         numReplicas.decommissionedReplicas();
+    
+    if (block instanceof BlockInfo) {
+      String fileName = ((BlockInfo)block).getINode().getFullPathName();
+      out.print(fileName + ": ");
+    }
+    // l: == live:, d: == decommissioned c: == corrupt e: == excess
+    out.print(block + ((usableReplicas > 0)? "" : " MISSING") + 
+              " (replicas:" +
+              " l: " + numReplicas.liveReplicas() +
+              " d: " + numReplicas.decommissionedReplicas() +
+              " c: " + numReplicas.corruptReplicas() +
+              " e: " + numReplicas.excessReplicas() + ") "); 
+
+    Collection<DatanodeDescriptor> corruptNodes = 
+                                  corruptReplicas.getNodes(block);
+    
+    for (Iterator<DatanodeDescriptor> jt = blocksMap.nodeIterator(block);
+         jt.hasNext();) {
+      DatanodeDescriptor node = jt.next();
+      String state = "";
+      if (corruptNodes != null && corruptNodes.contains(node)) {
+        state = "(corrupt)";
+      } else if (node.isDecommissioned() || 
+          node.isDecommissionInProgress()) {
+        state = "(decommissioned)";
+      }
+      
+      if (node.areBlockContentsStale()) {
+        state += " (block deletions maybe out of date)";
+      }
+      out.print(" " + node + state + " : ");
+    }
+    out.println("");
+  }
 
   /** @return maxReplicationStreams */
   public int getMaxReplicationStreams() {
@@ -782,6 +817,14 @@ public class BlockManager {
 
     node.resetBlocks();
     invalidateBlocks.remove(node.getStorageID());
+    
+    // If the DN hasn't block-reported since the most recent
+    // failover, then we may have been holding up on processing
+    // over-replicated blocks because of it. But we can now
+    // process those blocks.
+    if (node.areBlockContentsStale()) {
+      rescanPostponedMisreplicatedBlocks();
+    }
   }
 
   /**
@@ -879,10 +922,17 @@ public class BlockManager {
           + " because datanode " + dn.getName() + " does not exist.");
     }
 
-    // Check how many copies we have of the block. If we have at least one
-    // copy on a live node, then we can delete it.
-    int count = countNodes(blk).liveReplicas();
-    if (count >= 1) {
+    // Check how many copies we have of the block
+    NumberReplicas nr = countNodes(blk);
+    if (nr.replicasOnStaleNodes() > 0) {
+      NameNode.stateChangeLog.info("BLOCK* invalidateBlocks: postponing " +
+          "invalidation of block " + blk + " on " + dn + " because " +
+          nr.replicasOnStaleNodes() + " replica(s) are located on nodes " +
+          "with potentially out-of-date block reports.");
+      postponeBlock(blk);
+
+    } else if (nr.liveReplicas() >= 1) {
+      // If we have at least one copy on a live node, then we can delete it.
       addToInvalidates(blk, dn);
       removeStoredBlock(blk, node);
       if(NameNode.stateChangeLog.isDebugEnabled()) {
@@ -895,6 +945,13 @@ public class BlockManager {
     }
   }
 
+  private void postponeBlock(Block blk) {
+    if (postponedMisreplicatedBlocks.add(blk)) {
+      postponedMisreplicatedBlocksCount++;
+    }
+  }
+  
+  
   void updateState() {
     pendingReplicationBlocksCount = pendingReplications.size();
     underReplicatedBlocksCount = neededReplications.size();
@@ -933,7 +990,7 @@ public class BlockManager {
    *
    * @return number of blocks scheduled for replication during this iteration.
    */
-  private int computeReplicationWork(int blocksToProcess) throws IOException {
+  int computeReplicationWork(int blocksToProcess) throws IOException {
     List<List<Block>> blocksToReplicate = null;
     namesystem.writeLock();
     try {
@@ -984,8 +1041,10 @@ public class BlockManager {
             NumberReplicas numReplicas = new NumberReplicas();
             srcNode = chooseSourceDatanode(
                 block, containingNodes, liveReplicaNodes, numReplicas);
-            if(srcNode == null) // block can not be replicated from any node
+            if(srcNode == null) { // block can not be replicated from any node
+              LOG.debug("Block " + block + " cannot be repl from any node");
               continue;
+          }
 
             assert liveReplicaNodes.size() == numReplicas.liveReplicas();
             // do not schedule more if enough replicas is already pending
@@ -1235,7 +1294,7 @@ public class BlockManager {
         srcNode = node;
     }
     if(numReplicas != null)
-      numReplicas.initialize(live, decommissioned, corrupt, excess);
+      numReplicas.initialize(live, decommissioned, corrupt, excess, 0);
     return srcNode;
   }
 
@@ -1316,6 +1375,19 @@ public class BlockManager {
       } else {
         processReport(node, newReport);
       }
+      
+      // Now that we have an up-to-date block report, we know that any
+      // deletions from a previous NN iteration have been accounted for.
+      boolean staleBefore = node.areBlockContentsStale();
+      node.receivedBlockReport();
+      if (staleBefore && !node.areBlockContentsStale()) {
+        LOG.info("BLOCK* processReport: " +
+            "Received first block report from " + node +
+            " after becoming active. Its block contents are no longer" +
+            " considered stale.");
+        rescanPostponedMisreplicatedBlocks();
+      }
+      
     } finally {
       endTime = Util.now();
       namesystem.writeUnlock();
@@ -1328,6 +1400,37 @@ public class BlockManager {
         + ", processing time: " + (endTime - startTime) + " msecs");
   }
 
+  /**
+   * Rescan the list of blocks which were previously postponed.
+   */
+  private void rescanPostponedMisreplicatedBlocks() {
+    for (Iterator<Block> it = postponedMisreplicatedBlocks.iterator();
+         it.hasNext();) {
+      Block b = it.next();
+      
+      BlockInfo bi = blocksMap.getStoredBlock(b);
+      if (bi == null) {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("BLOCK* rescanPostponedMisreplicatedBlocks: " +
+              "Postponed mis-replicated block " + b + " no longer found " +
+              "in block map.");
+        }
+        it.remove();
+        postponedMisreplicatedBlocksCount--;
+        continue;
+      }
+      MisReplicationResult res = processMisReplicatedBlock(bi);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("BLOCK* rescanPostponedMisreplicatedBlocks: " +
+            "Re-scanned block " + b + ", result is " + res);
+      }
+      if (res != MisReplicationResult.POSTPONE) {
+        it.remove();
+        postponedMisreplicatedBlocksCount--;
+      }
+    }
+  }
+  
   private void processReport(final DatanodeDescriptor node,
       final BlockListAsLongs report) throws IOException {
     // Normal case:
@@ -1505,8 +1608,9 @@ public class BlockManager {
 
     // Ignore replicas already scheduled to be removed from the DN
     if(invalidateBlocks.contains(dn.getStorageID(), block)) {
-      assert storedBlock.findDatanode(dn) < 0 : "Block " + block
-        + " in recentInvalidatesSet should not appear in DN " + dn;
+/*  TODO: following assertion is incorrect, see HDFS-2668
+assert storedBlock.findDatanode(dn) < 0 : "Block " + block
+        + " in recentInvalidatesSet should not appear in DN " + dn; */
       return storedBlock;
     }
 
@@ -1773,41 +1877,81 @@ public class BlockManager {
   public void processMisReplicatedBlocks() {
     assert namesystem.hasWriteLock();
 
-    long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
+    long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0, nrPostponed = 0;
     neededReplications.clear();
     for (BlockInfo block : blocksMap.getBlocks()) {
-      INodeFile fileINode = block.getINode();
-      if (fileINode == null) {
-        // block does not belong to any file
-        nrInvalid++;
-        addToInvalidates(block);
-        continue;
-      }
-      // calculate current replication
-      short expectedReplication = fileINode.getReplication();
-      NumberReplicas num = countNodes(block);
-      int numCurrentReplica = num.liveReplicas();
-      // add to under-replicated queue if need to be
-      if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
-        if (neededReplications.add(block, numCurrentReplica, num
-            .decommissionedReplicas(), expectedReplication)) {
-          nrUnderReplicated++;
-        }
-      }
-
-      if (numCurrentReplica > expectedReplication) {
-        // over-replicated block
+      MisReplicationResult res = processMisReplicatedBlock(block);
+      LOG.info("block " + block + ": " + res);
+      switch (res) {
+      case UNDER_REPLICATED:
+        nrUnderReplicated++;
+        break;
+      case OVER_REPLICATED:
         nrOverReplicated++;
-        processOverReplicatedBlock(block, expectedReplication, null, null);
+        break;
+      case INVALID:
+        nrInvalid++;
+        break;
+      case POSTPONE:
+        nrPostponed++;
+        postponeBlock(block);
+        break;
+      case OK:
+        break;
+      default:
+        throw new AssertionError("Invalid enum value: " + res);
       }
     }
-
+    
     LOG.info("Total number of blocks            = " + blocksMap.size());
     LOG.info("Number of invalid blocks          = " + nrInvalid);
     LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);
-    LOG.info("Number of  over-replicated blocks = " + nrOverReplicated);
+    LOG.info("Number of  over-replicated blocks = " + nrOverReplicated +
+        ((nrPostponed > 0) ? ( " (" + nrPostponed + " postponed)") : ""));
   }
 
+  /**
+   * Process a single possibly misreplicated block. This adds it to the
+   * appropriate queues if necessary, and returns a result code indicating
+   * what happened with it.
+   */
+  private MisReplicationResult processMisReplicatedBlock(BlockInfo block) {
+    INodeFile fileINode = block.getINode();
+    if (fileINode == null) {
+      // block does not belong to any file
+      addToInvalidates(block);
+      return MisReplicationResult.INVALID;
+    }
+    // calculate current replication
+    short expectedReplication = fileINode.getReplication();
+    NumberReplicas num = countNodes(block);
+    int numCurrentReplica = num.liveReplicas();
+    // add to under-replicated queue if need to be
+    if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
+      if (neededReplications.add(block, numCurrentReplica, num
+          .decommissionedReplicas(), expectedReplication)) {
+        return MisReplicationResult.UNDER_REPLICATED;
+      }
+    }
+
+    if (numCurrentReplica > expectedReplication) {
+      if (num.replicasOnStaleNodes() > 0) {
+        // If any of the replicas of this block are on nodes that are
+        // considered "stale", then these replicas may in fact have
+        // already been deleted. So, we cannot safely act on the
+        // over-replication until a later point in time, when
+        // the "stale" nodes have block reported.
+        return MisReplicationResult.POSTPONE;
+      }
+      
+      // over-replicated block
+      processOverReplicatedBlock(block, expectedReplication, null, null);
+      return MisReplicationResult.OVER_REPLICATED;
+    }
+    
+    return MisReplicationResult.OK;
+  }
+  
   /** Set replication for the blocks. */
   public void setReplication(final short oldRepl, final short newRepl,
       final String src, final Block... blocks) throws IOException {
@@ -1851,6 +1995,14 @@ public class BlockManager {
     for (Iterator<DatanodeDescriptor> it = blocksMap.nodeIterator(block);
          it.hasNext();) {
       DatanodeDescriptor cur = it.next();
+      if (cur.areBlockContentsStale()) {
+        LOG.info("BLOCK* processOverReplicatedBlock: " +
+            "Postponing processing of over-replicated block " +
+            block + " since datanode " + cur + " does not yet have up-to-date " +
+            "block information.");
+        postponeBlock(block);
+        return;
+      }
       LightWeightLinkedSet<Block> excessBlocks = excessReplicateMap.get(cur
           .getStorageID());
       if (excessBlocks == null || !excessBlocks.contains(block)) {
@@ -2153,13 +2305,15 @@ public class BlockManager {
   }
 
   /**
-   * Return the number of nodes that are live and decommissioned.
+   * Return the number of nodes hosting a given block, grouped
+   * by the state of those replicas.
    */
   public NumberReplicas countNodes(Block b) {
-    int count = 0;
+    int decommissioned = 0;
     int live = 0;
     int corrupt = 0;
     int excess = 0;
+    int stale = 0;
     Iterator<DatanodeDescriptor> nodeIter = blocksMap.nodeIterator(b);
     Collection<DatanodeDescriptor> nodesCorrupt = corruptReplicas.getNodes(b);
     while (nodeIter.hasNext()) {
@@ -2167,7 +2321,7 @@ public class BlockManager {
       if ((nodesCorrupt != null) && (nodesCorrupt.contains(node))) {
         corrupt++;
       } else if (node.isDecommissionInProgress() || node.isDecommissioned()) {
-        count++;
+        decommissioned++;
       } else {
         LightWeightLinkedSet<Block> blocksExcess = excessReplicateMap.get(node
             .getStorageID());
@@ -2177,8 +2331,11 @@ public class BlockManager {
           live++;
         }
       }
+      if (node.areBlockContentsStale()) {
+        stale++;
+      }
     }
-    return new NumberReplicas(live, count, corrupt, excess);
+    return new NumberReplicas(live, decommissioned, corrupt, excess, stale);
   }
 
   /** 
@@ -2323,10 +2480,14 @@ public class BlockManager {
   }
 
   public void removeBlock(Block block) {
+    assert namesystem.hasWriteLock();
     block.setNumBytes(BlockCommand.NO_ACK);
     addToInvalidates(block);
     corruptReplicas.removeFromCorruptReplicasMap(block);
     blocksMap.removeBlock(block);
+    if (postponedMisreplicatedBlocks.remove(block)) {
+      postponedMisreplicatedBlocksCount--;
+    }
   }
 
   public BlockInfo getStoredBlock(Block block) {
@@ -2387,8 +2548,10 @@ public class BlockManager {
     namesystem.writeLock();
     try {
       // blocks should not be replicated or removed if safe mode is on
-      if (namesystem.isInSafeMode())
+      if (namesystem.isInSafeMode()) {
+        LOG.debug("In safemode, not computing replication work");
         return 0;
+      }
       // get blocks to invalidate for the nodeId
       assert nodeId != null;
       return invalidateBlocks.invalidateWork(nodeId);
@@ -2571,6 +2734,19 @@ public class BlockManager {
     return workFound;
   }
 
+  /**
+   * Clear all queues that hold decisions previously made by
+   * this NameNode.
+   */
+  public void clearQueues() {
+    neededReplications.clear();
+    pendingReplications.clear();
+    excessReplicateMap.clear();
+    invalidateBlocks.clear();
+    datanodeManager.clearPendingQueues();
+  };
+  
+
   private static class ReplicationWork {
 
     private Block block;
@@ -2601,4 +2777,22 @@ public class BlockManager {
       this.targets = null;
     }
   }
+
+  /**
+   * A simple result enum for the result of
+   * {@link BlockManager#processMisReplicatedBlock(BlockInfo)}.
+   */
+  enum MisReplicationResult {
+    /** The block should be invalidated since it belongs to a deleted file. */
+    INVALID,
+    /** The block is currently under-replicated. */
+    UNDER_REPLICATED,
+    /** The block is currently over-replicated. */
+    OVER_REPLICATED,
+    /** A decision can't currently be made about this block. */
+    POSTPONE,
+    /** The block is properly replicated */
+    OK
+  }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
index ef2a2b32063..a9f95d234ea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@@ -60,7 +60,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
     initialize(conf, stats, clusterMap);
   }
 
-  BlockPlacementPolicyDefault() {
+  protected BlockPlacementPolicyDefault() {
   }
     
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
index d927f052975..807213ed17d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@@ -94,6 +94,10 @@ public class DatanodeDescriptor extends DatanodeInfo {
     boolean contains(E e) {
       return blockq.contains(e);
     }
+
+    synchronized void clear() {
+      blockq.clear();
+    }
   }
 
   private volatile BlockInfo blockList = null;
@@ -103,6 +107,24 @@ public class DatanodeDescriptor extends DatanodeInfo {
   public boolean isAlive = false;
   public boolean needKeyUpdate = false;
 
+  /**
+   * Set to false on any NN failover, and reset to true
+   * whenever a block report is received.
+   */
+  private boolean heartbeatedSinceFailover = false;
+  
+  /**
+   * At startup or at any failover, the DNs in the cluster may
+   * have pending block deletions from a previous incarnation
+   * of the NameNode. Thus, we consider their block contents
+   * stale until we have received a block report. When a DN
+   * is considered stale, any replicas on it are transitively
+   * considered stale. If any block has at least one stale replica,
+   * then no invalidations will be processed for this block.
+   * See HDFS-1972.
+   */
+  private boolean blockContentsStale = true;
+  
   // A system administrator can tune the balancer bandwidth parameter
   // (dfs.balance.bandwidthPerSec) dynamically by calling
   // "dfsadmin -setBalanacerBandwidth <newbandwidth>", at which point the
@@ -281,6 +303,14 @@ public class DatanodeDescriptor extends DatanodeInfo {
     this.invalidateBlocks.clear();
     this.volumeFailures = 0;
   }
+  
+  public void clearBlockQueues() {
+    synchronized (invalidateBlocks) {
+      this.invalidateBlocks.clear();
+      this.recoverBlocks.clear();
+      this.replicateBlocks.clear();
+    }
+  }
 
   public int numBlocks() {
     return numBlocks;
@@ -298,6 +328,7 @@ public class DatanodeDescriptor extends DatanodeInfo {
     this.lastUpdate = System.currentTimeMillis();
     this.xceiverCount = xceiverCount;
     this.volumeFailures = volFailures;
+    this.heartbeatedSinceFailover = true;
     rollBlocksScheduled(lastUpdate);
   }
 
@@ -564,5 +595,36 @@ public class DatanodeDescriptor extends DatanodeInfo {
     this.bandwidth = bandwidth;
   }
 
+  public boolean areBlockContentsStale() {
+    return blockContentsStale;
+  }
 
+  public void markStaleAfterFailover() {
+    heartbeatedSinceFailover = false;
+    blockContentsStale = true;
+  }
+
+  public void receivedBlockReport() {
+    if (heartbeatedSinceFailover) {
+      blockContentsStale = false;
+    }
+  }
+
+  @Override
+  public String dumpDatanode() {
+    StringBuilder sb = new StringBuilder(super.dumpDatanode());
+    int repl = replicateBlocks.size();
+    if (repl > 0) {
+      sb.append(" ").append(repl).append(" blocks to be replicated;");
+    }
+    int inval = invalidateBlocks.size();
+    if (inval > 0) {
+      sb.append(" ").append(inval).append(" blocks to be invalidated;");      
+    }
+    int recover = recoverBlocks.size();
+    if (recover > 0) {
+      sb.append(" ").append(recover).append(" blocks to be recovered;");
+    }
+    return sb.toString();
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 0996fb71203..51ce52f1b1d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -945,4 +945,27 @@ public class DatanodeManager {
       }
     }
   }
+  
+  public void markAllDatanodesStale() {
+    LOG.info("Marking all datandoes as stale");
+    synchronized (datanodeMap) {
+      for (DatanodeDescriptor dn : datanodeMap.values()) {
+        dn.markStaleAfterFailover();
+      }
+    }
+  }
+
+  /**
+   * Clear any actions that are queued up to be sent to the DNs
+   * on their next heartbeats. This includes block invalidations,
+   * recoveries, and replication requests.
+   */
+  public void clearPendingQueues() {
+    synchronized (datanodeMap) {
+      for (DatanodeDescriptor dn : datanodeMap.values()) {
+        dn.clearBlockQueues();
+      }
+    }
+  }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java
index 2c6b46f0509..5c7e0bdca19 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/InvalidateBlocks.java
@@ -160,4 +160,9 @@ class InvalidateBlocks {
     numBlocks -= toInvalidate.size();
     return toInvalidate;
   }
+  
+  synchronized void clear() {
+    node2blocks.clear();
+    numBlocks = 0;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java
index 52f62587b1c..9e5c8dfd5ec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/NumberReplicas.java
@@ -26,20 +26,22 @@ public class NumberReplicas {
   private int decommissionedReplicas;
   private int corruptReplicas;
   private int excessReplicas;
+  private int replicasOnStaleNodes;
 
   NumberReplicas() {
-    initialize(0, 0, 0, 0);
+    initialize(0, 0, 0, 0, 0);
   }
 
-  NumberReplicas(int live, int decommissioned, int corrupt, int excess) {
-    initialize(live, decommissioned, corrupt, excess);
+  NumberReplicas(int live, int decommissioned, int corrupt, int excess, int stale) {
+    initialize(live, decommissioned, corrupt, excess, stale);
   }
 
-  void initialize(int live, int decommissioned, int corrupt, int excess) {
+  void initialize(int live, int decommissioned, int corrupt, int excess, int stale) {
     liveReplicas = live;
     decommissionedReplicas = decommissioned;
     corruptReplicas = corrupt;
     excessReplicas = excess;
+    replicasOnStaleNodes = stale;
   }
 
   public int liveReplicas() {
@@ -54,4 +56,13 @@ public class NumberReplicas {
   public int excessReplicas() {
     return excessReplicas;
   }
+  
+  /**
+   * @return the number of replicas which are on stale nodes.
+   * This is not mutually exclusive with the other counts -- ie a
+   * replica may count as both "live" and "stale".
+   */
+  public int replicasOnStaleNodes() {
+    return replicasOnStaleNodes;
+  }
 } 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java
index e07cf9bb2a0..e200ed0deab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingReplicationBlocks.java
@@ -104,6 +104,14 @@ class PendingReplicationBlocks {
     }
   }
 
+
+  public void clear() {
+    synchronized (pendingReplications) {
+      pendingReplications.clear();
+      timedOutItems.clear();
+    }
+  }
+
   /**
    * The total number of blocks that are undergoing replication
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index 5b905966e6a..e944ba1ea79 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -498,6 +498,16 @@ class BPOfferService {
     }
   }
 
+  /**
+   * Run an immediate deletion report on this thread. Used by tests.
+   */
+  @VisibleForTesting
+  void triggerDeletionReportForTests() throws IOException {
+    for (BPServiceActor actor : bpServices) {
+      actor.triggerDeletionReportForTests();
+    }
+  }
+
   /**
    * Run an immediate heartbeat from all actors. Used by tests.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index 2d1439ebdee..0459ceb2e0f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -281,8 +281,18 @@ class BPServiceActor implements Runnable {
    */
   @VisibleForTesting
   void triggerBlockReportForTests() throws IOException {
+    synchronized (receivedAndDeletedBlockList) {
       lastBlockReport = 0;
-      blockReport();
+      lastHeartbeat = 0;
+      receivedAndDeletedBlockList.notifyAll();
+      while (lastBlockReport == 0) {
+        try {
+          receivedAndDeletedBlockList.wait(100);
+        } catch (InterruptedException e) {
+          return;
+        }
+      }
+    }
   }
   
   @VisibleForTesting
@@ -290,6 +300,29 @@ class BPServiceActor implements Runnable {
     synchronized (receivedAndDeletedBlockList) {
       lastHeartbeat = 0;
       receivedAndDeletedBlockList.notifyAll();
+      while (lastHeartbeat == 0) {
+        try {
+          receivedAndDeletedBlockList.wait(100);
+        } catch (InterruptedException e) {
+          return;
+        }
+      }
+    }
+  }
+
+  @VisibleForTesting
+  void triggerDeletionReportForTests() throws IOException {
+    synchronized (receivedAndDeletedBlockList) {
+      lastDeletedReport = 0;
+      receivedAndDeletedBlockList.notifyAll();
+
+      while (lastDeletedReport == 0) {
+        try {
+          receivedAndDeletedBlockList.wait(100);
+        } catch (InterruptedException e) {
+          return;
+        }
+      }
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetAsyncDiskService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetAsyncDiskService.java
index 408a6afc472..89272b2ecfa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetAsyncDiskService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/FSDatasetAsyncDiskService.java
@@ -107,6 +107,14 @@ class FSDatasetAsyncDiskService {
     
   }
   
+  synchronized long countPendingDeletions() {
+    long count = 0;
+    for (ThreadPoolExecutor exec : executors.values()) {
+      count += exec.getTaskCount() - exec.getCompletedTaskCount();
+    }
+    return count;
+  }
+  
   /**
    * Execute the task sometime in the future, using ThreadPools.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 4c4aac3e951..4746bdb448e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -71,6 +71,7 @@ import java.io.FileNotFoundException;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
+import java.io.StringWriter;
 import java.lang.management.ManagementFactory;
 import java.net.InetAddress;
 import java.net.URI;
@@ -508,6 +509,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
             "taking over writer role in edits logs.");
         editLogTailer.catchupDuringFailover();
         
+        LOG.info("Reprocessing replication and invalidation queues...");
+        blockManager.getDatanodeManager().markAllDatanodesStale();
+        blockManager.clearQueues();
+        blockManager.processMisReplicatedBlocks();
+        
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("NameNode metadata after re-processing " +
+              "replication and invalidation queues during failover:\n" +
+              metaSaveAsString());
+        }
+        
         long nextTxId = dir.fsImage.getLastAppliedTxId() + 1;
         LOG.info("Will take over writing edit logs at txnid " + 
             nextTxId);
@@ -523,7 +535,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       writeUnlock();
     }
   }
-  
+
   /** 
    * Stop services required in active state
    * @throws InterruptedException
@@ -781,14 +793,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       File file = new File(System.getProperty("hadoop.log.dir"), filename);
       PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(file,
           true)));
-  
-      long totalInodes = this.dir.totalInodes();
-      long totalBlocks = this.getBlocksTotal();
-      out.println(totalInodes + " files and directories, " + totalBlocks
-          + " blocks = " + (totalInodes + totalBlocks) + " total");
-
-      blockManager.metaSave(out);
-
+      metaSave(out);
       out.flush();
       out.close();
     } finally {
@@ -796,6 +801,25 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
   }
 
+  private void metaSave(PrintWriter out) {
+    assert hasWriteLock();
+    long totalInodes = this.dir.totalInodes();
+    long totalBlocks = this.getBlocksTotal();
+    out.println(totalInodes + " files and directories, " + totalBlocks
+        + " blocks = " + (totalInodes + totalBlocks) + " total");
+
+    blockManager.metaSave(out);
+  }
+
+  private String metaSaveAsString() {
+    StringWriter sw = new StringWriter();
+    PrintWriter pw = new PrintWriter(sw);
+    metaSave(pw);
+    pw.flush();
+    return sw.toString();
+  }
+  
+
   long getDefaultBlockSize() {
     return serverDefaults.getBlockSize();
   }
@@ -3605,6 +3629,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   @Override
   public boolean isPopulatingReplQueues() {
+    if (!haContext.getState().shouldPopulateReplQueues()) {
+      return false;
+    }
     // safeMode is volatile, and may be set to null at any time
     SafeModeInfo safeMode = this.safeMode;
     if (safeMode == null)
@@ -3938,6 +3965,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return blockManager.getExcessBlocksCount();
   }
   
+  @Metric
+  public long getPostponedMisreplicatedBlocks() {
+    return blockManager.getPostponedMisreplicatedBlocksCount();
+  }
+  
   @Metric
   public int getBlockCapacity() {
     return blockManager.getCapacity();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
index 6da3b8ecead..a61e134cc72 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ActiveState.java
@@ -41,6 +41,11 @@ public class ActiveState extends HAState {
     return; // Other than journal all operations are allowed in active state
   }
   
+  @Override
+  public boolean shouldPopulateReplQueues() {
+    return true;
+  }
+  
   @Override
   public void setState(HAContext context, HAState s) throws ServiceFailedException {
     if (s == NameNode.STANDBY_STATE) {
@@ -67,4 +72,5 @@ public class ActiveState extends HAState {
       throw new ServiceFailedException("Failed to stop active services", e);
     }
   }
+
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
index 20c09d5f4e7..2f0b6ff1a6d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
@@ -106,9 +106,12 @@ abstract public class HAState {
   public abstract void checkOperation(final HAContext context, final OperationCategory op)
       throws StandbyException;
 
+  public abstract boolean shouldPopulateReplQueues();
+
   /**
    * @return String representation of the service state.
    */
+  @Override
   public String toString() {
     return state.toString();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
index b22b2e43ed8..ec0dcec9964 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -80,5 +80,10 @@ public class StandbyState extends HAState {
         + context.getState();
     throw new StandbyException(msg);
   }
+
+  @Override
+  public boolean shouldPopulateReplQueues() {
+    return false;
+  }
 }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index f4732986364..ab7d0197d68 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.hdfs.protocolR23Compatible.ClientNamenodeWireProtocol;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils;
 import org.apache.hadoop.hdfs.server.datanode.DataStorage;
 import org.apache.hadoop.hdfs.server.datanode.FSDatasetInterface;
@@ -1574,6 +1575,30 @@ public class MiniDFSCluster {
       ServiceFailedException {
     getHaServiceClient(nnIndex).transitionToStandby();
   }
+  
+  
+  public void triggerBlockReports()
+      throws IOException {
+    for (DataNode dn : getDataNodes()) {
+      DataNodeAdapter.triggerBlockReport(dn);
+    }
+  }
+
+
+  public void triggerDeletionReports()
+      throws IOException {
+    for (DataNode dn : getDataNodes()) {
+      DataNodeAdapter.triggerDeletionReport(dn);
+    }
+  }
+
+  public void triggerHeartbeats()
+      throws IOException {
+    for (DataNode dn : getDataNodes()) {
+      DataNodeAdapter.triggerHeartbeat(dn);
+    }
+  }
+
 
   /** Wait until the given namenode gets registration from all the datanodes */
   public void waitActive(int nnIndex) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
index 2d2406b12b9..38de3deba81 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
@@ -122,4 +122,26 @@ public class BlockManagerTestUtil {
     return blockManager.computeDatanodeWork();
   }
   
+  public static int computeInvalidationWork(BlockManager bm) {
+    return bm.computeInvalidateWork(Integer.MAX_VALUE);
+  }
+  
+  /**
+   * Compute all the replication and invalidation work for the
+   * given BlockManager.
+   * 
+   * This differs from the above functions in that it computes
+   * replication work for all DNs rather than a particular subset,
+   * regardless of invalidation/replication limit configurations.
+   * 
+   * NB: you may want to set
+   * {@link DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY} to
+   * a high value to ensure that all work is calculated.
+   */
+  public static int computeAllPendingWork(BlockManager bm)
+    throws IOException {
+    int work = computeInvalidationWork(bm);
+    work += bm.computeReplicationWork(Integer.MAX_VALUE);
+    return work;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java
index 8595c94d340..f58c630d9cf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeAdapter.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.hdfs.server.datanode;
 
+import java.io.IOException;
+
 /**
  * WARNING!! This is TEST ONLY class: it never has to be used
  * for ANY development purposes.
@@ -42,4 +44,27 @@ public class DataNodeAdapter {
       boolean heartbeatsDisabledForTests) {
     dn.setHeartbeatsDisabledForTests(heartbeatsDisabledForTests);
   }
+
+  public static void triggerDeletionReport(DataNode dn) throws IOException {
+    for (BPOfferService bpos : dn.getAllBpOs()) {
+      bpos.triggerDeletionReportForTests();
+    }
+  }
+
+  public static void triggerHeartbeat(DataNode dn) throws IOException {
+    for (BPOfferService bpos : dn.getAllBpOs()) {
+      bpos.triggerHeartbeatForTests();
+    }
+  }
+  
+  public static void triggerBlockReport(DataNode dn) throws IOException {
+    for (BPOfferService bpos : dn.getAllBpOs()) {
+      bpos.triggerBlockReportForTests();
+    }
+  }
+
+  public static long getPendingAsyncDeletions(DataNode dn) {
+    FSDataset fsd = (FSDataset)dn.getFSDataset();
+    return fsd.asyncDiskService.countPendingDeletions();
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index d05df3eaef1..551588425b4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -81,6 +81,11 @@ public class NameNodeAdapter {
     namenode.getNamesystem().leaveSafeMode(checkForUpgrades);
   }
   
+  public static void abortEditLogs(NameNode nn) {
+    FSEditLog el = nn.getFSImage().getEditLog();
+    el.abortCurrentLogSegment();
+  }
+  
   /**
    * Get the internal RPC server instance.
    * @return rpc server
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
new file mode 100644
index 00000000000..8ac86ca5dda
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
@@ -0,0 +1,469 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.net.URISyntaxException;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.FSClusterStats;
+import org.apache.hadoop.hdfs.server.namenode.FSInodeInfo;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.log4j.Level;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.base.Supplier;
+import com.google.common.collect.Lists;
+
+
+public class TestDNFencing {
+  
+  protected static final Log LOG = LogFactory.getLog(
+      TestDNFencing.class);
+  private static final String TEST_FILE_DATA = "hello highly available world";
+  private static final String TEST_FILE = "/testStandbyIsHot";
+  private static final Path TEST_FILE_PATH = new Path(TEST_FILE);
+  private static final int SMALL_BLOCK = 1024;
+  
+  private Configuration conf;
+  private MiniDFSCluster cluster;
+  private NameNode nn1, nn2;
+  private FileSystem fs;
+
+  static {
+    ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)LogFactory.getLog(BlockManager.class)).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
+  }
+  
+  @Before
+  public void setupCluster() throws Exception {
+    conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, SMALL_BLOCK);
+    // Bump up replication interval so that we only run replication
+    // checks explicitly.
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 600);
+    // Increase max streams so that we re-replicate quickly.
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 1000);
+    // See RandomDeleterPolicy javadoc.
+    conf.setClass("dfs.block.replicator.classname", RandomDeleterPolicy.class,
+        BlockPlacementPolicy.class); 
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .build();
+    nn1 = cluster.getNameNode(0);
+    nn2 = cluster.getNameNode(1);
+    
+    cluster.waitActive();
+    cluster.transitionToActive(0);
+    // Trigger block reports so that the first NN trusts all
+    // of the DNs, and will issue deletions
+    cluster.triggerBlockReports();
+    nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn2.getNamesystem().getEditLogTailer().interrupt();
+    fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+  }
+  
+  @After
+  public void shutdownCluster() throws Exception {
+    if (cluster != null) {
+      banner("Shutting down cluster. NN1 metadata:");
+      doMetasave(nn1);
+      banner("Shutting down cluster. NN2 metadata:");
+      doMetasave(nn2);
+      cluster.shutdown();
+    }
+  }
+  
+
+  @Test
+  public void testDnFencing() throws Exception {
+    // Create a file with replication level 3.
+    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30*SMALL_BLOCK, (short)3, 1L);
+    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, TEST_FILE_PATH);
+    
+    // Drop its replication count to 1, so it becomes over-replicated.
+    // Then compute the invalidation of the extra blocks and trigger
+    // heartbeats so the invalidations are flushed to the DNs.
+    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn1.getNamesystem().getBlockManager());
+    cluster.triggerHeartbeats();
+    
+    // Transition nn2 to active even though nn1 still thinks it's active.
+    banner("Failing to NN2 but let NN1 continue to think it's active");
+    NameNodeAdapter.abortEditLogs(nn1);
+    NameNodeAdapter.enterSafeMode(nn1, false);
+    cluster.transitionToActive(1);
+    
+    // Check that the standby picked up the replication change.
+    assertEquals(1,
+        nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());
+
+    // Dump some info for debugging purposes.
+    banner("NN2 Metadata immediately after failover");
+    doMetasave(nn2);
+    
+    // Even though NN2 considers the blocks over-replicated, it should
+    // post-pone the block invalidation because the DNs are still "stale".
+    assertEquals(30, nn2.getNamesystem().getPostponedMisreplicatedBlocks());
+    
+    banner("Triggering heartbeats and block reports so that fencing is completed");
+    cluster.triggerHeartbeats();
+    cluster.triggerBlockReports();
+    
+    banner("Metadata after nodes have all block-reported");
+    doMetasave(nn2);
+    
+    // The blocks should no longer be postponed.
+    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());
+    
+    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn2.getNamesystem().getBlockManager());
+    cluster.triggerHeartbeats();
+    waitForDNDeletions(cluster);
+    cluster.triggerDeletionReports();
+    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
+    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());
+    
+    banner("Making sure the file is still readable");
+    FileSystem fs2 = cluster.getFileSystem(1);
+    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
+
+    banner("Waiting for the actual block files to get deleted from DNs.");
+    waitForTrueReplication(cluster, block, 1);
+  }
+  
+  /**
+   * Test case which restarts the standby node in such a way that,
+   * when it exits safemode, it will want to invalidate a bunch
+   * of over-replicated block replicas. Ensures that if we failover
+   * at this point it won't lose data.
+   */
+  @Test
+  public void testNNClearsCommandsOnFailoverAfterStartup()
+      throws Exception {
+    // Make lots of blocks to increase chances of triggering a bug.
+    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30*SMALL_BLOCK, (short)3, 1L);
+
+    banner("Shutting down NN2");
+    cluster.shutdownNameNode(1);
+
+    banner("Setting replication to 1, rolling edit log.");
+    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);
+    nn1.getRpcServer().rollEditLog();
+    
+    // Start NN2 again. When it starts up, it will see all of the
+    // blocks as over-replicated, since it has the metadata for
+    // replication=1, but the DNs haven't yet processed the deletions.
+    banner("Starting NN2 again.");
+    cluster.restartNameNode(1);
+    nn2 = cluster.getNameNode(1);
+    
+    banner("triggering BRs");
+    cluster.triggerBlockReports();
+
+    // We expect that both NN1 and NN2 will have some number of
+    // deletions queued up for the DNs.
+    banner("computing invalidation on nn1");
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn1.getNamesystem().getBlockManager());
+
+    banner("computing invalidation on nn2");
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn2.getNamesystem().getBlockManager());
+    
+    // Dump some info for debugging purposes.
+    banner("Metadata immediately before failover");
+    doMetasave(nn2);
+
+
+    // Transition nn2 to active even though nn1 still thinks it's active
+    banner("Failing to NN2 but let NN1 continue to think it's active");
+    NameNodeAdapter.abortEditLogs(nn1);
+    NameNodeAdapter.enterSafeMode(nn1, false);
+
+    cluster.transitionToActive(1);
+
+    // Check that the standby picked up the replication change.
+    assertEquals(1,
+        nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());
+
+    // Dump some info for debugging purposes.
+    banner("Metadata immediately after failover");
+    doMetasave(nn2);
+    
+    banner("Triggering heartbeats and block reports so that fencing is completed");
+    cluster.triggerHeartbeats();
+    cluster.triggerBlockReports();
+    
+    banner("Metadata after nodes have all block-reported");
+    doMetasave(nn2);
+    
+    // The block should no longer be postponed.
+    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());
+    
+    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn2.getNamesystem().getBlockManager());
+
+    waitForNNToIssueDeletions(nn2);
+    cluster.triggerHeartbeats();
+    waitForDNDeletions(cluster);
+    cluster.triggerDeletionReports();
+    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
+    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());
+    
+    banner("Making sure the file is still readable");
+    FileSystem fs2 = cluster.getFileSystem(1);
+    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
+  }
+  
+  /**
+   * Test case that reduces replication of a file with a lot of blocks
+   * and then fails over right after those blocks enter the DN invalidation
+   * queues on the active. Ensures that fencing is correct and no replicas
+   * are lost.
+   */
+  @Test
+  public void testNNClearsCommandsOnFailoverWithReplChanges()
+      throws Exception {
+    // Make lots of blocks to increase chances of triggering a bug.
+    DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30*SMALL_BLOCK, (short)1, 1L);
+
+    banner("rolling NN1's edit log, forcing catch-up");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn1, nn2);
+    
+    // Get some new replicas reported so that NN2 now considers
+    // them over-replicated and schedules some more deletions
+    nn1.getRpcServer().setReplication(TEST_FILE, (short) 2);
+    while (BlockManagerTestUtil.getComputedDatanodeWork(
+        nn1.getNamesystem().getBlockManager()) > 0) {
+      LOG.info("Getting more replication work computed");
+    }
+    BlockManager bm1 = nn1.getNamesystem().getBlockManager();
+    while (bm1.getPendingReplicationBlocksCount() > 0) {
+      BlockManagerTestUtil.updateState(bm1);
+      cluster.triggerHeartbeats();
+      Thread.sleep(1000);
+    }
+    
+    banner("triggering BRs");
+    cluster.triggerBlockReports();
+    
+    nn1.getRpcServer().setReplication(TEST_FILE, (short) 1);
+
+    
+    banner("computing invalidation on nn1");
+
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn1.getNamesystem().getBlockManager());
+    doMetasave(nn1);
+
+    banner("computing invalidation on nn2");
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn2.getNamesystem().getBlockManager());
+    doMetasave(nn2);
+
+    // Dump some info for debugging purposes.
+    banner("Metadata immediately before failover");
+    doMetasave(nn2);
+
+
+    // Transition nn2 to active even though nn1 still thinks it's active
+    banner("Failing to NN2 but let NN1 continue to think it's active");
+    NameNodeAdapter.abortEditLogs(nn1);
+    NameNodeAdapter.enterSafeMode(nn1, false);
+
+    
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn2.getNamesystem().getBlockManager());
+    cluster.transitionToActive(1);
+
+    // Check that the standby picked up the replication change.
+    assertEquals(1,
+        nn2.getRpcServer().getFileInfo(TEST_FILE).getReplication());
+
+    // Dump some info for debugging purposes.
+    banner("Metadata immediately after failover");
+    doMetasave(nn2);
+    
+    banner("Triggering heartbeats and block reports so that fencing is completed");
+    cluster.triggerHeartbeats();
+    cluster.triggerBlockReports();
+    
+    banner("Metadata after nodes have all block-reported");
+    doMetasave(nn2);
+    
+    // The block should no longer be postponed.
+    assertEquals(0, nn2.getNamesystem().getPostponedMisreplicatedBlocks());
+    
+    // Wait for NN2 to enact its deletions (replication monitor has to run, etc)
+    BlockManagerTestUtil.computeInvalidationWork(
+        nn2.getNamesystem().getBlockManager());
+
+    waitForNNToIssueDeletions(nn2);
+    cluster.triggerHeartbeats();
+    waitForDNDeletions(cluster);
+    cluster.triggerDeletionReports();
+    assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
+    assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());
+    
+    banner("Making sure the file is still readable");
+    FileSystem fs2 = cluster.getFileSystem(1);
+    DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
+  }
+
+  /**
+   * Print a big banner in the test log to make debug easier.
+   */
+  private void banner(String string) {
+    LOG.info("\n\n\n\n================================================\n" +
+        string + "\n" +
+        "==================================================\n\n");
+  }
+
+  private void doMetasave(NameNode nn2) {
+    nn2.getNamesystem().writeLock();
+    try {
+      PrintWriter pw = new PrintWriter(System.err);
+      nn2.getNamesystem().getBlockManager().metaSave(pw);
+      pw.flush();
+    } finally {
+      nn2.getNamesystem().writeUnlock();
+    }
+  }
+
+  private void waitForTrueReplication(final MiniDFSCluster cluster,
+      final ExtendedBlock block, final int waitFor) throws Exception {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        try {
+          return getTrueReplication(cluster, block) == waitFor;
+        } catch (IOException e) {
+          throw new RuntimeException(e);
+        }
+      }
+    }, 500, 10000);
+  }
+
+  private int getTrueReplication(MiniDFSCluster cluster, ExtendedBlock block)
+      throws IOException {
+    int count = 0;
+    for (DataNode dn : cluster.getDataNodes()) {
+      if (dn.getFSDataset().getStoredBlock(block.getBlockPoolId(), block.getBlockId()) != null) {
+        count++;
+      }
+    }
+    return count;
+  }
+
+  private void waitForDNDeletions(final MiniDFSCluster cluster)
+      throws TimeoutException, InterruptedException {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        for (DataNode dn : cluster.getDataNodes()) {
+          if (DataNodeAdapter.getPendingAsyncDeletions(dn) > 0) {
+            return false;
+          }
+        }
+        return true;
+      }
+    }, 1000, 10000);
+    
+  }
+
+  private void waitForNNToIssueDeletions(final NameNode nn)
+      throws Exception {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        LOG.info("Waiting for NN to issue block deletions to DNs");
+        return nn.getNamesystem().getBlockManager().getPendingDeletionBlocksCount() == 0;
+      }
+    }, 250, 10000);
+  }
+
+  /**
+   * A BlockPlacementPolicy which, rather than using space available, makes
+   * random decisions about which excess replica to delete. This is because,
+   * in the test cases, the two NNs will usually (but not quite always)
+   * make the same decision of which replica to delete. The fencing issues
+   * are exacerbated when the two NNs make different decisions, which can
+   * happen in "real life" when they have slightly out-of-sync heartbeat
+   * information regarding disk usage.
+   */
+  public static class RandomDeleterPolicy extends BlockPlacementPolicyDefault {
+
+    public RandomDeleterPolicy() {
+      super();
+    }
+
+    @Override
+    public DatanodeDescriptor chooseReplicaToDelete(FSInodeInfo inode,
+        Block block, short replicationFactor,
+        Collection<DatanodeDescriptor> first,
+        Collection<DatanodeDescriptor> second) {
+      
+      Collection<DatanodeDescriptor> chooseFrom =
+        !first.isEmpty() ? first : second;
+
+      List<DatanodeDescriptor> l = Lists.newArrayList(chooseFrom);
+      return l.get(DFSUtil.getRandom().nextInt(l.size()));
+    }
+  }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
new file mode 100644
index 00000000000..b500c602850
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
@@ -0,0 +1,204 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
+import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
+import org.apache.log4j.Level;
+import org.junit.Assert;
+import org.junit.Test;
+
+import com.google.common.base.Supplier;
+
+
+/**
+ * Stress-test for potential bugs when replication is changing
+ * on blocks during a failover.
+ */
+public class TestDNFencingWithReplication {
+  static {
+    ((Log4JLogger)FSNamesystem.auditLog).getLogger().setLevel(Level.WARN);
+    ((Log4JLogger)Server.LOG).getLogger().setLevel(Level.FATAL);
+    ((Log4JLogger)LogFactory.getLog(
+        "org.apache.hadoop.io.retry.RetryInvocationHandler"))
+        .getLogger().setLevel(Level.FATAL);
+  }
+
+  private static final int NUM_THREADS = 20;
+  // How long should the test try to run for. In practice
+  // it runs for ~20-30s longer than this constant due to startup/
+  // shutdown time.
+  private static final long RUNTIME = 35000;
+  private static final int BLOCK_SIZE = 1024;
+  
+  private static class ReplicationToggler extends RepeatingTestThread {
+    private final FileSystem fs;
+    private final Path path;
+
+    public ReplicationToggler(TestContext ctx, FileSystem fs, Path p) {
+      super(ctx);
+      this.fs = fs;
+      this.path = p;
+    }
+
+    @Override
+    public void doAnAction() throws Exception {
+      fs.setReplication(path, (short)1);
+      waitForReplicas(1);
+      fs.setReplication(path, (short)2);
+      waitForReplicas(2);
+    }
+    
+    private void waitForReplicas(final int replicas) throws Exception {
+      try {
+        GenericTestUtils.waitFor(new Supplier<Boolean>() {
+          @Override
+          public Boolean get() {
+            try {
+              BlockLocation[] blocks = fs.getFileBlockLocations(path, 0, 10);
+              Assert.assertEquals(1, blocks.length);
+              return blocks[0].getHosts().length == replicas;
+            } catch (IOException e) {
+              throw new RuntimeException(e);
+            }
+          }
+        }, 100, 60000);
+      } catch (TimeoutException te) {
+        throw new IOException("Timed out waiting for " + replicas + " replicas " +
+            "on path " + path);
+      }
+    }
+    
+    public String toString() {
+      return "Toggler for " + path;
+    }
+  }
+  
+  @Test
+  public void testFencingStress() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
+    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    // Increase max streams so that we re-replicate quickly.
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 1000);
+
+    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      
+      final NameNode nn1 = cluster.getNameNode(0);
+      final NameNode nn2 = cluster.getNameNode(1);
+      nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
+      nn2.getNamesystem().getEditLogTailer().interrupt();
+      
+      FileSystem fs = TestDFSClientFailover.configureFailoverFs(
+          cluster, conf);
+      TestContext togglers = new TestContext();
+      for (int i = 0; i < NUM_THREADS; i++) {
+        Path p = new Path("/test-" + i);
+        DFSTestUtil.createFile(fs, p, BLOCK_SIZE*10, (short)3, (long)i);
+        togglers.addThread(new ReplicationToggler(togglers, fs, p));
+      }
+      
+      // Start a separate thread which will make sure that replication
+      // happens quickly by triggering deletion reports and replication
+      // work calculation frequently.
+      TestContext triggerCtx = new TestContext();
+      triggerCtx.addThread(new RepeatingTestThread(triggerCtx) {
+        
+        @Override
+        public void doAnAction() throws Exception {
+          for (DataNode dn : cluster.getDataNodes()) {
+            DataNodeAdapter.triggerDeletionReport(dn);
+            DataNodeAdapter.triggerHeartbeat(dn);
+          }
+          for (int i = 0; i < 2; i++) {
+            NameNode nn = cluster.getNameNode(i);
+            BlockManagerTestUtil.computeAllPendingWork(
+                nn.getNamesystem().getBlockManager());
+          }
+          Thread.sleep(500);
+        }
+      });
+      
+      triggerCtx.addThread(new RepeatingTestThread(triggerCtx) {
+        
+        @Override
+        public void doAnAction() throws Exception {
+          System.err.println("==============================\n" +
+              "Failing over from 0->1\n" +
+              "==================================");
+          cluster.transitionToStandby(0);
+          cluster.transitionToActive(1);
+          
+          Thread.sleep(5000);
+          System.err.println("==============================\n" +
+              "Failing over from 1->0\n" +
+              "==================================");
+
+          cluster.transitionToStandby(1);
+          cluster.transitionToActive(0);
+          Thread.sleep(5000);
+        }
+      });
+      
+      triggerCtx.startThreads();
+      togglers.startThreads();
+      
+      togglers.waitFor(RUNTIME);
+      togglers.stop();
+      triggerCtx.stop();
+
+      // CHeck that the files can be read without throwing
+      for (int i = 0; i < NUM_THREADS; i++) {
+        Path p = new Path("/test-" + i);
+        DFSTestUtil.readFile(fs, p);
+      }
+    } finally {
+      System.err.println("===========================\n\n\n\n");
+      cluster.shutdown();
+    }
+
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
index 4c398916790..876a632bc5f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
@@ -108,8 +108,7 @@ public class TestEditLogTailer {
     long activeTxId = active.getNamesystem().getFSImage().getEditLog()
       .getLastWrittenTxId();
     
-    // TODO: we should really just ask for a log roll here
-    doSaveNamespace(active);
+    active.getRpcServer().rollEditLog();
     
     long start = System.currentTimeMillis();
     while (System.currentTimeMillis() - start < NN_LAG_TIMEOUT) {
@@ -124,12 +123,4 @@ public class TestEditLogTailer {
         " (currently at " +
         standby.getNamesystem().getFSImage().getLastAppliedTxId() + ")");
   }
-  
-  private static void doSaveNamespace(NameNode nn)
-      throws IOException {
-    NameNodeAdapter.enterSafeMode(nn, false);
-    NameNodeAdapter.saveNamespace(nn);
-    NameNodeAdapter.leaveSafeMode(nn, false);
-  }
-  
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
index 22604275882..298bdffa2c7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
@@ -24,6 +24,7 @@ import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -33,10 +34,17 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.log4j.Level;
+import org.junit.Assert;
 import org.junit.Test;
 
 import com.google.common.base.Supplier;
@@ -52,6 +60,12 @@ public class TestStandbyIsHot {
   private static final String TEST_FILE = "/testStandbyIsHot";
   private static final Path TEST_FILE_PATH = new Path(TEST_FILE);
 
+  static {
+    ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)LogFactory.getLog(BlockManager.class)).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
+  }
+
   @Test
   public void testStandbyIsHot() throws Exception {
     Configuration conf = new Configuration();
@@ -79,19 +93,40 @@ public class TestStandbyIsHot {
       nn1.getRpcServer().rollEditLog();
       System.err.println("==================================");
 
-      waitForBlockLocations(nn2, TEST_FILE, 3);
-      
-      nn1.stop();
-      cluster.transitionToActive(1);
+      // Block locations should show up on standby.
+      LOG.info("Waiting for block locations to appear on standby node");
+      waitForBlockLocations(cluster, nn2, TEST_FILE, 3);
 
-      assertEquals(TEST_FILE_DATA, DFSTestUtil.readFile(fs, TEST_FILE_PATH));
+      // Trigger immediate heartbeats and block reports so
+      // that the active "trusts" all of the DNs
+      cluster.triggerHeartbeats();
+      cluster.triggerBlockReports();
+
+      // Change replication
+      LOG.info("Changing replication to 1");
+      fs.setReplication(TEST_FILE_PATH, (short)1);
+      waitForBlockLocations(cluster, nn1, TEST_FILE, 1);
+
+      nn1.getRpcServer().rollEditLog();
+      
+      LOG.info("Waiting for lowered replication to show up on standby");
+      waitForBlockLocations(cluster, nn2, TEST_FILE, 1);
+      
+      // Change back to 3
+      LOG.info("Changing replication to 3");
+      fs.setReplication(TEST_FILE_PATH, (short)3);
+      nn1.getRpcServer().rollEditLog();
+      
+      LOG.info("Waiting for higher replication to show up on standby");
+      waitForBlockLocations(cluster, nn2, TEST_FILE, 3);
       
     } finally {
       cluster.shutdown();
     }
   }
 
-  private void waitForBlockLocations(final NameNode nn,
+  static void waitForBlockLocations(final MiniDFSCluster cluster,
+      final NameNode nn,
       final String path, final int expectedReplicas)
       throws Exception {
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
@@ -100,8 +135,19 @@ public class TestStandbyIsHot {
       public Boolean get() {
         try {
           LocatedBlocks locs = NameNodeAdapter.getBlockLocations(nn, path, 0, 1000);
-          LOG.info("Got locs: " + locs);
-          return locs.getLastLocatedBlock().getLocations().length == expectedReplicas;
+          DatanodeInfo[] dnis = locs.getLastLocatedBlock().getLocations();
+          for (DatanodeInfo dni : dnis) {
+            Assert.assertNotNull(dni);
+          }
+          int numReplicas = dnis.length;
+          
+          LOG.info("Got " + numReplicas + " locs: " + locs);
+          if (numReplicas > expectedReplicas) {
+            for (DataNode dn : cluster.getDataNodes()) {
+              DataNodeAdapter.triggerDeletionReport(dn);
+            }
+          }
+          return numReplicas == expectedReplicas;
         } catch (IOException e) {
           LOG.warn("No block locations yet: " + e.getMessage());
           return false;

From 15ebdd5664de4cc54676b728f933de0f6fc157be Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 30 Dec 2011 00:25:21 +0000
Subject: [PATCH 062/177] HDFS-2714. Fix test cases which use standalone
 FSNamesystems. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1225708 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt        | 2 ++
 .../org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java | 5 ++++-
 .../apache/hadoop/hdfs/server/namenode/TestEditLogRace.java  | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4c7fd33bdec..a322ca56332 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -75,3 +75,5 @@ HDFS-2682. When a FailoverProxyProvider is used, Client should not retry for 45
 HDFS-2693. Fix synchronization issues around state transition (todd)
 
 HDFS-1972. Fencing mechanism for block invalidations and replications (todd)
+
+HDFS-2714. Fix test cases which use standalone FSNamesystems (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 4746bdb448e..a85b6c921ad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -579,7 +579,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   
   
   void checkOperation(OperationCategory op) throws StandbyException {
-    haContext.checkOperation(op);
+    if (haContext != null) {
+      // null in some unit tests
+      haContext.checkOperation(op);
+    }
   }
   
   public static Collection<URI> getNamespaceDirs(Configuration conf) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java
index d3d64594ac6..a855f8ddc65 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java
@@ -375,6 +375,7 @@ public class TestEditLogRace {
                 true);
             LOG.info("mkdirs complete");
           } catch (Throwable ioe) {
+            LOG.fatal("Got exception", ioe);
             deferredException.set(ioe);
             waitToEnterFlush.countDown();
           }
@@ -469,6 +470,7 @@ public class TestEditLogRace {
                 true);
             LOG.info("mkdirs complete");
           } catch (Throwable ioe) {
+            LOG.fatal("Got exception", ioe);
             deferredException.set(ioe);
             waitToEnterSync.countDown();
           }

From 20a6560bdfd8c4e3b6c3ac9b0f0f62d32e3a2191 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 30 Dec 2011 00:30:27 +0000
Subject: [PATCH 063/177] HDFS-2692. Fix bugs related to failover from/into
 safe mode. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1225709 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../server/blockmanagement/BlockManager.java  |   5 +-
 .../hdfs/server/namenode/Checkpointer.java    |  15 +-
 .../hdfs/server/namenode/FSEditLogLoader.java |  31 +-
 .../hdfs/server/namenode/FSNamesystem.java    |  46 ++-
 .../server/namenode/ha/EditLogTailer.java     |  14 +
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   3 +-
 .../server/namenode/ha/TestDNFencing.java     |   4 +-
 .../server/namenode/ha/TestHASafeMode.java    | 334 ++++++++++++++++++
 9 files changed, 422 insertions(+), 32 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index a322ca56332..db4c8e881c1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -77,3 +77,5 @@ HDFS-2693. Fix synchronization issues around state transition (todd)
 HDFS-1972. Fencing mechanism for block invalidations and replications (todd)
 
 HDFS-2714. Fix test cases which use standalone FSNamesystems (todd)
+
+HDFS-2692. Fix bugs related to failover from/into safe mode. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 9dba5a0e8b4..ec978f6ea10 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -2481,7 +2481,10 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
 
   public void removeBlock(Block block) {
     assert namesystem.hasWriteLock();
-    block.setNumBytes(BlockCommand.NO_ACK);
+    // TODO(HA): the following causes some problems for HA:
+    // the SBN doesn't get block deletions until the next
+    // BR...
+    // block.setNumBytes(BlockCommand.NO_ACK);
     addToInvalidates(block);
     corruptReplicas.removeFromCorruptReplicasMap(block);
     blocksMap.removeBlock(block);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
index 6fcf3b17a7d..4f485916b5d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
@@ -243,11 +243,16 @@ class Checkpointer extends Daemon {
     
     long txid = bnImage.getLastAppliedTxId();
     
-    backupNode.namesystem.dir.setReady();
-    backupNode.namesystem.setBlockTotal();
-    
-    bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid);
-    bnStorage.writeAll();
+    backupNode.namesystem.writeLock();
+    try {
+      backupNode.namesystem.dir.setReady();
+      backupNode.namesystem.setBlockTotal();
+      
+      bnImage.saveFSImageInAllDirs(backupNode.getNamesystem(), txid);
+      bnStorage.writeAll();
+    } finally {
+      backupNode.namesystem.writeUnlock();
+    }
 
     if(cpCmd.needToReturnImage()) {
       TransferFsImage.uploadImageFromStorage(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index c82f425b3a4..6e9ea8e2875 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -65,6 +65,7 @@ import com.google.common.base.Joiner;
 @InterfaceStability.Evolving
 public class FSEditLogLoader {
   private final FSNamesystem fsNamesys;
+  private long maxGenStamp = 0;
 
   public FSEditLogLoader(FSNamesystem fsNamesys) {
     this.fsNamesys = fsNamesys;
@@ -78,14 +79,19 @@ public class FSEditLogLoader {
   int loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
   throws IOException {
     long startTime = now();
-    int numEdits = loadFSEdits(edits, true, expectedStartingTxId);
-    FSImage.LOG.info("Edits file " + edits.getName() 
-        + " of size " + edits.length() + " edits # " + numEdits 
-        + " loaded in " + (now()-startTime)/1000 + " seconds.");
-    return numEdits;
+    fsNamesys.writeLock();
+    try {
+      int numEdits = loadFSEdits(edits, true, expectedStartingTxId);
+      FSImage.LOG.info("Edits file " + edits.getName() 
+          + " of size " + edits.length() + " edits # " + numEdits 
+          + " loaded in " + (now()-startTime)/1000 + " seconds.");
+      return numEdits;
+    } finally {
+      fsNamesys.writeUnlock();
+    }
   }
 
-  int loadFSEdits(EditLogInputStream edits, boolean closeOnExit,
+  private int loadFSEdits(EditLogInputStream edits, boolean closeOnExit,
                   long expectedStartingTxId)
       throws IOException {
     int numEdits = 0;
@@ -95,6 +101,13 @@ public class FSEditLogLoader {
       numEdits = loadEditRecords(logVersion, edits, false, 
                                  expectedStartingTxId);
     } finally {
+      fsNamesys.setBlockTotal();
+      // Delay the notification of genstamp updates until after
+      // setBlockTotal() above. Otherwise, we will mark blocks
+      // as "safe" before they've been incorporated in the expected
+      // totalBlocks and threshold for SafeMode -- triggering an
+      // assertion failure and/or exiting safemode too early!
+      fsNamesys.notifyGenStampUpdate(maxGenStamp);
       if(closeOnExit) {
         edits.close();
       }
@@ -485,9 +498,9 @@ public class FSEditLogLoader {
       }
     }
     
-    if (addCloseOp.blocks.length > 0) {
-      fsNamesys.notifyGenStampUpdate(
-          addCloseOp.blocks[addCloseOp.blocks.length - 1].getGenerationStamp());
+    // Record the max genstamp seen
+    for (Block b : addCloseOp.blocks) {
+      maxGenStamp = Math.max(maxGenStamp, b.getGenerationStamp());
     }
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index a85b6c921ad..16740fca899 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -418,6 +418,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       startOpt = StartupOption.REGULAR;
     }
     boolean success = false;
+    writeLock();
     try {
       // We shouldn't be calling saveNamespace if we've come up in standby state.
       if (fsImage.recoverTransitionRead(startOpt, this) && !haEnabled) {
@@ -434,6 +435,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if (!success) {
         fsImage.close();
       }
+      writeUnlock();
     }
     dir.imageLoadComplete();
   }
@@ -3244,9 +3246,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * @return true if in safe mode
      */
     private synchronized boolean isOn() {
-      assert isConsistent() : " SafeMode: Inconsistent filesystem state: "
-        + "Total num of blocks, active blocks, or "
-        + "total safe blocks don't match.";
+      doConsistencyCheck();
       return this.reached >= 0;
     }
       
@@ -3362,6 +3362,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * Check and trigger safe mode if needed. 
      */
     private void checkMode() {
+      // Have to have write-lock since leaving safemode initializes
+      // repl queues, which requires write lock
+      assert hasWriteLock();
       if (needEnter()) {
         enter();
         // check if we are ready to initialize replication queues
@@ -3541,16 +3544,26 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       
     /**
      * Checks consistency of the class state.
-     * This is costly and currently called only in assert.
-     * @throws IOException 
+     * This is costly so only runs if asserts are enabled.
      */
-    private boolean isConsistent() {
-      if (blockTotal == -1 && blockSafe == -1) {
-        return true; // manual safe mode
-      }
+    private void doConsistencyCheck() {
+      boolean assertsOn = false;
+      assert assertsOn = true; // set to true if asserts are on
+      if (!assertsOn) return;
+      
+      
       int activeBlocks = blockManager.getActiveBlockCount();
-      return (blockTotal == activeBlocks) ||
-        (blockSafe >= 0 && blockSafe <= blockTotal);
+      if (blockTotal == -1 && blockSafe == -1) {
+        return; // manual safe mode
+      }
+      if ((blockTotal != activeBlocks) &&
+          !(blockSafe >= 0 && blockSafe <= blockTotal)) {
+        throw new AssertionError(
+            " SafeMode: Inconsistent filesystem state: "
+        + "SafeMode data: blockTotal=" + blockTotal
+        + " blockSafe=" + blockSafe + "; "
+        + "BlockManager data: active="  + activeBlocks);
+      }
     }
   }
     
@@ -3663,7 +3676,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   /**
    * Set the total number of blocks in the system. 
    */
-  void setBlockTotal() {
+  public void setBlockTotal() {
     // safeMode is volatile, and may be set to null at any time
     SafeModeInfo safeMode = this.safeMode;
     if (safeMode == null)
@@ -4822,10 +4835,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   }
   
   public void notifyGenStampUpdate(long gs) {
-    LOG.info("=> notified of genstamp update for: " + gs);
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Generation stamp " + gs + " has been reached. " +
+          "Processing pending messages from DataNodes...");
+    }
     DataNodeMessage msg = pendingDatanodeMessages.take(gs);
     while (msg != null) {
-      LOG.info("processing message: " + msg);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Processing previously pending message: " + msg);
+      }
       try {
         switch (msg.getType()) {
         case BLOCK_RECEIVED_DELETE:
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index b7b1adb479c..9bded332d14 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -49,6 +49,8 @@ public class EditLogTailer {
   private final FSImage image;
   private final FSEditLog editLog;
   
+  private volatile Throwable lastError = null;
+  
   public EditLogTailer(FSNamesystem namesystem) {
     this.tailerThread = new EditLogTailerThread();
     this.namesystem = namesystem;
@@ -81,6 +83,11 @@ public class EditLogTailer {
     tailerThread.interrupt();
   }
   
+  @VisibleForTesting
+  public Throwable getLastError() {
+    return lastError;
+  }
+  
   public void catchupDuringFailover() throws IOException {
     Preconditions.checkState(tailerThread == null ||
         !tailerThread.isAlive(),
@@ -146,12 +153,19 @@ public class EditLogTailer {
           try {
             doTailEdits();
           } catch (IOException e) {
+            if (e.getCause() instanceof RuntimeException) {
+              throw (RuntimeException)e.getCause();
+            } else if (e.getCause() instanceof Error) {
+              throw (Error)e.getCause();
+            }
+                
             // Will try again
             LOG.info("Got error, will try again.", e);
           }
         } catch (Throwable t) {
           // TODO(HA): What should we do in this case? Shutdown the standby NN?
           LOG.error("Edit log tailer received throwable", t);
+          lastError = t;
         }
 
         try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index ab7d0197d68..5bdc9300b3e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -1452,7 +1452,8 @@ public class MiniDFSCluster {
     sizes = NameNodeAdapter.getStats(nameNode.getNamesystem());
     boolean isUp = false;
     synchronized (this) {
-      isUp = ((!nameNode.isInSafeMode() || !waitSafeMode) && sizes[0] != 0);
+      isUp = ((!nameNode.isInSafeMode() || !waitSafeMode) &&
+          sizes[ClientProtocol.GET_STATS_CAPACITY_IDX] != 0);
     }
     return isUp;
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
index 8ac86ca5dda..c3186292d99 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
@@ -410,7 +410,7 @@ public class TestDNFencing {
     return count;
   }
 
-  private void waitForDNDeletions(final MiniDFSCluster cluster)
+  static void waitForDNDeletions(final MiniDFSCluster cluster)
       throws TimeoutException, InterruptedException {
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       @Override
@@ -426,7 +426,7 @@ public class TestDNFencing {
     
   }
 
-  private void waitForNNToIssueDeletions(final NameNode nn)
+  static void waitForNNToIssueDeletions(final NameNode nn)
       throws Exception {
     GenericTestUtils.waitFor(new Supplier<Boolean>() {
       @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
new file mode 100644
index 00000000000..b69d7c6db4c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -0,0 +1,334 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+/**
+ * Tests that exercise safemode in an HA cluster.
+ */
+public class TestHASafeMode {
+  private static final Log LOG = LogFactory.getLog(TestHASafeMode.class);
+  private static final int BLOCK_SIZE = 1024;
+  private NameNode nn0;
+  private NameNode nn1;
+  private FileSystem fs;
+  private MiniDFSCluster cluster;
+  
+  @Before
+  public void setupCluster() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .waitSafeMode(false)
+      .build();
+    cluster.waitActive();
+    
+    nn0 = cluster.getNameNode(0);
+    nn1 = cluster.getNameNode(1);
+    fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+
+    cluster.transitionToActive(0);
+  }
+  
+  @After
+  public void shutdownCluster() throws IOException {
+    if (cluster != null) {
+      assertNull(nn1.getNamesystem().getEditLogTailer().getLastError());
+      cluster.shutdown();
+    }
+  }
+  
+  private void restartStandby() throws IOException {
+    cluster.shutdownNameNode(1);
+    // Set the safemode extension to be lengthy, so that the tests
+    // can check the safemode message after the safemode conditions
+    // have been achieved, without being racy.
+    cluster.getConfiguration(1).setInt(
+        DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
+    cluster.restartNameNode(1);
+    nn1 = cluster.getNameNode(1);
+    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn1.getNamesystem().getEditLogTailer().interrupt();
+  }
+  
+  /**
+   * Tests the case where, while a standby is down, more blocks are
+   * added to the namespace, but not rolled. So, when it starts up,
+   * it receives notification about the new blocks during
+   * the safemode extension period.
+   */
+  @Test
+  public void testBlocksAddedBeforeStandbyRestart() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L);
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+
+    banner("Creating some blocks that won't be in the edit log");
+    DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 1L);
+    
+    banner("Restarting standby");
+    restartStandby();
+
+    // We expect it to be stuck in safemode (not the extension) because
+    // the block reports are delayed (since they include blocks
+    // from /test2 which are too-high genstamps.
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 needs additional 3 blocks to reach"));
+
+    banner("Waiting for standby to catch up to active namespace");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 8 has reached the threshold 0.9990 of " +
+            "total blocks 8. Safe mode will be turned off automatically"));
+  }
+  
+  /**
+   * Similar to {@link #testBlocksAddedBeforeStandbyRestart()} except that
+   * the new blocks are allocated after the SBN has restarted. So, the
+   * blocks were not present in the original block reports at startup
+   * but are reported separately by blockReceived calls.
+   */
+  @Test
+  public void testBlocksAddedWhileInSafeMode() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L);
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+    
+    banner("Restarting standby");
+    restartStandby();
+    
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 3 has reached the threshold 0.9990 of " +
+            "total blocks 3. Safe mode will be turned off automatically"));
+    
+    // Create a few blocks which will send blockReceived calls to the
+    // SBN.
+    banner("Creating some blocks while SBN is in safe mode");
+    DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 1L);
+
+    
+    banner("Waiting for standby to catch up to active namespace");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 8 has reached the threshold 0.9990 of " +
+            "total blocks 8. Safe mode will be turned off automatically"));
+  }
+
+  /**
+   * Test for the following case proposed by ATM:
+   * 1. Both NNs are up, one is active. There are 100 blocks. Both are
+   *    out of safemode.
+   * 2. 10 block deletions get processed by NN1. NN2 enqueues these DN messages
+   *     until it next reads from a checkpointed edits file.
+   * 3. NN2 gets restarted. Its queues are lost.
+   * 4. NN2 comes up, reads from all the finalized edits files. Concludes there
+   *    should still be 100 blocks.
+   * 5. NN2 receives a block report from all the DNs, which only accounts for
+   *    90 blocks. It doesn't leave safemode.
+   * 6. NN1 dies or is transitioned to standby.
+   * 7. NN2 is transitioned to active. It reads all the edits from NN1. It now
+   *    knows there should only be 90 blocks, but it's still in safemode.
+   * 8. NN2 doesn't ever recheck whether it should leave safemode.
+   * 
+   * This is essentially the inverse of {@link #testBlocksAddedWhileStandbyShutdown()}
+   */
+  @Test
+  public void testBlocksRemovedBeforeStandbyRestart() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 5*BLOCK_SIZE, (short) 3, 1L);
+
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+
+    // Delete those blocks again, so they won't get reported to the SBN
+    // once it starts up
+    banner("Removing the blocks without rolling the edit log");
+    fs.delete(new Path("/test"), true);
+    BlockManagerTestUtil.computeAllPendingWork(
+        nn0.getNamesystem().getBlockManager());
+    cluster.triggerHeartbeats();
+
+    banner("Restarting standby");
+    restartStandby();
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 needs additional 5 blocks to reach"));
+    
+    banner("Waiting for standby to catch up to active namespace");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 has reached the threshold 0.9990 of " +
+            "total blocks 0. Safe mode will be turned off automatically"));
+  }
+  
+  /**
+   * Similar to {@link #testBlocksRemovedBeforeStandbyRestart()} except that
+   * the blocks are removed after the SBN has restarted. So, the
+   * blocks were present in the original block reports at startup
+   * but are deleted separately later by deletion reports.
+   */
+  @Test
+  public void testBlocksRemovedWhileInSafeMode() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 10*BLOCK_SIZE, (short) 3, 1L);
+
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+ 
+    banner("Restarting standby");
+    restartStandby();
+    
+    // It will initially have all of the blocks necessary.
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 10 has reached the threshold 0.9990 of " +
+            "total blocks 10. Safe mode will be turned off automatically"));
+
+    // Delete those blocks while the SBN is in safe mode - this
+    // should reduce it back below the threshold
+    banner("Removing the blocks without rolling the edit log");
+    fs.delete(new Path("/test"), true);
+    BlockManagerTestUtil.computeAllPendingWork(
+        nn0.getNamesystem().getBlockManager());
+    
+    banner("Triggering deletions on DNs and Deletion Reports");
+    cluster.triggerHeartbeats();
+    TestDNFencing.waitForDNDeletions(cluster);
+    cluster.triggerDeletionReports();
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 needs additional 10 blocks"));
+
+    banner("Waiting for standby to catch up to active namespace");
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 has reached the threshold 0.9990 of " +
+            "total blocks 0. Safe mode will be turned off automatically"));
+  }
+  
+  /**
+   * Set up a namesystem with several edits, both deletions and
+   * additions, and failover to a new NN while that NN is in
+   * safemode. Ensure that it will exit safemode.
+   */
+  @Test
+  public void testComplexFailoverIntoSafemode() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L);
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup and enter safemode.
+    nn0.getRpcServer().rollEditLog();
+
+    banner("Creating some blocks that won't be in the edit log");
+    DFSTestUtil.createFile(fs, new Path("/test2"), 5*BLOCK_SIZE, (short) 3, 1L);
+    
+    banner("Deleting the original blocks");
+    fs.delete(new Path("/test"), true);
+    
+    banner("Restarting standby");
+    restartStandby();
+
+    // We expect it to be stuck in safemode (not the extension) because
+    // the block reports are delayed (since they include blocks
+    // from /test2 which are too-high genstamps.
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 0 needs additional 3 blocks to reach"));
+
+    // Initiate a failover into it while it's in safemode
+    banner("Initiating a failover into NN1 in safemode");
+    NameNodeAdapter.abortEditLogs(nn0);
+    cluster.transitionToActive(1);
+
+    status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 5 has reached the threshold 0.9990 of " +
+            "total blocks 5. Safe mode will be turned off automatically"));
+  }
+  
+  /**
+   * Print a big banner in the test log to make debug easier.
+   */
+  static void banner(String string) {
+    LOG.info("\n\n\n\n================================================\n" +
+        string + "\n" +
+        "==================================================\n\n");
+  }
+
+}

From 737df8b67b972155b12ed615e23f3f1e8e4e9ca9 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Sat, 31 Dec 2011 01:53:23 +0000
Subject: [PATCH 064/177] HDFS-2716. Configuration needs to allow different
 dfs.http.addresses for each HA NN. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1226020 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 28 ++++++------
 .../hadoop/hdfs/server/namenode/NameNode.java | 18 +++++---
 .../server/namenode/SecondaryNameNode.java    | 11 ++++-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   | 43 ++++++++++++++++---
 5 files changed, 77 insertions(+), 25 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index db4c8e881c1..e733586305a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -79,3 +79,5 @@ HDFS-1972. Fencing mechanism for block invalidations and replications (todd)
 HDFS-2714. Fix test cases which use standalone FSNamesystems (todd)
 
 HDFS-2692. Fix bugs related to failover from/into safe mode. (todd)
+
+HDFS-2716. Configuration needs to allow different dfs.http.addresses for each HA NN (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 8bc17205131..29cb3b3339f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -337,7 +337,7 @@ public class DFSUtil {
    * @param nsId the nameservice ID to look at, or null for non-federated 
    * @return collection of namenode Ids
    */
-  static Collection<String> getNameNodeIds(Configuration conf, String nsId) {
+  public static Collection<String> getNameNodeIds(Configuration conf, String nsId) {
     String key = addSuffix(DFS_HA_NAMENODES_KEY, nsId);
     return conf.getTrimmedStringCollection(key);
   }
@@ -644,24 +644,28 @@ public class DFSUtil {
         DFS_NAMENODE_HTTPS_ADDRESS_KEY : DFS_NAMENODE_HTTP_ADDRESS_KEY;
     String httpAddressDefault = (securityOn && httpsAddress) ? 
         DFS_NAMENODE_HTTPS_ADDRESS_DEFAULT : DFS_NAMENODE_HTTP_ADDRESS_DEFAULT;
+      
+    String suffixes[];
     if (namenodeAddr != null) {
       // if non-default namenode, try reverse look up 
       // the nameServiceID if it is available
-      String nameServiceId = DFSUtil.getNameServiceIdFromAddress(
-          conf, namenodeAddr,
+      suffixes = getSuffixIDs(conf, namenodeAddr,
           DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
           DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+    } else {
+      suffixes = new String[2];
+    }
 
-      if (nameServiceId != null) {
-        httpAddress = conf.get(DFSUtil.addKeySuffixes(
-            httpAddressKey, nameServiceId));
-      }
+    return getSuffixedConf(conf, httpAddressKey, httpAddressDefault, suffixes);
+  }
+  
+  private static String getSuffixedConf(Configuration conf,
+      String key, String defaultVal, String[] suffixes) {
+    String ret = conf.get(DFSUtil.addKeySuffixes(key, suffixes));
+    if (ret != null) {
+      return ret;
     }
-    // else - Use non-federation style configuration
-    if (httpAddress == null) {
-      httpAddress = conf.get(httpAddressKey, httpAddressDefault);
-    }
-    return httpAddress;
+    return conf.get(key, defaultVal);
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 54d4d2f2901..fc0c22eeeae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -531,11 +531,12 @@ public class NameNode {
     this.conf = conf;
     this.role = role;
     String nsId = getNameServiceId(conf);
+    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
     this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
     this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
     this.haContext = createHAContext();
     try {
-      initializeGenericKeys(conf, nsId);
+      initializeGenericKeys(conf, nsId, namenodeId);
       initialize(conf);
       if (!haEnabled) {
         state = ACTIVE_STATE;
@@ -852,17 +853,24 @@ public class NameNode {
    * @param conf
    *          Configuration object to lookup specific key and to set the value
    *          to the key passed. Note the conf object is modified
-   * @param nameserviceId name service Id
+   * @param nameserviceId name service Id (to distinguish federated NNs)
+   * @param namenodeId the namenode ID (to distinguish HA NNs)
    * @see DFSUtil#setGenericConf(Configuration, String, String, String...)
    */
-  public static void initializeGenericKeys(Configuration conf, String
-      nameserviceId) {
-    String namenodeId = HAUtil.getNameNodeId(conf, nameserviceId);
+  public static void initializeGenericKeys(Configuration conf,
+      String nameserviceId, String namenodeId) {
     if ((nameserviceId == null || nameserviceId.isEmpty()) && 
         (namenodeId == null || namenodeId.isEmpty())) {
       return;
     }
     
+    if (nameserviceId != null) {
+      conf.set(DFS_FEDERATION_NAMESERVICE_ID, nameserviceId);
+    }
+    if (namenodeId != null) {
+      conf.set(DFS_HA_NAMENODE_ID_KEY, namenodeId);
+    }
+    
     DFSUtil.setGenericConf(conf, nameserviceId, namenodeId,
         NAMESERVICE_SPECIFIC_KEYS);
     if (conf.get(DFS_NAMENODE_RPC_ADDRESS_KEY) != null) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 11f77cc08b0..9231f11d8b8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -46,6 +46,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DFSUtil.ErrorSimulator;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
@@ -177,8 +178,14 @@ public class SecondaryNameNode implements Runnable {
   public SecondaryNameNode(Configuration conf,
       CommandLineOpts commandLineOpts) throws IOException {
     try {
-      NameNode.initializeGenericKeys(conf,
-          DFSUtil.getSecondaryNameServiceId(conf));
+      String nsId = DFSUtil.getSecondaryNameServiceId(conf);
+      if (HAUtil.isHAEnabled(conf, nsId)) {
+        LOG.fatal("Cannot use SecondaryNameNode in an HA cluster." +
+            " The Standby Namenode will perform checkpointing.");
+        shutdown();
+        return;
+      }
+      NameNode.initializeGenericKeys(conf, nsId, null);
       initialize(conf, commandLineOpts);
     } catch(IOException e) {
       shutdown();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index 5fb5bd70e8d..9773a50d54a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -258,20 +258,51 @@ public class TestDFSUtil {
    * copied to generic keys when the namenode starts.
    */
   @Test
-  public void testConfModification() {
+  public void testConfModificationFederationOnly() {
     final HdfsConfiguration conf = new HdfsConfiguration();
-    conf.set(DFS_FEDERATION_NAMESERVICES, "nn1");
-    conf.set(DFS_FEDERATION_NAMESERVICE_ID, "nn1");
-    final String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
+    String nsId = "ns1";
+    
+    conf.set(DFS_FEDERATION_NAMESERVICES, nsId);
+    conf.set(DFS_FEDERATION_NAMESERVICE_ID, nsId);
 
     // Set the nameservice specific keys with nameserviceId in the config key
     for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
       // Note: value is same as the key
-      conf.set(DFSUtil.addKeySuffixes(key, nameserviceId), key);
+      conf.set(DFSUtil.addKeySuffixes(key, nsId), key);
     }
 
     // Initialize generic keys from specific keys
-    NameNode.initializeGenericKeys(conf, nameserviceId);
+    NameNode.initializeGenericKeys(conf, nsId, null);
+
+    // Retrieve the keys without nameserviceId and Ensure generic keys are set
+    // to the correct value
+    for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
+      assertEquals(key, conf.get(key));
+    }
+  }
+  
+  /**
+   * Test to ensure nameservice specific keys in the configuration are
+   * copied to generic keys when the namenode starts.
+   */
+  @Test
+  public void testConfModificationFederationAndHa() {
+    final HdfsConfiguration conf = new HdfsConfiguration();
+    String nsId = "ns1";
+    String nnId = "nn1";
+    
+    conf.set(DFS_FEDERATION_NAMESERVICES, nsId);
+    conf.set(DFS_FEDERATION_NAMESERVICE_ID, nsId);
+    conf.set(DFS_HA_NAMENODES_KEY + "." + nsId, nnId);
+
+    // Set the nameservice specific keys with nameserviceId in the config key
+    for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
+      // Note: value is same as the key
+      conf.set(DFSUtil.addKeySuffixes(key, nsId, nnId), key);
+    }
+
+    // Initialize generic keys from specific keys
+    NameNode.initializeGenericKeys(conf, nsId, nnId);
 
     // Retrieve the keys without nameserviceId and Ensure generic keys are set
     // to the correct value

From d004ddee76694ca701bc3296b71c2164b76fa042 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 4 Jan 2012 19:50:41 +0000
Subject: [PATCH 065/177] HDFS-2720. Fix MiniDFSCluster HA support to work
 properly on Windows. Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1227284 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../apache/hadoop/hdfs/MiniDFSCluster.java    | 29 +++++++++++++------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index e733586305a..23b0eb7ea0b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -81,3 +81,5 @@ HDFS-2714. Fix test cases which use standalone FSNamesystems (todd)
 HDFS-2692. Fix bugs related to failover from/into safe mode. (todd)
 
 HDFS-2716. Configuration needs to allow different dfs.http.addresses for each HA NN (todd)
+
+HDFS-2720. Fix MiniDFSCluster HA support to work properly on Windows. (Uma Maheswara Rao G via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 5bdc9300b3e..9b6328374a2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -601,10 +601,13 @@ public class MiniDFSCluster {
         }
       }
 
-      // Now start all the NNs in this nameservice.
+      // Now format first NN and copy the storage directory from that node to the others.
       int i = 0;
+      Collection<URI> prevNNDirs = null;
+      int nnCounterForFormat = nnCounter;
       for (NNConf nn : nameservice.getNNs()) {
-        initNameNodeConf(conf, nsId, nn.getNnId(), manageNameDfsDirs, nnCounter);
+        initNameNodeConf(conf, nsId, nn.getNnId(), manageNameDfsDirs,
+            nnCounterForFormat);
         
         boolean formatThisOne = format;
         if (format && i++ > 0) {
@@ -613,11 +616,21 @@ public class MiniDFSCluster {
           // block pool ID, etc. Instead, copy the name dirs
           // from the first one.
           formatThisOne = false;
-          copyNameDirs(getConfiguration(nnCounter - 1), conf);
+          assert (null != prevNNDirs);
+          copyNameDirs(prevNNDirs, FSNamesystem.getNamespaceDirs(conf), conf);
         }
         
-        createNameNode(nnCounter++, conf, numDataNodes, formatThisOne,
-            operation, clusterId, nsId, nn.getNnId());
+        nnCounterForFormat++;
+        if (formatThisOne) {
+          NameNode.format(conf);
+        }
+        prevNNDirs = FSNamesystem.getNamespaceDirs(conf);
+      }
+      // Start all Namenodes
+      for (NNConf nn : nameservice.getNNs()) {
+        initNameNodeConf(conf, nsId, nn.getNnId(), manageNameDfsDirs, nnCounter);
+        createNameNode(nnCounter++, conf, numDataNodes, false, operation,
+            clusterId, nsId, nn.getNnId());
       }
       
     }
@@ -655,10 +668,8 @@ public class MiniDFSCluster {
     }
   }
 
-  private void copyNameDirs(Configuration srcConf, Configuration dstConf)
-      throws IOException {
-    Collection<URI> srcDirs = FSNamesystem.getNamespaceDirs(srcConf);
-    Collection<URI> dstDirs = FSNamesystem.getNamespaceDirs(dstConf);
+  private void copyNameDirs(Collection<URI> srcDirs, Collection<URI> dstDirs,
+      Configuration dstConf) throws IOException {
     URI srcDir = Lists.newArrayList(srcDirs).get(0);
     FileSystem dstFS = FileSystem.getLocal(dstConf).getRaw();
     for (URI dstDir : dstDirs) {

From 5b8dcb20a2fad2e7e9dee56c451f68f9d865b5ae Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 5 Jan 2012 00:22:54 +0000
Subject: [PATCH 066/177] HDFS-2291. Allow the StandbyNode to make checkpoints
 in an HA setup. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1227411 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   2 +
 .../hdfs/server/namenode/CheckpointConf.java  |  78 +++++
 .../hdfs/server/namenode/Checkpointer.java    |  38 +--
 .../hdfs/server/namenode/FSEditLog.java       |   2 +-
 .../hadoop/hdfs/server/namenode/FSImage.java  |   6 +-
 .../hdfs/server/namenode/FSNamesystem.java    |  36 ++
 .../hdfs/server/namenode/GetImageServlet.java |   6 +-
 .../hdfs/server/namenode/NNStorage.java       |   2 +-
 .../hadoop/hdfs/server/namenode/NameNode.java |   7 +
 .../SaveNamespaceCancelledException.java      |   4 +-
 .../server/namenode/SecondaryNameNode.java    |  57 +---
 .../hdfs/server/namenode/TransferFsImage.java |  42 ++-
 .../server/namenode/ha/EditLogTailer.java     |   4 +-
 .../hdfs/server/namenode/ha/HAContext.java    |   6 +-
 .../hdfs/server/namenode/ha/HAState.java      |  30 ++
 .../namenode/ha/StandbyCheckpointer.java      | 313 ++++++++++++++++++
 .../hdfs/server/namenode/ha/StandbyState.java |   5 +
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  10 +
 .../apache/hadoop/hdfs/MiniDFSNNTopology.java |  28 ++
 .../hdfs/server/namenode/FSImageTestUtil.java |  18 +-
 .../hdfs/server/namenode/NameNodeAdapter.java |   7 +
 .../hdfs/server/namenode/TestCheckpoint.java  |  16 +-
 .../ha/TestEditLogsDuringFailover.java        |   2 +-
 .../namenode/ha/TestStandbyCheckpoints.java   | 240 ++++++++++++++
 25 files changed, 864 insertions(+), 97 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 23b0eb7ea0b..795e65876f2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -83,3 +83,5 @@ HDFS-2692. Fix bugs related to failover from/into safe mode. (todd)
 HDFS-2716. Configuration needs to allow different dfs.http.addresses for each HA NN (todd)
 
 HDFS-2720. Fix MiniDFSCluster HA support to work properly on Windows. (Uma Maheswara Rao G via todd)
+
+HDFS-2291. Allow the StandbyNode to make checkpoints in an HA setup. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index a5debe0a29d..b3fee6fc511 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -331,4 +331,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   // HA related configuration
   public static final String DFS_HA_NAMENODES_KEY = "dfs.ha.namenodes";
   public static final String DFS_HA_NAMENODE_ID_KEY = "dfs.ha.namenode.id";
+  public static final String  DFS_HA_STANDBY_CHECKPOINTS_KEY = "dfs.ha.standby.checkpoints";
+  public static final boolean DFS_HA_STANDBY_CHECKPOINTS_DEFAULT = true;
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
new file mode 100644
index 00000000000..8b3cf04d741
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CheckpointConf.java
@@ -0,0 +1,78 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+
+import com.google.common.collect.ImmutableList;
+
+@InterfaceAudience.Private
+public class CheckpointConf {
+  private static final Log LOG = LogFactory.getLog(CheckpointConf.class);
+  
+  /** How often to checkpoint regardless of number of txns */
+  private final long checkpointPeriod;    // in seconds
+  
+  /** How often to poll the NN to check checkpointTxnCount */
+  private final long checkpointCheckPeriod; // in seconds
+  
+  /** checkpoint once every this many transactions, regardless of time */
+  private final long checkpointTxnCount;
+
+  
+  public CheckpointConf(Configuration conf) {
+    checkpointCheckPeriod = conf.getLong(
+        DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY,
+        DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_DEFAULT);
+        
+    checkpointPeriod = conf.getLong(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 
+                                    DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
+    checkpointTxnCount = conf.getLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 
+                                  DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
+    warnForDeprecatedConfigs(conf);
+  }
+  
+  private static void warnForDeprecatedConfigs(Configuration conf) {
+    for (String key : ImmutableList.of(
+          "fs.checkpoint.size",
+          "dfs.namenode.checkpoint.size")) {
+      if (conf.get(key) != null) {
+        LOG.warn("Configuration key " + key + " is deprecated! Ignoring..." +
+            " Instead please specify a value for " +
+            DFS_NAMENODE_CHECKPOINT_TXNS_KEY);
+      }
+    }
+  }
+
+  public long getPeriod() {
+    return checkpointPeriod;
+  }
+
+  public long getCheckPeriod() {
+    return Math.min(checkpointCheckPeriod, checkpointPeriod);
+  }
+
+  public long getTxnCount() {
+    return checkpointTxnCount;
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
index 4f485916b5d..6ae931fd44f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Checkpointer.java
@@ -29,7 +29,6 @@ import java.util.List;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.protocol.CheckpointCommand;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
@@ -58,17 +57,16 @@ class Checkpointer extends Daemon {
 
   private BackupNode backupNode;
   volatile boolean shouldRun;
-  private long checkpointPeriod;    // in seconds
-  // Transactions count to trigger the checkpoint
-  private long checkpointTxnCount; 
 
   private String infoBindAddress;
 
+  private CheckpointConf checkpointConf;
+
   private BackupImage getFSImage() {
     return (BackupImage)backupNode.getFSImage();
   }
 
-  private NamenodeProtocol getNamenode(){
+  private NamenodeProtocol getRemoteNamenodeProxy(){
     return backupNode.namenode;
   }
 
@@ -89,26 +87,24 @@ class Checkpointer extends Daemon {
   /**
    * Initialize checkpoint.
    */
-  @SuppressWarnings("deprecation")
   private void initialize(Configuration conf) throws IOException {
     // Create connection to the namenode.
     shouldRun = true;
 
     // Initialize other scheduling parameters from the configuration
-    checkpointPeriod = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 
-                                    DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
-    checkpointTxnCount = conf.getLong(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 
-                                  DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
-    SecondaryNameNode.warnForDeprecatedConfigs(conf);
+    checkpointConf = new CheckpointConf(conf);
 
     // Pull out exact http address for posting url to avoid ip aliasing issues
     String fullInfoAddr = conf.get(DFS_NAMENODE_BACKUP_HTTP_ADDRESS_KEY, 
                                    DFS_NAMENODE_BACKUP_HTTP_ADDRESS_DEFAULT);
     infoBindAddress = fullInfoAddr.substring(0, fullInfoAddr.indexOf(":"));
 
-    LOG.info("Checkpoint Period : " + checkpointPeriod + " secs " +
-             "(" + checkpointPeriod/60 + " min)");
-    LOG.info("Transactions count is  : " + checkpointTxnCount + ", to trigger checkpoint");
+    LOG.info("Checkpoint Period : " +
+             checkpointConf.getPeriod() + " secs " +
+             "(" + checkpointConf.getPeriod()/60 + " min)");
+    LOG.info("Transactions count is  : " +
+             checkpointConf.getTxnCount() +
+             ", to trigger checkpoint");
   }
 
   /**
@@ -125,8 +121,8 @@ class Checkpointer extends Daemon {
   public void run() {
     // Check the size of the edit log once every 5 minutes.
     long periodMSec = 5 * 60;   // 5 minutes
-    if(checkpointPeriod < periodMSec) {
-      periodMSec = checkpointPeriod;
+    if(checkpointConf.getPeriod() < periodMSec) {
+      periodMSec = checkpointConf.getPeriod();
     }
     periodMSec *= 1000;
 
@@ -142,7 +138,7 @@ class Checkpointer extends Daemon {
           shouldCheckpoint = true;
         } else {
           long txns = countUncheckpointedTxns();
-          if(txns >= checkpointTxnCount)
+          if(txns >= checkpointConf.getTxnCount())
             shouldCheckpoint = true;
         }
         if(shouldCheckpoint) {
@@ -165,7 +161,7 @@ class Checkpointer extends Daemon {
   }
 
   private long countUncheckpointedTxns() throws IOException {
-    long curTxId = getNamenode().getTransactionID();
+    long curTxId = getRemoteNamenodeProxy().getTransactionID();
     long uncheckpointedTxns = curTxId -
       getFSImage().getStorage().getMostRecentCheckpointTxId();
     assert uncheckpointedTxns >= 0;
@@ -183,7 +179,7 @@ class Checkpointer extends Daemon {
     bnImage.freezeNamespaceAtNextRoll();
     
     NamenodeCommand cmd = 
-      getNamenode().startCheckpoint(backupNode.getRegistration());
+      getRemoteNamenodeProxy().startCheckpoint(backupNode.getRegistration());
     CheckpointCommand cpCmd = null;
     switch(cmd.getAction()) {
       case NamenodeProtocol.ACT_SHUTDOWN:
@@ -207,7 +203,7 @@ class Checkpointer extends Daemon {
     long lastApplied = bnImage.getLastAppliedTxId();
     LOG.debug("Doing checkpoint. Last applied: " + lastApplied);
     RemoteEditLogManifest manifest =
-      getNamenode().getEditLogManifest(bnImage.getLastAppliedTxId() + 1);
+      getRemoteNamenodeProxy().getEditLogManifest(bnImage.getLastAppliedTxId() + 1);
 
     if (!manifest.getLogs().isEmpty()) {
       RemoteEditLog firstRemoteLog = manifest.getLogs().get(0);
@@ -260,7 +256,7 @@ class Checkpointer extends Daemon {
           bnStorage, txid);
     }
 
-    getNamenode().endCheckpoint(backupNode.getRegistration(), sig);
+    getRemoteNamenodeProxy().endCheckpoint(backupNode.getRegistration(), sig);
 
     if (backupNode.getRole() == NamenodeRole.BACKUP) {
       bnImage.convergeJournalSpool();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index f1f163eed2e..19f9f5117aa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -285,7 +285,7 @@ public class FSEditLog  {
   /**
    * @return true if the log is open in read mode.
    */
-  synchronized boolean isOpenForRead() {
+  public synchronized boolean isOpenForRead() {
     return state == State.OPEN_FOR_READING;
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 54c5cf8e109..b92a37eae8c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -808,7 +808,7 @@ public class FSImage implements Closeable {
    * Save the contents of the FS image to a new image file in each of the
    * current storage directories.
    */
-  synchronized void saveNamespace(FSNamesystem source) throws IOException {
+  public synchronized void saveNamespace(FSNamesystem source) throws IOException {
     assert editLog != null : "editLog must be initialized";
     storage.attemptRestoreRemovedStorage();
 
@@ -817,7 +817,7 @@ public class FSImage implements Closeable {
     if (editLogWasOpen) {
       editLog.endCurrentLogSegment(true);
     }
-    long imageTxId = editLog.getLastWrittenTxId();
+    long imageTxId = getLastAppliedOrWrittenTxId();
     try {
       saveFSImageInAllDirs(source, imageTxId);
       storage.writeAll();
@@ -834,7 +834,7 @@ public class FSImage implements Closeable {
     
   }
   
-  void cancelSaveNamespace(String reason)
+  public void cancelSaveNamespace(String reason)
       throws InterruptedException {
     SaveNamespaceContext ctx = curSaveNamespaceContext;
     if (ctx != null) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index ac20b0ee4d8..b4f522e00b3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -47,6 +47,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DAT
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_MIN_DATANODES_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_UPGRADE_PERMISSION_KEY;
@@ -112,6 +114,7 @@ import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
+import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -159,6 +162,7 @@ import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
 import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAState;
+import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer;
 import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState;
 import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
@@ -261,6 +265,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private UserGroupInformation fsOwner;
   private String supergroup;
   private PermissionStatus defaultPermission;
+  private boolean standbyShouldCheckpoint;
   
   // Scan interval is not configurable.
   private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
@@ -321,11 +326,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   private EditLogTailer editLogTailer = null;
 
+  /**
+   * Used when this NN is in standby state to perform checkpoints.
+   */
+  private StandbyCheckpointer standbyCheckpointer;
+
   /**
    * Reference to the NN's HAContext object. This is only set once
    * {@link #startCommonServices(Configuration, HAContext)} is called. 
    */
   private HAContext haContext;
+
+  private final Configuration conf;
   
   PendingDataNodeMessages getPendingDataNodeMessages() {
     return pendingDatanodeMessages;
@@ -381,6 +393,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * @throws IOException on bad configuration
    */
   FSNamesystem(Configuration conf, FSImage fsImage) throws IOException {
+    this.conf = conf;
     try {
       initialize(conf, fsImage);
     } catch(IOException e) {
@@ -568,11 +581,30 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     }
     editLogTailer = new EditLogTailer(this);
     editLogTailer.start();
+    if (standbyShouldCheckpoint) {
+      standbyCheckpointer = new StandbyCheckpointer(conf, this);
+      standbyCheckpointer.start();
+    }
+  }
+
+
+  /**
+   * Called while the NN is in Standby state, but just about to be
+   * asked to enter Active state. This cancels any checkpoints
+   * currently being taken.
+   */
+  void prepareToStopStandbyServices() throws ServiceFailedException {
+    if (standbyCheckpointer != null) {
+      standbyCheckpointer.cancelAndPreventCheckpoints();
+    }
   }
 
   /** Stop services required in standby state */
   void stopStandbyServices() throws IOException {
     LOG.info("Stopping services started for standby state");
+    if (standbyCheckpointer != null) {
+      standbyCheckpointer.stop();
+    }
     if (editLogTailer != null) {
       editLogTailer.stop();
     }
@@ -728,6 +760,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         DFS_SUPPORT_APPEND_DEFAULT);
 
     this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf);
+    
+    this.standbyShouldCheckpoint = conf.getBoolean(
+        DFS_HA_STANDBY_CHECKPOINTS_KEY,
+        DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java
index 8753b270f1d..b9860032e6f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java
@@ -124,16 +124,18 @@ public class GetImageServlet extends HttpServlet {
             final long txid = parsedParams.getTxId();
 
             if (! currentlyDownloadingCheckpoints.add(txid)) {
-              throw new IOException(
+              response.sendError(HttpServletResponse.SC_CONFLICT,
                   "Another checkpointer is already in the process of uploading a" +
                   " checkpoint made at transaction ID " + txid);
+              return null;
             }
 
             try {
               if (nnImage.getStorage().findImageFile(txid) != null) {
-                throw new IOException(
+                response.sendError(HttpServletResponse.SC_CONFLICT,
                     "Another checkpointer already uploaded an checkpoint " +
                     "for txid " + txid);
+                return null;
               }
               
               // issue a HTTP get request to download the new fsimage 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
index e763f6f6828..0dcf1e6f629 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorage.java
@@ -463,7 +463,7 @@ public class NNStorage extends Storage implements Closeable {
   /**
    * Return the transaction ID of the last checkpoint.
    */
-  long getMostRecentCheckpointTxId() {
+  public long getMostRecentCheckpointTxId() {
     return mostRecentCheckpointTxId;
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index fc0c22eeeae..c9af0ba05bf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -543,6 +543,7 @@ public class NameNode {
       } else {
         state = STANDBY_STATE;;
       }
+      state.prepareToEnterState(haContext);
       state.enterState(haContext);
     } catch (IOException e) {
       this.stop();
@@ -965,6 +966,11 @@ public class NameNode {
       namesystem.startStandbyServices();
     }
 
+    @Override
+    public void prepareToStopStandbyServices() throws ServiceFailedException {
+      namesystem.prepareToStopStandbyServices();
+    }
+    
     @Override
     public void stopStandbyServices() throws IOException {
       // TODO(HA): Are we guaranteed to be the only active here?
@@ -992,6 +998,7 @@ public class NameNode {
     public boolean allowStaleReads() {
       return allowStaleStandbyReads;
     }
+
   }
   
   public boolean isStandbyState() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceCancelledException.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceCancelledException.java
index 2731275f261..5b49f0ee47c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceCancelledException.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SaveNamespaceCancelledException.java
@@ -18,8 +18,10 @@
 package org.apache.hadoop.hdfs.server.namenode;
 
 import java.io.IOException;
+import org.apache.hadoop.classification.InterfaceAudience;;
 
-class SaveNamespaceCancelledException extends IOException {
+@InterfaceAudience.Private
+public class SaveNamespaceCancelledException extends IOException {
   private static final long serialVersionUID = 1L;
 
   SaveNamespaceCancelledException(String cancelReason) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 9231f11d8b8..da41917ff99 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.fs.FileSystem;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.DFSUtil.ErrorSimulator;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -117,16 +118,8 @@ public class SecondaryNameNode implements Runnable {
 
   private Collection<URI> checkpointDirs;
   private Collection<URI> checkpointEditsDirs;
-  
-  /** How often to checkpoint regardless of number of txns */
-  private long checkpointPeriod;    // in seconds
-  
-  /** How often to poll the NN to check checkpointTxnCount */
-  private long checkpointCheckPeriod; // in seconds
-  
-  /** checkpoint once every this many transactions, regardless of time */
-  private long checkpointTxnCount;
 
+  private CheckpointConf checkpointConf;
   private FSNamesystem namesystem;
 
 
@@ -136,9 +129,9 @@ public class SecondaryNameNode implements Runnable {
       + "\nName Node Address    : " + nameNodeAddr   
       + "\nStart Time           : " + new Date(starttime)
       + "\nLast Checkpoint Time : " + (lastCheckpointTime == 0? "--": new Date(lastCheckpointTime))
-      + "\nCheckpoint Period    : " + checkpointPeriod + " seconds"
-      + "\nCheckpoint Size      : " + StringUtils.byteDesc(checkpointTxnCount)
-                                    + " (= " + checkpointTxnCount + " bytes)" 
+      + "\nCheckpoint Period    : " + checkpointConf.getPeriod() + " seconds"
+      + "\nCheckpoint Size      : " + StringUtils.byteDesc(checkpointConf.getTxnCount())
+                                    + " (= " + checkpointConf.getTxnCount() + " bytes)" 
       + "\nCheckpoint Dirs      : " + checkpointDirs
       + "\nCheckpoint Edits Dirs: " + checkpointEditsDirs;
   }
@@ -243,16 +236,8 @@ public class SecondaryNameNode implements Runnable {
     namesystem = new FSNamesystem(conf, checkpointImage);
 
     // Initialize other scheduling parameters from the configuration
-    checkpointCheckPeriod = conf.getLong(
-        DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY,
-        DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_DEFAULT);
-        
-    checkpointPeriod = conf.getLong(DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 
-                                    DFS_NAMENODE_CHECKPOINT_PERIOD_DEFAULT);
-    checkpointTxnCount = conf.getLong(DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 
-                                  DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
-    warnForDeprecatedConfigs(conf);
-
+    checkpointConf = new CheckpointConf(conf);
+    
     // initialize the webserver for uploading files.
     // Kerberized SSL servers must be run from the host principal...
     UserGroupInformation httpUGI = 
@@ -307,21 +292,9 @@ public class SecondaryNameNode implements Runnable {
     conf.set(DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY, infoBindAddress + ":" +infoPort); 
     LOG.info("Secondary Web-server up at: " + infoBindAddress + ":" +infoPort);
     LOG.info("Secondary image servlet up at: " + infoBindAddress + ":" + imagePort);
-    LOG.info("Checkpoint Period   :" + checkpointPeriod + " secs " +
-             "(" + checkpointPeriod/60 + " min)");
-    LOG.info("Log Size Trigger    :" + checkpointTxnCount + " txns");
-  }
-
-  static void warnForDeprecatedConfigs(Configuration conf) {
-    for (String key : ImmutableList.of(
-          "fs.checkpoint.size",
-          "dfs.namenode.checkpoint.size")) {
-      if (conf.get(key) != null) {
-        LOG.warn("Configuration key " + key + " is deprecated! Ignoring..." +
-            " Instead please specify a value for " +
-            DFS_NAMENODE_CHECKPOINT_TXNS_KEY);
-      }
-    }
+    LOG.info("Checkpoint Period   :" + checkpointConf.getPeriod() + " secs " +
+             "(" + checkpointConf.getPeriod()/60 + " min)");
+    LOG.info("Log Size Trigger    :" + checkpointConf.getTxnCount() + " txns");
   }
 
   /**
@@ -372,7 +345,7 @@ public class SecondaryNameNode implements Runnable {
     // Poll the Namenode (once every checkpointCheckPeriod seconds) to find the
     // number of transactions in the edit log that haven't yet been checkpointed.
     //
-    long period = Math.min(checkpointCheckPeriod, checkpointPeriod);
+    long period = checkpointConf.getCheckPeriod();
 
     while (shouldRun) {
       try {
@@ -391,7 +364,7 @@ public class SecondaryNameNode implements Runnable {
         long now = System.currentTimeMillis();
 
         if (shouldCheckpointBasedOnCount() ||
-            now >= lastCheckpointTime + 1000 * checkpointPeriod) {
+            now >= lastCheckpointTime + 1000 * checkpointConf.getPeriod()) {
           doCheckpoint();
           lastCheckpointTime = now;
         }
@@ -585,13 +558,13 @@ public class SecondaryNameNode implements Runnable {
       switch (opts.getCommand()) {
       case CHECKPOINT:
         long count = countUncheckpointedTxns();
-        if (count > checkpointTxnCount ||
+        if (count > checkpointConf.getTxnCount() ||
             opts.shouldForceCheckpoint()) {
           doCheckpoint();
         } else {
           System.err.println("EditLog size " + count + " transactions is " +
                              "smaller than configured checkpoint " +
-                             "interval " + checkpointTxnCount + " transactions.");
+                             "interval " + checkpointConf.getTxnCount() + " transactions.");
           System.err.println("Skipping checkpoint.");
         }
         break;
@@ -637,7 +610,7 @@ public class SecondaryNameNode implements Runnable {
   }
 
   boolean shouldCheckpointBasedOnCount() throws IOException {
-    return countUncheckpointedTxns() >= checkpointTxnCount;
+    return countUncheckpointedTxns() >= checkpointConf.getTxnCount();
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
index cc8dccaf1ae..985d85ba981 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java
@@ -24,8 +24,11 @@ import java.security.MessageDigest;
 import java.util.List;
 import java.lang.Math;
 
+import javax.servlet.http.HttpServletResponse;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
@@ -41,7 +44,8 @@ import com.google.common.collect.Lists;
 /**
  * This class provides fetching a specified file from the NameNode.
  */
-class TransferFsImage {
+@InterfaceAudience.Private
+public class TransferFsImage {
   
   public final static String CONTENT_LENGTH = "Content-Length";
   public final static String MD5_HEADER = "X-MD5-Digest";
@@ -103,7 +107,7 @@ class TransferFsImage {
    * @param storage the storage directory to transfer the image from
    * @param txid the transaction ID of the image to be uploaded
    */
-  static void uploadImageFromStorage(String fsName,
+  public static void uploadImageFromStorage(String fsName,
       InetSocketAddress imageListenAddress,
       NNStorage storage, long txid) throws IOException {
     
@@ -111,7 +115,20 @@ class TransferFsImage {
         txid, imageListenAddress, storage);
     // this doesn't directly upload an image, but rather asks the NN
     // to connect back to the 2NN to download the specified image.
-    TransferFsImage.getFileClient(fsName, fileid, null, null, false);
+    try {
+      TransferFsImage.getFileClient(fsName, fileid, null, null, false);
+    } catch (HttpGetFailedException e) {
+      if (e.getResponseCode() == HttpServletResponse.SC_CONFLICT) {
+        // this is OK - this means that a previous attempt to upload
+        // this checkpoint succeeded even though we thought it failed.
+        LOG.info("Image upload with txid " + txid + 
+            " conflicted with a previous image upload to the " +
+            "same NameNode. Continuing...", e);
+        return;
+      } else {
+        throw e;
+      }
+    }
     LOG.info("Uploaded image with txid " + txid + " to namenode at " +
     		fsName);
   }
@@ -194,10 +211,11 @@ class TransferFsImage {
     HttpURLConnection connection = (HttpURLConnection) url.openConnection();
     
     if (connection.getResponseCode() != HttpURLConnection.HTTP_OK) {
-      throw new IOException(
+      throw new HttpGetFailedException(
           "Image transfer servlet at " + url +
           " failed with status code " + connection.getResponseCode() +
-          "\nResponse message:\n" + connection.getResponseMessage());
+          "\nResponse message:\n" + connection.getResponseMessage(),
+          connection);
     }
     
     long advertisedSize;
@@ -289,5 +307,19 @@ class TransferFsImage {
     String header = connection.getHeaderField(MD5_HEADER);
     return (header != null) ? new MD5Hash(header) : null;
   }
+  
+  public static class HttpGetFailedException extends IOException {
+    private static final long serialVersionUID = 1L;
+    private final int responseCode;
+
+    HttpGetFailedException(String msg, HttpURLConnection connection) throws IOException {
+      super(msg);
+      this.responseCode = connection.getResponseCode();
+    }
+    
+    public int getResponseCode() {
+      return responseCode;
+    }
+  }
 
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 9bded332d14..53e96a73a32 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -46,7 +46,6 @@ public class EditLogTailer {
   private final EditLogTailerThread tailerThread;
   
   private final FSNamesystem namesystem;
-  private final FSImage image;
   private final FSEditLog editLog;
   
   private volatile Throwable lastError = null;
@@ -54,7 +53,6 @@ public class EditLogTailer {
   public EditLogTailer(FSNamesystem namesystem) {
     this.tailerThread = new EditLogTailerThread();
     this.namesystem = namesystem;
-    this.image = namesystem.getFSImage();
     this.editLog = namesystem.getEditLog();
   }
   
@@ -106,6 +104,8 @@ public class EditLogTailer {
     // deadlock.
     namesystem.writeLockInterruptibly();
     try {
+      FSImage image = namesystem.getFSImage();
+
       long lastTxnId = image.getLastAppliedTxId();
       
       if (LOG.isDebugEnabled()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
index dce1cfb34a8..6b070b25f54 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAContext.java
@@ -3,6 +3,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 import java.io.IOException;
 
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
 import org.apache.hadoop.ipc.StandbyException;
 
@@ -26,7 +27,10 @@ public interface HAContext {
   
   /** Start the services required in standby state */
   public void startStandbyServices() throws IOException;
-  
+
+  /** Prepare to exit the standby state */
+  public void prepareToStopStandbyServices() throws ServiceFailedException;
+
   /** Stop the services when exiting standby state */
   public void stopStandbyServices() throws IOException;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
index 2f0b6ff1a6d..20ea854b461 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/HAState.java
@@ -54,6 +54,8 @@ abstract public class HAState {
    */
   protected final void setStateInternal(final HAContext context, final HAState s)
       throws ServiceFailedException {
+    prepareToExitState(context);
+    s.prepareToEnterState(context);
     context.writeLock();
     try {
       exitState(context);
@@ -64,6 +66,18 @@ abstract public class HAState {
     }
   }
 
+  /**
+   * Method to be overridden by subclasses to prepare to enter a state.
+   * This method is called <em>without</em> the context being locked,
+   * and after {@link #prepareToExitState(HAContext)} has been called
+   * for the previous state, but before {@link #exitState(HAContext)}
+   * has been called for the previous state.
+   * @param context HA context
+   * @throws ServiceFailedException on precondition failure
+   */
+  public void prepareToEnterState(final HAContext context)
+      throws ServiceFailedException {}
+
   /**
    * Method to be overridden by subclasses to perform steps necessary for
    * entering a state.
@@ -73,6 +87,22 @@ abstract public class HAState {
   public abstract void enterState(final HAContext context)
       throws ServiceFailedException;
 
+  /**
+   * Method to be overridden by subclasses to prepare to exit a state.
+   * This method is called <em>without</em> the context being locked.
+   * This is used by the standby state to cancel any checkpoints
+   * that are going on. It can also be used to check any preconditions
+   * for the state transition.
+   * 
+   * This method should not make any destructuve changes to the state
+   * (eg stopping threads) since {@link #prepareToEnterState(HAContext)}
+   * may subsequently cancel the state transition.
+   * @param context HA context
+   * @throws ServiceFailedException on precondition failure
+   */
+  public void prepareToExitState(final HAContext context)
+      throws ServiceFailedException {}
+
   /**
    * Method to be overridden by subclasses to perform steps necessary for
    * exiting a state.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
new file mode 100644
index 00000000000..ee7921db4f8
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -0,0 +1,313 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.security.PrivilegedAction;
+import java.util.ArrayList;
+import java.util.Collection;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.server.namenode.CheckpointConf;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceCancelledException;
+import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import static org.apache.hadoop.hdfs.server.common.Util.now;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+
+/**
+ * Thread which runs inside the NN when it's in Standby state,
+ * periodically waking up to take a checkpoint of the namespace.
+ * When it takes a checkpoint, it saves it to its local
+ * storage and then uploads it to the remote NameNode.
+ */
+@InterfaceAudience.Private
+public class StandbyCheckpointer {
+  private static final Log LOG = LogFactory.getLog(StandbyCheckpointer.class);
+  private static final long PREVENT_AFTER_CANCEL_MS = 2*60*1000L;
+  private final CheckpointConf checkpointConf;
+  private final FSNamesystem namesystem;
+  private long lastCheckpointTime;
+  private final CheckpointerThread thread;
+  private String activeNNAddress;
+  private InetSocketAddress myNNAddress;
+  
+  // Keep track of how many checkpoints were canceled.
+  // This is for use in tests.
+  private static int canceledCount = 0;
+  
+  public StandbyCheckpointer(Configuration conf, FSNamesystem ns) {
+    this.namesystem = ns;
+    this.checkpointConf = new CheckpointConf(conf); 
+    this.thread = new CheckpointerThread();
+
+    setNameNodeAddresses(conf);
+  }
+
+  /**
+   * Determine the address of the NN we are checkpointing
+   * as well as our own HTTP address from the configuration.
+   */
+  private void setNameNodeAddresses(Configuration conf) {
+    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
+    Collection<String> nnIds = DFSUtil.getNameNodeIds(conf, nsId);
+    String myNNId = conf.get(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY);
+    Preconditions.checkArgument(nnIds != null,
+        "Could not determine namenode ids in namespace '%s'",
+        nsId);
+    Preconditions.checkArgument(nnIds.size() == 2,
+        "Expected exactly 2 NameNodes in this namespace. Instead, got: '%s'",
+        Joiner.on("','").join(nnIds));
+    Preconditions.checkState(myNNId != null && !myNNId.isEmpty(),
+        "Could not determine own NN ID");
+
+    ArrayList<String> nnSet = Lists.newArrayList(nnIds);
+    nnSet.remove(myNNId);
+    assert nnSet.size() == 1;
+    String activeNN = nnSet.get(0);
+    
+    // Look up the address of the active NN.
+    Configuration confForActive = new Configuration(conf);
+    NameNode.initializeGenericKeys(confForActive, nsId, activeNN);
+    activeNNAddress = DFSUtil.getInfoServer(null, confForActive, true);
+    
+    // Look up our own address.
+    String myAddrString = DFSUtil.getInfoServer(null, conf, true);
+    
+    // Sanity-check.
+    Preconditions.checkArgument(checkAddress(activeNNAddress),
+        "Bad address for active NN: %s", activeNNAddress);
+    Preconditions.checkArgument(checkAddress(activeNNAddress),
+        "Bad address for standby NN: %s", myNNAddress);
+    
+    myNNAddress = NetUtils.createSocketAddr(myAddrString);
+  }
+  
+  /**
+   * Ensure that the given address is valid and has a port
+   * specified.
+   */
+  private boolean checkAddress(String addrStr) {
+    InetSocketAddress addr = NetUtils.createSocketAddr(addrStr);
+    return addr.getPort() != 0;
+  }
+
+  public void start() {
+    LOG.info("Starting standby checkpoint thread...\n" +
+        "Checkpointing active NN at " + activeNNAddress + "\n" +
+        "Serving checkpoints at " + myNNAddress);
+    thread.start();
+  }
+  
+  public void stop() throws IOException {
+    thread.setShouldRun(false);
+    thread.interrupt();
+    try {
+      thread.join();
+    } catch (InterruptedException e) {
+      LOG.warn("Edit log tailer thread exited with an exception");
+      throw new IOException(e);
+    }
+  }
+
+  private void doCheckpoint() throws InterruptedException, IOException {
+    long txid;
+    
+    namesystem.writeLockInterruptibly();
+    try {
+      assert namesystem.getEditLog().isOpenForRead() :
+        "Standby Checkpointer should only attempt a checkpoint when " +
+        "NN is in standby mode, but the edit logs are in an unexpected state";
+      
+      FSImage img = namesystem.getFSImage();
+      
+      long prevCheckpointTxId = img.getStorage().getMostRecentCheckpointTxId();
+      long thisCheckpointTxId = img.getLastAppliedOrWrittenTxId();
+      assert thisCheckpointTxId >= prevCheckpointTxId;
+      if (thisCheckpointTxId == prevCheckpointTxId) {
+        LOG.info("A checkpoint was triggered but the Standby Node has not " +
+            "received any transactions since the last checkpoint at txid " +
+            thisCheckpointTxId + ". Skipping...");
+        return;
+      }
+      
+      img.saveNamespace(namesystem);
+      txid = img.getStorage().getMostRecentCheckpointTxId();
+      assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
+        thisCheckpointTxId + " but instead saved at txid=" + txid;
+    } finally {
+      namesystem.writeUnlock();
+    }
+    
+    // Upload the saved checkpoint back to the active
+    TransferFsImage.uploadImageFromStorage(
+        activeNNAddress, myNNAddress,
+        namesystem.getFSImage().getStorage(), txid);
+  }
+  
+  /**
+   * Cancel any checkpoint that's currently being made,
+   * and prevent any new checkpoints from starting for the next
+   * minute or so.
+   */
+  public void cancelAndPreventCheckpoints() throws ServiceFailedException {
+    try {
+      thread.preventCheckpointsFor(PREVENT_AFTER_CANCEL_MS);
+      // TODO: there is a really narrow race here if we are just
+      // about to start a checkpoint - this won't cancel it!
+      namesystem.getFSImage().cancelSaveNamespace(
+          "About to exit standby state");
+    } catch (InterruptedException e) {
+      throw new ServiceFailedException(
+          "Interrupted while trying to cancel checkpoint");
+    }
+  }
+  
+  @VisibleForTesting
+  static int getCanceledCount() {
+    return canceledCount;
+  }
+
+  private long countUncheckpointedTxns() {
+    FSImage img = namesystem.getFSImage();
+    return img.getLastAppliedOrWrittenTxId() -
+      img.getStorage().getMostRecentCheckpointTxId();
+  }
+
+  private class CheckpointerThread extends Thread {
+    private volatile boolean shouldRun = true;
+    private volatile long preventCheckpointsUntil = 0;
+
+    private CheckpointerThread() {
+      super("Standby State Checkpointer");
+    }
+    
+    private void setShouldRun(boolean shouldRun) {
+      this.shouldRun = shouldRun;
+    }
+
+    @Override
+    public void run() {
+      // We have to make sure we're logged in as far as JAAS
+      // is concerned, in order to use kerberized SSL properly.
+      // This code copied from SecondaryNameNode - TODO: refactor
+      // to a utility function.
+      if (UserGroupInformation.isSecurityEnabled()) {
+        UserGroupInformation ugi = null;
+        try { 
+          ugi = UserGroupInformation.getLoginUser();
+        } catch (IOException e) {
+          LOG.error("Exception while getting login user", e);
+          Runtime.getRuntime().exit(-1);
+        }
+        ugi.doAs(new PrivilegedAction<Object>() {
+          @Override
+          public Object run() {
+            doWork();
+            return null;
+          }
+        });
+      } else {
+        doWork();
+      }
+    }
+
+    /**
+     * Prevent checkpoints from occurring for some time period
+     * in the future. This is used when preparing to enter active
+     * mode. We need to not only cancel any concurrent checkpoint,
+     * but also prevent any checkpoints from racing to start just
+     * after the cancel call.
+     * 
+     * @param delayMs the number of MS for which checkpoints will be
+     * prevented
+     */
+    private void preventCheckpointsFor(long delayMs) {
+      preventCheckpointsUntil = now() + delayMs;
+    }
+
+    private void doWork() {
+      // Reset checkpoint time so that we don't always checkpoint
+      // on startup.
+      lastCheckpointTime = now();
+      while (shouldRun) {
+        try {
+          Thread.sleep(1000 * checkpointConf.getCheckPeriod());
+        } catch (InterruptedException ie) {
+        }
+        if (!shouldRun) {
+          break;
+        }
+        try {
+          // We may have lost our ticket since last checkpoint, log in again, just in case
+          if (UserGroupInformation.isSecurityEnabled()) {
+            UserGroupInformation.getCurrentUser().reloginFromKeytab();
+          }
+          
+          long now = now();
+          long uncheckpointed = countUncheckpointedTxns();
+          long secsSinceLast = (now - lastCheckpointTime)/1000;
+          
+          boolean needCheckpoint = false;
+          if (uncheckpointed >= checkpointConf.getTxnCount()) {
+            LOG.info("Triggering checkpoint because there have been " + 
+                uncheckpointed + " txns since the last checkpoint, which " +
+                "exceeds the configured threshold " +
+                checkpointConf.getTxnCount());
+            needCheckpoint = true;
+          } else if (secsSinceLast >= checkpointConf.getPeriod()) {
+            LOG.info("Triggering checkpoint because it has been " +
+                secsSinceLast + " seconds since the last checkpoint, which " +
+                "exceeds the configured interval " + checkpointConf.getPeriod());
+            needCheckpoint = true;
+          }
+          if (needCheckpoint && now < preventCheckpointsUntil) {
+            LOG.info("But skipping this checkpoint since we are about to failover!");
+            canceledCount++;
+          } else if (needCheckpoint) {
+            doCheckpoint();
+            lastCheckpointTime = now;
+          }
+        } catch (SaveNamespaceCancelledException ce) {
+          LOG.info("Checkpoint was cancelled: " + ce.getMessage());
+          canceledCount++;
+        } catch (InterruptedException ie) {
+          // Probably requested shutdown.
+          continue;
+        } catch (Throwable t) {
+          LOG.error("Exception in doCheckpoint", t);
+        }
+      }
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
index ec0dcec9964..80f42e60fea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -61,6 +61,11 @@ public class StandbyState extends HAState {
     }
   }
 
+  @Override
+  public void prepareToExitState(HAContext context) throws ServiceFailedException {
+    context.prepareToStopStandbyServices();
+  }
+
   @Override
   public void exitState(HAContext context) throws ServiceFailedException {
     try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 9b6328374a2..6b800a9637f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -538,6 +538,16 @@ public class MiniDFSCluster {
     conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, 
                    StaticMapping.class, DNSToSwitchMapping.class);
     
+    // In an HA cluster, in order for the StandbyNode to perform checkpoints,
+    // it needs to know the HTTP port of the Active. So, if ephemeral ports
+    // are chosen, disable checkpoints for the test.
+    if (!nnTopology.allHttpPortsSpecified() &&
+        nnTopology.isHA()) {
+      LOG.info("MiniDFSCluster disabling checkpointing in the Standby node " +
+          "since no HTTP ports have been specified.");
+      conf.setBoolean(DFS_HA_STANDBY_CHECKPOINTS_KEY, false);
+    }
+    
     federation = nnTopology.isFederated();
     createNameNodesAndSetConf(
         nnTopology, manageNameDfsDirs, format, operation, clusterId, conf);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
index 407ec8f5d10..fc9bb64f9ed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
@@ -106,6 +106,34 @@ public class MiniDFSNNTopology {
   public boolean isFederated() {
     return nameservices.size() > 1 || federation;
   }
+  
+  /**
+   * @return true if at least one of the nameservices
+   * in the topology has HA enabled.
+   */
+  public boolean isHA() {
+    for (NSConf ns : nameservices) {
+      if (ns.getNNs().size() > 1) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  /**
+   * @return true if all of the NNs in the cluster have their HTTP
+   * port specified to be non-ephemeral.
+   */
+  public boolean allHttpPortsSpecified() {
+    for (NSConf ns : nameservices) {
+      for (NNConf nn : ns.getNNs()) {
+        if (nn.getHttpPort() == 0) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
 
   public List<NSConf> getNameservices() {
     return nameservices;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index 9e9af7af617..2e4e932b386 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -195,9 +195,10 @@ public abstract class FSImageTestUtil {
    * Create an aborted in-progress log in the given directory, containing
    * only a specified number of "mkdirs" operations.
    */
-  public static void createAbortedLogWithMkdirs(File editsLogDir, int numDirs)
-      throws IOException {
+  public static void createAbortedLogWithMkdirs(File editsLogDir, int numDirs,
+      long firstTxId) throws IOException {
     FSEditLog editLog = FSImageTestUtil.createStandaloneEditLog(editsLogDir);
+    editLog.setNextTxId(firstTxId);
     editLog.openForWrite();
     
     PermissionStatus perms = PermissionStatus.createImmutable("fakeuser", "fakegroup",
@@ -399,10 +400,15 @@ public abstract class FSImageTestUtil {
    * Assert that the NameNode has checkpoints at the expected
    * transaction IDs.
    */
-  static void assertNNHasCheckpoints(MiniDFSCluster cluster,
+  public static void assertNNHasCheckpoints(MiniDFSCluster cluster,
       List<Integer> txids) {
+    assertNNHasCheckpoints(cluster, 0, txids);
+  }
+  
+  public static void assertNNHasCheckpoints(MiniDFSCluster cluster,
+      int nnIdx, List<Integer> txids) {
 
-    for (File nameDir : getNameNodeCurrentDirs(cluster)) {
+    for (File nameDir : getNameNodeCurrentDirs(cluster, nnIdx)) {
       // Should have fsimage_N for the three checkpoints
       for (long checkpointTxId : txids) {
         File image = new File(nameDir,
@@ -412,9 +418,9 @@ public abstract class FSImageTestUtil {
     }
   }
 
-  static List<File> getNameNodeCurrentDirs(MiniDFSCluster cluster) {
+  public static List<File> getNameNodeCurrentDirs(MiniDFSCluster cluster, int nnIdx) {
     List<File> nameDirs = Lists.newArrayList();
-    for (URI u : cluster.getNameDirs(0)) {
+    for (URI u : cluster.getNameDirs(nnIdx)) {
       nameDirs.add(new File(u.getPath(), "current"));
     }
     return nameDirs;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index 551588425b4..8223e7c60c1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -32,6 +32,7 @@ import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.mockito.Mockito;
 
 /**
@@ -149,4 +150,10 @@ public class NameNodeAdapter {
     fsn.setFsLockForTests(spy);
     return spy;
   }
+
+  public static FSImage spyOnFsImage(NameNode nn1) {
+    FSImage spy = Mockito.spy(nn1.getNamesystem().dir.fsImage);
+    nn1.getNamesystem().dir.fsImage = spy;
+    return spy;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index f40a89e8491..54550164b88 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -1339,17 +1339,11 @@ public class TestCheckpoint extends TestCase {
       // Let the first one finish
       delayer.proceed();
       
-      // Letting the first node continue should catch an exception
+      // Letting the first node continue, it should try to upload the
+      // same image, and gracefully ignore it, while logging an
+      // error message.
       checkpointThread.join();
-      try {
-        checkpointThread.propagateExceptions();
-        fail("Didn't throw!");
-      } catch (Exception ioe) {
-        assertTrue("Unexpected exception: " +
-            StringUtils.stringifyException(ioe),
-            ioe.toString().contains("Another checkpointer already uploaded"));
-        LOG.info("Caught expected exception", ioe);
-      }
+      checkpointThread.propagateExceptions();
       
       // primary should still consider fsimage_4 the latest
       assertEquals(4, storage.getMostRecentCheckpointTxId());
@@ -1791,7 +1785,7 @@ public class TestCheckpoint extends TestCase {
   private void assertParallelFilesInvariant(MiniDFSCluster cluster,
       ImmutableList<SecondaryNameNode> secondaries) throws Exception {
     List<File> allCurrentDirs = Lists.newArrayList();
-    allCurrentDirs.addAll(getNameNodeCurrentDirs(cluster));
+    allCurrentDirs.addAll(getNameNodeCurrentDirs(cluster, 0));
     for (SecondaryNameNode snn : secondaries) {
       allCurrentDirs.addAll(getCheckpointCurrentDirs(snn));
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
index 952df211a74..a245301dd90 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogsDuringFailover.java
@@ -129,7 +129,7 @@ public class TestEditLogsDuringFailover {
       // Create a fake in-progress edit-log in the shared directory
       URI sharedUri = cluster.getSharedEditsDir(0, 1);
       File sharedDir = new File(sharedUri.getPath(), "current");
-      FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG);
+      FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG, 1);
       assertEditFiles(Collections.singletonList(sharedUri),
           NNStorage.getInProgressEditsFileName(1));
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
new file mode 100644
index 00000000000..905dd03c60d
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -0,0 +1,240 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSet;
+import com.google.common.collect.Lists;
+
+
+public class TestStandbyCheckpoints {
+  private static final int NUM_DIRS_IN_LOG = 200000;
+  private MiniDFSCluster cluster;
+  private NameNode nn0, nn1;
+  private FileSystem fs;
+
+  @Before
+  public void setupCluster() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
+    
+    MiniDFSNNTopology topology = new MiniDFSNNTopology()
+      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+        .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10001))
+        .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10002)));
+    
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(topology)
+      .numDataNodes(0)
+      .build();
+    cluster.waitActive();
+    
+    nn0 = cluster.getNameNode(0);
+    nn1 = cluster.getNameNode(1);
+    fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+
+    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn1.getNamesystem().getEditLogTailer().interrupt();
+
+    cluster.transitionToActive(0);
+  }
+  
+  @After
+  public void shutdownCluster() throws IOException {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Test
+  public void testSBNCheckpoints() throws Exception {
+    doEdits(0, 10);
+    
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    // Once the standby catches up, it should notice that it needs to
+    // do a checkpoint and save one to its local directories.
+    waitForCheckpoint(1, ImmutableList.of(0, 12));
+    
+    // It should also upload it back to the active.
+    waitForCheckpoint(0, ImmutableList.of(0, 12));
+  }
+
+  /**
+   * Test for the case when both of the NNs in the cluster are
+   * in the standby state, and thus are both creating checkpoints
+   * and uploading them to each other.
+   * In this circumstance, they should receive the error from the
+   * other node indicating that the other node already has a
+   * checkpoint for the given txid, but this should not cause
+   * an abort, etc.
+   */
+  @Test
+  public void testBothNodesInStandbyState() throws Exception {
+    doEdits(0, 10);
+    
+    cluster.transitionToStandby(0);
+
+    // Transitioning to standby closed the edit log on the active,
+    // so the standby will catch up. Then, both will be in standby mode
+    // with enough uncheckpointed txns to cause a checkpoint, and they
+    // will each try to take a checkpoint and upload to each other.
+    waitForCheckpoint(1, ImmutableList.of(0, 12));
+    waitForCheckpoint(0, ImmutableList.of(0, 12));
+    
+    assertEquals(12, nn0.getNamesystem().getFSImage().getStorage()
+        .getMostRecentCheckpointTxId());
+    assertEquals(12, nn1.getNamesystem().getFSImage().getStorage()
+        .getMostRecentCheckpointTxId());
+    
+    List<File> dirs = Lists.newArrayList();
+    dirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0));
+    dirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 1));
+    // TODO: this failed once because it caught a ckpt file -- maybe
+    // this is possible if one of the NNs is really fast and the other is slow?
+    // need to loop this to suss out the race.
+    FSImageTestUtil.assertParallelFilesAreIdentical(dirs, ImmutableSet.<String>of());
+  }
+  
+  /**
+   * Test for the case when the SBN is configured to checkpoint based
+   * on a time period, but no transactions are happening on the
+   * active. Thus, it would want to save a second checkpoint at the
+   * same txid, which is a no-op. This test makes sure this doesn't
+   * cause any problem.
+   */
+  @Test
+  public void testCheckpointWhenNoNewTransactionsHappened()
+      throws Exception {
+    // Checkpoint as fast as we can, in a tight loop.
+    cluster.getConfiguration(1).setInt(
+        DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
+    cluster.restartNameNode(1);
+    nn1 = cluster.getNameNode(1);
+    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn1.getNamesystem().getEditLogTailer().interrupt();
+ 
+    FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1);
+    
+    // We shouldn't save any checkpoints at txid=0
+    Thread.sleep(1000);
+    Mockito.verify(spyImage1, Mockito.never())
+      .saveNamespace((FSNamesystem) Mockito.anyObject());
+ 
+    // Roll the primary and wait for the standby to catch up
+    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    Thread.sleep(2000);
+    
+    // We should make exactly one checkpoint at this new txid. 
+    Mockito.verify(spyImage1, Mockito.times(1))
+      .saveNamespace((FSNamesystem) Mockito.anyObject());       
+  }
+  
+  /**
+   * Test cancellation of ongoing checkpoints when failover happens
+   * mid-checkpoint. 
+   */
+  @Test
+  public void testCheckpointCancellation() throws Exception {
+    cluster.transitionToStandby(0);
+    
+    // Create an edit log in the shared edits dir with a lot
+    // of mkdirs operations. This is solely so that the image is
+    // large enough to take a non-trivial amount of time to load.
+    // (only ~15MB)
+    URI sharedUri = cluster.getSharedEditsDir(0, 1);
+    File sharedDir = new File(sharedUri.getPath(), "current");
+    File tmpDir = new File(MiniDFSCluster.getBaseDirectory(),
+        "testCheckpointCancellation-tmp");
+    FSImageTestUtil.createAbortedLogWithMkdirs(tmpDir, NUM_DIRS_IN_LOG,
+        3);
+    String fname = NNStorage.getInProgressEditsFileName(3); 
+    new File(tmpDir, fname).renameTo(new File(sharedDir, fname));
+
+    // Checkpoint as fast as we can, in a tight loop.
+    cluster.getConfiguration(1).setInt(
+        DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
+    cluster.restartNameNode(1);
+    nn1 = cluster.getNameNode(1);
+    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn1.getNamesystem().getEditLogTailer().interrupt();
+
+    cluster.transitionToActive(0);    
+    
+    for (int i = 0; i < 10; i++) {
+      
+      doEdits(i*10, i*10 + 10);
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      cluster.transitionToStandby(1);
+      cluster.transitionToActive(0);
+    }
+    
+    assertTrue(StandbyCheckpointer.getCanceledCount() > 0);
+  }
+  
+
+  private void doEdits(int start, int stop) throws IOException {
+    for (int i = start; i < stop; i++) {
+      Path p = new Path("/test" + i);
+      fs.mkdirs(p);
+    }
+  }
+
+  private void waitForCheckpoint(int nnIdx, List<Integer> txids)
+      throws InterruptedException {
+    long start = System.currentTimeMillis();
+    while (true) {
+      try {
+        FSImageTestUtil.assertNNHasCheckpoints(cluster, nnIdx, txids);
+        return;
+      } catch (AssertionError err) {
+        if (System.currentTimeMillis() - start > 10000) {
+          throw err;
+        } else {
+          Thread.sleep(300);
+        }
+      }
+    }
+  }
+}

From fb9cdcfa60cb641faded2d3843c81e969ac33147 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Thu, 5 Jan 2012 21:02:59 +0000
Subject: [PATCH 067/177] =?UTF-8?q?HADOOP-7924.=20=E2=80=A9FailoverControl?=
 =?UTF-8?q?ler=20for=20client-based=20configuration.=20Contributed=20by=20?=
 =?UTF-8?q?Eli=20Collins?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1227836 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |   2 +
 .../hadoop-common/src/main/bin/hadoop         |   5 +
 .../apache/hadoop/ha/FailoverController.java  | 132 ++++++++++
 .../hadoop/ha/FailoverFailedException.java    |  38 +++
 .../java/org/apache/hadoop/ha/HAAdmin.java    |  29 +-
 .../hadoop/ha/HealthCheckFailedException.java |  19 +-
 .../hadoop/ha/ServiceFailedException.java     |  18 +-
 .../hadoop/ha/TestFailoverController.java     | 247 ++++++++++++++++++
 .../org/apache/hadoop/ha/TestHAAdmin.java     |   5 +
 9 files changed, 457 insertions(+), 38 deletions(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverFailedException.java
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 1663eee16c5..107572ae495 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -24,3 +24,5 @@ HADOOP-7925. Add interface and update CLI to query current state to
 
 HADOOP-7932. Make client connection retries on socket time outs configurable.
              (Uma Maheswara Rao G via todd)
+
+HADOOP-7924.  FailoverController for client-based configuration (eli)
diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
index 4ca78972aa3..e57ea31fbde 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
@@ -36,6 +36,7 @@ function print_usage(){
   echo "  classpath            prints the class path needed to get the"
   echo "                       Hadoop jar and the required libraries"
   echo "  daemonlog            get/set the log level for each daemon"
+  echo "  haadmin              run a HA admin client"
   echo " or"
   echo "  CLASSNAME            run the class named CLASSNAME"
   echo ""
@@ -95,6 +96,10 @@ case $COMMAND in
       CLASS=org.apache.hadoop.tools.HadoopArchives
       CLASSPATH=${CLASSPATH}:${TOOL_PATH}
       HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+    elif [ "$COMMAND" = "haadmin" ] ; then
+      CLASS=org.apache.hadoop.ha.HAAdmin
+      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
     elif [[ "$COMMAND" = -*  ]] ; then
         # class and package names cannot begin with a -
         echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
new file mode 100644
index 00000000000..cc60de66a39
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+
+/**
+ * The FailOverController is responsible for electing an active service
+ * on startup or when the current active is changing (eg due to failure),
+ * monitoring the health of a service, and performing a fail-over when a
+ * new active service is either manually selected by a user or elected.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class FailoverController {
+
+  private static final Log LOG = LogFactory.getLog(FailoverController.class);
+
+  /**
+   * Perform pre-failover checks on the given service we plan to
+   * failover to, eg to prevent failing over to a service (eg due
+   * to it being inaccessible, already active, not healthy, etc).
+   *
+   * @param toSvc service to make active
+   * @param toSvcName name of service to make active
+   * @throws FailoverFailedException if we should avoid failover
+   */
+  private static void preFailoverChecks(HAServiceProtocol toSvc,
+                                        String toSvcName)
+      throws FailoverFailedException {
+    HAServiceState toSvcState;
+    try {
+      toSvcState = toSvc.getServiceState();
+    } catch (Exception e) {
+      String msg = "Unable to get service state for " + toSvcName;
+      LOG.error(msg, e);
+      throw new FailoverFailedException(msg, e);
+    }
+    if (!toSvcState.equals(HAServiceState.STANDBY)) {
+      throw new FailoverFailedException(
+          "Can't failover to an active service");
+    }
+    try {
+      toSvc.monitorHealth();
+    } catch (HealthCheckFailedException hce) {
+      throw new FailoverFailedException(
+          "Can't failover to an unhealthy service", hce);
+    }
+    // TODO(HA): ask toSvc if it's capable. Eg not in SM.
+  }
+
+  /**
+   * Failover from service 1 to service 2. If the failover fails
+   * then try to failback.
+   *
+   * @param fromSvc currently active service
+   * @param fromSvcName name of currently active service
+   * @param toSvc service to make active
+   * @param toSvcName name of service to make active
+   * @throws FailoverFailedException if the failover fails
+   */
+  public static void failover(HAServiceProtocol fromSvc, String fromSvcName,
+                              HAServiceProtocol toSvc, String toSvcName)
+      throws FailoverFailedException {
+    preFailoverChecks(toSvc, toSvcName);
+
+    // Try to make fromSvc standby
+    try {
+      fromSvc.transitionToStandby();
+    } catch (ServiceFailedException sfe) {
+      LOG.warn("Unable to make " + fromSvcName + " standby (" +
+          sfe.getMessage() + ")");
+    } catch (Exception e) {
+      LOG.warn("Unable to make " + fromSvcName +
+          " standby (unable to connect)", e);
+      // TODO(HA): fence fromSvc and unfence on failback
+    }
+
+    // Try to make toSvc active
+    boolean failed = false;
+    Throwable cause = null;
+    try {
+      toSvc.transitionToActive();
+    } catch (ServiceFailedException sfe) {
+      LOG.error("Unable to make " + toSvcName + " active (" +
+          sfe.getMessage() + "). Failing back");
+      failed = true;
+      cause = sfe;
+    } catch (Exception e) {
+      LOG.error("Unable to make " + toSvcName +
+          " active (unable to connect). Failing back", e);
+      failed = true;
+      cause = e;
+    }
+
+    // Try to failback if we failed to make toSvc active
+    if (failed) {
+      String msg = "Unable to failover to " + toSvcName;
+      try {
+        fromSvc.transitionToActive();
+      } catch (ServiceFailedException sfe) {
+        msg = "Failback to " + fromSvcName + " failed (" +
+              sfe.getMessage() + ")";
+        LOG.fatal(msg);
+      } catch (Exception e) {
+        msg = "Failback to " + fromSvcName + " failed (unable to connect)";
+        LOG.fatal(msg);
+      }
+      throw new FailoverFailedException(msg, cause);
+    }
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverFailedException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverFailedException.java
new file mode 100644
index 00000000000..09982b4f7ea
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverFailedException.java
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Exception thrown to indicate service failover has failed.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FailoverFailedException extends Exception {
+  private static final long serialVersionUID = 1L;
+
+  public FailoverFailedException(final String message) {
+    super(message);
+  }
+
+  public FailoverFailedException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index fff82e83b40..2dc5c1f39a3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -24,7 +24,6 @@ import java.util.Map;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.util.Tool;
@@ -46,6 +45,9 @@ public class HAAdmin extends Configured implements Tool {
         new UsageInfo("<host:port>", "Transitions the daemon into Active state"))
     .put("-transitionToStandby",
         new UsageInfo("<host:port>", "Transitions the daemon into Standby state"))
+    .put("-failover",
+        new UsageInfo("<host:port> <host:port>",
+            "Failover from the first daemon to the second"))
     .put("-getServiceState",
         new UsageInfo("<host:port>", "Returns the state of the daemon"))
     .put("-checkHealth",
@@ -94,7 +96,6 @@ public class HAAdmin extends Configured implements Tool {
     return 0;
   }
 
-  
   private int transitionToStandby(final String[] argv)
       throws IOException, ServiceFailedException {
     if (argv.length != 2) {
@@ -107,7 +108,27 @@ public class HAAdmin extends Configured implements Tool {
     proto.transitionToStandby();
     return 0;
   }
-  
+
+  private int failover(final String[] argv)
+      throws IOException, ServiceFailedException {
+    if (argv.length != 3) {
+      errOut.println("failover: incorrect number of arguments");
+      printUsage(errOut, "-failover");
+      return -1;
+    }
+
+    HAServiceProtocol proto1 = getProtocol(argv[1]);
+    HAServiceProtocol proto2 = getProtocol(argv[2]);
+    try {
+      FailoverController.failover(proto1, argv[1], proto2, argv[2]);
+      out.println("Failover from "+argv[1]+" to "+argv[2]+" successful");
+    } catch (FailoverFailedException ffe) {
+      errOut.println("Failover failed: " + ffe.getLocalizedMessage());
+      return 1;
+    }
+    return 0;
+  }
+
   private int checkHealth(final String[] argv)
       throws IOException, ServiceFailedException {
     if (argv.length != 2) {
@@ -171,6 +192,8 @@ public class HAAdmin extends Configured implements Tool {
       return transitionToActive(argv);
     } else if ("-transitionToStandby".equals(cmd)) {
       return transitionToStandby(argv);
+    } else if ("-failover".equals(cmd)) {
+      return failover(argv);
     } else if ("-getServiceState".equals(cmd)) {
       return getServiceState(argv);
     } else if ("-checkHealth".equals(cmd)) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
index a73e4ef3c2f..4d888be480e 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
@@ -21,34 +21,17 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
 /**
- * Exception thrown to indicate that health check of a service
- * failed.
+ * Exception thrown to indicate that health check of a service failed.
  */
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
 public class HealthCheckFailedException extends Exception {
   private static final long serialVersionUID = 1L;
 
-  /**
-   * Constructs exception with the specified detail message.
-   * @param  message the detail message (which is saved for later retrieval
-   *         by the {@link #getMessage()} method).
-   */
   public HealthCheckFailedException(final String message) {
     super(message);
   }
   
-  /**
-   * Constructs a new exception with the specified detail message and
-   * cause.
-   *
-   * @param  message the detail message (which is saved for later retrieval
-   *         by the {@link #getMessage()} method).
-   * @param  cause the cause (which is saved for later retrieval by the
-   *         {@link #getCause()} method).  (A <tt>null</tt> value is
-   *         permitted, and indicates that the cause is nonexistent or
-   *         unknown.)
-   */
   public HealthCheckFailedException(String message, Throwable cause) {
     super(message, cause);
   }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java
index e0f8cfc837c..788a8430732 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java
@@ -30,27 +30,11 @@ import org.apache.hadoop.classification.InterfaceStability;
 public class ServiceFailedException extends Exception {
   private static final long serialVersionUID = 1L;
 
-  /**
-   * Constructs exception with the specified detail message.
-   * @param  message the detail message (which is saved for later retrieval
-   *         by the {@link #getMessage()} method).
-   */
   public ServiceFailedException(final String message) {
     super(message);
   }
   
-  /**
-   * Constructs a new exception with the specified detail message and
-   * cause.
-   *
-   * @param  message the detail message (which is saved for later retrieval
-   *         by the {@link #getMessage()} method).
-   * @param  cause the cause (which is saved for later retrieval by the
-   *         {@link #getCause()} method).  (A <tt>null</tt> value is
-   *         permitted, and indicates that the cause is nonexistent or
-   *         unknown.)
-   */
   public ServiceFailedException(String message, Throwable cause) {
-      super(message, cause);
+    super(message, cause);
   }
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
new file mode 100644
index 00000000000..f4a6ff2427f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
@@ -0,0 +1,247 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+public class TestFailoverController {
+
+  private class DummyService implements HAServiceProtocol {
+    HAServiceState state;
+
+    DummyService(HAServiceState state) {
+      this.state = state;
+    }
+
+    @Override
+    public long getProtocolVersion(String protocol, long clientVersion)
+        throws IOException {
+      return 0;
+    }
+
+    @Override
+    public ProtocolSignature getProtocolSignature(String protocol,
+        long clientVersion, int clientMethodsHash) throws IOException {
+      return null;
+    }
+
+    @Override
+    public void monitorHealth() throws HealthCheckFailedException {
+      // Do nothing
+    }
+
+    @Override
+    public void transitionToActive() throws ServiceFailedException {
+      state = HAServiceState.ACTIVE;
+    }
+
+    @Override
+    public void transitionToStandby() throws ServiceFailedException {
+      state = HAServiceState.STANDBY;
+    }
+
+    @Override
+    public HAServiceState getServiceState() {
+      return state;
+    }
+  }
+
+  @Test
+  public void testFailoverAndFailback() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+
+    FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+    assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
+    assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
+
+    FailoverController.failover(svc2, "svc2", svc1, "svc1");
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+  }
+
+  @Test
+  public void testFailoverFromStandbyToStandby() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.STANDBY);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+
+    FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+    assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
+    assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
+  }
+
+  @Test
+  public void testFailoverFromActiveToActive() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.ACTIVE);
+
+    try {
+      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      fail("Can't failover to an already active service");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
+  }
+
+  @Test
+  public void testFailoverToUnhealthyServiceFails() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public void monitorHealth() throws HealthCheckFailedException {
+        throw new HealthCheckFailedException("Failed!");
+      }
+    };
+
+    try {
+      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      fail("Failover to unhealthy service");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+  }
+
+  @Test
+  public void testFailoverFromFaultyServiceSucceeds() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
+      @Override
+      public void transitionToStandby() throws ServiceFailedException {
+        throw new ServiceFailedException("Failed!");
+      }
+    };
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+
+    try {
+      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+    } catch (FailoverFailedException ffe) {
+      fail("Faulty active prevented failover");
+    }
+    // svc1 still thinks they're active, that's OK, we'll fence them
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
+  }
+
+  private HAServiceProtocol getProtocol(String target)
+      throws IOException {
+    InetSocketAddress addr = NetUtils.createSocketAddr(target);
+    Configuration conf = new Configuration();
+    // Lower the timeout so we quickly fail to connect
+    conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 1);
+    return (HAServiceProtocol)RPC.getProxy(
+        HAServiceProtocol.class, HAServiceProtocol.versionID, addr, conf);
+  }
+
+  @Test
+  public void testFailoverFromNonExistantServiceSucceeds() throws Exception {
+    HAServiceProtocol svc1 = getProtocol("localhost:1234");
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+
+    try {
+      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+    } catch (FailoverFailedException ffe) {
+      fail("Non-existant active prevented failover");
+    }
+
+    // Don't check svc1 (we can't reach it, but that's OK, we'll fence)
+    assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
+  }
+
+  @Test
+  public void testFailoverToNonExistantServiceFails() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    HAServiceProtocol svc2 = getProtocol("localhost:1234");
+
+    try {
+      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      fail("Failed over to a non-existant standby");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+  }
+
+  @Test
+  public void testFailoverToFaultyServiceFailsbackOK() throws Exception {
+    DummyService svc1 = spy(new DummyService(HAServiceState.ACTIVE));
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public void transitionToActive() throws ServiceFailedException {
+        throw new ServiceFailedException("Failed!");
+      }
+    };
+
+    try {
+      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      fail("Failover to already active service");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    // svc1 went standby then back to active
+    verify(svc1).transitionToStandby();
+    verify(svc1).transitionToActive();
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+  }
+
+  @Test
+  public void testFailbackToFaultyServiceFails() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
+      @Override
+      public void transitionToActive() throws ServiceFailedException {
+        throw new ServiceFailedException("Failed!");
+      }
+    };
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public void transitionToActive() throws ServiceFailedException {
+        throw new ServiceFailedException("Failed!");
+      }
+    };
+
+    try {
+      FailoverController.failover(svc1, "svc1", svc2, "svc2");
+      fail("Failover to already active service");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
index b465029d47f..ca3d9eccaf3 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
@@ -100,6 +100,11 @@ public class TestHAAdmin {
     Mockito.verify(mockProtocol).transitionToStandby();
   }
 
+  @Test
+  public void testFailover() throws Exception {
+    assertEquals(0, runTool("-failover", "xxx", "yyy"));
+  }
+
   @Test
   public void testGetServiceState() throws Exception {
     assertEquals(0, runTool("-getServiceState", "xxx"));

From 20af88280a83ac1c7df97f476849303d662efe48 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 6 Jan 2012 05:37:24 +0000
Subject: [PATCH 068/177] Fix bad merge which mixed up two configuration
 options.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1227962 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/main/java/org/apache/hadoop/ipc/Client.java         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
index 5fe97eac1b8..4ad70538ffd 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java
@@ -1351,13 +1351,13 @@ public class Client {
         conf.getBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, true);
       return new ConnectionId(addr, protocol, ticket,
           rpcTimeout, remotePrincipal,
-          conf.getInt(
-            CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
-            CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT),
           conf.getInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
               CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_DEFAULT),
           conf.getInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
               CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_DEFAULT),
+          conf.getInt(
+            CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+            CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT),
           conf.getBoolean(CommonConfigurationKeysPublic.IPC_CLIENT_TCPNODELAY_KEY,
               CommonConfigurationKeysPublic.IPC_CLIENT_TCPNODELAY_DEFAULT),
           doPing, 

From 9a07ba8945407cd8f63169faf9e0faa4311d38c7 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 6 Jan 2012 20:44:05 +0000
Subject: [PATCH 069/177] HDFS-2709. Appropriately handle error conditions in
 EditLogTailer. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1228390 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/server/namenode/BackupImage.java     |   4 +-
 .../namenode/EditLogFileInputStream.java      |  26 +-
 .../namenode/EditLogInputException.java       |  45 ++
 .../hdfs/server/namenode/FSEditLogLoader.java | 584 +++++++++---------
 .../hadoop/hdfs/server/namenode/FSImage.java  |  32 +-
 .../server/namenode/FileJournalManager.java   |  36 +-
 .../server/namenode/ha/EditLogTailer.java     |  65 +-
 .../hdfs/server/namenode/NameNodeAdapter.java |   9 +
 .../hdfs/server/namenode/TestEditLog.java     |   8 +-
 .../hdfs/server/namenode/TestEditLogRace.java |   2 +-
 .../namenode/TestFileJournalManager.java      |  37 +-
 .../namenode/TestSecurityTokenEditLog.java    |   2 +-
 .../server/namenode/ha/TestEditLogTailer.java |  27 +-
 .../namenode/ha/TestFailureToReadEdits.java   | 190 ++++++
 .../server/namenode/ha/TestHASafeMode.java    |  11 +-
 .../server/namenode/ha/TestStandbyIsHot.java  |   8 +
 17 files changed, 722 insertions(+), 366 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputException.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 795e65876f2..e48312daf47 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -85,3 +85,5 @@ HDFS-2716. Configuration needs to allow different dfs.http.addresses for each HA
 HDFS-2720. Fix MiniDFSCluster HA support to work properly on Windows. (Uma Maheswara Rao G via todd)
 
 HDFS-2291. Allow the StandbyNode to make checkpoints in an HA setup. (todd)
+
+HDFS-2709. Appropriately handle error conditions in EditLogTailer (atm via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
index 4e28d83a528..ece013fa55a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupImage.java
@@ -217,7 +217,7 @@ public class BackupImage extends FSImage {
       int logVersion = storage.getLayoutVersion();
       backupInputStream.setBytes(data, logVersion);
 
-      int numLoaded = logLoader.loadEditRecords(logVersion, backupInputStream, 
+      long numLoaded = logLoader.loadEditRecords(logVersion, backupInputStream, 
                                                 true, lastAppliedTxId + 1);
       if (numLoaded != numTxns) {
         throw new IOException("Batch of txns starting at txnid " +
@@ -310,7 +310,7 @@ public class BackupImage extends FSImage {
             + " txns from in-progress stream " + stream);
         
         FSEditLogLoader loader = new FSEditLogLoader(namesystem);
-        int numLoaded = loader.loadFSEdits(stream, lastAppliedTxId + 1);
+        long numLoaded = loader.loadFSEdits(stream, lastAppliedTxId + 1);
         lastAppliedTxId += numLoaded;
         assert numLoaded == remainingTxns :
           "expected to load " + remainingTxns + " but loaded " +
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
index 3857db236c6..a27fa9490e7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
@@ -68,7 +68,8 @@ class EditLogFileInputStream extends EditLogInputStream {
    *         header
    */
   EditLogFileInputStream(File name, long firstTxId, long lastTxId,
-      boolean isInProgress) throws LogHeaderCorruptException, IOException {
+      boolean isInProgress)
+      throws LogHeaderCorruptException, IOException {
     file = name;
     fStream = new FileInputStream(name);
 
@@ -88,6 +89,24 @@ class EditLogFileInputStream extends EditLogInputStream {
     this.isInProgress = isInProgress;
   }
 
+  /**
+   * Skip over a number of transactions. Subsequent calls to
+   * {@link EditLogFileInputStream#readOp()} will begin after these skipped
+   * transactions. If more transactions are requested to be skipped than remain
+   * in the edit log, all edit log ops in the log will be skipped and subsequent
+   * calls to {@link EditLogInputStream#readOp} will return null.
+   * 
+   * @param transactionsToSkip number of transactions to skip over.
+   * @throws IOException if there's an error while reading an operation
+   */
+  public void skipTransactions(long transactionsToSkip) throws IOException {
+    assert firstTxId != HdfsConstants.INVALID_TXID &&
+        lastTxId != HdfsConstants.INVALID_TXID;
+    for (long i = 0; i < transactionsToSkip; i++) {
+      reader.readOp();
+    }
+  }
+
   @Override
   public long getFirstTxId() throws IOException {
     return firstTxId;
@@ -179,14 +198,13 @@ class EditLogFileInputStream extends EditLogInputStream {
       throw new LogHeaderCorruptException(
           "Reached EOF when reading log header");
     }
-    if (logVersion < HdfsConstants.LAYOUT_VERSION) { // future version
+    if (logVersion < HdfsConstants.LAYOUT_VERSION || // future version
+        logVersion > Storage.LAST_UPGRADABLE_LAYOUT_VERSION) { // unsupported
       throw new LogHeaderCorruptException(
           "Unexpected version of the file system log file: "
           + logVersion + ". Current version = "
           + HdfsConstants.LAYOUT_VERSION + ".");
     }
-    assert logVersion <= Storage.LAST_UPGRADABLE_LAYOUT_VERSION :
-      "Unsupported version " + logVersion;
     return logVersion;
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputException.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputException.java
new file mode 100644
index 00000000000..56edf8cb22c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogInputException.java
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * Thrown when there's a failure to read an edit log op from disk when loading
+ * edits.
+ */
+@InterfaceAudience.Private
+public class EditLogInputException extends IOException {
+
+  private static final long serialVersionUID = 1L;
+  
+  private final long numEditsLoaded;
+  
+  public EditLogInputException(String message, Throwable cause,
+      long numEditsLoaded) {
+    super(message, cause);
+    this.numEditsLoaded = numEditsLoaded;
+  }
+  
+  public long getNumEditsLoaded() {
+    return numEditsLoaded;
+  }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 6e9ea8e2875..e1394e630bf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.util.Holder;
+import org.apache.hadoop.io.IOUtils;
 
 import com.google.common.base.Joiner;
 
@@ -76,52 +77,41 @@ public class FSEditLogLoader {
    * This is where we apply edits that we've been writing to disk all
    * along.
    */
-  int loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
-  throws IOException {
-    long startTime = now();
+  long loadFSEdits(EditLogInputStream edits, long expectedStartingTxId)
+      throws IOException {
+    long numEdits = 0;
+    int logVersion = edits.getVersion();
+
     fsNamesys.writeLock();
     try {
-      int numEdits = loadFSEdits(edits, true, expectedStartingTxId);
+      long startTime = now();
+      numEdits = loadEditRecords(logVersion, edits, false, 
+                                 expectedStartingTxId);
       FSImage.LOG.info("Edits file " + edits.getName() 
           + " of size " + edits.length() + " edits # " + numEdits 
           + " loaded in " + (now()-startTime)/1000 + " seconds.");
-      return numEdits;
-    } finally {
-      fsNamesys.writeUnlock();
-    }
-  }
-
-  private int loadFSEdits(EditLogInputStream edits, boolean closeOnExit,
-                  long expectedStartingTxId)
-      throws IOException {
-    int numEdits = 0;
-    int logVersion = edits.getVersion();
-
-    try {
-      numEdits = loadEditRecords(logVersion, edits, false, 
-                                 expectedStartingTxId);
     } finally {
       fsNamesys.setBlockTotal();
+      
       // Delay the notification of genstamp updates until after
       // setBlockTotal() above. Otherwise, we will mark blocks
       // as "safe" before they've been incorporated in the expected
       // totalBlocks and threshold for SafeMode -- triggering an
       // assertion failure and/or exiting safemode too early!
       fsNamesys.notifyGenStampUpdate(maxGenStamp);
-      if(closeOnExit) {
-        edits.close();
-      }
+      
+      edits.close();
+      fsNamesys.writeUnlock();
     }
     
     return numEdits;
   }
 
-  @SuppressWarnings("deprecation")
-  int loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit,
+  long loadEditRecords(int logVersion, EditLogInputStream in, boolean closeOnExit,
                       long expectedStartingTxId)
-      throws IOException {
+      throws IOException, EditLogInputException {
     FSDirectory fsDir = fsNamesys.dir;
-    int numEdits = 0;
+    long numEdits = 0;
 
     EnumMap<FSEditLogOpCodes, Holder<Integer>> opCounts =
       new EnumMap<FSEditLogOpCodes, Holder<Integer>>(FSEditLogOpCodes.class);
@@ -136,9 +126,19 @@ public class FSEditLogLoader {
       long txId = expectedStartingTxId - 1;
 
       try {
-        FSEditLogOp op;
-        while ((op = in.readOp()) != null) {
-          recentOpcodeOffsets[numEdits % recentOpcodeOffsets.length] =
+        while (true) {
+          FSEditLogOp op;
+          try {
+            if ((op = in.readOp()) == null) {
+              break;
+            }
+          } catch (IOException ioe) {
+            String errorMessage = formatEditLogReplayError(in, recentOpcodeOffsets);
+            FSImage.LOG.error(errorMessage);
+            throw new EditLogInputException(errorMessage,
+                ioe, numEdits);
+          }
+          recentOpcodeOffsets[(int)(numEdits % recentOpcodeOffsets.length)] =
             in.getPosition();
           if (LayoutVersion.supports(Feature.STORED_TXIDS, logVersion)) {
             long thisTxId = op.txid;
@@ -149,279 +149,291 @@ public class FSEditLogLoader {
             txId = thisTxId;
           }
 
-          numEdits++;
           incrOpCount(op.opCode, opCounts);
-          switch (op.opCode) {
-          case OP_ADD: {
-            AddCloseOp addCloseOp = (AddCloseOp)op;
-
-            // See if the file already exists (persistBlocks call)
-            INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
-            if (oldFile == null) { // this is OP_ADD on a new file
-              // versions > 0 support per file replication
-              // get name and replication
-              final short replication  = fsNamesys.getBlockManager(
-                  ).adjustReplication(addCloseOp.replication);
-              PermissionStatus permissions = fsNamesys.getUpgradePermission();
-              if (addCloseOp.permissions != null) {
-                permissions = addCloseOp.permissions;
-              }
-              long blockSize = addCloseOp.blockSize;
-              
-              if (FSNamesystem.LOG.isDebugEnabled()) {
-                FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
-                    " numblocks : " + addCloseOp.blocks.length +
-                    " clientHolder " + addCloseOp.clientName +
-                    " clientMachine " + addCloseOp.clientMachine);
-              }
-
-              // Older versions of HDFS does not store the block size in inode.
-              // If the file has more than one block, use the size of the
-              // first block as the blocksize. Otherwise use the default
-              // block size.
-              if (-8 <= logVersion && blockSize == 0) {
-                if (addCloseOp.blocks.length > 1) {
-                  blockSize = addCloseOp.blocks[0].getNumBytes();
-                } else {
-                  long first = ((addCloseOp.blocks.length == 1)?
-                      addCloseOp.blocks[0].getNumBytes(): 0);
-                  blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first);
-                }
-              }
-
-              // TODO: We should do away with this add-then-replace dance.
-
-              // add to the file tree
-              INodeFile node = (INodeFile)fsDir.unprotectedAddFile(
-                  addCloseOp.path, permissions,
-                  replication, addCloseOp.mtime,
-                  addCloseOp.atime, blockSize);
-
-              fsNamesys.prepareFileForWrite(addCloseOp.path, node,
-                  addCloseOp.clientName, addCloseOp.clientMachine, null);
-            } else { // This is OP_ADD on an existing file
-              if (!oldFile.isUnderConstruction()) {
-                // This is a call to append() on an already-closed file.
-                fsNamesys.prepareFileForWrite(addCloseOp.path, oldFile,
-                    addCloseOp.clientName, addCloseOp.clientMachine, null);
-                oldFile = getINodeFile(fsDir, addCloseOp.path);
-              }
-              
-              updateBlocks(fsDir, addCloseOp, oldFile);
-            }
-            break;
-          }
-          case OP_CLOSE: {
-            AddCloseOp addCloseOp = (AddCloseOp)op;
-            
-            INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
-            if (oldFile == null) {
-              throw new IOException("Operation trying to close non-existent file " +
-                  addCloseOp.path);
-            }
-            
-            // Update in-memory data structures
-            updateBlocks(fsDir, addCloseOp, oldFile);
-
-            // Now close the file
-            INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile;
-            // TODO: we could use removeLease(holder, path) here, but OP_CLOSE
-            // doesn't seem to serialize the holder... unclear why!
-            fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
-            INodeFile newFile = ucFile.convertToInodeFile();
-            fsDir.replaceNode(addCloseOp.path, ucFile, newFile);
-            break;
-          }
-          case OP_SET_REPLICATION: {
-            SetReplicationOp setReplicationOp = (SetReplicationOp)op;
-            short replication = fsNamesys.getBlockManager().adjustReplication(
-                setReplicationOp.replication);
-            fsDir.unprotectedSetReplication(setReplicationOp.path,
-                                            replication, null);
-            break;
-          }
-          case OP_CONCAT_DELETE: {
-            ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
-            fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs,
-                concatDeleteOp.timestamp);
-            break;
-          }
-          case OP_RENAME_OLD: {
-            RenameOldOp renameOp = (RenameOldOp)op;
-            HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false);
-            fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
-                                      renameOp.timestamp);
-            fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo);
-            break;
-          }
-          case OP_DELETE: {
-            DeleteOp deleteOp = (DeleteOp)op;
-            fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp);
-            break;
-          }
-          case OP_MKDIR: {
-            MkdirOp mkdirOp = (MkdirOp)op;
-            PermissionStatus permissions = fsNamesys.getUpgradePermission();
-            if (mkdirOp.permissions != null) {
-              permissions = mkdirOp.permissions;
-            }
-
-            fsDir.unprotectedMkdir(mkdirOp.path, permissions,
-                                   mkdirOp.timestamp);
-            break;
-          }
-          case OP_SET_GENSTAMP: {
-            SetGenstampOp setGenstampOp = (SetGenstampOp)op;
-            fsNamesys.setGenerationStamp(setGenstampOp.genStamp);
-            break;
-          }
-          case OP_SET_PERMISSIONS: {
-            SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
-            fsDir.unprotectedSetPermission(setPermissionsOp.src,
-                                           setPermissionsOp.permissions);
-            break;
-          }
-          case OP_SET_OWNER: {
-            SetOwnerOp setOwnerOp = (SetOwnerOp)op;
-            fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username,
-                                      setOwnerOp.groupname);
-            break;
-          }
-          case OP_SET_NS_QUOTA: {
-            SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
-            fsDir.unprotectedSetQuota(setNSQuotaOp.src,
-                                      setNSQuotaOp.nsQuota,
-                                      HdfsConstants.QUOTA_DONT_SET);
-            break;
-          }
-          case OP_CLEAR_NS_QUOTA: {
-            ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
-            fsDir.unprotectedSetQuota(clearNSQuotaOp.src,
-                                      HdfsConstants.QUOTA_RESET,
-                                      HdfsConstants.QUOTA_DONT_SET);
-            break;
-          }
-
-          case OP_SET_QUOTA:
-            SetQuotaOp setQuotaOp = (SetQuotaOp)op;
-            fsDir.unprotectedSetQuota(setQuotaOp.src,
-                                      setQuotaOp.nsQuota,
-                                      setQuotaOp.dsQuota);
-            break;
-
-          case OP_TIMES: {
-            TimesOp timesOp = (TimesOp)op;
-
-            fsDir.unprotectedSetTimes(timesOp.path,
-                                      timesOp.mtime,
-                                      timesOp.atime, true);
-            break;
-          }
-          case OP_SYMLINK: {
-            SymlinkOp symlinkOp = (SymlinkOp)op;
-            fsDir.unprotectedSymlink(symlinkOp.path, symlinkOp.value,
-                                     symlinkOp.mtime, symlinkOp.atime,
-                                     symlinkOp.permissionStatus);
-            break;
-          }
-          case OP_RENAME: {
-            RenameOp renameOp = (RenameOp)op;
-
-            HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false);
-            fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
-                                      renameOp.timestamp, renameOp.options);
-            fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo);
-            break;
-          }
-          case OP_GET_DELEGATION_TOKEN: {
-            GetDelegationTokenOp getDelegationTokenOp
-              = (GetDelegationTokenOp)op;
-
-            fsNamesys.getDelegationTokenSecretManager()
-              .addPersistedDelegationToken(getDelegationTokenOp.token,
-                                           getDelegationTokenOp.expiryTime);
-            break;
-          }
-          case OP_RENEW_DELEGATION_TOKEN: {
-            RenewDelegationTokenOp renewDelegationTokenOp
-              = (RenewDelegationTokenOp)op;
-            fsNamesys.getDelegationTokenSecretManager()
-              .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
-                                           renewDelegationTokenOp.expiryTime);
-            break;
-          }
-          case OP_CANCEL_DELEGATION_TOKEN: {
-            CancelDelegationTokenOp cancelDelegationTokenOp
-              = (CancelDelegationTokenOp)op;
-            fsNamesys.getDelegationTokenSecretManager()
-                .updatePersistedTokenCancellation(
-                    cancelDelegationTokenOp.token);
-            break;
-          }
-          case OP_UPDATE_MASTER_KEY: {
-            UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
-            fsNamesys.getDelegationTokenSecretManager()
-              .updatePersistedMasterKey(updateMasterKeyOp.key);
-            break;
-          }
-          case OP_REASSIGN_LEASE: {
-            ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
-
-            Lease lease = fsNamesys.leaseManager.getLease(
-                reassignLeaseOp.leaseHolder);
-            INodeFileUnderConstruction pendingFile =
-                (INodeFileUnderConstruction) fsDir.getFileINode(
-                    reassignLeaseOp.path);
-            fsNamesys.reassignLeaseInternal(lease,
-                reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile);
-            break;
-          }
-          case OP_START_LOG_SEGMENT:
-          case OP_END_LOG_SEGMENT: {
-            // no data in here currently.
-            break;
-          }
-          case OP_DATANODE_ADD:
-          case OP_DATANODE_REMOVE:
-            break;
-          default:
-            throw new IOException("Invalid operation read " + op.opCode);
+          try {
+            applyEditLogOp(op, fsDir, logVersion);
+          } catch (Throwable t) {
+            // Catch Throwable because in the case of a truly corrupt edits log, any
+            // sort of error might be thrown (NumberFormat, NullPointer, EOF, etc.)
+            String errorMessage = formatEditLogReplayError(in, recentOpcodeOffsets);
+            FSImage.LOG.error(errorMessage);
+            throw new IOException(errorMessage, t);
           }
+          numEdits++;
         }
-
       } catch (IOException ex) {
         check203UpgradeFailure(logVersion, ex);
       } finally {
         if(closeOnExit)
           in.close();
       }
-    } catch (Throwable t) {
-      // Catch Throwable because in the case of a truly corrupt edits log, any
-      // sort of error might be thrown (NumberFormat, NullPointer, EOF, etc.)
-      StringBuilder sb = new StringBuilder();
-      sb.append("Error replaying edit log at offset " + in.getPosition());
-      if (recentOpcodeOffsets[0] != -1) {
-        Arrays.sort(recentOpcodeOffsets);
-        sb.append("\nRecent opcode offsets:");
-        for (long offset : recentOpcodeOffsets) {
-          if (offset != -1) {
-            sb.append(' ').append(offset);
-          }
-        }
-      }
-      String errorMessage = sb.toString();
-      FSImage.LOG.error(errorMessage);
-      throw new IOException(errorMessage, t);
     } finally {
       fsDir.writeUnlock();
       fsNamesys.writeUnlock();
-    }
-    if (FSImage.LOG.isDebugEnabled()) {
-      dumpOpCounts(opCounts);
+      if (FSImage.LOG.isDebugEnabled()) {
+        dumpOpCounts(opCounts);
+      }
     }
     return numEdits;
   }
   
+  @SuppressWarnings("deprecation")
+  private void applyEditLogOp(FSEditLogOp op, FSDirectory fsDir,
+      int logVersion) throws IOException {
+    switch (op.opCode) {
+    case OP_ADD: {
+      AddCloseOp addCloseOp = (AddCloseOp)op;
+
+      // See if the file already exists (persistBlocks call)
+      INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
+      if (oldFile == null) { // this is OP_ADD on a new file
+        // versions > 0 support per file replication
+        // get name and replication
+        final short replication  = fsNamesys.getBlockManager(
+            ).adjustReplication(addCloseOp.replication);
+        PermissionStatus permissions = fsNamesys.getUpgradePermission();
+        if (addCloseOp.permissions != null) {
+          permissions = addCloseOp.permissions;
+        }
+        long blockSize = addCloseOp.blockSize;
+        
+        if (FSNamesystem.LOG.isDebugEnabled()) {
+          FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
+              " numblocks : " + addCloseOp.blocks.length +
+              " clientHolder " + addCloseOp.clientName +
+              " clientMachine " + addCloseOp.clientMachine);
+        }
+
+        // Older versions of HDFS does not store the block size in inode.
+        // If the file has more than one block, use the size of the
+        // first block as the blocksize. Otherwise use the default
+        // block size.
+        if (-8 <= logVersion && blockSize == 0) {
+          if (addCloseOp.blocks.length > 1) {
+            blockSize = addCloseOp.blocks[0].getNumBytes();
+          } else {
+            long first = ((addCloseOp.blocks.length == 1)?
+                addCloseOp.blocks[0].getNumBytes(): 0);
+            blockSize = Math.max(fsNamesys.getDefaultBlockSize(), first);
+          }
+        }
+
+        // TODO: We should do away with this add-then-replace dance.
+
+        // add to the file tree
+        INodeFile node = (INodeFile)fsDir.unprotectedAddFile(
+            addCloseOp.path, permissions,
+            replication, addCloseOp.mtime,
+            addCloseOp.atime, blockSize);
+
+        fsNamesys.prepareFileForWrite(addCloseOp.path, node,
+            addCloseOp.clientName, addCloseOp.clientMachine, null);
+      } else { // This is OP_ADD on an existing file
+        if (!oldFile.isUnderConstruction()) {
+          // This is a call to append() on an already-closed file.
+          fsNamesys.prepareFileForWrite(addCloseOp.path, oldFile,
+              addCloseOp.clientName, addCloseOp.clientMachine, null);
+          oldFile = getINodeFile(fsDir, addCloseOp.path);
+        }
+        
+        updateBlocks(fsDir, addCloseOp, oldFile);
+      }
+      break;
+    }
+    case OP_CLOSE: {
+      AddCloseOp addCloseOp = (AddCloseOp)op;
+      
+      INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
+      if (oldFile == null) {
+        throw new IOException("Operation trying to close non-existent file " +
+            addCloseOp.path);
+      }
+      
+      // Update in-memory data structures
+      updateBlocks(fsDir, addCloseOp, oldFile);
+
+      // Now close the file
+      INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile;
+      // TODO: we could use removeLease(holder, path) here, but OP_CLOSE
+      // doesn't seem to serialize the holder... unclear why!
+      fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
+      INodeFile newFile = ucFile.convertToInodeFile();
+      fsDir.replaceNode(addCloseOp.path, ucFile, newFile);
+      break;
+    }
+    case OP_SET_REPLICATION: {
+      SetReplicationOp setReplicationOp = (SetReplicationOp)op;
+      short replication = fsNamesys.getBlockManager().adjustReplication(
+          setReplicationOp.replication);
+      fsDir.unprotectedSetReplication(setReplicationOp.path,
+                                      replication, null);
+      break;
+    }
+    case OP_CONCAT_DELETE: {
+      ConcatDeleteOp concatDeleteOp = (ConcatDeleteOp)op;
+      fsDir.unprotectedConcat(concatDeleteOp.trg, concatDeleteOp.srcs,
+          concatDeleteOp.timestamp);
+      break;
+    }
+    case OP_RENAME_OLD: {
+      RenameOldOp renameOp = (RenameOldOp)op;
+      HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false);
+      fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
+                                renameOp.timestamp);
+      fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo);
+      break;
+    }
+    case OP_DELETE: {
+      DeleteOp deleteOp = (DeleteOp)op;
+      fsDir.unprotectedDelete(deleteOp.path, deleteOp.timestamp);
+      break;
+    }
+    case OP_MKDIR: {
+      MkdirOp mkdirOp = (MkdirOp)op;
+      PermissionStatus permissions = fsNamesys.getUpgradePermission();
+      if (mkdirOp.permissions != null) {
+        permissions = mkdirOp.permissions;
+      }
+
+      fsDir.unprotectedMkdir(mkdirOp.path, permissions,
+                             mkdirOp.timestamp);
+      break;
+    }
+    case OP_SET_GENSTAMP: {
+      SetGenstampOp setGenstampOp = (SetGenstampOp)op;
+      fsNamesys.setGenerationStamp(setGenstampOp.genStamp);
+      break;
+    }
+    case OP_SET_PERMISSIONS: {
+      SetPermissionsOp setPermissionsOp = (SetPermissionsOp)op;
+      fsDir.unprotectedSetPermission(setPermissionsOp.src,
+                                     setPermissionsOp.permissions);
+      break;
+    }
+    case OP_SET_OWNER: {
+      SetOwnerOp setOwnerOp = (SetOwnerOp)op;
+      fsDir.unprotectedSetOwner(setOwnerOp.src, setOwnerOp.username,
+                                setOwnerOp.groupname);
+      break;
+    }
+    case OP_SET_NS_QUOTA: {
+      SetNSQuotaOp setNSQuotaOp = (SetNSQuotaOp)op;
+      fsDir.unprotectedSetQuota(setNSQuotaOp.src,
+                                setNSQuotaOp.nsQuota,
+                                HdfsConstants.QUOTA_DONT_SET);
+      break;
+    }
+    case OP_CLEAR_NS_QUOTA: {
+      ClearNSQuotaOp clearNSQuotaOp = (ClearNSQuotaOp)op;
+      fsDir.unprotectedSetQuota(clearNSQuotaOp.src,
+                                HdfsConstants.QUOTA_RESET,
+                                HdfsConstants.QUOTA_DONT_SET);
+      break;
+    }
+
+    case OP_SET_QUOTA:
+      SetQuotaOp setQuotaOp = (SetQuotaOp)op;
+      fsDir.unprotectedSetQuota(setQuotaOp.src,
+                                setQuotaOp.nsQuota,
+                                setQuotaOp.dsQuota);
+      break;
+
+    case OP_TIMES: {
+      TimesOp timesOp = (TimesOp)op;
+
+      fsDir.unprotectedSetTimes(timesOp.path,
+                                timesOp.mtime,
+                                timesOp.atime, true);
+      break;
+    }
+    case OP_SYMLINK: {
+      SymlinkOp symlinkOp = (SymlinkOp)op;
+      fsDir.unprotectedSymlink(symlinkOp.path, symlinkOp.value,
+                               symlinkOp.mtime, symlinkOp.atime,
+                               symlinkOp.permissionStatus);
+      break;
+    }
+    case OP_RENAME: {
+      RenameOp renameOp = (RenameOp)op;
+
+      HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false);
+      fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst,
+                                renameOp.timestamp, renameOp.options);
+      fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo);
+      break;
+    }
+    case OP_GET_DELEGATION_TOKEN: {
+      GetDelegationTokenOp getDelegationTokenOp
+        = (GetDelegationTokenOp)op;
+
+      fsNamesys.getDelegationTokenSecretManager()
+        .addPersistedDelegationToken(getDelegationTokenOp.token,
+                                     getDelegationTokenOp.expiryTime);
+      break;
+    }
+    case OP_RENEW_DELEGATION_TOKEN: {
+      RenewDelegationTokenOp renewDelegationTokenOp
+        = (RenewDelegationTokenOp)op;
+      fsNamesys.getDelegationTokenSecretManager()
+        .updatePersistedTokenRenewal(renewDelegationTokenOp.token,
+                                     renewDelegationTokenOp.expiryTime);
+      break;
+    }
+    case OP_CANCEL_DELEGATION_TOKEN: {
+      CancelDelegationTokenOp cancelDelegationTokenOp
+        = (CancelDelegationTokenOp)op;
+      fsNamesys.getDelegationTokenSecretManager()
+          .updatePersistedTokenCancellation(
+              cancelDelegationTokenOp.token);
+      break;
+    }
+    case OP_UPDATE_MASTER_KEY: {
+      UpdateMasterKeyOp updateMasterKeyOp = (UpdateMasterKeyOp)op;
+      fsNamesys.getDelegationTokenSecretManager()
+        .updatePersistedMasterKey(updateMasterKeyOp.key);
+      break;
+    }
+    case OP_REASSIGN_LEASE: {
+      ReassignLeaseOp reassignLeaseOp = (ReassignLeaseOp)op;
+
+      Lease lease = fsNamesys.leaseManager.getLease(
+          reassignLeaseOp.leaseHolder);
+      INodeFileUnderConstruction pendingFile =
+          (INodeFileUnderConstruction) fsDir.getFileINode(
+              reassignLeaseOp.path);
+      fsNamesys.reassignLeaseInternal(lease,
+          reassignLeaseOp.path, reassignLeaseOp.newHolder, pendingFile);
+      break;
+    }
+    case OP_START_LOG_SEGMENT:
+    case OP_END_LOG_SEGMENT: {
+      // no data in here currently.
+      break;
+    }
+    case OP_DATANODE_ADD:
+    case OP_DATANODE_REMOVE:
+      break;
+    default:
+      throw new IOException("Invalid operation read " + op.opCode);
+    }
+  }
+  
+  private static String formatEditLogReplayError(EditLogInputStream in,
+      long recentOpcodeOffsets[]) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("Error replaying edit log at offset " + in.getPosition());
+    if (recentOpcodeOffsets[0] != -1) {
+      Arrays.sort(recentOpcodeOffsets);
+      sb.append("\nRecent opcode offsets:");
+      for (long offset : recentOpcodeOffsets) {
+        if (offset != -1) {
+          sb.append(' ').append(offset);
+        }
+      }
+    }
+    return sb.toString();
+  }
+  
   private static INodeFile getINodeFile(FSDirectory fsDir, String path)
       throws IOException {
     INode inode = fsDir.getINode(path);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index b92a37eae8c..8eb4dede34c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -665,11 +665,11 @@ public class FSImage implements Closeable {
    * @return the number of transactions loaded
    */
   public long loadEdits(Iterable<EditLogInputStream> editStreams,
-                           FSNamesystem target) throws IOException {
+      FSNamesystem target) throws IOException, EditLogInputException {
     LOG.debug("About to load edits:\n  " + Joiner.on("\n  ").join(editStreams));
 
     long startingTxId = getLastAppliedTxId() + 1;
-    int numLoaded = 0;
+    long numLoaded = 0;
 
     try {    
       FSEditLogLoader loader = new FSEditLogLoader(target);
@@ -677,20 +677,28 @@ public class FSImage implements Closeable {
       // Load latest edits
       for (EditLogInputStream editIn : editStreams) {
         LOG.info("Reading " + editIn + " expecting start txid #" + startingTxId);
-        int thisNumLoaded = loader.loadFSEdits(editIn, startingTxId);
-        lastAppliedTxId = startingTxId + thisNumLoaded - 1;
-        startingTxId += thisNumLoaded;
-        numLoaded += thisNumLoaded;
+        long thisNumLoaded = 0;
+        try {
+          thisNumLoaded = loader.loadFSEdits(editIn, startingTxId);
+        } catch (EditLogInputException elie) {
+          thisNumLoaded = elie.getNumEditsLoaded();
+          throw elie;
+        } finally {
+          // Update lastAppliedTxId even in case of error, since some ops may
+          // have been successfully applied before the error.
+          lastAppliedTxId = startingTxId + thisNumLoaded - 1;
+          startingTxId += thisNumLoaded;
+          numLoaded += thisNumLoaded;
+        }
       }
     } finally {
-      // TODO(HA): Should this happen when called by the tailer?
       FSEditLog.closeAllStreams(editStreams);
+      // update the counts
+      // TODO(HA): this may be very slow -- we probably want to
+      // update them as we go for HA.
+      target.dir.updateCountForINodeWithQuota();   
     }
-
-    // update the counts
-    // TODO(HA): this may be very slow -- we probably want to
-    // update them as we go for HA.
-    target.dir.updateCountForINodeWithQuota();    
+    
     return numLoaded;
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index 182d5f763d3..06b8eff3fa9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -147,6 +147,7 @@ class FileJournalManager implements JournalManager {
         ret.add(new RemoteEditLog(elf.firstTxId, elf.lastTxId));
       } else if ((firstTxId > elf.getFirstTxId()) &&
                  (firstTxId <= elf.getLastTxId())) {
+        // Note that this behavior is different from getLogFiles below.
         throw new IllegalStateException("Asked for firstTxId " + firstTxId
             + " which is in the middle of file " + elf.file);
       }
@@ -194,20 +195,21 @@ class FileJournalManager implements JournalManager {
   synchronized public EditLogInputStream getInputStream(long fromTxId)
       throws IOException {
     for (EditLogFile elf : getLogFiles(fromTxId)) {
-      if (elf.getFirstTxId() == fromTxId) {
+      if (elf.containsTxId(fromTxId)) {
         if (elf.isInProgress()) {
           elf.validateLog();
         }
         if (LOG.isTraceEnabled()) {
           LOG.trace("Returning edit stream reading from " + elf);
         }
-        return new EditLogFileInputStream(elf.getFile(), 
+        EditLogFileInputStream elfis = new EditLogFileInputStream(elf.getFile(),
             elf.getFirstTxId(), elf.getLastTxId(), elf.isInProgress());
+        elfis.skipTransactions(fromTxId - elf.getFirstTxId());
+        return elfis;
       }
     }
 
-    throw new IOException("Cannot find editlog file with " + fromTxId
-        + " as first first txid");
+    throw new IOException("Cannot find editlog file containing " + fromTxId);
   }
 
   @Override
@@ -223,7 +225,7 @@ class FileJournalManager implements JournalManager {
         LOG.warn("Gap in transactions in " + sd.getRoot() + ". Gap is "
             + fromTxId + " - " + (elf.getFirstTxId() - 1));
         break;
-      } else if (fromTxId == elf.getFirstTxId()) {
+      } else if (elf.containsTxId(fromTxId)) {
         if (elf.isInProgress()) {
           elf.validateLog();
         } 
@@ -231,22 +233,12 @@ class FileJournalManager implements JournalManager {
         if (elf.isCorrupt()) {
           break;
         }
+        numTxns += elf.getLastTxId() + 1 - fromTxId;
         fromTxId = elf.getLastTxId() + 1;
-        numTxns += fromTxId - elf.getFirstTxId();
         
         if (elf.isInProgress()) {
           break;
         }
-      } else if (elf.getFirstTxId() < fromTxId &&
-                 elf.getLastTxId() >= fromTxId) {
-        // Middle of a log segment - this should never happen
-        // since getLogFiles checks for it. But we should be
-        // paranoid about this case since it might result in
-        // overlapping txid ranges, etc, if we had a bug.
-        IOException ioe = new IOException("txid " + fromTxId +
-            " falls in the middle of file " + elf);
-        LOG.error("Broken invariant in edit log file management", ioe);
-        throw ioe;
       }
     }
 
@@ -302,12 +294,8 @@ class FileJournalManager implements JournalManager {
     List<EditLogFile> logFiles = Lists.newArrayList();
     
     for (EditLogFile elf : allLogFiles) {
-      if (fromTxId > elf.getFirstTxId()
-          && fromTxId <= elf.getLastTxId()) {
-        throw new IllegalStateException("Asked for fromTxId " + fromTxId
-            + " which is in middle of file " + elf.file);
-      }
-      if (fromTxId <= elf.getFirstTxId()) {
+      if (fromTxId <= elf.getFirstTxId() ||
+          elf.containsTxId(fromTxId)) {
         logFiles.add(elf);
       }
     }
@@ -389,6 +377,10 @@ class FileJournalManager implements JournalManager {
     long getLastTxId() {
       return lastTxId;
     }
+    
+    boolean containsTxId(long txId) {
+      return firstTxId <= txId && txId <= lastTxId;
+    }
 
     /** 
      * Count the number of valid transactions in a log.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 53e96a73a32..097332b1404 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -25,6 +25,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hdfs.server.namenode.EditLogInputException;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
@@ -46,9 +47,9 @@ public class EditLogTailer {
   private final EditLogTailerThread tailerThread;
   
   private final FSNamesystem namesystem;
-  private final FSEditLog editLog;
+  private FSEditLog editLog;
   
-  private volatile Throwable lastError = null;
+  private volatile Runtime runtime = Runtime.getRuntime();
   
   public EditLogTailer(FSNamesystem namesystem) {
     this.tailerThread = new EditLogTailerThread();
@@ -82,8 +83,18 @@ public class EditLogTailer {
   }
   
   @VisibleForTesting
-  public Throwable getLastError() {
-    return lastError;
+  FSEditLog getEditLog() {
+    return editLog;
+  }
+  
+  @VisibleForTesting
+  void setEditLog(FSEditLog editLog) {
+    this.editLog = editLog;
+  }
+  
+  @VisibleForTesting
+  synchronized void setRuntime(Runtime runtime) {
+    this.runtime = runtime;
   }
   
   public void catchupDuringFailover() throws IOException {
@@ -111,13 +122,31 @@ public class EditLogTailer {
       if (LOG.isDebugEnabled()) {
         LOG.debug("lastTxnId: " + lastTxnId);
       }
-      Collection<EditLogInputStream> streams = editLog
-          .selectInputStreams(lastTxnId + 1, 0, false);
+      Collection<EditLogInputStream> streams;
+      try {
+        streams = editLog.selectInputStreams(lastTxnId + 1, 0, false);
+      } catch (IOException ioe) {
+        // This is acceptable. If we try to tail edits in the middle of an edits
+        // log roll, i.e. the last one has been finalized but the new inprogress
+        // edits file hasn't been started yet.
+        LOG.warn("Edits tailer failed to find any streams. Will try again " +
+            "later.", ioe);
+        return;
+      }
       if (LOG.isDebugEnabled()) {
         LOG.debug("edit streams to load from: " + streams.size());
       }
       
-      long editsLoaded = image.loadEdits(streams, namesystem);
+      // Once we have streams to load, errors encountered are legitimate cause
+      // for concern, so we don't catch them here. Simple errors reading from
+      // disk are ignored.
+      long editsLoaded = 0;
+      try {
+        editsLoaded = image.loadEdits(streams, namesystem);
+      } catch (EditLogInputException elie) {
+        LOG.warn("Error while reading edits from disk. Will try again.", elie);
+        editsLoaded = elie.getNumEditsLoaded();
+      }
       if (LOG.isDebugEnabled()) {
         LOG.debug("editsLoaded: " + editsLoaded);
       }
@@ -150,22 +179,14 @@ public class EditLogTailer {
     public void run() {
       while (shouldRun) {
         try {
-          try {
-            doTailEdits();
-          } catch (IOException e) {
-            if (e.getCause() instanceof RuntimeException) {
-              throw (RuntimeException)e.getCause();
-            } else if (e.getCause() instanceof Error) {
-              throw (Error)e.getCause();
-            }
-                
-            // Will try again
-            LOG.info("Got error, will try again.", e);
-          }
+          doTailEdits();
+        } catch (InterruptedException ie) {
+          // interrupter should have already set shouldRun to false
+          continue;
         } catch (Throwable t) {
-          // TODO(HA): What should we do in this case? Shutdown the standby NN?
-          LOG.error("Edit log tailer received throwable", t);
-          lastError = t;
+          LOG.error("Error encountered while tailing edits. Shutting down " +
+              "standby NN.", t);
+          runtime.exit(1);
         }
 
         try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index 8223e7c60c1..800cb542c60 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.ipc.Server;
@@ -156,4 +157,12 @@ public class NameNodeAdapter {
     nn1.getNamesystem().dir.fsImage = spy;
     return spy;
   }
+  
+  public static String getMkdirOpPath(FSEditLogOp op) {
+    if (op.opCode == FSEditLogOpCodes.OP_MKDIR) {
+      return ((MkdirOp) op).path;
+    } else {
+      return null;
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index fe756b2c992..f36b5d20516 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -147,7 +147,7 @@ public class TestEditLog extends TestCase {
   public void testPreTxIdEditLogNoEdits() throws Exception {
     FSNamesystem namesys = Mockito.mock(FSNamesystem.class);
     namesys.dir = Mockito.mock(FSDirectory.class);
-    int numEdits = testLoad(
+    long numEdits = testLoad(
         StringUtils.hexStringToByte("ffffffed"), // just version number
         namesys);
     assertEquals(0, numEdits);
@@ -166,7 +166,7 @@ public class TestEditLog extends TestCase {
       cluster.waitActive();
       final FSNamesystem namesystem = cluster.getNamesystem();
 
-      int numEdits = testLoad(HADOOP20_SOME_EDITS, namesystem);
+      long numEdits = testLoad(HADOOP20_SOME_EDITS, namesystem);
       assertEquals(3, numEdits);
       // Sanity check the edit
       HdfsFileStatus fileInfo = namesystem.getFileInfo("/myfile", false);
@@ -177,7 +177,7 @@ public class TestEditLog extends TestCase {
     }
   }
   
-  private int testLoad(byte[] data, FSNamesystem namesys) throws IOException {
+  private long testLoad(byte[] data, FSNamesystem namesys) throws IOException {
     FSEditLogLoader loader = new FSEditLogLoader(namesys);
     return loader.loadFSEdits(new EditLogByteInputStream(data), 1);
   }
@@ -315,7 +315,7 @@ public class TestEditLog extends TestCase {
         assertTrue("Expect " + editFile + " exists", editFile.exists());
         
         System.out.println("Verifying file: " + editFile);
-        int numEdits = loader.loadFSEdits(
+        long numEdits = loader.loadFSEdits(
             new EditLogFileInputStream(editFile), 3);
         int numLeases = namesystem.leaseManager.countLease();
         System.out.println("Number of outstanding leases " + numLeases);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java
index a855f8ddc65..da66b45da2a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java
@@ -237,7 +237,7 @@ public class TestEditLogRace {
         
       System.out.println("Verifying file: " + editFile);
       FSEditLogLoader loader = new FSEditLogLoader(namesystem);
-      int numEditsThisLog = loader.loadFSEdits(new EditLogFileInputStream(editFile), 
+      long numEditsThisLog = loader.loadFSEdits(new EditLogFileInputStream(editFile), 
           startTxId);
       
       System.out.println("Number of edits: " + numEditsThisLog);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
index 0321dff4e16..275c3fa38ae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
@@ -30,6 +30,7 @@ import java.io.FilenameFilter;
 import java.io.IOException;
 import org.junit.Test;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
 import org.apache.hadoop.test.GenericTestUtils;
 import static org.apache.hadoop.hdfs.server.namenode.TestEditLog.setupEdits;
@@ -194,12 +195,12 @@ public class TestFileJournalManager {
   }
 
   /**
-   * Try to make a request with a start transaction id which doesn't
-   * match the start ID of some log segment. 
-   * This should fail as edit logs must currently be treated as indevisable 
-   * units.
+   * Make requests with starting transaction ids which don't match the beginning
+   * txid of some log segments.
+   * 
+   * This should succeed.
    */
-  @Test(expected=IllegalStateException.class)
+  @Test
   public void testAskForTransactionsMidfile() throws IOException {
     File f = new File(TestEditLog.TEST_DIR + "/filejournaltest2");
     NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 
@@ -207,7 +208,12 @@ public class TestFileJournalManager {
     StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
     
     FileJournalManager jm = new FileJournalManager(sd);
-    jm.getNumberOfTransactions(2);    
+    
+    // 10 rolls, so 11 rolled files, 110 txids total.
+    final int TOTAL_TXIDS = 10 * 11;
+    for (int txid = 1; txid <= TOTAL_TXIDS; txid++) {
+      assertEquals((TOTAL_TXIDS - txid) + 1, jm.getNumberOfTransactions(txid));
+    }
   }
 
   /** 
@@ -303,6 +309,25 @@ public class TestFileJournalManager {
         "", getLogsAsString(fjm, 9999));
   }
 
+  /**
+   * Make sure that we starting reading the correct op when we request a stream
+   * with a txid in the middle of an edit log file.
+   */
+  @Test
+  public void testReadFromMiddleOfEditLog() throws CorruptionException,
+      IOException {
+    File f = new File(TestEditLog.TEST_DIR + "/filejournaltest2");
+    NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 
+                                   10);
+    StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
+    
+    FileJournalManager jm = new FileJournalManager(sd);
+    
+    EditLogInputStream elis = jm.getInputStream(5);
+    FSEditLogOp op = elis.readOp();
+    assertEquals("read unexpected op", op.getTransactionId(), 5);
+  }
+
   private static String getLogsAsString(
       FileJournalManager fjm, long firstTxId) throws IOException {
     return Joiner.on(",").join(fjm.getRemoteEditLogs(firstTxId));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
index d4fd72d3b06..c0012be5baa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
@@ -141,7 +141,7 @@ public class TestSecurityTokenEditLog extends TestCase {
         System.out.println("Verifying file: " + editFile);
         
         FSEditLogLoader loader = new FSEditLogLoader(namesystem);        
-        int numEdits = loader.loadFSEdits(
+        long numEdits = loader.loadFSEdits(
             new EditLogFileInputStream(editFile), 1);
         assertEquals("Verification for " + editFile, expectedTransactions, numEdits);
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
index 876a632bc5f..5f7170dd0d6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
@@ -101,9 +101,21 @@ public class TestEditLogTailer {
   private static String getDirPath(int suffix) {
     return DIR_PREFIX + suffix;
   }
-  
+
+  /**
+   * Trigger an edits log roll on the active and then wait for the standby to
+   * catch up to all the edits done by the active. This method will check
+   * repeatedly for up to NN_LAG_TIMEOUT milliseconds, and then fail throwing
+   * {@link CouldNotCatchUpException}.
+   * 
+   * @param active active NN
+   * @param standby standby NN which should catch up to active
+   * @throws IOException if an error occurs rolling the edit log
+   * @throws CouldNotCatchUpException if the standby doesn't catch up to the
+   *         active in NN_LAG_TIMEOUT milliseconds
+   */
   static void waitForStandbyToCatchUp(NameNode active,
-      NameNode standby) throws InterruptedException, IOException {
+      NameNode standby) throws InterruptedException, IOException, CouldNotCatchUpException {
     
     long activeTxId = active.getNamesystem().getFSImage().getEditLog()
       .getLastWrittenTxId();
@@ -119,8 +131,15 @@ public class TestEditLogTailer {
       }
       Thread.sleep(SLEEP_TIME);
     }
-    Assert.fail("Standby did not catch up to txid " + activeTxId +
-        " (currently at " +
+    throw new CouldNotCatchUpException("Standby did not catch up to txid " +
+        activeTxId + " (currently at " +
         standby.getNamesystem().getFSImage().getLastAppliedTxId() + ")");
   }
+  
+  public static class CouldNotCatchUpException extends IOException {
+
+    public CouldNotCatchUpException(String message) {
+      super(message);
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
new file mode 100644
index 00000000000..b1105517548
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
@@ -0,0 +1,190 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.anyBoolean;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Matchers.anyLong;
+import static org.mockito.Mockito.doAnswer;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.Collection;
+import java.util.LinkedList;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.ha.TestEditLogTailer.CouldNotCatchUpException;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+public class TestFailureToReadEdits {
+  private static final String TEST_DIR1 = "/test1";
+  private static final String TEST_DIR2 = "/test2";
+  private static final String TEST_DIR3 = "/test3";
+
+  /**
+   * Test that the standby NN won't double-replay earlier edits if it encounters
+   * a failure to read a later edit.
+   */
+  @Test
+  public void testFailuretoReadEdits() throws IOException,
+      ServiceFailedException, URISyntaxException, InterruptedException {
+    Configuration conf = new Configuration();
+    HAUtil.setAllowStandbyReads(conf, true);
+    
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(0)
+      .build();
+    
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      
+      Runtime mockRuntime = mock(Runtime.class);
+      
+      NameNode nn1 = cluster.getNameNode(0);
+      NameNode nn2 = cluster.getNameNode(1);
+      nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
+      nn2.getNamesystem().getEditLogTailer().interrupt();
+      nn2.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
+      
+      FileSystem fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+      fs.mkdirs(new Path(TEST_DIR1));
+      TestEditLogTailer.waitForStandbyToCatchUp(nn1, nn2);
+      
+      // If these two ops are applied twice, the first op will throw an
+      // exception the second time its replayed.
+      fs.setOwner(new Path(TEST_DIR1), "foo", "bar");
+      fs.delete(new Path(TEST_DIR1), true);
+      
+      // This op should get applied just fine.
+      fs.mkdirs(new Path(TEST_DIR2));
+      
+      // This is the op the mocking will cause to fail to be read.
+      fs.mkdirs(new Path(TEST_DIR3));
+      
+      FSEditLog spyEditLog = spy(nn2.getNamesystem().getEditLogTailer()
+          .getEditLog());
+      LimitedEditLogAnswer answer = new LimitedEditLogAnswer(); 
+      doAnswer(answer).when(spyEditLog).selectInputStreams(
+          anyLong(), anyLong(), anyBoolean());
+      nn2.getNamesystem().getEditLogTailer().setEditLog(spyEditLog);
+      
+      try {
+        TestEditLogTailer.waitForStandbyToCatchUp(nn1, nn2);
+        fail("Standby fully caught up, but should not have been able to");
+      } catch (CouldNotCatchUpException e) {
+        verify(mockRuntime, times(0)).exit(anyInt());
+      }
+      
+      // Null because it was deleted.
+      assertNull(NameNodeAdapter.getFileInfo(nn2,
+          TEST_DIR1, false));
+      // Should have been successfully created.
+      assertTrue(NameNodeAdapter.getFileInfo(nn2,
+          TEST_DIR2, false).isDir());
+      // Null because it hasn't been created yet.
+      assertNull(NameNodeAdapter.getFileInfo(nn2,
+          TEST_DIR3, false));
+      
+      // Now let the standby read ALL the edits.
+      answer.setThrowExceptionOnRead(false);
+      TestEditLogTailer.waitForStandbyToCatchUp(nn1, nn2);
+      
+      // Null because it was deleted.
+      assertNull(NameNodeAdapter.getFileInfo(nn2,
+          TEST_DIR1, false));
+      // Should have been successfully created.
+      assertTrue(NameNodeAdapter.getFileInfo(nn2,
+          TEST_DIR2, false).isDir());
+      // Should now have been successfully created.
+      assertTrue(NameNodeAdapter.getFileInfo(nn2,
+          TEST_DIR3, false).isDir());
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+  
+  private static class LimitedEditLogAnswer
+      implements Answer<Collection<EditLogInputStream>> {
+    
+    private boolean throwExceptionOnRead = true;
+
+    @SuppressWarnings("unchecked")
+    @Override
+    public Collection<EditLogInputStream> answer(InvocationOnMock invocation)
+        throws Throwable {
+      Collection<EditLogInputStream> streams = (Collection<EditLogInputStream>)
+          invocation.callRealMethod();
+  
+      if (!throwExceptionOnRead) {
+        return streams;
+      } else {
+        Collection<EditLogInputStream> ret = new LinkedList<EditLogInputStream>();
+        for (EditLogInputStream stream : streams) {
+          EditLogInputStream spyStream = spy(stream);
+          doAnswer(new Answer<FSEditLogOp>() {
+
+            @Override
+            public FSEditLogOp answer(InvocationOnMock invocation)
+                throws Throwable {
+              FSEditLogOp op = (FSEditLogOp) invocation.callRealMethod();
+              if (throwExceptionOnRead &&
+                  TEST_DIR3.equals(NameNodeAdapter.getMkdirOpPath(op))) {
+                throw new IOException("failed to read op creating " + TEST_DIR3);
+              } else {
+                return op;
+              }
+            }
+            
+          }).when(spyStream).readOp();
+          ret.add(spyStream);
+        }
+        return ret;
+      }
+    }
+    
+    public void setThrowExceptionOnRead(boolean throwExceptionOnRead) {
+      this.throwExceptionOnRead = throwExceptionOnRead;
+    }
+  }
+  
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index b69d7c6db4c..d168bc8cbce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -17,7 +17,11 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
 
@@ -48,6 +52,7 @@ public class TestHASafeMode {
   private NameNode nn1;
   private FileSystem fs;
   private MiniDFSCluster cluster;
+  private Runtime mockRuntime = mock(Runtime.class);
   
   @Before
   public void setupCluster() throws Exception {
@@ -64,6 +69,8 @@ public class TestHASafeMode {
     nn0 = cluster.getNameNode(0);
     nn1 = cluster.getNameNode(1);
     fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+    
+    nn0.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
 
     cluster.transitionToActive(0);
   }
@@ -71,7 +78,7 @@ public class TestHASafeMode {
   @After
   public void shutdownCluster() throws IOException {
     if (cluster != null) {
-      assertNull(nn1.getNamesystem().getEditLogTailer().getLastError());
+      verify(mockRuntime, times(0)).exit(anyInt());
       cluster.shutdown();
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
index 298bdffa2c7..a9d09ca7a7a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
@@ -18,6 +18,10 @@
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.junit.Assert.*;
+import static org.mockito.Matchers.anyInt;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.times;
+import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
 import java.util.concurrent.TimeoutException;
@@ -75,12 +79,15 @@ public class TestStandbyIsHot {
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(3)
       .build();
+    Runtime mockRuntime = mock(Runtime.class);
     try {
       cluster.waitActive();
       cluster.transitionToActive(0);
       
       NameNode nn1 = cluster.getNameNode(0);
       NameNode nn2 = cluster.getNameNode(1);
+      
+      nn2.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
       nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
       nn2.getNamesystem().getEditLogTailer().interrupt();
       
@@ -121,6 +128,7 @@ public class TestStandbyIsHot {
       waitForBlockLocations(cluster, nn2, TEST_FILE, 3);
       
     } finally {
+      verify(mockRuntime, times(0)).exit(anyInt());
       cluster.shutdown();
     }
   }

From 2d0b340c4739dff11d1c04982d4bd3a4dcd97ce9 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Sat, 7 Jan 2012 00:17:42 +0000
Subject: [PATCH 070/177] HADOOP-7961. Move HA fencing to common. Contributed
 by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1228510 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/pom.xml   |  4 ++
 .../ha/BadFencingConfigurationException.java  |  0
 .../org/apache/hadoop}/ha/FenceMethod.java    |  0
 .../org/apache/hadoop}/ha/NodeFencer.java     |  0
 .../apache/hadoop}/ha/ShellCommandFencer.java |  0
 .../apache/hadoop}/ha/SshFenceByTcpPort.java  | 45 +++++++------------
 .../org/apache/hadoop}/ha/StreamPumper.java   |  0
 .../org/apache/hadoop}/ha/TestNodeFencer.java |  1 -
 .../hadoop}/ha/TestShellCommandFencer.java    |  8 ++--
 .../hadoop}/ha/TestSshFenceByTcpPort.java     | 20 +++++----
 hadoop-hdfs-project/hadoop-hdfs/pom.xml       |  4 --
 11 files changed, 34 insertions(+), 48 deletions(-)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop}/ha/BadFencingConfigurationException.java (100%)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop}/ha/FenceMethod.java (100%)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop}/ha/NodeFencer.java (100%)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop}/ha/ShellCommandFencer.java (100%)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop}/ha/SshFenceByTcpPort.java (88%)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop}/ha/StreamPumper.java (100%)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop}/ha/TestNodeFencer.java (98%)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop}/ha/TestShellCommandFencer.java (95%)
 rename {hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode => hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop}/ha/TestSshFenceByTcpPort.java (88%)

diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index 6a5e56fb486..23d61f825b3 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -264,6 +264,10 @@
       <artifactId>json-simple</artifactId>
       <scope>compile</scope>
     </dependency>
+    <dependency>
+      <groupId>com.jcraft</groupId>
+      <artifactId>jsch</artifactId>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BadFencingConfigurationException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/BadFencingConfigurationException.java
similarity index 100%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/BadFencingConfigurationException.java
rename to hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/BadFencingConfigurationException.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/FenceMethod.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java
similarity index 100%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/FenceMethod.java
rename to hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/NodeFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
similarity index 100%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/NodeFencer.java
rename to hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java
similarity index 100%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ShellCommandFencer.java
rename to hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/SshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
similarity index 88%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/SshFenceByTcpPort.java
rename to hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
index bb01d53d035..ecccb4e1f3c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/SshFenceByTcpPort.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
@@ -27,7 +27,6 @@ import java.util.regex.Pattern;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.jcraft.jsch.ChannelExec;
@@ -36,11 +35,11 @@ import com.jcraft.jsch.JSchException;
 import com.jcraft.jsch.Session;
 
 /**
- * This fencing implementation sshes to the target node and uses <code>fuser</code>
- * to kill the process listening on the NameNode's TCP port. This is
- * more accurate than using "jps" since it doesn't require parsing,
- * and will work even if there are multiple NameNodes running on the
- * same machine.<p>
+ * This fencing implementation sshes to the target node and uses 
+ * <code>fuser</code> to kill the process listening on the service's
+ * TCP port. This is more accurate than using "jps" since it doesn't 
+ * require parsing, and will work even if there are multiple service
+ * processes running on the same machine.<p>
  * It returns a successful status code if:
  * <ul>
  * <li><code>fuser</code> indicates it successfully killed a process, <em>or</em>
@@ -49,7 +48,7 @@ import com.jcraft.jsch.Session;
  * <p>
  * This fencing mechanism is configured as following in the fencing method
  * list:
- * <code>sshfence([username@]nnhost[:ssh-port][, target-nn-port])</code>
+ * <code>sshfence([username@]nnhost[:ssh-port], target-port)</code>
  * where the first argument specifies the username, host, and port to ssh
  * into, and the second argument specifies the port on which the target
  * NN process is listening on.
@@ -58,9 +57,6 @@ import com.jcraft.jsch.Session;
  * <code>other-nn<code> as the current user on the standard SSH port,
  * then kill whatever process is listening on port 8020.
  * <p>
- * If no <code>target-nn-port</code> is specified, it is assumed that the
- * target NameNode is listening on the same port as the local NameNode.
- * <p>
  * In order to achieve passwordless SSH, the operator must also configure
  * <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
  * SSH key that has passphrase-less access to the given username and host.
@@ -117,10 +113,8 @@ public class SshFenceByTcpPort extends Configured
     }
     LOG.info("Connected to " + args.host);
 
-    int targetPort = args.targetPort != null ?
-        args.targetPort : getDefaultNNPort();
     try {
-      return doFence(session, targetPort);
+      return doFence(session, args.targetPort);
     } catch (JSchException e) {
       LOG.warn("Unable to achieve fencing on remote host", e);
       return false;
@@ -142,14 +136,14 @@ public class SshFenceByTcpPort extends Configured
     return session;
   }
 
-  private boolean doFence(Session session, int nnPort) throws JSchException {
+  private boolean doFence(Session session, int port) throws JSchException {
     try {
-      LOG.info("Looking for process running on port " + nnPort);
+      LOG.info("Looking for process running on port " + port);
       int rc = execCommand(session,
-          "PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + nnPort);
+          "PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp " + port);
       if (rc == 0) {
         LOG.info("Successfully killed process that was " +
-            "listening on port " + nnPort);
+            "listening on port " + port);
         // exit code 0 indicates the process was successfully killed.
         return true;
       } else if (rc == 1) {
@@ -157,7 +151,7 @@ public class SshFenceByTcpPort extends Configured
         // or that fuser didn't have root privileges in order to find it
         // (eg running as a different user)
         LOG.info(
-            "Indeterminate response from trying to kill NameNode. " +
+            "Indeterminate response from trying to kill service. " +
             "Verifying whether it is running using nc...");
         rc = execCommand(session, "nc -z localhost 8020");
         if (rc == 0) {
@@ -234,10 +228,6 @@ public class SshFenceByTcpPort extends Configured
     return getConf().getTrimmedStringCollection(CONF_IDENTITIES_KEY);
   }
   
-  private int getDefaultNNPort() {
-    return NameNode.getAddress(getConf()).getPort();
-  }
-
   /**
    * Container for the parsed arg line for this fencing method.
    */
@@ -251,8 +241,7 @@ public class SshFenceByTcpPort extends Configured
     final String user;
     final String host;
     final int sshPort;
-    
-    final Integer targetPort;
+    final int targetPort;
     
     public Args(String args) throws BadFencingConfigurationException {
       if (args == null) {
@@ -260,7 +249,7 @@ public class SshFenceByTcpPort extends Configured
             "Must specify args for ssh fencing configuration");
       }
       String[] argList = args.split(",\\s*");
-      if (argList.length > 2 || argList.length == 0) {
+      if (argList.length != 2) {
         throw new BadFencingConfigurationException(
             "Incorrect number of arguments: " + args);
       }
@@ -287,11 +276,7 @@ public class SshFenceByTcpPort extends Configured
       }
       
       // Parse target port.
-      if (argList.length > 1) {
-        targetPort = parseConfiggedPort(argList[1]);
-      } else {
-        targetPort = null;
-      }
+      targetPort = parseConfiggedPort(argList[1]);
     }
 
     private Integer parseConfiggedPort(String portStr)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java
similarity index 100%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StreamPumper.java
rename to hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNodeFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
similarity index 98%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNodeFencer.java
rename to hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
index 5481ea23228..deb37523432 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
@@ -23,7 +23,6 @@ import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
-import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Before;
 import org.junit.Test;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
similarity index 95%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestShellCommandFencer.java
rename to hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
index 3b942560cb6..4388ad71443 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestShellCommandFencer.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
@@ -20,7 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 import static org.junit.Assert.*;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.StringUtils;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -61,7 +61,6 @@ public class TestShellCommandFencer {
     assertFalse(fencer.tryFence("xxxxxxxxxxxx"));
   }
   
-  
   @Test
   public void testCheckArgs() {
     try {
@@ -70,8 +69,9 @@ public class TestShellCommandFencer {
       new NodeFencer(conf);
       fail("Didn't throw when passing no args to shell");
     } catch (BadFencingConfigurationException confe) {
-      GenericTestUtils.assertExceptionContains(
-          "No argument passed", confe);
+      assertTrue(
+        "Unexpected exception:" + StringUtils.stringifyException(confe),
+        confe.getMessage().contains("No argument passed"));    
     }
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
similarity index 88%
rename from hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java
rename to hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
index a249a45d5c9..fb8fc3a54a8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestSshFenceByTcpPort.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
@@ -61,37 +61,37 @@ public class TestSshFenceByTcpPort {
     SshFenceByTcpPort fence = new SshFenceByTcpPort();
     fence.setConf(conf);
     // Connect to Google's DNS server - not running ssh!
-    assertFalse(fence.tryFence("8.8.8.8"));
+    assertFalse(fence.tryFence("8.8.8.8, 1234"));
   }
   
   @Test
   public void testArgsParsing() throws BadFencingConfigurationException {
-    Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234");
+    Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234, 5678");
     assertEquals("foo", args.user);
     assertEquals("bar.com", args.host);
     assertEquals(1234, args.sshPort);
-    assertNull(args.targetPort);
+    assertEquals(5678, args.targetPort);
 
-    args = new SshFenceByTcpPort.Args("foo@bar.com");
+    args = new SshFenceByTcpPort.Args("foo@bar.com, 1234");
     assertEquals("foo", args.user);
     assertEquals("bar.com", args.host);
     assertEquals(22, args.sshPort);
-    assertNull(args.targetPort);
+    assertEquals(1234, args.targetPort);
     
-    args = new SshFenceByTcpPort.Args("bar.com");
+    args = new SshFenceByTcpPort.Args("bar.com, 1234");
     assertEquals(System.getProperty("user.name"), args.user);
     assertEquals("bar.com", args.host);
     assertEquals(22, args.sshPort);
-    assertNull(args.targetPort);
+    assertEquals(1234, args.targetPort);
     
     args = new SshFenceByTcpPort.Args("bar.com:1234, 12345");
     assertEquals(System.getProperty("user.name"), args.user);
     assertEquals("bar.com", args.host);
     assertEquals(1234, args.sshPort);
-    assertEquals(Integer.valueOf(12345), args.targetPort);
+    assertEquals(12345, args.targetPort);
     
     args = new SshFenceByTcpPort.Args("bar, 8020");
-    assertEquals(Integer.valueOf(8020), args.targetPort);    
+    assertEquals(8020, args.targetPort);    
   }
   
   @Test
@@ -101,6 +101,8 @@ public class TestSshFenceByTcpPort {
     assertBadArgs("bar.com:");
     assertBadArgs("bar.com:x");
     assertBadArgs("foo.com, x");
+    assertBadArgs("foo.com,");
+    assertBadArgs("foo.com, ");
   }
   
   private void assertBadArgs(String argStr) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
index f0971d6dc53..532218251bc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
@@ -109,10 +109,6 @@
       <artifactId>ant</artifactId>
       <scope>provided</scope>
     </dependency>
-    <dependency>
-      <groupId>com.jcraft</groupId>
-      <artifactId>jsch</artifactId>
-    </dependency>
   </dependencies>
 
   <build>

From fcf1039cdc8bb7253935ef7361a0dfc026155f8f Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Sat, 7 Jan 2012 03:29:35 +0000
Subject: [PATCH 071/177] Previous commit didn't fixup package statements.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1228561 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt     | 2 ++
 .../apache/hadoop/ha/BadFencingConfigurationException.java    | 4 ++--
 .../src/main/java/org/apache/hadoop/ha/FenceMethod.java       | 4 ++--
 .../src/main/java/org/apache/hadoop/ha/NodeFencer.java        | 2 +-
 .../main/java/org/apache/hadoop/ha/ShellCommandFencer.java    | 2 +-
 .../src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java | 2 +-
 .../src/main/java/org/apache/hadoop/ha/StreamPumper.java      | 2 +-
 7 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 107572ae495..571fbcd7658 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -26,3 +26,5 @@ HADOOP-7932. Make client connection retries on socket time outs configurable.
              (Uma Maheswara Rao G via todd)
 
 HADOOP-7924.  FailoverController for client-based configuration (eli)
+
+HADOOP-7961. Move HA fencing to common. (eli)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/BadFencingConfigurationException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/BadFencingConfigurationException.java
index 4540d9994de..3d3b1ba53cc 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/BadFencingConfigurationException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/BadFencingConfigurationException.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import java.io.IOException;
 
@@ -33,4 +33,4 @@ class BadFencingConfigurationException extends IOException {
   public BadFencingConfigurationException(String msg, Throwable cause) {
     super(msg, cause);
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java
index e4c79a229ee..c448241a835 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
@@ -60,4 +60,4 @@ public interface FenceMethod {
    *         determined to be invalid only at runtime
    */
   public boolean tryFence(String args) throws BadFencingConfigurationException; 
-}
\ No newline at end of file
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
index 279a26acd9c..61ef950c05b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import java.util.List;
 import java.util.Map;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java
index 96e12287466..07d11629a4c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import java.io.IOException;
 import java.lang.reflect.Field;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
index ecccb4e1f3c..48bb59c2943 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import java.io.IOException;
 import java.net.InetAddress;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java
index 94802214ef3..8bc16af2afa 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/StreamPumper.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import java.io.BufferedReader;
 import java.io.IOException;

From 89937fe7c72618446672f2e809510ebc2041fc93 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Sat, 7 Jan 2012 03:42:56 +0000
Subject: [PATCH 072/177] Previous commit missed test packages.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1228566 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/test/java/org/apache/hadoop/ha/TestNodeFencer.java    | 2 +-
 .../java/org/apache/hadoop/ha/TestShellCommandFencer.java     | 2 +-
 .../test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
index deb37523432..93baf0dc2f6 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import static org.junit.Assert.*;
 
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
index 4388ad71443..f02aa8a14b0 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import static org.junit.Assert.*;
 
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
index fb8fc3a54a8..d88d892ed85 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
@@ -15,14 +15,14 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hdfs.server.namenode.ha;
+package org.apache.hadoop.ha;
 
 import static org.junit.Assert.*;
 
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hdfs.server.namenode.ha.SshFenceByTcpPort.Args;
+import org.apache.hadoop.ha.SshFenceByTcpPort.Args;
 import org.apache.log4j.Level;
 import org.junit.Assume;
 import org.junit.Test;

From 736b1860ab7f64de13419c6385534892b1542c1d Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 9 Jan 2012 04:12:37 +0000
Subject: [PATCH 073/177] HDFS-2730. Refactor shared HA-related test code into
 HATestUtil class. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229023 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hadoop/hdfs/TestDFSClientFailover.java    |  44 +----
 .../hdfs/server/namenode/ha/HATestUtil.java   | 162 ++++++++++++++++++
 .../server/namenode/ha/TestDNFencing.java     |  44 +----
 .../ha/TestDNFencingWithReplication.java      |   3 +-
 .../server/namenode/ha/TestEditLogTailer.java |  50 +-----
 .../namenode/ha/TestFailureToReadEdits.java   |  13 +-
 .../server/namenode/ha/TestHASafeMode.java    |  13 +-
 .../namenode/ha/TestHAStateTransitions.java   |   5 +-
 .../namenode/ha/TestStandbyCheckpoints.java   |   7 +-
 .../server/namenode/ha/TestStandbyIsHot.java  |   3 +-
 11 files changed, 198 insertions(+), 148 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index e48312daf47..f57d2d35fdf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -87,3 +87,5 @@ HDFS-2720. Fix MiniDFSCluster HA support to work properly on Windows. (Uma Mahes
 HDFS-2291. Allow the StandbyNode to make checkpoints in an HA setup. (todd)
 
 HDFS-2709. Appropriately handle error conditions in EditLogTailer (atm via todd)
+
+HDFS-2730. Refactor shared HA-related test code into HATestUtil class (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
index 90739693f5c..d06a606e54b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -18,14 +18,12 @@
 package org.apache.hadoop.hdfs;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
+
 import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetSocketAddress;
-import java.net.URI;
 import java.net.URISyntaxException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -34,6 +32,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
+import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.After;
 import org.junit.Before;
@@ -46,7 +45,6 @@ public class TestDFSClientFailover {
   
   private Configuration conf = new Configuration();
   private MiniDFSCluster cluster;
-  private static final String LOGICAL_HOSTNAME = "ha-nn-uri-%d";
   
   @Before
   public void setUpCluster() throws IOException {
@@ -83,7 +81,7 @@ public class TestDFSClientFailover {
     out1.close();
     out2.close();
         
-    FileSystem fs = configureFailoverFs(cluster, conf);
+    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
     
     AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
     cluster.getNameNode(0).stop();
@@ -92,7 +90,7 @@ public class TestDFSClientFailover {
     // Check that it functions even if the URL becomes canonicalized
     // to include a port number.
     Path withPort = new Path("hdfs://" +
-        getLogicalHostname(cluster) + ":" +
+        HATestUtil.getLogicalHostname(cluster) + ":" +
         NameNode.DEFAULT_PORT + "/" + TEST_FILE.toUri().getPath());
     FileSystem fs2 = withPort.getFileSystem(fs.getConf());
     assertTrue(fs2.exists(withPort));
@@ -117,38 +115,4 @@ public class TestDFSClientFailover {
           "does not use port information", ioe);
     }
   }
-
-  public static FileSystem configureFailoverFs(MiniDFSCluster cluster, Configuration conf)
-  throws IOException, URISyntaxException {
-    InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
-    InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
-
-    String nsId = "nameserviceId1";
-    
-    String nameNodeId1 = "nn1";
-    String nameNodeId2 = "nn2";
-    String logicalName = getLogicalHostname(cluster);
-    
-    conf = new Configuration(conf);
-    String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
-    String address2 = "hdfs://" + nnAddr2.getHostName() + ":" + nnAddr2.getPort();
-    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
-        nsId, nameNodeId1), address1);
-    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
-        nsId, nameNodeId2), address2);
-    
-    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nsId);
-    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nsId),
-        nameNodeId1 + "," + nameNodeId2);
-    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
-        ConfiguredFailoverProxyProvider.class.getName());
-    
-    FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalName), conf);
-    return fs;
-  }
-
-  private static String getLogicalHostname(MiniDFSCluster cluster) {
-    return String.format(LOGICAL_HOSTNAME, cluster.getInstanceId());
-  }
-
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
new file mode 100644
index 00000000000..1b7b62dade7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.concurrent.TimeoutException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.test.GenericTestUtils;
+
+import com.google.common.base.Supplier;
+
+/**
+ * Static utility functions useful for testing HA.
+ */
+public abstract class HATestUtil {
+  private static Log LOG = LogFactory.getLog(HATestUtil.class);
+  
+  private static final String LOGICAL_HOSTNAME = "ha-nn-uri-%d";
+  
+  /**
+   * Trigger an edits log roll on the active and then wait for the standby to
+   * catch up to all the edits done by the active. This method will check
+   * repeatedly for up to NN_LAG_TIMEOUT milliseconds, and then fail throwing
+   * {@link CouldNotCatchUpException}
+   * 
+   * @param active active NN
+   * @param standby standby NN which should catch up to active
+   * @throws IOException if an error occurs rolling the edit log
+   * @throws CouldNotCatchUpException if the standby doesn't catch up to the
+   *         active in NN_LAG_TIMEOUT milliseconds
+   */
+  static void waitForStandbyToCatchUp(NameNode active,
+      NameNode standby) throws InterruptedException, IOException, CouldNotCatchUpException {
+    
+    long activeTxId = active.getNamesystem().getFSImage().getEditLog()
+      .getLastWrittenTxId();
+    
+    active.getRpcServer().rollEditLog();
+    
+    long start = System.currentTimeMillis();
+    while (System.currentTimeMillis() - start < TestEditLogTailer.NN_LAG_TIMEOUT) {
+      long nn2HighestTxId = standby.getNamesystem().getFSImage()
+        .getLastAppliedTxId();
+      if (nn2HighestTxId >= activeTxId) {
+        return;
+      }
+      Thread.sleep(TestEditLogTailer.SLEEP_TIME);
+    }
+    throw new CouldNotCatchUpException("Standby did not catch up to txid " +
+        activeTxId + " (currently at " +
+        standby.getNamesystem().getFSImage().getLastAppliedTxId() + ")");
+  }
+
+  /**
+   * Wait for the datanodes in the cluster to process any block
+   * deletions that have already been asynchronously queued.
+   */
+  static void waitForDNDeletions(final MiniDFSCluster cluster)
+      throws TimeoutException, InterruptedException {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        for (DataNode dn : cluster.getDataNodes()) {
+          if (DataNodeAdapter.getPendingAsyncDeletions(dn) > 0) {
+            return false;
+          }
+        }
+        return true;
+      }
+    }, 1000, 10000);
+    
+  }
+
+  /**
+   * Wait for the NameNode to issue any deletions that are already
+   * pending (i.e. for the pendingDeletionBlocksCount to go to 0)
+   */
+  static void waitForNNToIssueDeletions(final NameNode nn)
+      throws Exception {
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        LOG.info("Waiting for NN to issue block deletions to DNs");
+        return nn.getNamesystem().getBlockManager().getPendingDeletionBlocksCount() == 0;
+      }
+    }, 250, 10000);
+  }
+
+  public static class CouldNotCatchUpException extends IOException {
+    private static final long serialVersionUID = 1L;
+
+    public CouldNotCatchUpException(String message) {
+      super(message);
+    }
+  }
+
+  public static FileSystem configureFailoverFs(MiniDFSCluster cluster, Configuration conf)
+  throws IOException, URISyntaxException {
+    InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
+    InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
+  
+    String nsId = "nameserviceId1";
+    
+    String nameNodeId1 = "nn1";
+    String nameNodeId2 = "nn2";
+    String logicalName = getLogicalHostname(cluster);
+    
+    conf = new Configuration(conf);
+    String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
+    String address2 = "hdfs://" + nnAddr2.getHostName() + ":" + nnAddr2.getPort();
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
+        nsId, nameNodeId1), address1);
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
+        nsId, nameNodeId2), address2);
+    
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nsId);
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nsId),
+        nameNodeId1 + "," + nameNodeId2);
+    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
+        ConfiguredFailoverProxyProvider.class.getName());
+    
+    FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalName), conf);
+    return fs;
+  }
+  
+
+  public static String getLogicalHostname(MiniDFSCluster cluster) {
+    return String.format(LOGICAL_HOSTNAME, cluster.getInstanceId());
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
index c3186292d99..9a2149a281c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
@@ -25,7 +25,6 @@ import java.io.StringWriter;
 import java.net.URISyntaxException;
 import java.util.Collection;
 import java.util.List;
-import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -39,7 +38,6 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.TestDFSClientFailover;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@@ -48,7 +46,6 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
-import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSClusterStats;
 import org.apache.hadoop.hdfs.server.namenode.FSInodeInfo;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@@ -111,7 +108,7 @@ public class TestDNFencing {
     cluster.triggerBlockReports();
     nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
     nn2.getNamesystem().getEditLogTailer().interrupt();
-    fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+    fs = HATestUtil.configureFailoverFs(cluster, conf);
   }
   
   @After
@@ -172,7 +169,7 @@ public class TestDNFencing {
     BlockManagerTestUtil.computeInvalidationWork(
         nn2.getNamesystem().getBlockManager());
     cluster.triggerHeartbeats();
-    waitForDNDeletions(cluster);
+    HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
     assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
     assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());
@@ -258,9 +255,9 @@ public class TestDNFencing {
     BlockManagerTestUtil.computeInvalidationWork(
         nn2.getNamesystem().getBlockManager());
 
-    waitForNNToIssueDeletions(nn2);
+    HATestUtil.waitForNNToIssueDeletions(nn2);
     cluster.triggerHeartbeats();
-    waitForDNDeletions(cluster);
+    HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
     assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
     assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());
@@ -283,7 +280,7 @@ public class TestDNFencing {
     DFSTestUtil.createFile(fs, TEST_FILE_PATH, 30*SMALL_BLOCK, (short)1, 1L);
 
     banner("rolling NN1's edit log, forcing catch-up");
-    TestEditLogTailer.waitForStandbyToCatchUp(nn1, nn2);
+    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
     
     // Get some new replicas reported so that NN2 now considers
     // them over-replicated and schedules some more deletions
@@ -353,9 +350,9 @@ public class TestDNFencing {
     BlockManagerTestUtil.computeInvalidationWork(
         nn2.getNamesystem().getBlockManager());
 
-    waitForNNToIssueDeletions(nn2);
+    HATestUtil.waitForNNToIssueDeletions(nn2);
     cluster.triggerHeartbeats();
-    waitForDNDeletions(cluster);
+    HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
     assertEquals(0, nn2.getNamesystem().getUnderReplicatedBlocks());
     assertEquals(0, nn2.getNamesystem().getPendingReplicationBlocks());
@@ -410,33 +407,6 @@ public class TestDNFencing {
     return count;
   }
 
-  static void waitForDNDeletions(final MiniDFSCluster cluster)
-      throws TimeoutException, InterruptedException {
-    GenericTestUtils.waitFor(new Supplier<Boolean>() {
-      @Override
-      public Boolean get() {
-        for (DataNode dn : cluster.getDataNodes()) {
-          if (DataNodeAdapter.getPendingAsyncDeletions(dn) > 0) {
-            return false;
-          }
-        }
-        return true;
-      }
-    }, 1000, 10000);
-    
-  }
-
-  static void waitForNNToIssueDeletions(final NameNode nn)
-      throws Exception {
-    GenericTestUtils.waitFor(new Supplier<Boolean>() {
-      @Override
-      public Boolean get() {
-        LOG.info("Waiting for NN to issue block deletions to DNs");
-        return nn.getNamesystem().getBlockManager().getPendingDeletionBlocksCount() == 0;
-      }
-    }, 250, 10000);
-  }
-
   /**
    * A BlockPlacementPolicy which, rather than using space available, makes
    * random decisions about which excess replica to delete. This is because,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
index b500c602850..8fc9d49eb05 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.TestDFSClientFailover;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
@@ -132,7 +131,7 @@ public class TestDNFencingWithReplication {
       nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
       nn2.getNamesystem().getEditLogTailer().interrupt();
       
-      FileSystem fs = TestDFSClientFailover.configureFailoverFs(
+      FileSystem fs = HATestUtil.configureFailoverFs(
           cluster, conf);
       TestContext togglers = new TestContext();
       for (int i = 0; i < NUM_THREADS; i++) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
index 5f7170dd0d6..7c3e38b18a5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
@@ -34,15 +34,14 @@ import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.log4j.Level;
-import org.junit.Assert;
 import org.junit.Test;
 
 public class TestEditLogTailer {
   
   private static final String DIR_PREFIX = "/dir";
   private static final int DIRS_TO_MAKE = 20;
-  private static final long SLEEP_TIME = 1000;
-  private static final long NN_LAG_TIMEOUT = 10 * 1000;
+  static final long SLEEP_TIME = 1000;
+  static final long NN_LAG_TIMEOUT = 10 * 1000;
   
   static {
     ((Log4JLogger)FSImage.LOG).getLogger().setLevel(Level.ALL);
@@ -74,7 +73,7 @@ public class TestEditLogTailer {
             true);
       }
       
-      waitForStandbyToCatchUp(nn1, nn2);
+      HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
       
       for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
         assertTrue(NameNodeAdapter.getFileInfo(nn2,
@@ -87,7 +86,7 @@ public class TestEditLogTailer {
             true);
       }
       
-      waitForStandbyToCatchUp(nn1, nn2);
+      HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
       
       for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
         assertTrue(NameNodeAdapter.getFileInfo(nn2,
@@ -101,45 +100,4 @@ public class TestEditLogTailer {
   private static String getDirPath(int suffix) {
     return DIR_PREFIX + suffix;
   }
-
-  /**
-   * Trigger an edits log roll on the active and then wait for the standby to
-   * catch up to all the edits done by the active. This method will check
-   * repeatedly for up to NN_LAG_TIMEOUT milliseconds, and then fail throwing
-   * {@link CouldNotCatchUpException}.
-   * 
-   * @param active active NN
-   * @param standby standby NN which should catch up to active
-   * @throws IOException if an error occurs rolling the edit log
-   * @throws CouldNotCatchUpException if the standby doesn't catch up to the
-   *         active in NN_LAG_TIMEOUT milliseconds
-   */
-  static void waitForStandbyToCatchUp(NameNode active,
-      NameNode standby) throws InterruptedException, IOException, CouldNotCatchUpException {
-    
-    long activeTxId = active.getNamesystem().getFSImage().getEditLog()
-      .getLastWrittenTxId();
-    
-    active.getRpcServer().rollEditLog();
-    
-    long start = System.currentTimeMillis();
-    while (System.currentTimeMillis() - start < NN_LAG_TIMEOUT) {
-      long nn2HighestTxId = standby.getNamesystem().getFSImage()
-        .getLastAppliedTxId();
-      if (nn2HighestTxId >= activeTxId) {
-        return;
-      }
-      Thread.sleep(SLEEP_TIME);
-    }
-    throw new CouldNotCatchUpException("Standby did not catch up to txid " +
-        activeTxId + " (currently at " +
-        standby.getNamesystem().getFSImage().getLastAppliedTxId() + ")");
-  }
-  
-  public static class CouldNotCatchUpException extends IOException {
-
-    public CouldNotCatchUpException(String message) {
-      super(message);
-    }
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
index b1105517548..96a62960984 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
@@ -41,13 +41,12 @@ import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.TestDFSClientFailover;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
-import org.apache.hadoop.hdfs.server.namenode.ha.TestEditLogTailer.CouldNotCatchUpException;
+import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil.CouldNotCatchUpException;
 import org.junit.Test;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
@@ -84,9 +83,9 @@ public class TestFailureToReadEdits {
       nn2.getNamesystem().getEditLogTailer().interrupt();
       nn2.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
       
-      FileSystem fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
       fs.mkdirs(new Path(TEST_DIR1));
-      TestEditLogTailer.waitForStandbyToCatchUp(nn1, nn2);
+      HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
       
       // If these two ops are applied twice, the first op will throw an
       // exception the second time its replayed.
@@ -107,9 +106,9 @@ public class TestFailureToReadEdits {
       nn2.getNamesystem().getEditLogTailer().setEditLog(spyEditLog);
       
       try {
-        TestEditLogTailer.waitForStandbyToCatchUp(nn1, nn2);
+        HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
         fail("Standby fully caught up, but should not have been able to");
-      } catch (CouldNotCatchUpException e) {
+      } catch (HATestUtil.CouldNotCatchUpException e) {
         verify(mockRuntime, times(0)).exit(anyInt());
       }
       
@@ -125,7 +124,7 @@ public class TestFailureToReadEdits {
       
       // Now let the standby read ALL the edits.
       answer.setThrowExceptionOnRead(false);
-      TestEditLogTailer.waitForStandbyToCatchUp(nn1, nn2);
+      HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
       
       // Null because it was deleted.
       assertNull(NameNodeAdapter.getFileInfo(nn2,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index d168bc8cbce..a23f38e97f2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -34,7 +34,6 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.TestDFSClientFailover;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
@@ -68,7 +67,7 @@ public class TestHASafeMode {
     
     nn0 = cluster.getNameNode(0);
     nn1 = cluster.getNameNode(1);
-    fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+    fs = HATestUtil.configureFailoverFs(cluster, conf);
     
     nn0.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
 
@@ -126,7 +125,7 @@ public class TestHASafeMode {
             "The reported blocks 0 needs additional 3 blocks to reach"));
 
     banner("Waiting for standby to catch up to active namespace");
-    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
     status = nn1.getNamesystem().getSafemode();
     assertTrue("Bad safemode status: '" + status + "'",
@@ -167,7 +166,7 @@ public class TestHASafeMode {
 
     
     banner("Waiting for standby to catch up to active namespace");
-    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
     status = nn1.getNamesystem().getSafemode();
     assertTrue("Bad safemode status: '" + status + "'",
@@ -221,7 +220,7 @@ public class TestHASafeMode {
             "The reported blocks 0 needs additional 5 blocks to reach"));
     
     banner("Waiting for standby to catch up to active namespace");
-    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
     status = nn1.getNamesystem().getSafemode();
     assertTrue("Bad safemode status: '" + status + "'",
         status.startsWith(
@@ -265,7 +264,7 @@ public class TestHASafeMode {
     
     banner("Triggering deletions on DNs and Deletion Reports");
     cluster.triggerHeartbeats();
-    TestDNFencing.waitForDNDeletions(cluster);
+    HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
 
     status = nn1.getNamesystem().getSafemode();
@@ -275,7 +274,7 @@ public class TestHASafeMode {
             "The reported blocks 0 needs additional 10 blocks"));
 
     banner("Waiting for standby to catch up to active namespace");
-    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
     status = nn1.getNamesystem().getSafemode();
     assertTrue("Bad safemode status: '" + status + "'",
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 6eac5756b61..071a2985e8c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -29,7 +29,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.TestDFSClientFailover;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
@@ -110,7 +109,7 @@ public class TestHAStateTransitions {
       cluster.transitionToActive(0);
       
       LOG.info("Starting with NN 0 active");
-      FileSystem fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
       fs.mkdirs(TEST_DIR);
 
       LOG.info("Failing over to NN 1");
@@ -161,7 +160,7 @@ public class TestHAStateTransitions {
       Mockito.doAnswer(new GenericTestUtils.SleepAnswer(50))
         .when(spyLock).writeLock();
       
-      final FileSystem fs = TestDFSClientFailover.configureFailoverFs(
+      final FileSystem fs = HATestUtil.configureFailoverFs(
           cluster, conf);
       
       TestContext ctx = new TestContext();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 905dd03c60d..336c427fea9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.TestDFSClientFailover;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@@ -72,7 +71,7 @@ public class TestStandbyCheckpoints {
     
     nn0 = cluster.getNameNode(0);
     nn1 = cluster.getNameNode(1);
-    fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+    fs = HATestUtil.configureFailoverFs(cluster, conf);
 
     nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
     nn1.getNamesystem().getEditLogTailer().interrupt();
@@ -91,7 +90,7 @@ public class TestStandbyCheckpoints {
   public void testSBNCheckpoints() throws Exception {
     doEdits(0, 10);
     
-    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
     // Once the standby catches up, it should notice that it needs to
     // do a checkpoint and save one to its local directories.
     waitForCheckpoint(1, ImmutableList.of(0, 12));
@@ -162,7 +161,7 @@ public class TestStandbyCheckpoints {
       .saveNamespace((FSNamesystem) Mockito.anyObject());
  
     // Roll the primary and wait for the standby to catch up
-    TestEditLogTailer.waitForStandbyToCatchUp(nn0, nn1);
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
     Thread.sleep(2000);
     
     // We should make exactly one checkpoint at this new txid. 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
index a9d09ca7a7a..ff87ebcc6fa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
@@ -37,7 +37,6 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.TestDFSClientFailover;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@@ -91,7 +90,7 @@ public class TestStandbyIsHot {
       nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
       nn2.getNamesystem().getEditLogTailer().interrupt();
       
-      FileSystem fs = TestDFSClientFailover.configureFailoverFs(cluster, conf);
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
       
       Thread.sleep(1000);
       System.err.println("==================================");

From 526efb48a6d3a44a753ee9fcb6333eba046193ca Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 10 Jan 2012 03:29:02 +0000
Subject: [PATCH 074/177] HDFS-2762. Fix TestCheckpoint timing out on HA
 branch. Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229464 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt           | 2 ++
 .../src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java    | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index f57d2d35fdf..165d09578ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -89,3 +89,5 @@ HDFS-2291. Allow the StandbyNode to make checkpoints in an HA setup. (todd)
 HDFS-2709. Appropriately handle error conditions in EditLogTailer (atm via todd)
 
 HDFS-2730. Refactor shared HA-related test code into HATestUtil class (todd)
+
+HDFS-2762. Fix TestCheckpoint timing out on HA branch. (Uma Maheswara Rao G via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 6b800a9637f..da9724ef1f7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -632,7 +632,7 @@ public class MiniDFSCluster {
         
         nnCounterForFormat++;
         if (formatThisOne) {
-          NameNode.format(conf);
+          DFSTestUtil.formatNameNode(conf);
         }
         prevNNDirs = FSNamesystem.getNamespaceDirs(conf);
       }

From 44d070cd01d267aa853ba92fba01e0116f53270b Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 10 Jan 2012 03:46:05 +0000
Subject: [PATCH 075/177] HDFS-2724. NN web UI can throw NPE after startup,
 before standby state is entered. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229466 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java | 4 +++-
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt         | 2 ++
 .../java/org/apache/hadoop/hdfs/server/namenode/NameNode.java | 3 +++
 .../hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp           | 2 +-
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
index 86f5f675ce3..ffb2f1d39ae 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
@@ -36,9 +36,11 @@ public interface HAServiceProtocol extends VersionedProtocol {
   public static final long versionID = 1L;
 
   /**
-   * An HA service may be in active or standby state.
+   * An HA service may be in active or standby state. During
+   * startup, it is in an unknown INITIALIZING state.
    */
   public enum HAServiceState {
+    INITIALIZING("initializing"),
     ACTIVE("active"),
     STANDBY("standby");
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 165d09578ff..018dfeaa550 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -91,3 +91,5 @@ HDFS-2709. Appropriately handle error conditions in EditLogTailer (atm via todd)
 HDFS-2730. Refactor shared HA-related test code into HATestUtil class (todd)
 
 HDFS-2762. Fix TestCheckpoint timing out on HA branch. (Uma Maheswara Rao G via todd)
+
+HDFS-2724. NN web UI can throw NPE after startup, before standby state is entered. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index c9af0ba05bf..9b3375fca4d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -926,6 +926,9 @@ public class NameNode {
   }
 
   synchronized HAServiceState getServiceState() {
+    if (state == null) {
+      return HAServiceState.INITIALIZING;
+    }
     return state.getServiceState();
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
index b6d96a0ff3e..bc158a26f0c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
@@ -31,7 +31,7 @@
   NameNode nn = NameNodeHttpServer.getNameNodeFromContext(application);
   FSNamesystem fsn = nn.getNamesystem();
   String namenodeRole = nn.getRole().toString();
-  String namenodeState = HAServiceState.ACTIVE.equals(nn.getServiceState()) ? "active" : "standby";
+  String namenodeState = nn.getServiceState().toString();
   String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":" + nn.getNameNodeAddress().getPort();
 %>
 

From 190dc1c91b0ae0f3f128cc6603e354a3ec83288a Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 11 Jan 2012 05:55:32 +0000
Subject: [PATCH 076/177] HDFS-2753. Fix standby getting stuck in safemode when
 blocks are written while SBN is down. Contributed by Hari Mankude and Todd
 Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229898 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../server/blockmanagement/BlockManager.java  |  2 +-
 .../blockmanagement/DatanodeDescriptor.java   |  9 +++++
 .../server/namenode/ha/TestHASafeMode.java    | 35 ++++++++++++++++++-
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 018dfeaa550..fae2f313044 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -93,3 +93,5 @@ HDFS-2730. Refactor shared HA-related test code into HATestUtil class (todd)
 HDFS-2762. Fix TestCheckpoint timing out on HA branch. (Uma Maheswara Rao G via todd)
 
 HDFS-2724. NN web UI can throw NPE after startup, before standby state is entered. (todd)
+
+HDFS-2753. Fix standby getting stuck in safemode when blocks are written while SBN is down. (Hari Mankude and todd via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index ec978f6ea10..ce01502972b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1361,7 +1361,7 @@ public class BlockManager {
 
       // To minimize startup time, we discard any second (or later) block reports
       // that we receive while still in startup phase.
-      if (namesystem.isInStartupSafeMode() && node.numBlocks() > 0) {
+      if (namesystem.isInStartupSafeMode() && !node.isFirstBlockReport()) {
         NameNode.stateChangeLog.info("BLOCK* processReport: "
             + "discarded non-initial block report from " + nodeID.getName()
             + " because namenode still in startup phase");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
index 807213ed17d..984456f142d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@@ -151,6 +151,10 @@ public class DatanodeDescriptor extends DatanodeInfo {
   private long lastBlocksScheduledRollTime = 0;
   private static final int BLOCKS_SCHEDULED_ROLL_INTERVAL = 600*1000; //10min
   private int volumeFailures = 0;
+  
+  /** Set to false after processing first block report */
+  private boolean firstBlockReport = true;
+  
   /** 
    * When set to true, the node is not in include list and is not allowed
    * to communicate with the namenode
@@ -608,6 +612,11 @@ public class DatanodeDescriptor extends DatanodeInfo {
     if (heartbeatedSinceFailover) {
       blockContentsStale = false;
     }
+    firstBlockReport = false;
+  }
+  
+  boolean isFirstBlockReport() {
+    return firstBlockReport;
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index a23f38e97f2..a76470f1c41 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -192,7 +192,7 @@ public class TestHASafeMode {
    *    knows there should only be 90 blocks, but it's still in safemode.
    * 8. NN2 doesn't ever recheck whether it should leave safemode.
    * 
-   * This is essentially the inverse of {@link #testBlocksAddedWhileStandbyShutdown()}
+   * This is essentially the inverse of {@link #testBlocksAddedBeforeStandbyRestart()}
    */
   @Test
   public void testBlocksRemovedBeforeStandbyRestart() throws Exception {
@@ -328,6 +328,39 @@ public class TestHASafeMode {
             "total blocks 5. Safe mode will be turned off automatically"));
   }
   
+  /**
+   * Regression test for HDFS-2753. In this bug, the following sequence was
+   * observed:
+   * - Some blocks are written to DNs while the SBN was down. This causes
+   *   the blockReceived messages to get queued in the BPServiceActor on the
+   *   DN.
+   * - When the SBN returns, the DN re-registers with the SBN, and then
+   *   flushes its blockReceived queue to the SBN before it sends its
+   *   first block report. This caused the first block report to be
+   *   incorrect ignored.
+   * - The SBN would become stuck in safemode.
+   */
+  @Test
+  public void testBlocksAddedWhileStandbyIsDown() throws Exception {
+    DFSTestUtil.createFile(fs, new Path("/test"), 3*BLOCK_SIZE, (short) 3, 1L);
+
+    banner("Stopping standby");
+    cluster.shutdownNameNode(1);
+    
+    DFSTestUtil.createFile(fs, new Path("/test2"), 3*BLOCK_SIZE, (short) 3, 1L);
+
+    banner("Rolling edit log so standby gets all edits on restart");
+    nn0.getRpcServer().rollEditLog();
+    
+    restartStandby();
+    String status = nn1.getNamesystem().getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'",
+        status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks 6 has reached the threshold 0.9990 of " +
+            "total blocks 6. Safe mode will be turned off automatically"));    
+  }
+  
   /**
    * Print a big banner in the test log to make debug easier.
    */

From 298e867673b1bbf33dbde828896ea3a332f4ec7c Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 11 Jan 2012 06:08:13 +0000
Subject: [PATCH 077/177] HDFS-2773. Reading edit logs from an earlier version
 should not leave blocks in under-construction state. Contributed by Todd
 Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229900 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/server/namenode/FSEditLogLoader.java |  14 +++-
 .../namenode/INodeFileUnderConstruction.java  |  21 +++++
 .../apache/hadoop/hdfs/TestPersistBlocks.java |  73 ++++++++++++++++++
 .../resources/hadoop-1.0-multiblock-file.tgz  | Bin 0 -> 2811 bytes
 5 files changed, 108 insertions(+), 2 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1.0-multiblock-file.tgz

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index fae2f313044..0b01c22d541 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -95,3 +95,5 @@ HDFS-2762. Fix TestCheckpoint timing out on HA branch. (Uma Maheswara Rao G via
 HDFS-2724. NN web UI can throw NPE after startup, before standby state is entered. (todd)
 
 HDFS-2753. Fix standby getting stuck in safemode when blocks are written while SBN is down. (Hari Mankude and todd via todd)
+
+HDFS-2773. Reading edit logs from an earlier version should not leave blocks in under-construction state. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index e1394e630bf..56d2fcb5887 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -467,7 +467,7 @@ public class FSEditLogLoader {
       BlockInfo oldBlock = oldBlocks[i];
       Block newBlock = addCloseOp.blocks[i];
       
-      boolean isLastBlock = i == oldBlocks.length - 1;
+      boolean isLastBlock = i == addCloseOp.blocks.length - 1;
       if (oldBlock.getBlockId() != newBlock.getBlockId() ||
           (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
               !(isGenStampUpdate && isLastBlock))) {
@@ -504,7 +504,17 @@ public class FSEditLogLoader {
       // We're adding blocks
       for (int i = oldBlocks.length; i < addCloseOp.blocks.length; i++) {
         Block newBlock = addCloseOp.blocks[i];
-        BlockInfo newBI = new BlockInfoUnderConstruction(newBlock, file.getReplication());
+        BlockInfo newBI;
+        if (addCloseOp.opCode == FSEditLogOpCodes.OP_ADD){
+          newBI = new BlockInfoUnderConstruction(
+              newBlock, file.getReplication());
+        } else {
+          // OP_CLOSE should add finalized blocks. This code path
+          // is only executed when loading edits written by prior
+          // versions of Hadoop. Current versions always log
+          // OP_ADD operations as each block is allocated.
+          newBI = new BlockInfo(newBlock, file.getReplication());
+        }
         fsNamesys.getBlockManager().addINode(newBI, file);
         file.addBlock(newBI);
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
index 2440c4dd122..d3f918bf3d7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeFileUnderConstruction.java
@@ -26,6 +26,8 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
 
+import com.google.common.base.Joiner;
+
 /**
  * I-node for file being written.
  */
@@ -94,6 +96,9 @@ public class INodeFileUnderConstruction extends INodeFile {
   // use the modification time as the access time
   //
   INodeFile convertToInodeFile() {
+    assert allBlocksComplete() :
+      "Can't finalize inode " + this + " since it contains " +
+      "non-complete blocks! Blocks are: " + blocksAsString();
     INodeFile obj = new INodeFile(getPermissionStatus(),
                                   getBlocks(),
                                   getReplication(),
@@ -103,6 +108,18 @@ public class INodeFileUnderConstruction extends INodeFile {
     return obj;
     
   }
+  
+  /**
+   * @return true if all of the blocks in this file are marked as completed.
+   */
+  private boolean allBlocksComplete() {
+    for (BlockInfo b : blocks) {
+      if (!b.isComplete()) {
+        return false;
+      }
+    }
+    return true;
+  }
 
   /**
    * Remove a block from the block list. This block should be
@@ -141,4 +158,8 @@ public class INodeFileUnderConstruction extends INodeFile {
     setBlock(numBlocks()-1, ucBlock);
     return ucBlock;
   }
+  
+  private String blocksAsString() {
+    return Joiner.on(",").join(this.blocks);
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java
index dd1ff016a8a..cb989298faa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java
@@ -23,22 +23,34 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.INodeFileUnderConstruction;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.log4j.Level;
 
+import java.io.File;
 import java.io.IOException;
+import java.net.URI;
+import java.util.Collection;
+import java.util.List;
 import java.util.Random;
 import static org.junit.Assert.*;
 import org.junit.Test;
 
+import com.google.common.collect.Lists;
+
 /**
  * A JUnit test for checking if restarting DFS preserves the
  * blocks that are part of an unclosed file.
@@ -57,6 +69,9 @@ public class TestPersistBlocks {
   
   static final byte[] DATA_BEFORE_RESTART = new byte[BLOCK_SIZE * NUM_BLOCKS];
   static final byte[] DATA_AFTER_RESTART = new byte[BLOCK_SIZE * NUM_BLOCKS];
+  
+  private static final String HADOOP_1_0_MULTIBLOCK_TGZ =
+    "hadoop-1.0-multiblock-file.tgz";
   static {
     Random rand = new Random();
     rand.nextBytes(DATA_BEFORE_RESTART);
@@ -277,4 +292,62 @@ public class TestPersistBlocks {
       if (cluster != null) { cluster.shutdown(); }
     }
   }
+  
+  /**
+   * Earlier versions of HDFS didn't persist block allocation to the edit log.
+   * This makes sure that we can still load an edit log when the OP_CLOSE
+   * is the opcode which adds all of the blocks. This is a regression
+   * test for HDFS-2773.
+   * This test uses a tarred pseudo-distributed cluster from Hadoop 1.0
+   * which has a multi-block file. This is similar to the tests in
+   * {@link TestDFSUpgradeFromImage} but none of those images include
+   * a multi-block file.
+   */
+  @Test
+  public void testEarlierVersionEditLog() throws Exception {
+    final Configuration conf = new HdfsConfiguration();
+        
+    String tarFile = System.getProperty("test.cache.data", "build/test/cache")
+      + "/" + HADOOP_1_0_MULTIBLOCK_TGZ;
+    String testDir = System.getProperty("test.build.data", "build/test/data");
+    File dfsDir = new File(testDir, "image-1.0");
+    if (dfsDir.exists() && !FileUtil.fullyDelete(dfsDir)) {
+      throw new IOException("Could not delete dfs directory '" + dfsDir + "'");
+    }
+    FileUtil.unTar(new File(tarFile), new File(testDir));
+
+    File nameDir = new File(dfsDir, "name");
+    GenericTestUtils.assertExists(nameDir);
+    File dataDir = new File(dfsDir, "data");
+    GenericTestUtils.assertExists(dataDir);
+    
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, nameDir.getAbsolutePath());
+    conf.set(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, dataDir.getAbsolutePath());
+    
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
+      .format(false)
+      .manageDataDfsDirs(false)
+      .manageNameDfsDirs(false)
+      .numDataNodes(1)
+      .startupOption(StartupOption.UPGRADE)
+      .build();
+    try {
+      FileSystem fs = cluster.getFileSystem();
+      Path testPath = new Path("/user/todd/4blocks");
+      // Read it without caring about the actual data within - we just need
+      // to make sure that the block states and locations are OK.
+      DFSTestUtil.readFile(fs, testPath);
+      
+      // Ensure that we can append to it - if the blocks were in some funny
+      // state we'd get some kind of issue here. 
+      FSDataOutputStream stm = fs.append(testPath);
+      try {
+        stm.write(1);
+      } finally {
+        IOUtils.closeStream(stm);
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1.0-multiblock-file.tgz b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-1.0-multiblock-file.tgz
new file mode 100644
index 0000000000000000000000000000000000000000..8e327c2f3c82dca39dadf5d2d70f982e7ac9c1e0
GIT binary patch
literal 2811
zcmeH}`#;qA9>=xYp_V9WUE66-9YhSWZZUIeV^)P!JIt7J+iA#cT{0Ma+sGWZ)FQdg
zF3VJQlFPUhO_^N6m_>F9!(mLWLo;JAnfZRs+JEEp{_*|(<@I>}@P0iJ*}GIciqySS
zJ{R%5!VPf;RRXB-T46t>-%vOII`^sWo$qWQmw4CTEJ~lmF+F@DOlMBbI*q)#7Ub95
zc(yB$JK+%Rc2ftVNpSeOp5X9ek0_!m)N$>_PXe=?{DP)P9s5i{$ihbTLa(4W@i^Qu
zRa$H>aHr7Qa03|i=^$ZedbM-E8{3rINvC>Usx;LSIG-7|eVFdO|5oU~dMzUhZ!c?R
za8mbP%HZeb=GT<2!Mc~WAn~d#IV=0&PI@L(c27{%TJ~=RDexG~;HR?YHRyg?o;u)&
zfe`xnR{bWo(^?E!y@i6Eh&i7T&W;2N5JaYlI4=xG=tirL*Z}szeyG968KzJuH0)O_
z>FeqKwzQXpT;qX8dxO*_^6cWg^r2=v#T9Ct;)^&g&;J;`dgk_PKrtt;OtyD8mD?D<
zhY|-i!eKhD?b#i0{KI=jq)9@{yWO2`dxgCGCWUT0M7A*1dO5TBz*8fMgN=?&JZcuK
zT@#IsU@*y9)lZ!C;<t!G9OaGqJ#5qj9_QcD(SCs+iCSFITs@#X`D`3l;x)5KjQXdE
zLp(qqOLMNaF&094w*+!pQPnIsH&BgW$(HObDHg3pJ+n~#&{vu@-*>yqX}25D=kf+Y
z)sGOM^@3u(gTVhN)yJ&PVns76RcKDsxaR%mchVz|dFHXxQp~#Ah|dPye*zG$1l6$F
zH_U;765xelm{?@ZM$0n>HLZ^j)AWm*#h`%zSyvNSWmf{)x<IoeSBRKMp**{cos`M(
zCA`LlQ^Z1Em4pATe%icDs!NgiL0eoj8de}=X&&-x!q>(r%6{l02Lmybamfng^LKFU
z7KaOLP_Pz&M>i%n<p_X6!E(-6Y3i#TDxc;Pflmbf-v|UGjh*0df>8Kfx+gg0LG?T&
zDuhf{Q)e(LWlwk!<c#b&%frWnI6DKk#4k}U$(7o!iF$M1gtPq%mH7Lo3lEicu6s0{
z(6ie*d0s!>-p;cma(K)CJaQjUOH*bJUg<V{!L)U;gT1ka*-*tSQNdn_crU&p-dq)l
zQt*k2K@%!+PE*wfRcghZ4A~r-yjxqvr8~V99Z+&o$u0I9o2PtAk0xd+SS>MgE1dCK
z5;8alroT>rsfm_=G|lnjdwz&))Je}h)K_P+AZy*X-2?w&n?An1`05v4_yPN(qRM0a
zowkFI7|eCg`u$inb|mX}=diq_uHnU8Kl7#9mLi`=;~Wg1KiyG@4k4d7D|IJyEPKrE
zL^1*5-VQWIKVO2*>COWZhfE;||GNkJ|J%U3*)P_X`-eAC)1(E~)_}d%ri2pD90H^O
zuu1dz(-&T4IYW8eM$ajbfBG&=D^3VhaQOo1@9rX~><5gPJ7WWf;<2BLfQC^M7-cfn
z4<-CDX?ZgyA@+I%@jAm<$P!Od(7Pv}r_2rJP5S%cG*$Tlt)oV&2&1xzJ&*WmFSSYy
z`H{$Z**HJdYFTyqRKd<E_{UoQ*cT<rnCNmWna(q6UX9Wn)f#6rV)haD8H!Se%(w`|
zKh);3r*@;lqoUNbN<^Ze3vSl>mJYG)2^cwR$jFY)mmnNmw`i|I9Z^{rdiog9*UJWO
z*5S>~HH1B$Ou)s6k+mm1{`LWO`Q+O8jgcPoW7iq9POzB8;zzg`C9^`vcF;tW7S)yX
z#pv`U5oDVMVbw2+X@BWLOZ=8H$bhGy#edv13ANA#I0s|cupAqe!m{>7=B<xwjWHy9
zpO53-u0&eq)Hr^xpI--DG2*=bB<&ud#vr)*D?X#c_o>~}F?)oYl<Ko%i6`hLSGA{#
zWC3v^(gI%pKEB|ah9}Wh@s0BZv~kb6WO|OjV`WEuTm&jREY$I`b`4IOCr!HGRd;z^
zHKsCH0^oJF%;C&Z^!;x-AAH&6UC|SmlCocPt6Xwq+Lm~38F)$S*U@!tu>IlK*uuUd
zGZx^la6LbEEHh_smfZo~Kld<`@KL{o`r3UK?XsIx1RicdSHFc>!=AlBo`mg}S`Ew`
ziJJ9xQC|`l8{9ZP3sv-jcXvQt)5t+UW8~yZ(MczP7e@(g|DcO8eel%r!srZ`b{Zi6
zTD*ue8Ue8@B-kZlSP4aIyU3K>kTnH$K#I63kk5hY{0czcoM6Sgk?`dY?q*a`n@=<4
zrh~ej5|a|HTSt<VI;Eerl>312CcDC*H91(6L07%<DE}km|Ka-MVm>h3f~Ue;!A4<x
zp8|bRpVS5)2HRFpBZ%Wdj(++iIovYG+Rk>a#}{y?`Nbm^!4f6w-R2Z<|8@fmrn7+R
U_E|**pka0Ryx#?t*1anK11!+myZ`_I

literal 0
HcmV?d00001


From a4f4becf52b3fb9199812dfa06e6acb461ee47d6 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 11 Jan 2012 06:14:13 +0000
Subject: [PATCH 078/177] HDFS-2775. Fix
 TestStandbyCheckpoints.testBothNodesInStandbyState failing intermittently.
 Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229901 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt         | 2 ++
 .../java/org/apache/hadoop/hdfs/server/namenode/FSImage.java  | 3 +++
 .../hdfs/server/namenode/ha/TestStandbyCheckpoints.java       | 4 ++--
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 0b01c22d541..55939fe34d4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -97,3 +97,5 @@ HDFS-2724. NN web UI can throw NPE after startup, before standby state is entere
 HDFS-2753. Fix standby getting stuck in safemode when blocks are written while SBN is down. (Hari Mankude and todd via todd)
 
 HDFS-2773. Reading edit logs from an earlier version should not leave blocks in under-construction state. (todd)
+
+HDFS-2775. Fix TestStandbyCheckpoints.testBothNodesInStandbyState failing intermittently. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 8eb4dede34c..ce1abe82bbb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -1134,4 +1134,7 @@ public class FSImage implements Closeable {
     this.lastAppliedTxId = editLog.getLastWrittenTxId();
   }
 
+  public synchronized long getMostRecentCheckpointTxId() {
+    return storage.getMostRecentCheckpointTxId();
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 336c427fea9..b02ac5cdac2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -121,9 +121,9 @@ public class TestStandbyCheckpoints {
     waitForCheckpoint(1, ImmutableList.of(0, 12));
     waitForCheckpoint(0, ImmutableList.of(0, 12));
     
-    assertEquals(12, nn0.getNamesystem().getFSImage().getStorage()
+    assertEquals(12, nn0.getNamesystem().getFSImage()
         .getMostRecentCheckpointTxId());
-    assertEquals(12, nn1.getNamesystem().getFSImage().getStorage()
+    assertEquals(12, nn1.getNamesystem().getFSImage()
         .getMostRecentCheckpointTxId());
     
     List<File> dirs = Lists.newArrayList();

From a339836bbc747324807b9690c6cb5bb13b1fdc0b Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 11 Jan 2012 08:26:18 +0000
Subject: [PATCH 079/177] HDFS-2766. Test for case where standby partially
 reads log and then performs checkpoint. Contributed by Aaron T. Myers

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229929 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../server/namenode/FileJournalManager.java   |   8 +-
 .../hdfs/server/namenode/FSImageTestUtil.java |   6 +
 .../hdfs/server/namenode/ha/HATestUtil.java   |  19 ++
 .../namenode/ha/TestFailureToReadEdits.java   | 247 ++++++++++++------
 .../namenode/ha/TestStandbyCheckpoints.java   |  25 +-
 6 files changed, 207 insertions(+), 100 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 55939fe34d4..9441e52958d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -99,3 +99,5 @@ HDFS-2753. Fix standby getting stuck in safemode when blocks are written while S
 HDFS-2773. Reading edit logs from an earlier version should not leave blocks in under-construction state. (todd)
 
 HDFS-2775. Fix TestStandbyCheckpoints.testBothNodesInStandbyState failing intermittently. (todd)
+
+HDFS-2766. Test for case where standby partially reads log and then performs checkpoint. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index 06b8eff3fa9..3c6bec6cd5a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -204,7 +204,13 @@ class FileJournalManager implements JournalManager {
         }
         EditLogFileInputStream elfis = new EditLogFileInputStream(elf.getFile(),
             elf.getFirstTxId(), elf.getLastTxId(), elf.isInProgress());
-        elfis.skipTransactions(fromTxId - elf.getFirstTxId());
+        long transactionsToSkip = fromTxId - elf.getFirstTxId();
+        if (transactionsToSkip > 0) {
+          LOG.info(String.format("Log begins at txid %d, but requested start "
+              + "txid is %d. Skipping %d edits.", elf.getFirstTxId(), fromTxId,
+              transactionsToSkip));
+          elfis.skipTransactions(transactionsToSkip);
+        }
         return elfis;
       }
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index 2e4e932b386..f0b8a6d2b30 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -33,7 +33,9 @@ import java.util.Map.Entry;
 import java.util.Properties;
 import java.util.Set;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -64,6 +66,8 @@ import static org.mockito.Mockito.mock;
  */
 public abstract class FSImageTestUtil {
   
+  public static final Log LOG = LogFactory.getLog(FSImageTestUtil.class.getName());
+  
   /**
    * The position in the fsimage header where the txid is
    * written.
@@ -410,6 +414,8 @@ public abstract class FSImageTestUtil {
 
     for (File nameDir : getNameNodeCurrentDirs(cluster, nnIdx)) {
       // Should have fsimage_N for the three checkpoints
+      LOG.info("Examining storage dir " + nameDir + " with contents: "
+          + StringUtils.join(nameDir.listFiles(), ", "));
       for (long checkpointTxId : txids) {
         File image = new File(nameDir,
                               NNStorage.getImageFileName(checkpointTxId));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index 1b7b62dade7..ba05da82414 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -25,6 +25,7 @@ import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.util.List;
 import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.test.GenericTestUtils;
 
@@ -159,4 +161,21 @@ public abstract class HATestUtil {
   public static String getLogicalHostname(MiniDFSCluster cluster) {
     return String.format(LOGICAL_HOSTNAME, cluster.getInstanceId());
   }
+  
+  public static void waitForCheckpoint(MiniDFSCluster cluster, int nnIdx,
+      List<Integer> txids) throws InterruptedException {
+    long start = System.currentTimeMillis();
+    while (true) {
+      try {
+        FSImageTestUtil.assertNNHasCheckpoints(cluster, nnIdx, txids);
+        return;
+      } catch (AssertionError err) {
+        if (System.currentTimeMillis() - start > 10000) {
+          throw err;
+        } else {
+          Thread.sleep(300);
+        }
+      }
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
index 96a62960984..ca51b4eb9ba 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
@@ -30,14 +30,14 @@ import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
-import java.net.URISyntaxException;
+import java.net.URI;
 import java.util.Collection;
 import java.util.LinkedList;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
@@ -46,100 +46,191 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
-import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil.CouldNotCatchUpException;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 
+import com.google.common.collect.ImmutableList;
+
 public class TestFailureToReadEdits {
   private static final String TEST_DIR1 = "/test1";
   private static final String TEST_DIR2 = "/test2";
   private static final String TEST_DIR3 = "/test3";
+  
+  private Configuration conf;
+  private Runtime mockRuntime = mock(Runtime.class);
+  private MiniDFSCluster cluster;
+  private NameNode nn0;
+  private NameNode nn1;
+  private FileSystem fs;
+  
+  @Before
+  public void setUpCluster() throws Exception {
+    conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 10);
+    HAUtil.setAllowStandbyReads(conf, true);
+    
+    MiniDFSNNTopology topology = new MiniDFSNNTopology()
+      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+        .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10001))
+        .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10002)));
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(topology)
+      .numDataNodes(0)
+      .build();
+    
+    cluster.waitActive();
+    
+    nn0 = cluster.getNameNode(0);
+    nn1 = cluster.getNameNode(1);
+    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn1.getNamesystem().getEditLogTailer().interrupt();
+    nn1.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
+    
+    cluster.transitionToActive(0);
+    fs = HATestUtil.configureFailoverFs(cluster, conf);
+  }
+  
+  @After
+  public void tearDownCluster() throws Exception {
+    if (fs != null) {
+      fs.close();
+    }
+    
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
 
   /**
    * Test that the standby NN won't double-replay earlier edits if it encounters
    * a failure to read a later edit.
    */
   @Test
-  public void testFailuretoReadEdits() throws IOException,
-      ServiceFailedException, URISyntaxException, InterruptedException {
-    Configuration conf = new Configuration();
-    HAUtil.setAllowStandbyReads(conf, true);
+  public void testFailuretoReadEdits() throws Exception {
+    assertTrue(fs.mkdirs(new Path(TEST_DIR1)));
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
     
-    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-      .nnTopology(MiniDFSNNTopology.simpleHATopology())
-      .numDataNodes(0)
-      .build();
+    // If these two ops are applied twice, the first op will throw an
+    // exception the second time its replayed.
+    fs.setOwner(new Path(TEST_DIR1), "foo", "bar");
+    assertTrue(fs.delete(new Path(TEST_DIR1), true));
+    
+    // This op should get applied just fine.
+    assertTrue(fs.mkdirs(new Path(TEST_DIR2)));
+    
+    // This is the op the mocking will cause to fail to be read.
+    assertTrue(fs.mkdirs(new Path(TEST_DIR3)));
+    
+    LimitedEditLogAnswer answer = causeFailureOnEditLogRead();
     
     try {
-      cluster.waitActive();
-      cluster.transitionToActive(0);
-      
-      Runtime mockRuntime = mock(Runtime.class);
-      
-      NameNode nn1 = cluster.getNameNode(0);
-      NameNode nn2 = cluster.getNameNode(1);
-      nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
-      nn2.getNamesystem().getEditLogTailer().interrupt();
-      nn2.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
-      
-      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
-      fs.mkdirs(new Path(TEST_DIR1));
-      HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
-      
-      // If these two ops are applied twice, the first op will throw an
-      // exception the second time its replayed.
-      fs.setOwner(new Path(TEST_DIR1), "foo", "bar");
-      fs.delete(new Path(TEST_DIR1), true);
-      
-      // This op should get applied just fine.
-      fs.mkdirs(new Path(TEST_DIR2));
-      
-      // This is the op the mocking will cause to fail to be read.
-      fs.mkdirs(new Path(TEST_DIR3));
-      
-      FSEditLog spyEditLog = spy(nn2.getNamesystem().getEditLogTailer()
-          .getEditLog());
-      LimitedEditLogAnswer answer = new LimitedEditLogAnswer(); 
-      doAnswer(answer).when(spyEditLog).selectInputStreams(
-          anyLong(), anyLong(), anyBoolean());
-      nn2.getNamesystem().getEditLogTailer().setEditLog(spyEditLog);
-      
-      try {
-        HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
-        fail("Standby fully caught up, but should not have been able to");
-      } catch (HATestUtil.CouldNotCatchUpException e) {
-        verify(mockRuntime, times(0)).exit(anyInt());
-      }
-      
-      // Null because it was deleted.
-      assertNull(NameNodeAdapter.getFileInfo(nn2,
-          TEST_DIR1, false));
-      // Should have been successfully created.
-      assertTrue(NameNodeAdapter.getFileInfo(nn2,
-          TEST_DIR2, false).isDir());
-      // Null because it hasn't been created yet.
-      assertNull(NameNodeAdapter.getFileInfo(nn2,
-          TEST_DIR3, false));
-      
-      // Now let the standby read ALL the edits.
-      answer.setThrowExceptionOnRead(false);
-      HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
-      
-      // Null because it was deleted.
-      assertNull(NameNodeAdapter.getFileInfo(nn2,
-          TEST_DIR1, false));
-      // Should have been successfully created.
-      assertTrue(NameNodeAdapter.getFileInfo(nn2,
-          TEST_DIR2, false).isDir());
-      // Should now have been successfully created.
-      assertTrue(NameNodeAdapter.getFileInfo(nn2,
-          TEST_DIR3, false).isDir());
-    } finally {
-      if (cluster != null) {
-        cluster.shutdown();
-      }
+      HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+      fail("Standby fully caught up, but should not have been able to");
+    } catch (HATestUtil.CouldNotCatchUpException e) {
+      verify(mockRuntime, times(0)).exit(anyInt());
     }
+    
+    // Null because it was deleted.
+    assertNull(NameNodeAdapter.getFileInfo(nn1,
+        TEST_DIR1, false));
+    // Should have been successfully created.
+    assertTrue(NameNodeAdapter.getFileInfo(nn1,
+        TEST_DIR2, false).isDir());
+    // Null because it hasn't been created yet.
+    assertNull(NameNodeAdapter.getFileInfo(nn1,
+        TEST_DIR3, false));
+    
+    // Now let the standby read ALL the edits.
+    answer.setThrowExceptionOnRead(false);
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    
+    // Null because it was deleted.
+    assertNull(NameNodeAdapter.getFileInfo(nn1,
+        TEST_DIR1, false));
+    // Should have been successfully created.
+    assertTrue(NameNodeAdapter.getFileInfo(nn1,
+        TEST_DIR2, false).isDir());
+    // Should now have been successfully created.
+    assertTrue(NameNodeAdapter.getFileInfo(nn1,
+        TEST_DIR3, false).isDir());
+  }
+  
+  /**
+   * Test the following case:
+   * 1. SBN is reading a finalized edits file when NFS disappears halfway
+   *    through (or some intermittent error happens)
+   * 2. SBN performs a checkpoint and uploads it to the NN
+   * 3. NN receives a checkpoint that doesn't correspond to the end of any log
+   *    segment
+   * 4. Both NN and SBN should be able to restart at this point.
+   * 
+   * This is a regression test for HDFS-2766.
+   */
+  @Test
+  public void testCheckpointStartingMidEditsFile() throws Exception {
+    assertTrue(fs.mkdirs(new Path(TEST_DIR1)));
+    
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    
+    // Once the standby catches up, it should notice that it needs to
+    // do a checkpoint and save one to its local directories.
+    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 3));
+    
+    // It should also upload it back to the active.
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 3));
+    
+    causeFailureOnEditLogRead();
+    
+    assertTrue(fs.mkdirs(new Path(TEST_DIR2)));
+    assertTrue(fs.mkdirs(new Path(TEST_DIR3)));
+    
+    try {
+      HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+      fail("Standby fully caught up, but should not have been able to");
+    } catch (HATestUtil.CouldNotCatchUpException e) {
+      verify(mockRuntime, times(0)).exit(anyInt());
+    }
+    
+    // 5 because we should get OP_START_LOG_SEGMENT and one successful OP_MKDIR
+    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 3, 5));
+    
+    // It should also upload it back to the active.
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 3, 5));
+
+    // Restart the active NN
+    cluster.restartNameNode(0);
+    
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 3, 5));
+    
+    FileSystem fs0 = null;
+    try {
+      // Make sure that when the active restarts, it loads all the edits.
+      fs0 = FileSystem.get(NameNode.getUri(nn0.getNameNodeAddress()),
+          conf);
+      
+      assertTrue(fs0.exists(new Path(TEST_DIR1)));
+      assertTrue(fs0.exists(new Path(TEST_DIR2)));
+      assertTrue(fs0.exists(new Path(TEST_DIR3)));
+    } finally {
+      if (fs0 != null)
+        fs0.close();
+    }
+  }
+  
+  private LimitedEditLogAnswer causeFailureOnEditLogRead() throws IOException {
+    FSEditLog spyEditLog = spy(nn1.getNamesystem().getEditLogTailer()
+        .getEditLog());
+    LimitedEditLogAnswer answer = new LimitedEditLogAnswer(); 
+    doAnswer(answer).when(spyEditLog).selectInputStreams(
+        anyLong(), anyLong(), anyBoolean());
+    nn1.getNamesystem().getEditLogTailer().setEditLog(spyEditLog);
+    
+    return answer;
   }
   
   private static class LimitedEditLogAnswer
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index b02ac5cdac2..83f077c55d6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -93,10 +93,10 @@ public class TestStandbyCheckpoints {
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
     // Once the standby catches up, it should notice that it needs to
     // do a checkpoint and save one to its local directories.
-    waitForCheckpoint(1, ImmutableList.of(0, 12));
+    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12));
     
     // It should also upload it back to the active.
-    waitForCheckpoint(0, ImmutableList.of(0, 12));
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12));
   }
 
   /**
@@ -118,8 +118,8 @@ public class TestStandbyCheckpoints {
     // so the standby will catch up. Then, both will be in standby mode
     // with enough uncheckpointed txns to cause a checkpoint, and they
     // will each try to take a checkpoint and upload to each other.
-    waitForCheckpoint(1, ImmutableList.of(0, 12));
-    waitForCheckpoint(0, ImmutableList.of(0, 12));
+    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12));
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12));
     
     assertEquals(12, nn0.getNamesystem().getFSImage()
         .getMostRecentCheckpointTxId());
@@ -211,7 +211,6 @@ public class TestStandbyCheckpoints {
     
     assertTrue(StandbyCheckpointer.getCanceledCount() > 0);
   }
-  
 
   private void doEdits(int start, int stop) throws IOException {
     for (int i = start; i < stop; i++) {
@@ -220,20 +219,4 @@ public class TestStandbyCheckpoints {
     }
   }
 
-  private void waitForCheckpoint(int nnIdx, List<Integer> txids)
-      throws InterruptedException {
-    long start = System.currentTimeMillis();
-    while (true) {
-      try {
-        FSImageTestUtil.assertNNHasCheckpoints(cluster, nnIdx, txids);
-        return;
-      } catch (AssertionError err) {
-        if (System.currentTimeMillis() - start > 10000) {
-          throw err;
-        } else {
-          Thread.sleep(300);
-        }
-      }
-    }
-  }
 }

From 4f1bf2fe23e53ff4b8550882d19f2cf1dd477926 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 11 Jan 2012 08:32:10 +0000
Subject: [PATCH 080/177] HDFS-2738. FSEditLog.selectinputStreams is reading
 through in-progress streams even when non-in-progress are requested.
 Contributed by Aaron T. Myers

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1229931 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../bkjournal/BookKeeperJournalManager.java   |  8 ++-
 .../TestBookKeeperJournalManager.java         | 14 ++--
 .../server/namenode/BackupJournalManager.java |  5 +-
 .../hdfs/server/namenode/FSEditLog.java       | 16 ++---
 .../hadoop/hdfs/server/namenode/FSImage.java  |  7 +-
 .../server/namenode/FileJournalManager.java   | 24 +++++--
 .../hdfs/server/namenode/JournalManager.java  |  7 +-
 .../hdfs/server/namenode/JournalSet.java      | 14 ++--
 .../hdfs/server/namenode/TestEditLog.java     | 28 ++++++--
 .../namenode/TestFileJournalManager.java      | 65 ++++++++++++++-----
 .../namenode/TestGenericJournalConf.java      |  4 +-
 12 files changed, 136 insertions(+), 58 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 9441e52958d..74bdec7d88a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -101,3 +101,5 @@ HDFS-2773. Reading edit logs from an earlier version should not leave blocks in
 HDFS-2775. Fix TestStandbyCheckpoints.testBothNodesInStandbyState failing intermittently. (todd)
 
 HDFS-2766. Test for case where standby partially reads log and then performs checkpoint. (atm)
+
+HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams even when non-in-progress are requested. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
index 7fa90269ecd..047efd51f4b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/java/org/apache/hadoop/contrib/bkjournal/BookKeeperJournalManager.java
@@ -312,8 +312,10 @@ public class BookKeeperJournalManager implements JournalManager {
     }
   }
 
+  // TODO(HA): Handle inProgressOk
   @Override
-  public EditLogInputStream getInputStream(long fromTxnId) throws IOException {
+  public EditLogInputStream getInputStream(long fromTxnId, boolean inProgressOk)
+      throws IOException {
     for (EditLogLedgerMetadata l : getLedgerList()) {
       if (l.getFirstTxId() == fromTxnId) {
         try {
@@ -329,8 +331,10 @@ public class BookKeeperJournalManager implements JournalManager {
     throw new IOException("No ledger for fromTxnId " + fromTxnId + " found.");
   }
 
+  // TODO(HA): Handle inProgressOk
   @Override
-  public long getNumberOfTransactions(long fromTxnId) throws IOException {
+  public long getNumberOfTransactions(long fromTxnId, boolean inProgressOk)
+      throws IOException {
     long count = 0;
     long expectedStart = 0;
     for (EditLogLedgerMetadata l : getLedgerList()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java
index b949bc200ea..5937fa82958 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/test/java/org/apache/hadoop/contrib/bkjournal/TestBookKeeperJournalManager.java
@@ -195,7 +195,7 @@ public class TestBookKeeperJournalManager {
     out.close();
     bkjm.finalizeLogSegment(1, 100);
 
-    long numTrans = bkjm.getNumberOfTransactions(1);
+    long numTrans = bkjm.getNumberOfTransactions(1, true);
     assertEquals(100, numTrans);
   }
 
@@ -218,17 +218,17 @@ public class TestBookKeeperJournalManager {
     }
     zkc.delete(bkjm.finalizedLedgerZNode(DEFAULT_SEGMENT_SIZE+1, DEFAULT_SEGMENT_SIZE*2), -1);
     
-    long numTrans = bkjm.getNumberOfTransactions(1);
+    long numTrans = bkjm.getNumberOfTransactions(1, true);
     assertEquals(DEFAULT_SEGMENT_SIZE, numTrans);
     
     try {
-      numTrans = bkjm.getNumberOfTransactions(DEFAULT_SEGMENT_SIZE+1);
+      numTrans = bkjm.getNumberOfTransactions(DEFAULT_SEGMENT_SIZE+1, true);
       fail("Should have thrown corruption exception by this point");
     } catch (JournalManager.CorruptionException ce) {
       // if we get here, everything is going good
     }
 
-    numTrans = bkjm.getNumberOfTransactions((DEFAULT_SEGMENT_SIZE*2)+1);
+    numTrans = bkjm.getNumberOfTransactions((DEFAULT_SEGMENT_SIZE*2)+1, true);
     assertEquals(DEFAULT_SEGMENT_SIZE, numTrans);
   }
 
@@ -262,7 +262,7 @@ public class TestBookKeeperJournalManager {
     out.abort();
     out.close();
     
-    long numTrans = bkjm.getNumberOfTransactions(1);
+    long numTrans = bkjm.getNumberOfTransactions(1, true);
     assertEquals((txid-1), numTrans);
   }
 
@@ -357,7 +357,7 @@ public class TestBookKeeperJournalManager {
     bkjm.finalizeLogSegment(1, numTransactions);
 
      
-    EditLogInputStream in = bkjm.getInputStream(1);
+    EditLogInputStream in = bkjm.getInputStream(1, true);
     try {
       assertEquals(numTransactions, 
                    FSEditLogTestUtil.countTransactionsInStream(in));
@@ -392,4 +392,4 @@ public class TestBookKeeperJournalManager {
     assertNotNull(zkc.exists(bkjm.finalizedLedgerZNode(1, 100), false));
     assertNull(zkc.exists(bkjm.inprogressZNode(), false));
   }
-}
\ No newline at end of file
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java
index c655ee75bbf..de75b769345 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupJournalManager.java
@@ -58,7 +58,7 @@ class BackupJournalManager implements JournalManager {
   }
 
   @Override
-  public long getNumberOfTransactions(long fromTxnId) 
+  public long getNumberOfTransactions(long fromTxnId, boolean inProgressOk)
       throws IOException, CorruptionException {
     // This JournalManager is never used for input. Therefore it cannot
     // return any transactions
@@ -66,7 +66,8 @@ class BackupJournalManager implements JournalManager {
   }
   
   @Override
-  public EditLogInputStream getInputStream(long fromTxnId) throws IOException {
+  public EditLogInputStream getInputStream(long fromTxnId, boolean inProgressOk)
+      throws IOException {
     // This JournalManager is never used for input. Therefore it cannot
     // return any transactions
     throw new IOException("Unsupported operation");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 19f9f5117aa..cd7ff5b0c8f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -252,7 +252,7 @@ public class FSEditLog  {
     long segmentTxId = getLastWrittenTxId() + 1;
     // Safety check: we should never start a segment if there are
     // newer txids readable.
-    EditLogInputStream s = journalSet.getInputStream(segmentTxId);
+    EditLogInputStream s = journalSet.getInputStream(segmentTxId, true);
     try {
       Preconditions.checkState(s == null,
           "Cannot start writing at txid %s when there is a stream " +
@@ -1071,19 +1071,19 @@ public class FSEditLog  {
   public Collection<EditLogInputStream> selectInputStreams(long fromTxId,
       long toAtLeastTxId, boolean inProgressOk) throws IOException {
     List<EditLogInputStream> streams = new ArrayList<EditLogInputStream>();
-    EditLogInputStream stream = journalSet.getInputStream(fromTxId);
+    EditLogInputStream stream = journalSet.getInputStream(fromTxId, inProgressOk);
     while (stream != null) {
-      if (inProgressOk || !stream.isInProgress()) {
-        streams.add(stream);
-      }
+      streams.add(stream);
       // We're now looking for a higher range, so reset the fromTxId
       fromTxId = stream.getLastTxId() + 1;
-      stream = journalSet.getInputStream(fromTxId);
+      stream = journalSet.getInputStream(fromTxId, inProgressOk);
     }
+    
     if (fromTxId <= toAtLeastTxId) {
       closeAllStreams(streams);
-      throw new IOException("No non-corrupt logs for txid " 
-                            + fromTxId);
+      throw new IOException(String.format("Gap in transactions. Expected to "
+          + "be able to read up until at least txid %d but unable to find any "
+          + "edit logs containing txid %d", toAtLeastTxId, fromTxId));
     }
     return streams;
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index ce1abe82bbb..d72523d29f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -585,9 +585,12 @@ public class FSImage implements Closeable {
 
     if (LayoutVersion.supports(Feature.TXID_BASED_LAYOUT, 
                                getLayoutVersion())) {
+      // If we're open for write, we're either non-HA or we're the active NN, so
+      // we better be able to load all the edits. If we're the standby NN, it's
+      // OK to not be able to read all of edits right now.
+      long toAtLeastTxId = editLog.isOpenForWrite() ? inspector.getMaxSeenTxId() : 0;
       editStreams = editLog.selectInputStreams(imageFile.getCheckpointTxId() + 1,
-                                               inspector.getMaxSeenTxId(),
-                                               false);
+          toAtLeastTxId, false);
     } else {
       editStreams = FSImagePreTransactionalStorageInspector
         .getEditLogStreams(storage);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index 3c6bec6cd5a..2380e93f0f7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -31,6 +31,7 @@ import java.util.regex.Pattern;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException;
 import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.EditLogValidation;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
@@ -192,10 +193,13 @@ class FileJournalManager implements JournalManager {
   }
 
   @Override
-  synchronized public EditLogInputStream getInputStream(long fromTxId)
-      throws IOException {
+  synchronized public EditLogInputStream getInputStream(long fromTxId,
+      boolean inProgressOk) throws IOException {
     for (EditLogFile elf : getLogFiles(fromTxId)) {
       if (elf.containsTxId(fromTxId)) {
+        if (!inProgressOk && elf.isInProgress()) {
+          continue;
+        }
         if (elf.isInProgress()) {
           elf.validateLog();
         }
@@ -219,7 +223,7 @@ class FileJournalManager implements JournalManager {
   }
 
   @Override
-  public long getNumberOfTransactions(long fromTxId) 
+  public long getNumberOfTransactions(long fromTxId, boolean inProgressOk)
       throws IOException, CorruptionException {
     long numTxns = 0L;
     
@@ -232,6 +236,10 @@ class FileJournalManager implements JournalManager {
             + fromTxId + " - " + (elf.getFirstTxId() - 1));
         break;
       } else if (elf.containsTxId(fromTxId)) {
+        if (!inProgressOk && elf.isInProgress()) {
+          break;
+        }
+        
         if (elf.isInProgress()) {
           elf.validateLog();
         } 
@@ -253,7 +261,7 @@ class FileJournalManager implements JournalManager {
                 + " txns from " + fromTxId);
     }
 
-    long max = findMaxTransaction();
+    long max = findMaxTransaction(inProgressOk);
     
     // fromTxId should be greater than max, as it points to the next 
     // transaction we should expect to find. If it is less than or equal
@@ -276,7 +284,7 @@ class FileJournalManager implements JournalManager {
     
     // make sure journal is aware of max seen transaction before moving corrupt 
     // files aside
-    findMaxTransaction();
+    findMaxTransaction(true);
 
     for (EditLogFile elf : allLogFiles) {
       if (elf.getFile().equals(currentInProgress)) {
@@ -318,9 +326,13 @@ class FileJournalManager implements JournalManager {
    * tranaction id in the case that it was the maximum transaction in
    * the journal.
    */
-  private long findMaxTransaction()
+  private long findMaxTransaction(boolean inProgressOk)
       throws IOException {
     for (EditLogFile elf : getLogFiles(0)) {
+      if (elf.isInProgress() && !inProgressOk) {
+        continue;
+      }
+      
       if (elf.isInProgress()) {
         maxSeenTransaction = Math.max(elf.getFirstTxId(), maxSeenTransaction);
         elf.validateLog();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
index d45de18e92d..f9c622dc387 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalManager.java
@@ -48,20 +48,23 @@ public interface JournalManager extends Closeable {
    /**
    * Get the input stream starting with fromTxnId from this journal manager
    * @param fromTxnId the first transaction id we want to read
+   * @param inProgressOk whether or not in-progress streams should be returned
    * @return the stream starting with transaction fromTxnId
    * @throws IOException if a stream cannot be found.
    */
-  EditLogInputStream getInputStream(long fromTxnId) throws IOException;
+  EditLogInputStream getInputStream(long fromTxnId, boolean inProgressOk)
+    throws IOException;
 
   /**
    * Get the number of transaction contiguously available from fromTxnId.
    *
    * @param fromTxnId Transaction id to count from
+   * @param inProgressOk whether or not in-progress streams should be counted
    * @return The number of transactions available from fromTxnId
    * @throws IOException if the journal cannot be read.
    * @throws CorruptionException if there is a gap in the journal at fromTxnId.
    */
-  long getNumberOfTransactions(long fromTxnId) 
+  long getNumberOfTransactions(long fromTxnId, boolean inProgressOk)
       throws IOException, CorruptionException;
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
index 7af0b51b909..c00236fd094 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
@@ -198,7 +198,8 @@ public class JournalSet implements JournalManager {
    *         or null if no more exist
    */
   @Override
-  public EditLogInputStream getInputStream(long fromTxnId) throws IOException {
+  public EditLogInputStream getInputStream(long fromTxnId, boolean inProgressOk)
+      throws IOException {
     JournalManager bestjm = null;
     long bestjmNumTxns = 0;
     CorruptionException corruption = null;
@@ -209,7 +210,8 @@ public class JournalSet implements JournalManager {
       JournalManager candidate = jas.getManager();
       long candidateNumTxns = 0;
       try {
-        candidateNumTxns = candidate.getNumberOfTransactions(fromTxnId);
+        candidateNumTxns = candidate.getNumberOfTransactions(fromTxnId,
+            inProgressOk);
       } catch (CorruptionException ce) {
         corruption = ce;
       } catch (IOException ioe) {
@@ -232,18 +234,20 @@ public class JournalSet implements JournalManager {
         return null;
       }
     }
-    return bestjm.getInputStream(fromTxnId);
+    return bestjm.getInputStream(fromTxnId, inProgressOk);
   }
   
   @Override
-  public long getNumberOfTransactions(long fromTxnId) throws IOException {
+  public long getNumberOfTransactions(long fromTxnId, boolean inProgressOk)
+      throws IOException {
     long num = 0;
     for (JournalAndStream jas: journals) {
       if (jas.isDisabled()) {
         LOG.info("Skipping jas " + jas + " since it's disabled");
         continue;
       } else {
-        long newNum = jas.getManager().getNumberOfTransactions(fromTxnId);
+        long newNum = jas.getManager().getNumberOfTransactions(fromTxnId,
+            inProgressOk);
         if (newNum > num) {
           num = newNum;
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index f36b5d20516..f95a876eed9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -936,11 +936,11 @@ public class TestEditLog extends TestCase {
    * 
    * @param editUris directories to create edit logs in
    * @param numrolls number of times to roll the edit log during setup
+   * @param closeOnFinish whether to close the edit log after setup
    * @param abortAtRolls Specifications for when to fail, see AbortSpec
    */
-  public static NNStorage setupEdits(List<URI> editUris, int numrolls, 
-                                     AbortSpec... abortAtRolls)
-      throws IOException {
+  public static NNStorage setupEdits(List<URI> editUris, int numrolls,
+      boolean closeOnFinish, AbortSpec... abortAtRolls) throws IOException {
     List<AbortSpec> aborts = new ArrayList<AbortSpec>(Arrays.asList(abortAtRolls));
     NNStorage storage = new NNStorage(new Configuration(),
                                       Collections.<URI>emptyList(),
@@ -979,16 +979,34 @@ public class TestEditLog extends TestCase {
       }
       editlog.logSync();
     }
-    editlog.close();
+    
+    if (closeOnFinish) {
+      editlog.close();
+    }
 
     FSImageTestUtil.logStorageContents(LOG, storage);
     return storage;
   }
+    
+  /**
+   * Set up directories for tests. 
+   *
+   * Each rolled file is 10 txns long. 
+   * A failed file is 2 txns long.
+   * 
+   * @param editUris directories to create edit logs in
+   * @param numrolls number of times to roll the edit log during setup
+   * @param abortAtRolls Specifications for when to fail, see AbortSpec
+   */
+  public static NNStorage setupEdits(List<URI> editUris, int numrolls, 
+      AbortSpec... abortAtRolls) throws IOException {
+    return setupEdits(editUris, numrolls, true, abortAtRolls);
+  }
 
   /** 
    * Test loading an editlog which has had both its storage fail
    * on alternating rolls. Two edit log directories are created.
-   * The first on fails on odd rolls, the second on even. Test
+   * The first one fails on odd rolls, the second on even. Test
    * that we are able to load the entire editlog regardless.
    */
   @Test
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
index 275c3fa38ae..300080a5c96 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
@@ -60,7 +60,7 @@ public class TestFileJournalManager {
     long numJournals = 0;
     for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.EDITS)) {
       FileJournalManager jm = new FileJournalManager(sd);
-      assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1));
+      assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
       numJournals++;
     }
     assertEquals(3, numJournals);
@@ -81,7 +81,7 @@ public class TestFileJournalManager {
 
     FileJournalManager jm = new FileJournalManager(sd);
     assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, 
-                 jm.getNumberOfTransactions(1));
+                 jm.getNumberOfTransactions(1, true));
   }
 
   /**
@@ -103,15 +103,16 @@ public class TestFileJournalManager {
     Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS);
     StorageDirectory sd = dirs.next();
     FileJournalManager jm = new FileJournalManager(sd);
-    assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1));
+    assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
     
     sd = dirs.next();
     jm = new FileJournalManager(sd);
-    assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1));
+    assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
+        true));
 
     sd = dirs.next();
     jm = new FileJournalManager(sd);
-    assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1));
+    assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
   }
 
   /** 
@@ -135,15 +136,18 @@ public class TestFileJournalManager {
     Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS);
     StorageDirectory sd = dirs.next();
     FileJournalManager jm = new FileJournalManager(sd);
-    assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1));
+    assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
+        true));
     
     sd = dirs.next();
     jm = new FileJournalManager(sd);
-    assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1));
+    assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
+        true));
 
     sd = dirs.next();
     jm = new FileJournalManager(sd);
-    assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1));
+    assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
+        true));
   }
 
   /** 
@@ -174,15 +178,15 @@ public class TestFileJournalManager {
 
     FileJournalManager jm = new FileJournalManager(sd);
     long expectedTotalTxnCount = TXNS_PER_ROLL*10 + TXNS_PER_FAIL;
-    assertEquals(expectedTotalTxnCount, jm.getNumberOfTransactions(1));
+    assertEquals(expectedTotalTxnCount, jm.getNumberOfTransactions(1, true));
 
     long skippedTxns = (3*TXNS_PER_ROLL); // skip first 3 files
     long startingTxId = skippedTxns + 1; 
 
-    long numTransactionsToLoad = jm.getNumberOfTransactions(startingTxId);
+    long numTransactionsToLoad = jm.getNumberOfTransactions(startingTxId, true);
     long numLoaded = 0;
     while (numLoaded < numTransactionsToLoad) {
-      EditLogInputStream editIn = jm.getInputStream(startingTxId);
+      EditLogInputStream editIn = jm.getInputStream(startingTxId, true);
       FSEditLogLoader.EditLogValidation val = FSEditLogLoader.validateEditLog(editIn);
       long count = val.getNumTransactions();
 
@@ -212,7 +216,8 @@ public class TestFileJournalManager {
     // 10 rolls, so 11 rolled files, 110 txids total.
     final int TOTAL_TXIDS = 10 * 11;
     for (int txid = 1; txid <= TOTAL_TXIDS; txid++) {
-      assertEquals((TOTAL_TXIDS - txid) + 1, jm.getNumberOfTransactions(txid));
+      assertEquals((TOTAL_TXIDS - txid) + 1, jm.getNumberOfTransactions(txid,
+          true));
     }
   }
 
@@ -244,10 +249,10 @@ public class TestFileJournalManager {
     assertTrue(files[0].delete());
     
     FileJournalManager jm = new FileJournalManager(sd);
-    assertEquals(startGapTxId-1, jm.getNumberOfTransactions(1));
+    assertEquals(startGapTxId-1, jm.getNumberOfTransactions(1, true));
 
     try {
-      jm.getNumberOfTransactions(startGapTxId);
+      jm.getNumberOfTransactions(startGapTxId, true);
       fail("Should have thrown an exception by now");
     } catch (IOException ioe) {
       assertTrue(true);
@@ -255,7 +260,7 @@ public class TestFileJournalManager {
 
     // rolled 10 times so there should be 11 files.
     assertEquals(11*TXNS_PER_ROLL - endGapTxId, 
-                 jm.getNumberOfTransactions(endGapTxId+1));
+                 jm.getNumberOfTransactions(endGapTxId + 1, true));
   }
 
   /** 
@@ -282,7 +287,7 @@ public class TestFileJournalManager {
 
     FileJournalManager jm = new FileJournalManager(sd);
     assertEquals(10*TXNS_PER_ROLL+1, 
-                 jm.getNumberOfTransactions(1)); 
+                 jm.getNumberOfTransactions(1, true));
   }
 
   @Test
@@ -323,11 +328,37 @@ public class TestFileJournalManager {
     
     FileJournalManager jm = new FileJournalManager(sd);
     
-    EditLogInputStream elis = jm.getInputStream(5);
+    EditLogInputStream elis = jm.getInputStream(5, true);
     FSEditLogOp op = elis.readOp();
     assertEquals("read unexpected op", op.getTransactionId(), 5);
   }
 
+  /**
+   * Make sure that in-progress streams aren't counted if we don't ask for
+   * them.
+   */
+  @Test
+  public void testExcludeInProgressStreams() throws CorruptionException,
+      IOException {
+    File f = new File(TestEditLog.TEST_DIR + "/filejournaltest2");
+    
+    // Don't close the edit log once the files have been set up.
+    NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()), 
+                                   10, false);
+    StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
+    
+    FileJournalManager jm = new FileJournalManager(sd);
+    
+    // If we exclude the in-progess stream, we should only have 100 tx.
+    assertEquals(100, jm.getNumberOfTransactions(1, false));
+    
+    EditLogInputStream elis = jm.getInputStream(90, false);
+    FSEditLogOp lastReadOp = null;
+    while ((lastReadOp = elis.readOp()) != null) {
+      assertTrue(lastReadOp.getTransactionId() <= 100);
+    }
+  }
+
   private static String getLogsAsString(
       FileJournalManager fjm, long firstTxId) throws IOException {
     return Joiner.on(",").join(fjm.getRemoteEditLogs(firstTxId));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java
index 00fe43f404c..51e49a92375 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestGenericJournalConf.java
@@ -144,13 +144,13 @@ public class TestGenericJournalConf {
     }
 
     @Override
-    public EditLogInputStream getInputStream(long fromTxnId)
+    public EditLogInputStream getInputStream(long fromTxnId, boolean inProgressOk)
         throws IOException {
       return null;
     }
 
     @Override
-    public long getNumberOfTransactions(long fromTxnId)
+    public long getNumberOfTransactions(long fromTxnId, boolean inProgressOk)
         throws IOException {
       return 0;
     }

From 3e76f00baa6a5edb87761d69bbb8320d245c0621 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 11 Jan 2012 21:14:30 +0000
Subject: [PATCH 081/177] Fix expected error text in assertion.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1230254 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
index b07bad252ed..282ad68a37f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
@@ -248,7 +248,7 @@ public class TestDFSRollback extends TestCase {
       baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
       deleteMatchingFiles(baseDirs, "edits.*");
       startNameNodeShouldFail(StartupOption.ROLLBACK,
-          "No non-corrupt logs for txid ");
+          "Gap in transactions. Expected to be able to read up until at least txid ");
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       
       log("NameNode rollback with no image file", numDirs);

From 09e5af76f333f2924b5034bca8adfdcc99723a96 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Thu, 12 Jan 2012 01:39:30 +0000
Subject: [PATCH 082/177] HADOOP-7970. HAServiceProtocol methods must throw
 IOException.Contributed by Hari Mankude.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1230351 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  3 +++
 .../apache/hadoop/ha/FailoverController.java  |  7 +++++-
 .../apache/hadoop/ha/HAServiceProtocol.java   | 22 +++++++++++++++----
 .../hadoop/ha/HealthCheckFailedException.java |  4 +++-
 .../hadoop/ha/ServiceFailedException.java     |  4 +++-
 .../hadoop/hdfs/server/namenode/NameNode.java |  3 ---
 6 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 571fbcd7658..0b86369a2de 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -28,3 +28,6 @@ HADOOP-7932. Make client connection retries on socket time outs configurable.
 HADOOP-7924.  FailoverController for client-based configuration (eli)
 
 HADOOP-7961. Move HA fencing to common. (eli)
+
+HADOOP-7970. HAServiceProtocol methods must throw IOException.
+(Hari Mankude via suresh).
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
index cc60de66a39..6ab1a99ae55 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.ha;
 
+import java.io.IOException;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 
@@ -51,7 +53,7 @@ public class FailoverController {
     HAServiceState toSvcState;
     try {
       toSvcState = toSvc.getServiceState();
-    } catch (Exception e) {
+    } catch (IOException e) {
       String msg = "Unable to get service state for " + toSvcName;
       LOG.error(msg, e);
       throw new FailoverFailedException(msg, e);
@@ -65,6 +67,9 @@ public class FailoverController {
     } catch (HealthCheckFailedException hce) {
       throw new FailoverFailedException(
           "Can't failover to an unhealthy service", hce);
+    } catch (IOException e) {
+      throw new FailoverFailedException(
+          "Got an io exception", e);
     }
     // TODO(HA): ask toSvc if it's capable. Eg not in SM.
   }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
index ffb2f1d39ae..2243ba130b6 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
@@ -21,6 +21,8 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.ipc.VersionedProtocol;
 
+import java.io.IOException;
+
 /**
  * Protocol interface that provides High Availability related primitives to
  * monitor and fail-over the service.
@@ -69,8 +71,11 @@ public interface HAServiceProtocol extends VersionedProtocol {
    * 
    * @throws HealthCheckFailedException
    *           if the health check of a service fails.
+   * @throws IOException
+   *           if other errors happen
    */
-  public void monitorHealth() throws HealthCheckFailedException;
+  public void monitorHealth() throws HealthCheckFailedException,
+                                     IOException;
 
   /**
    * Request service to transition to active state. No operation, if the
@@ -78,8 +83,11 @@ public interface HAServiceProtocol extends VersionedProtocol {
    * 
    * @throws ServiceFailedException
    *           if transition from standby to active fails.
+   * @throws IOException
+   *           if other errors happen
    */
-  public void transitionToActive() throws ServiceFailedException;
+  public void transitionToActive() throws ServiceFailedException,
+                                          IOException;
 
   /**
    * Request service to transition to standby state. No operation, if the
@@ -87,11 +95,17 @@ public interface HAServiceProtocol extends VersionedProtocol {
    * 
    * @throws ServiceFailedException
    *           if transition from active to standby fails.
+   * @throws IOException
+   *           if other errors happen
    */
-  public void transitionToStandby() throws ServiceFailedException;
+  public void transitionToStandby() throws ServiceFailedException,
+                                           IOException;
 
   /**
    * Return the current state of the service.
+   * 
+   * @throws IOException
+   *           if other errors happen
    */
-  public HAServiceState getServiceState();
+  public HAServiceState getServiceState() throws IOException;
 }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
index 4d888be480e..e636adff3e2 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HealthCheckFailedException.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.ha;
 
+import java.io.IOException;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
@@ -25,7 +27,7 @@ import org.apache.hadoop.classification.InterfaceStability;
  */
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
-public class HealthCheckFailedException extends Exception {
+public class HealthCheckFailedException extends IOException {
   private static final long serialVersionUID = 1L;
 
   public HealthCheckFailedException(final String message) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java
index 788a8430732..6f3e444b396 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ServiceFailedException.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.ha;
 
+import java.io.IOException;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 
@@ -27,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceStability;
  */
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
-public class ServiceFailedException extends Exception {
+public class ServiceFailedException extends IOException {
   private static final long serialVersionUID = 1L;
 
   public ServiceFailedException(final String message) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 9b3375fca4d..bf31695eae8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -551,9 +551,6 @@ public class NameNode {
     } catch (HadoopIllegalArgumentException e) {
       this.stop();
       throw e;
-    } catch (ServiceFailedException e) {
-      this.stop();
-      throw new IOException("Service failed to start", e);
     }
   }
 

From 72207596be4b293a44607003fa68c4e413e1baaf Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Fri, 13 Jan 2012 02:30:29 +0000
Subject: [PATCH 083/177] HADOOP-7972. HAServiceProtocol exceptions need to be
 unwrapped. Contributed by Hari Mankude.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1230861 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/ha/FailoverController.java  |  8 +--
 .../java/org/apache/hadoop/ha/HAAdmin.java    |  6 +-
 .../hadoop/ha/HAServiceProtocolHelper.java    | 64 +++++++++++++++++++
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  5 +-
 4 files changed, 74 insertions(+), 9 deletions(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
index 6ab1a99ae55..0060567ebbd 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
@@ -63,7 +63,7 @@ public class FailoverController {
           "Can't failover to an active service");
     }
     try {
-      toSvc.monitorHealth();
+      HAServiceProtocolHelper.monitorHealth(toSvc);
     } catch (HealthCheckFailedException hce) {
       throw new FailoverFailedException(
           "Can't failover to an unhealthy service", hce);
@@ -91,7 +91,7 @@ public class FailoverController {
 
     // Try to make fromSvc standby
     try {
-      fromSvc.transitionToStandby();
+      HAServiceProtocolHelper.transitionToStandby(fromSvc);
     } catch (ServiceFailedException sfe) {
       LOG.warn("Unable to make " + fromSvcName + " standby (" +
           sfe.getMessage() + ")");
@@ -105,7 +105,7 @@ public class FailoverController {
     boolean failed = false;
     Throwable cause = null;
     try {
-      toSvc.transitionToActive();
+      HAServiceProtocolHelper.transitionToActive(toSvc);
     } catch (ServiceFailedException sfe) {
       LOG.error("Unable to make " + toSvcName + " active (" +
           sfe.getMessage() + "). Failing back");
@@ -122,7 +122,7 @@ public class FailoverController {
     if (failed) {
       String msg = "Unable to failover to " + toSvcName;
       try {
-        fromSvc.transitionToActive();
+        HAServiceProtocolHelper.transitionToActive(fromSvc);
       } catch (ServiceFailedException sfe) {
         msg = "Failback to " + fromSvcName + " failed (" +
               sfe.getMessage() + ")";
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index 2dc5c1f39a3..7dbc17ed6e7 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -92,7 +92,7 @@ public class HAAdmin extends Configured implements Tool {
     }
     
     HAServiceProtocol proto = getProtocol(argv[1]);
-    proto.transitionToActive();
+    HAServiceProtocolHelper.transitionToActive(proto);
     return 0;
   }
 
@@ -105,7 +105,7 @@ public class HAAdmin extends Configured implements Tool {
     }
     
     HAServiceProtocol proto = getProtocol(argv[1]);
-    proto.transitionToStandby();
+    HAServiceProtocolHelper.transitionToStandby(proto);
     return 0;
   }
 
@@ -139,7 +139,7 @@ public class HAAdmin extends Configured implements Tool {
     
     HAServiceProtocol proto = getProtocol(argv[1]);
     try {
-      proto.monitorHealth();
+      HAServiceProtocolHelper.monitorHealth(proto);
     } catch (HealthCheckFailedException e) {
       errOut.println("Health check failed: " + e.getLocalizedMessage());
       return 1;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java
new file mode 100644
index 00000000000..c8de74269e3
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha;
+
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ipc.RemoteException;
+
+/**
+ * Helper for making {@link HAServiceProtocol} RPC calls. This helper
+ * unwraps the {@link RemoteException} to specific exceptions.
+ * 
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class HAServiceProtocolHelper {
+  public static void monitorHealth(HAServiceProtocol svc)
+      throws IOException {
+    try {
+      svc.monitorHealth();
+    } catch (RemoteException e) {
+      throw e.unwrapRemoteException(HealthCheckFailedException.class);
+    }
+  }
+
+  public static void transitionToActive(HAServiceProtocol svc)
+      throws IOException {
+    try {
+      svc.transitionToActive();
+    } catch (RemoteException e) {
+      throw e.unwrapRemoteException(ServiceFailedException.class);
+    }
+  }
+
+  public static void transitionToStandby(HAServiceProtocol svc)
+      throws IOException {
+    try {
+      svc.transitionToStandby();
+    } catch (RemoteException e) {
+      throw e.unwrapRemoteException(ServiceFailedException.class);
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index da9724ef1f7..0357c5d714f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -47,6 +47,7 @@ import org.apache.hadoop.fs.Path;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 import org.apache.hadoop.ha.HAServiceProtocol;
+import org.apache.hadoop.ha.HAServiceProtocolHelper;
 import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
 import org.apache.hadoop.hdfs.protocol.Block;
@@ -1590,12 +1591,12 @@ public class MiniDFSCluster {
   
   public void transitionToActive(int nnIndex) throws IOException,
       ServiceFailedException {
-    getHaServiceClient(nnIndex).transitionToActive();
+    HAServiceProtocolHelper.transitionToActive(getHaServiceClient(nnIndex));
   }
   
   public void transitionToStandby(int nnIndex) throws IOException,
       ServiceFailedException {
-    getHaServiceClient(nnIndex).transitionToStandby();
+    HAServiceProtocolHelper.transitionToStandby(getHaServiceClient(nnIndex));
   }
   
   

From 1c24ae0cd82f1cf583a691a6fcd285ed806cc08e Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Sat, 14 Jan 2012 20:19:55 +0000
Subject: [PATCH 084/177] HDFS-2789. TestHAAdmin.testFailover is failing.
 Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1231577 13f79535-47bb-0310-9956-ffa450edef68
---
 .../src/test/java/org/apache/hadoop/ha/TestHAAdmin.java         | 2 ++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt           | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
index ca3d9eccaf3..9bea4849947 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
@@ -26,6 +26,7 @@ import java.io.PrintStream;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -102,6 +103,7 @@ public class TestHAAdmin {
 
   @Test
   public void testFailover() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
     assertEquals(0, runTool("-failover", "xxx", "yyy"));
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 74bdec7d88a..f472704e7ad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -103,3 +103,5 @@ HDFS-2775. Fix TestStandbyCheckpoints.testBothNodesInStandbyState failing interm
 HDFS-2766. Test for case where standby partially reads log and then performs checkpoint. (atm)
 
 HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams even when non-in-progress are requested. (atm)
+
+HDFS-2789. TestHAAdmin.testFailover is failing (eli)

From 2f26475a39f94e756ef4d15ff8c863b3f692a29e Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 16 Jan 2012 21:46:05 +0000
Subject: [PATCH 085/177] HDFS-2747. Entering safe mode after starting SBN can
 NPE. Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232176 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/FSNamesystem.java    | 37 ++++++-----
 .../server/namenode/ha/TestHASafeMode.java    | 63 +++++++++++++++++++
 3 files changed, 87 insertions(+), 15 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index f472704e7ad..fc245ed3e69 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -105,3 +105,5 @@ HDFS-2766. Test for case where standby partially reads log and then performs che
 HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams even when non-in-progress are requested. (atm)
 
 HDFS-2789. TestHAAdmin.testFailover is failing (eli)
+
+HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index b4f522e00b3..f1664f7e62d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3774,21 +3774,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void enterSafeMode(boolean resourcesLow) throws IOException {
     writeLock();
     try {
-    // Ensure that any concurrent operations have been fully synced
-    // before entering safe mode. This ensures that the FSImage
-    // is entirely stable on disk as soon as we're in safe mode.
-    getEditLog().logSyncAll();
-    if (!isInSafeMode()) {
-      safeMode = new SafeModeInfo(resourcesLow);
-      return;
-    }
-    if (resourcesLow) {
-      safeMode.setResourcesLow();
-    }
-    safeMode.setManual();
-    getEditLog().logSyncAll();
-    NameNode.stateChangeLog.info("STATE* Safe mode is ON. " 
-                                + safeMode.getTurnOffTip());
+      // Ensure that any concurrent operations have been fully synced
+      // before entering safe mode. This ensures that the FSImage
+      // is entirely stable on disk as soon as we're in safe mode.
+      boolean isEditlogOpenForWrite = getEditLog().isOpenForWrite();
+      // Before Editlog is in OpenForWrite mode, editLogStream will be null. So,
+      // logSyncAll call can be called only when Edlitlog is in OpenForWrite mode
+      if (isEditlogOpenForWrite) {
+        getEditLog().logSyncAll();
+      }
+      if (!isInSafeMode()) {
+        safeMode = new SafeModeInfo(resourcesLow);
+        return;
+      }
+      if (resourcesLow) {
+        safeMode.setResourcesLow();
+      }
+      safeMode.setManual();
+      if (isEditlogOpenForWrite) {
+        getEditLog().logSyncAll();
+      }
+      NameNode.stateChangeLog.info("STATE* Safe mode is ON. "
+          + safeMode.getTurnOffTip());
     } finally {
       writeUnlock();
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index a76470f1c41..af7985e21d3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.junit.After;
@@ -95,6 +96,68 @@ public class TestHASafeMode {
     nn1.getNamesystem().getEditLogTailer().interrupt();
   }
   
+  /**
+   * Test case for enter safemode in active namenode, when it is already in startup safemode.
+   * It is a regression test for HDFS-2747.
+   */
+  @Test
+  public void testEnterSafeModeInANNShouldNotThrowNPE() throws Exception {
+    banner("Restarting active");
+    restartActive();
+    FSNamesystem namesystem = nn0.getNamesystem();
+    String status = namesystem.getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'", status
+        .startsWith("Safe mode is ON."));
+    NameNodeAdapter.enterSafeMode(nn0, false);
+    assertTrue("Failed to enter into safemode in active", namesystem
+        .isInSafeMode());
+    NameNodeAdapter.enterSafeMode(nn0, false);
+    assertTrue("Failed to enter into safemode in active", namesystem
+        .isInSafeMode());
+  }
+
+  /**
+   * Test case for enter safemode in standby namenode, when it is already in startup safemode.
+   * It is a regression test for HDFS-2747.
+   */
+  @Test
+  public void testEnterSafeModeInSBNShouldNotThrowNPE() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil
+        .createFile(fs, new Path("/test"), 3 * BLOCK_SIZE, (short) 3, 1L);
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup and enter safemode.
+    nn0.getRpcServer().rollEditLog();
+    banner("Creating some blocks that won't be in the edit log");
+    DFSTestUtil.createFile(fs, new Path("/test2"), 5 * BLOCK_SIZE, (short) 3,
+        1L);
+    banner("Deleting the original blocks");
+    fs.delete(new Path("/test"), true);
+    banner("Restarting standby");
+    restartStandby();
+    FSNamesystem namesystem = nn1.getNamesystem();
+    String status = namesystem.getSafemode();
+    assertTrue("Bad safemode status: '" + status + "'", status
+        .startsWith("Safe mode is ON."));
+    NameNodeAdapter.enterSafeMode(nn1, false);
+    assertTrue("Failed to enter into safemode in standby", namesystem
+        .isInSafeMode());
+    NameNodeAdapter.enterSafeMode(nn1, false);
+    assertTrue("Failed to enter into safemode in standby", namesystem
+        .isInSafeMode());
+  }
+
+  private void restartActive() throws IOException {
+    cluster.shutdownNameNode(0);
+    // Set the safemode extension to be lengthy, so that the tests
+    // can check the safemode message after the safemode conditions
+    // have been achieved, without being racy.
+    cluster.getConfiguration(0).setInt(
+        DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
+    cluster.restartNameNode(0);
+    nn0 = cluster.getNameNode(0);
+  }
+  
   /**
    * Tests the case where, while a standby is down, more blocks are
    * added to the namespace, but not rolled. So, when it starts up,

From d880c7cc784cf636b2590fd98ea7c8ee67065a30 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Mon, 16 Jan 2012 22:16:15 +0000
Subject: [PATCH 086/177] HDFS-2772. On transition to active, standby should
 not swallow ELIE. Contributed by Aaron T. Myers

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232197 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../server/namenode/ha/EditLogTailer.java     | 16 +++---
 .../namenode/ha/TestFailureToReadEdits.java   | 49 ++++++++++++++++++-
 3 files changed, 60 insertions(+), 7 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index fc245ed3e69..e694bed695d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -107,3 +107,5 @@ HDFS-2738. FSEditLog.selectinputStreams is reading through in-progress streams e
 HDFS-2789. TestHAAdmin.testFailover is failing (eli)
 
 HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G via todd)
+
+HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 097332b1404..264e3a72e61 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -144,11 +144,13 @@ public class EditLogTailer {
       try {
         editsLoaded = image.loadEdits(streams, namesystem);
       } catch (EditLogInputException elie) {
-        LOG.warn("Error while reading edits from disk. Will try again.", elie);
         editsLoaded = elie.getNumEditsLoaded();
-      }
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("editsLoaded: " + editsLoaded);
+        throw elie;
+      } finally {
+        if (editsLoaded > 0) {
+          LOG.info(String.format("Loaded %d edits starting from txid %d ",
+              editsLoaded, lastTxnId));
+        }
       }
     } finally {
       namesystem.writeUnlock();
@@ -180,12 +182,14 @@ public class EditLogTailer {
       while (shouldRun) {
         try {
           doTailEdits();
+        } catch (EditLogInputException elie) {
+          LOG.warn("Error while reading edits from disk. Will try again.", elie);
         } catch (InterruptedException ie) {
           // interrupter should have already set shouldRun to false
           continue;
         } catch (Throwable t) {
-          LOG.error("Error encountered while tailing edits. Shutting down " +
-              "standby NN.", t);
+          LOG.error("Unknown error encountered while tailing edits. " +
+              "Shutting down standby NN.", t);
           runtime.exit(1);
         }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
index ca51b4eb9ba..24b2c0866e3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
@@ -30,17 +30,20 @@ import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
-import java.net.URI;
 import java.util.Collection;
 import java.util.LinkedList;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.ha.ServiceFailedException;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.EditLogInputException;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp;
@@ -55,6 +58,9 @@ import org.mockito.stubbing.Answer;
 import com.google.common.collect.ImmutableList;
 
 public class TestFailureToReadEdits {
+  
+  private static final Log LOG = LogFactory.getLog(TestFailureToReadEdits.class);
+  
   private static final String TEST_DIR1 = "/test1";
   private static final String TEST_DIR2 = "/test2";
   private static final String TEST_DIR3 = "/test3";
@@ -221,6 +227,47 @@ public class TestFailureToReadEdits {
         fs0.close();
     }
   }
+
+  /**
+   * Ensure that the standby fails to become active if it cannot read all
+   * available edits in the shared edits dir when it is transitioning to active
+   * state.
+   */
+  @Test
+  public void testFailureToReadEditsOnTransitionToActive() throws Exception {
+    assertTrue(fs.mkdirs(new Path(TEST_DIR1)));
+    
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    
+    // It should also upload it back to the active.
+    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 3));
+    
+    causeFailureOnEditLogRead();
+    
+    assertTrue(fs.mkdirs(new Path(TEST_DIR2)));
+    assertTrue(fs.mkdirs(new Path(TEST_DIR3)));
+    
+    try {
+      HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+      fail("Standby fully caught up, but should not have been able to");
+    } catch (HATestUtil.CouldNotCatchUpException e) {
+      verify(mockRuntime, times(0)).exit(anyInt());
+    }
+    
+    // Shutdown the active NN.
+    cluster.shutdownNameNode(0);
+    
+    try {
+      // Transition the standby to active.
+      cluster.transitionToActive(1);
+      fail("Standby transitioned to active, but should not have been able to");
+    } catch (ServiceFailedException sfe) {
+      LOG.info("got expected exception: " + sfe.toString(), sfe);
+      assertTrue("Standby failed to catch up for some reason other than "
+          + "failure to read logs", sfe.toString().contains(
+              EditLogInputException.class.getName()));
+    }
+  }
   
   private LimitedEditLogAnswer causeFailureOnEditLogRead() throws IOException {
     FSEditLog spyEditLog = spy(nn1.getNamesystem().getEditLogTailer()

From 212678f036f4f96493bc14a584e758f97cf65573 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 17 Jan 2012 03:10:25 +0000
Subject: [PATCH 087/177] HDFS-2767. ConfiguredFailoverProxyProvider should
 support NameNodeProtocol. Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232284 13f79535-47bb-0310-9956-ffa450edef68
---
 .../retry/DefaultFailoverProxyProvider.java   | 14 +--
 .../io/retry/FailoverProxyProvider.java       |  8 +-
 .../hadoop/io/retry/TestFailoverProxy.java    | 19 ++--
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../org/apache/hadoop/hdfs/DFSClient.java     | 55 +----------
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 33 +++++++
 .../java/org/apache/hadoop/hdfs/HAUtil.java   | 93 ++++++++++++++++++-
 .../server/balancer/NameNodeConnector.java    | 32 +------
 .../ha/ConfiguredFailoverProxyProvider.java   | 53 ++++++++---
 9 files changed, 191 insertions(+), 118 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/DefaultFailoverProxyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/DefaultFailoverProxyProvider.java
index 812a46e02bd..ae37d0bed4a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/DefaultFailoverProxyProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/DefaultFailoverProxyProvider.java
@@ -27,28 +27,28 @@ import org.apache.hadoop.ipc.RPC;
  * event of failover, and always returns the same proxy object. 
  */
 @InterfaceStability.Evolving
-public class DefaultFailoverProxyProvider implements FailoverProxyProvider {
+public class DefaultFailoverProxyProvider<T> implements FailoverProxyProvider<T> {
   
-  private Object proxy;
-  private Class<?> iface;
+  private T proxy;
+  private Class<T> iface;
   
-  public DefaultFailoverProxyProvider(Class<?> iface, Object proxy) {
+  public DefaultFailoverProxyProvider(Class<T> iface, T proxy) {
     this.proxy = proxy;
     this.iface = iface;
   }
 
   @Override
-  public Class<?> getInterface() {
+  public Class<T> getInterface() {
     return iface;
   }
 
   @Override
-  public Object getProxy() {
+  public T getProxy() {
     return proxy;
   }
 
   @Override
-  public void performFailover(Object currentProxy) {
+  public void performFailover(T currentProxy) {
     // Nothing to do.
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/FailoverProxyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/FailoverProxyProvider.java
index 707a40d8888..ba7d29f0d52 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/FailoverProxyProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/FailoverProxyProvider.java
@@ -29,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceStability;
  * {@link RetryPolicy}.
  */
 @InterfaceStability.Evolving
-public interface FailoverProxyProvider extends Closeable {
+public interface FailoverProxyProvider<T> extends Closeable {
 
   /**
    * Get the proxy object which should be used until the next failover event
@@ -37,7 +37,7 @@ public interface FailoverProxyProvider extends Closeable {
    * 
    * @return the proxy object to invoke methods upon
    */
-  public Object getProxy();
+  public T getProxy();
 
   /**
    * Called whenever the associated {@link RetryPolicy} determines that an error
@@ -46,7 +46,7 @@ public interface FailoverProxyProvider extends Closeable {
    * @param currentProxy the proxy object which was being used before this
    *        failover event
    */
-  public void performFailover(Object currentProxy);
+  public void performFailover(T currentProxy);
 
   /**
    * Return a reference to the interface this provider's proxy objects actually
@@ -58,5 +58,5 @@ public interface FailoverProxyProvider extends Closeable {
    * @return the interface implemented by the proxy objects returned by
    *         {@link FailoverProxyProvider#getProxy()}
    */
-  public Class<?> getInterface();
+  public Class<T> getInterface();
 }
\ No newline at end of file
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
index 0a2963f7be5..2a6dc2622fd 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
@@ -28,19 +28,20 @@ import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.util.ThreadUtil;
 import org.junit.Test;
 
+@SuppressWarnings("unchecked")
 public class TestFailoverProxy {
 
-  public static class FlipFlopProxyProvider implements FailoverProxyProvider {
+  public static class FlipFlopProxyProvider<T> implements FailoverProxyProvider<T> {
     
-    private Class<?> iface;
-    private Object currentlyActive;
-    private Object impl1;
-    private Object impl2;
+    private Class<T> iface;
+    private T currentlyActive;
+    private T impl1;
+    private T impl2;
     
     private int failoversOccurred = 0;
     
-    public FlipFlopProxyProvider(Class<?> iface, Object activeImpl,
-        Object standbyImpl) {
+    public FlipFlopProxyProvider(Class<T> iface, T activeImpl,
+        T standbyImpl) {
       this.iface = iface;
       this.impl1 = activeImpl;
       this.impl2 = standbyImpl;
@@ -48,7 +49,7 @@ public class TestFailoverProxy {
     }
     
     @Override
-    public Object getProxy() {
+    public T getProxy() {
       return currentlyActive;
     }
 
@@ -59,7 +60,7 @@ public class TestFailoverProxy {
     }
 
     @Override
-    public Class<?> getInterface() {
+    public Class<T> getInterface() {
       return iface;
     }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index e694bed695d..e5f5ede8d90 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -109,3 +109,5 @@ HDFS-2789. TestHAAdmin.testFailover is failing (eli)
 HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G via todd)
 
 HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
+
+HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 46fa863f3b1..71014118a23 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -94,9 +94,6 @@ import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.MD5Hash;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.retry.FailoverProxyProvider;
-import org.apache.hadoop.io.retry.RetryPolicies;
-import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.ipc.Client;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
@@ -109,7 +106,6 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.TokenRenewer;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.Progressable;
-import org.apache.hadoop.util.ReflectionUtils;
 
 /********************************************************
  * DFSClient can connect to a Hadoop Filesystem and 
@@ -312,20 +308,10 @@ public class DFSClient implements java.io.Closeable {
     this.clientName = leaserenewer.getClientName(dfsClientConf.taskId);
     
     this.socketCache = new SocketCache(dfsClientConf.socketCacheCapacity);
-    
-    Class<?> failoverProxyProviderClass = getFailoverProxyProviderClass(
-        nameNodeUri, conf);
-    
-    if (nameNodeUri != null && failoverProxyProviderClass != null) {
-      FailoverProxyProvider failoverProxyProvider = (FailoverProxyProvider)
-          ReflectionUtils.newInstance(failoverProxyProviderClass, conf);
-      this.namenode = (ClientProtocol)RetryProxy.create(ClientProtocol.class,
-          failoverProxyProvider,
-          RetryPolicies.failoverOnNetworkException(
-              RetryPolicies.TRY_ONCE_THEN_FAIL,
-              dfsClientConf.maxFailoverAttempts,
-              dfsClientConf.failoverSleepBaseMillis,
-              dfsClientConf.failoverSleepMaxMillis));
+    ClientProtocol failoverNNProxy = (ClientProtocol) HAUtil
+        .createFailoverProxy(conf, nameNodeUri, ClientProtocol.class);
+    if (nameNodeUri != null && failoverNNProxy != null) {
+      this.namenode = failoverNNProxy;
       nnAddress = null;
     } else if (nameNodeUri != null && rpcNamenode == null) {
       this.namenode = DFSUtil.createNamenode(NameNode.getAddress(nameNodeUri), conf);
@@ -353,39 +339,6 @@ public class DFSClient implements java.io.Closeable {
       LOG.debug("Short circuit read is " + shortCircuitLocalReads);
     }
   }
-  
-  private Class<?> getFailoverProxyProviderClass(URI nameNodeUri, Configuration conf)
-      throws IOException {
-    if (nameNodeUri == null) {
-      return null;
-    }
-    String host = nameNodeUri.getHost();
-
-    String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + host;
-    try {
-      Class<?> ret = conf.getClass(configKey, null);
-      if (ret != null) {
-        // If we found a proxy provider, then this URI should be a logical NN.
-        // Given that, it shouldn't have a non-default port number.
-        int port = nameNodeUri.getPort();
-        if (port > 0 && port != NameNode.DEFAULT_PORT) {
-          throw new IOException(
-              "Port " + port + " specified in URI " + nameNodeUri +
-              " but host '" + host + "' is a logical (HA) namenode" +
-              " and does not use port information.");
-        }
-      }
-      return ret;
-    } catch (RuntimeException e) {
-      if (e.getCause() instanceof ClassNotFoundException) {
-        throw new IOException("Could not load failover proxy provider class "
-            + conf.get(configKey) + " which is configured for authority " + nameNodeUri,
-            e);
-      } else {
-        throw e;
-      }
-    }
-  }
 
   /**
    * Return the number of times the client should go back to the namenode
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 29cb3b3339f..d5dc5b30c54 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -28,10 +28,12 @@ import java.security.SecureRandom;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.StringTokenizer;
+import java.util.concurrent.TimeUnit;
 
 import javax.net.SocketFactory;
 
@@ -47,7 +49,12 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
@@ -809,6 +816,32 @@ public class DFSUtil {
     return new ClientDatanodeProtocolTranslatorPB(addr, ticket, conf, factory);
   }
   
+  /**
+   * Build a NamenodeProtocol connection to the namenode and set up the retry
+   * policy
+   */
+  public static NamenodeProtocolTranslatorPB createNNProxyWithNamenodeProtocol(
+      InetSocketAddress address, Configuration conf, UserGroupInformation ugi)
+      throws IOException {
+    RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(5, 200,
+        TimeUnit.MILLISECONDS);
+    Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap 
+        = new HashMap<Class<? extends Exception>, RetryPolicy>();
+    RetryPolicy methodPolicy = RetryPolicies.retryByException(timeoutPolicy,
+        exceptionToPolicyMap);
+    Map<String, RetryPolicy> methodNameToPolicyMap = new HashMap<String, RetryPolicy>();
+    methodNameToPolicyMap.put("getBlocks", methodPolicy);
+    methodNameToPolicyMap.put("getAccessKeys", methodPolicy);
+    RPC.setProtocolEngine(conf, NamenodeProtocolPB.class,
+        ProtobufRpcEngine.class);
+    NamenodeProtocolPB proxy = RPC.getProxy(NamenodeProtocolPB.class, RPC
+        .getProtocolVersion(NamenodeProtocolPB.class), address, ugi, conf,
+        NetUtils.getDefaultSocketFactory(conf));
+    NamenodeProtocolPB retryProxy = (NamenodeProtocolPB) RetryProxy.create(
+        NamenodeProtocolPB.class, proxy, methodNameToPolicyMap);
+    return new NamenodeProtocolTranslatorPB(retryProxy);
+  }
+  
   /**
    * Get nameservice Id for the {@link NameNode} based on namenode RPC address
    * matching the local node address.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 6a619712c48..1dc2bf67581 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -18,13 +18,23 @@
 package org.apache.hadoop.hdfs;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
-
+import java.io.IOException;
+import java.lang.reflect.Constructor;
 import java.net.InetSocketAddress;
-import java.util.Collection;
+import java.net.URI;
 import java.util.Map;
 
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSClient.Conf;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import com.google.common.base.Preconditions;
 
 public class HAUtil {
   private HAUtil() { /* Hidden constructor */ }
@@ -110,5 +120,84 @@ public class HAUtil {
   public static void setAllowStandbyReads(Configuration conf, boolean val) {
     conf.setBoolean("dfs.ha.allow.stale.reads", val);
   }
+ 
+  /** Creates the Failover proxy provider instance*/
+  @SuppressWarnings("unchecked")
+  public static <T> FailoverProxyProvider<T> createFailoverProxyProvider(
+      Configuration conf, Class<FailoverProxyProvider<?>> failoverProxyProviderClass,
+      Class xface) throws IOException {
+    Preconditions.checkArgument(
+        xface.isAssignableFrom(NamenodeProtocols.class),
+        "Interface %s is not a NameNode protocol", xface);
+    try {
+      Constructor<FailoverProxyProvider<?>> ctor = failoverProxyProviderClass
+          .getConstructor(Class.class);
+      FailoverProxyProvider<?> provider = ctor.newInstance(xface);
+      ReflectionUtils.setConf(provider, conf);
+      return (FailoverProxyProvider<T>) provider;
+    } catch (Exception e) {
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      } else {
+        throw new IOException(
+            "Couldn't create proxy provider " + failoverProxyProviderClass, e);
+      }
+    }
+  }
 
+  /** Gets the configured Failover proxy provider's class */
+  public static <T> Class<FailoverProxyProvider<T>> getFailoverProxyProviderClass(
+      Configuration conf, URI nameNodeUri, Class<T> xface) throws IOException {
+    if (nameNodeUri == null) {
+      return null;
+    }
+    String host = nameNodeUri.getHost();
+
+    String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "."
+        + host;
+    try {
+      @SuppressWarnings("unchecked")
+      Class<FailoverProxyProvider<T>> ret = (Class<FailoverProxyProvider<T>>) conf
+          .getClass(configKey, null, FailoverProxyProvider.class);
+      if (ret != null) {
+        // If we found a proxy provider, then this URI should be a logical NN.
+        // Given that, it shouldn't have a non-default port number.
+        int port = nameNodeUri.getPort();
+        if (port > 0 && port != NameNode.DEFAULT_PORT) {
+          throw new IOException("Port " + port + " specified in URI "
+              + nameNodeUri + " but host '" + host
+              + "' is a logical (HA) namenode"
+              + " and does not use port information.");
+        }
+      }
+      return ret;
+    } catch (RuntimeException e) {
+      if (e.getCause() instanceof ClassNotFoundException) {
+        throw new IOException("Could not load failover proxy provider class "
+            + conf.get(configKey) + " which is configured for authority "
+            + nameNodeUri, e);
+      } else {
+        throw e;
+      }
+    }
+  }
+
+  /** Creates the namenode proxy with the passed Protocol */
+  @SuppressWarnings("unchecked")
+  public static Object createFailoverProxy(Configuration conf, URI nameNodeUri,
+      Class xface) throws IOException {
+    Class<FailoverProxyProvider<?>> failoverProxyProviderClass = HAUtil
+        .getFailoverProxyProviderClass(conf, nameNodeUri, xface);
+    if (failoverProxyProviderClass != null) {
+      FailoverProxyProvider<?> failoverProxyProvider = HAUtil
+          .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface);
+      Conf config = new Conf(conf);
+      return RetryProxy.create(xface, failoverProxyProvider, RetryPolicies
+          .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
+              config.maxFailoverAttempts, config.failoverSleepBaseMillis,
+              config.failoverSleepMaxMillis));
+    }
+    return null;
+  }
+  
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
index 227ad722b98..939105871cb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
@@ -22,7 +22,6 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashMap;
@@ -58,7 +57,6 @@ import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
 
-import com.google.common.collect.Collections2;
 import com.google.common.collect.Lists;
 
 /**
@@ -88,7 +86,8 @@ class NameNodeConnector {
     InetSocketAddress nn = Lists.newArrayList(haNNs).get(0);
     // TODO(HA): need to deal with connecting to HA NN pair here
     this.namenodeAddress = nn;
-    this.namenode = createNamenode(nn, conf);
+    this.namenode = DFSUtil.createNNProxyWithNamenodeProtocol(nn, conf,
+        UserGroupInformation.getCurrentUser());
     this.client = DFSUtil.createNamenode(conf);
     this.fs = FileSystem.get(NameNode.getUri(nn), conf);
 
@@ -196,33 +195,6 @@ class NameNodeConnector {
         + "]";
   }
 
-  /** Build a NamenodeProtocol connection to the namenode and
-   * set up the retry policy
-   */ 
-  private static NamenodeProtocol createNamenode(InetSocketAddress address,
-      Configuration conf) throws IOException {
-    RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(
-        5, 200, TimeUnit.MILLISECONDS);
-    Map<Class<? extends Exception>,RetryPolicy> exceptionToPolicyMap =
-        new HashMap<Class<? extends Exception>, RetryPolicy>();
-    RetryPolicy methodPolicy = RetryPolicies.retryByException(
-        timeoutPolicy, exceptionToPolicyMap);
-    Map<String,RetryPolicy> methodNameToPolicyMap =
-        new HashMap<String, RetryPolicy>();
-    methodNameToPolicyMap.put("getBlocks", methodPolicy);
-    methodNameToPolicyMap.put("getAccessKeys", methodPolicy);
-
-    RPC.setProtocolEngine(conf, NamenodeProtocolPB.class,
-        ProtobufRpcEngine.class);
-    NamenodeProtocolPB proxy = RPC.getProxy(NamenodeProtocolPB.class,
-            RPC.getProtocolVersion(NamenodeProtocolPB.class), address,
-            UserGroupInformation.getCurrentUser(), conf,
-            NetUtils.getDefaultSocketFactory(conf));
-    NamenodeProtocolPB retryProxy = (NamenodeProtocolPB) RetryProxy.create(
-        NamenodeProtocolPB.class, proxy, methodNameToPolicyMap);
-    return new NamenodeProtocolTranslatorPB(retryProxy);
-  }
-
   /**
    * Periodically updates access keys.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index 65e4655b52a..c44c1c1d74b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -32,52 +32,75 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.security.UserGroupInformation;
 
+import com.google.common.base.Preconditions;
+
 /**
  * A FailoverProxyProvider implementation which allows one to configure two URIs
  * to connect to during fail-over. The first configured address is tried first,
  * and on a fail-over event the other address is tried.
  */
-public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
-    Configurable {
+public class ConfiguredFailoverProxyProvider<T> implements
+    FailoverProxyProvider<T>, Configurable {
   
   private static final Log LOG =
       LogFactory.getLog(ConfiguredFailoverProxyProvider.class);
   
   private Configuration conf;
   private int currentProxyIndex = 0;
-  private List<AddressRpcProxyPair> proxies = new ArrayList<AddressRpcProxyPair>();
+  private List<AddressRpcProxyPair<T>> proxies = new ArrayList<AddressRpcProxyPair<T>>();
   private UserGroupInformation ugi;
+  private final Class<T> xface;
 
+  public ConfiguredFailoverProxyProvider(Class<T> xface) {
+    Preconditions.checkArgument(
+        xface.isAssignableFrom(NamenodeProtocols.class),
+        "Interface class %s is not a valid NameNode protocol!");
+    this.xface = xface;
+  }
+    
   @Override
-  public Class<?> getInterface() {
-    return ClientProtocol.class;
+  public Class<T> getInterface() {
+    return xface;
   }
 
   /**
    * Lazily initialize the RPC proxy object.
    */
+  @SuppressWarnings("unchecked")
   @Override
-  public synchronized Object getProxy() {
+  public synchronized T getProxy() {
     AddressRpcProxyPair current = proxies.get(currentProxyIndex);
     if (current.namenode == null) {
       try {
-        // TODO(HA): This will create a NN proxy with an underlying retry
-        // proxy. We don't want this.
-        current.namenode = DFSUtil.createNamenode(current.address, conf, ugi);
+        if (NamenodeProtocol.class.equals(xface)) {
+          current.namenode = DFSUtil.createNNProxyWithNamenodeProtocol(
+              current.address, conf, ugi);
+        } else if (ClientProtocol.class.equals(xface)) {
+          // TODO(HA): This will create a NN proxy with an underlying retry
+          // proxy. We don't want this.
+          current.namenode = DFSUtil.createNamenode(current.address, conf, ugi);
+        } else {
+          throw new IllegalStateException(
+              "Upsupported protocol found when creating the proxy conection to NameNode. "
+                  + ((xface != null) ? xface.getClass().getName() : xface)
+                  + " is not supported by " + this.getClass().getName());
+        }
       } catch (IOException e) {
         LOG.error("Failed to create RPC proxy to NameNode", e);
         throw new RuntimeException(e);
       }
     }
-    return current.namenode;
+    return (T)current.namenode;
   }
 
   @Override
-  public synchronized void performFailover(Object currentProxy) {
+  public synchronized void performFailover(T currentProxy) {
     currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
   }
 
@@ -113,7 +136,7 @@ public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
       Map<String, InetSocketAddress> addressesInNN = map.get(nsId);
       
       for (InetSocketAddress address : addressesInNN.values()) {
-        proxies.add(new AddressRpcProxyPair(address));
+        proxies.add(new AddressRpcProxyPair<T>(address));
       }
     } catch (IOException e) {
       throw new RuntimeException(e);
@@ -124,9 +147,9 @@ public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
    * A little pair object to store the address and connected RPC proxy object to
    * an NN. Note that {@link AddressRpcProxyPair#namenode} may be null.
    */
-  private static class AddressRpcProxyPair {
+  private static class AddressRpcProxyPair<T> {
     public InetSocketAddress address;
-    public ClientProtocol namenode;
+    public T namenode;
     
     public AddressRpcProxyPair(InetSocketAddress address) {
       this.address = address;
@@ -139,7 +162,7 @@ public class ConfiguredFailoverProxyProvider implements FailoverProxyProvider,
    */
   @Override
   public synchronized void close() throws IOException {
-    for (AddressRpcProxyPair proxy : proxies) {
+    for (AddressRpcProxyPair<T> proxy : proxies) {
       if (proxy.namenode != null) {
         if (proxy.namenode instanceof Closeable) {
           ((Closeable)proxy.namenode).close();

From 0c1450ca5d922b5bf713bb8bb17459dc11a97330 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 17 Jan 2012 03:21:08 +0000
Subject: [PATCH 088/177] HDFS-2795. Standby NN takes a long time to recover
 from a dead DN starting up. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232285 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../server/blockmanagement/BlockManager.java  |  3 +
 .../blockmanagement/BlockManagerTestUtil.java | 33 ++++++++
 .../server/blockmanagement/TestNodeCount.java | 23 +-----
 .../server/namenode/ha/TestStandbyIsHot.java  | 78 +++++++++++++++++++
 5 files changed, 120 insertions(+), 19 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index e5f5ede8d90..605e7e37d2f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -111,3 +111,5 @@ HDFS-2747. Entering safe mode after starting SBN can NPE. (Uma Maheswara Rao G v
 HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
 
 HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd)
+
+HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index ce01502972b..551bcd13012 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -2502,6 +2502,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
       final int curReplicasDelta, int expectedReplicasDelta) {
     namesystem.writeLock();
     try {
+      if (!namesystem.isPopulatingReplQueues()) {
+        return;
+      }
       NumberReplicas repl = countNodes(block);
       int curExpectedReplicas = getReplication(block);
       if (isNeededReplication(block, curExpectedReplicas, repl.liveReplicas())) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
index 38de3deba81..66c10ceb253 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManagerTestUtil.java
@@ -24,8 +24,11 @@ import java.util.Iterator;
 import java.util.Set;
 
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.util.Daemon;
+import org.junit.Assert;
 
 public class BlockManagerTestUtil {
   public static void setNodeReplicationLimit(final BlockManager blockManager,
@@ -144,4 +147,34 @@ public class BlockManagerTestUtil {
     work += bm.computeReplicationWork(Integer.MAX_VALUE);
     return work;
   }
+
+  /**
+   * Ensure that the given NameNode marks the specified DataNode as
+   * entirely dead/expired.
+   * @param nn the NameNode to manipulate
+   * @param dnName the name of the DataNode
+   */
+  public static void noticeDeadDatanode(NameNode nn, String dnName) {
+    FSNamesystem namesystem = nn.getNamesystem();
+    namesystem.writeLock();
+    try {
+      DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
+      HeartbeatManager hbm = dnm.getHeartbeatManager();
+      DatanodeDescriptor[] dnds = hbm.getDatanodes();
+      DatanodeDescriptor theDND = null;
+      for (DatanodeDescriptor dnd : dnds) {
+        if (dnd.getName().equals(dnName)) {
+          theDND = dnd;
+        }
+      }
+      Assert.assertNotNull("Could not find DN with name: " + dnName, theDND);
+      
+      synchronized (hbm) {
+        theDND.setLastUpdate(0);
+        hbm.heartbeatCheck();
+      }
+    } finally {
+      namesystem.writeUnlock();
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java
index 986ca13ed1f..d47f1103446 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestNodeCount.java
@@ -81,15 +81,8 @@ public class TestNodeCount extends TestCase {
       DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
       
       // make sure that NN detects that the datanode is down
-      try {
-        namesystem.writeLock();
-        synchronized (hm) {
-          datanode.setLastUpdate(0); // mark it dead
-          hm.heartbeatCheck();
-        }
-      } finally {
-        namesystem.writeUnlock();
-      }
+      BlockManagerTestUtil.noticeDeadDatanode(
+          cluster.getNameNode(), datanode.getName());
       
       // the block will be replicated
       DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
@@ -121,16 +114,8 @@ public class TestNodeCount extends TestCase {
       // bring down non excessive datanode
       dnprop = cluster.stopDataNode(nonExcessDN.getName());
       // make sure that NN detects that the datanode is down
-      
-      try {
-        namesystem.writeLock();
-        synchronized(hm) {
-          nonExcessDN.setLastUpdate(0); // mark it dead
-          hm.heartbeatCheck();
-        }
-      } finally {
-        namesystem.writeUnlock();
-      }
+      BlockManagerTestUtil.noticeDeadDatanode(
+          cluster.getNameNode(), nonExcessDN.getName());
 
       // The block should be replicated
       initializeTimeout(TIMEOUT);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
index ff87ebcc6fa..7bb8d814d22 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
@@ -33,13 +33,16 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.AppendTestUtil;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
@@ -131,6 +134,81 @@ public class TestStandbyIsHot {
       cluster.shutdown();
     }
   }
+  
+  /**
+   * Regression test for HDFS-2795:
+   *  - Start an HA cluster with a DN.
+   *  - Write several blocks to the FS with replication 1.
+   *  - Shutdown the DN
+   *  - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
+   *  - Restart the DN.
+   * In the bug, the standby node would only very slowly notice the blocks returning
+   * to the cluster.
+   */
+  @Test
+  public void testDatanodeRestarts() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
+    // We read from the standby to watch block locations
+    HAUtil.setAllowStandbyReads(conf, true);
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(1)
+      .build();
+    try {
+      NameNode nn0 = cluster.getNameNode(0);
+      NameNode nn1 = cluster.getNameNode(1);
+      nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+      nn1.getNamesystem().getEditLogTailer().interrupt();
+
+      cluster.transitionToActive(0);
+      
+      // Create 5 blocks.
+      DFSTestUtil.createFile(cluster.getFileSystem(0), 
+          TEST_FILE_PATH, 5*1024, (short)1, 1L);
+      
+      HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+      
+      // Stop the DN.
+      DataNode dn = cluster.getDataNodes().get(0);
+      String dnName = dn.getDatanodeId().getName(); 
+      DataNodeProperties dnProps = cluster.stopDataNode(0);
+      
+      // Make sure both NNs register it as dead.
+      BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
+      BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
+      
+      BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
+      BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
+      assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
+      
+      // The SBN will not have any blocks in its neededReplication queue
+      // since the SBN doesn't process replication.
+      assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
+      
+      LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(
+          TEST_FILE, 0, 1);
+      assertEquals("Standby should have registered that the block has no replicas",
+          0, locs.get(0).getLocations().length);
+      
+      cluster.restartDataNode(dnProps);
+      // Wait for both NNs to re-register the DN.
+      cluster.waitActive(0);
+      cluster.waitActive(1);
+      
+      BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
+      BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
+      assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
+      assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
+      
+      locs = nn1.getRpcServer().getBlockLocations(
+          TEST_FILE, 0, 1);
+      assertEquals("Standby should have registered that the block has replicas again",
+          1, locs.get(0).getLocations().length);
+    } finally {
+      cluster.shutdown();
+    }
+  }
 
   static void waitForBlockLocations(final MiniDFSCluster cluster,
       final NameNode nn,

From 4c7a6c6c3f5879a2f79080753074f078943f8392 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 17 Jan 2012 17:57:36 +0000
Subject: [PATCH 089/177] Amend HDFS-2795. Fix PersistBlocks failure due to an
 NPE in isPopulatingReplQueues()

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232510 13f79535-47bb-0310-9956-ffa450edef68
---
 .../org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 8edfb7fcbd2..258cb53186a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3681,7 +3681,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   @Override
   public boolean isPopulatingReplQueues() {
-    if (!haContext.getState().shouldPopulateReplQueues()) {
+    if (haContext != null && // null during startup!
+        !haContext.getState().shouldPopulateReplQueues()) {
       return false;
     }
     // safeMode is volatile, and may be set to null at any time

From a380dc8732a17a88b9adc69368eb96ab54d31de8 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 17 Jan 2012 18:39:09 +0000
Subject: [PATCH 090/177] HDFS-2592. Balancer support for HA namenodes.
 Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1232531 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../server/balancer/NameNodeConnector.java    |  38 ++++---
 .../server/namenode/NameNodeRpcServer.java    |   2 +-
 .../hdfs/server/balancer/TestBalancer.java    |  42 +++----
 .../balancer/TestBalancerWithHANameNodes.java | 105 ++++++++++++++++++
 .../hdfs/server/namenode/ha/HATestUtil.java   |  22 ++--
 6 files changed, 163 insertions(+), 48 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 605e7e37d2f..0f42b77cb6f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -113,3 +113,5 @@ HDFS-2772. On transition to active, standby should not swallow ELIE. (atm)
 HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma Maheswara Rao G via todd)
 
 HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (todd)
+
+HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
index 939105871cb..cdeeb23e6ed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
@@ -22,11 +22,9 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
+import java.net.URI;
 import java.util.Collection;
 import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
 
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -34,11 +32,10 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
-import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
@@ -46,13 +43,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.io.retry.RetryPolicies;
-import org.apache.hadoop.io.retry.RetryPolicy;
-import org.apache.hadoop.io.retry.RetryProxy;
-import org.apache.hadoop.ipc.ProtobufRpcEngine;
-import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
@@ -83,13 +74,24 @@ class NameNodeConnector {
 
   NameNodeConnector(Collection<InetSocketAddress> haNNs,
       Configuration conf) throws IOException {
-    InetSocketAddress nn = Lists.newArrayList(haNNs).get(0);
-    // TODO(HA): need to deal with connecting to HA NN pair here
-    this.namenodeAddress = nn;
-    this.namenode = DFSUtil.createNNProxyWithNamenodeProtocol(nn, conf,
-        UserGroupInformation.getCurrentUser());
-    this.client = DFSUtil.createNamenode(conf);
-    this.fs = FileSystem.get(NameNode.getUri(nn), conf);
+    this.namenodeAddress = Lists.newArrayList(haNNs).get(0);
+    URI nameNodeUri = NameNode.getUri(this.namenodeAddress);
+    NamenodeProtocol failoverNamenode = (NamenodeProtocol) HAUtil
+        .createFailoverProxy(conf, nameNodeUri, NamenodeProtocol.class);
+    if (null != failoverNamenode) {
+      this.namenode = failoverNamenode;
+    } else {
+      this.namenode = DFSUtil.createNNProxyWithNamenodeProtocol(
+          this.namenodeAddress, conf, UserGroupInformation.getCurrentUser());
+    }
+    ClientProtocol failOverClient = (ClientProtocol) HAUtil
+        .createFailoverProxy(conf, nameNodeUri, ClientProtocol.class);
+    if (null != failOverClient) {
+      this.client = failOverClient;
+    } else {
+      this.client = DFSUtil.createNamenode(conf);
+    }
+    this.fs = FileSystem.get(nameNodeUri, conf);
 
     final NamespaceInfo namespaceinfo = namenode.versionRequest();
     this.blockpoolID = namespaceinfo.getBlockPoolID();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 8bbcc3f60b8..a1bc504fe02 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -329,7 +329,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
       throw new IllegalArgumentException(
         "Unexpected not positive size: "+size);
     }
-
+    namesystem.checkOperation(OperationCategory.READ);
     return namesystem.getBlockManager().getBlocks(datanode, size); 
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
index 84235112aa5..63b061001b5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
@@ -42,24 +42,23 @@ import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 
 /**
  * This class tests if a balancer schedules tasks correctly.
  */
 public class TestBalancer extends TestCase {
   private static final Log LOG = LogFactory.getLog(
-  "org.apache.hadoop.hdfs.TestReplication");
+  "org.apache.hadoop.hdfs.TestBalancer");
   
-  final private static long CAPACITY = 500L;
-  final private static String RACK0 = "/rack0";
-  final private static String RACK1 = "/rack1";
-  final private static String RACK2 = "/rack2";
-  final static private String fileName = "/tmp.txt";
-  final static private Path filePath = new Path(fileName);
+  final static long CAPACITY = 500L;
+  final static String RACK0 = "/rack0";
+  final static String RACK1 = "/rack1";
+  final static String RACK2 = "/rack2";
+  final private static String fileName = "/tmp.txt";
+  final static Path filePath = new Path(fileName);
   private MiniDFSCluster cluster;
 
   ClientProtocol client;
@@ -83,9 +82,10 @@ public class TestBalancer extends TestCase {
   }
 
   /* create a file with a length of <code>fileLen</code> */
-  private void createFile(long fileLen, short replicationFactor)
+  static void createFile(MiniDFSCluster cluster, Path filePath, long fileLen,
+      short replicationFactor, int nnIndex)
   throws IOException {
-    FileSystem fs = cluster.getFileSystem();
+    FileSystem fs = cluster.getFileSystem(nnIndex);
     DFSTestUtil.createFile(fs, filePath, fileLen, 
         replicationFactor, r.nextLong());
     DFSTestUtil.waitReplication(fs, filePath, replicationFactor);
@@ -104,7 +104,7 @@ public class TestBalancer extends TestCase {
 
       short replicationFactor = (short)(numNodes-1);
       long fileLen = size/replicationFactor;
-      createFile(fileLen, replicationFactor);
+      createFile(cluster , filePath, fileLen, replicationFactor, 0);
 
       List<LocatedBlock> locatedBlocks = client.
       getBlockLocations(fileName, 0, fileLen).getLocatedBlocks();
@@ -212,7 +212,8 @@ public class TestBalancer extends TestCase {
    * @throws IOException - if getStats() fails
    * @throws TimeoutException
    */
-  private void waitForHeartBeat(long expectedUsedSpace, long expectedTotalSpace)
+  static void waitForHeartBeat(long expectedUsedSpace,
+      long expectedTotalSpace, ClientProtocol client, MiniDFSCluster cluster)
   throws IOException, TimeoutException {
     long timeout = TIMEOUT;
     long failtime = (timeout <= 0L) ? Long.MAX_VALUE
@@ -249,7 +250,8 @@ public class TestBalancer extends TestCase {
    * @throws IOException
    * @throws TimeoutException
    */
-  private void waitForBalancer(long totalUsedSpace, long totalCapacity) 
+  static void waitForBalancer(long totalUsedSpace, long totalCapacity,
+      ClientProtocol client, MiniDFSCluster cluster)
   throws IOException, TimeoutException {
     long timeout = TIMEOUT;
     long failtime = (timeout <= 0L) ? Long.MAX_VALUE
@@ -312,7 +314,8 @@ public class TestBalancer extends TestCase {
       
       // fill up the cluster to be 30% full
       long totalUsedSpace = totalCapacity*3/10;
-      createFile(totalUsedSpace/numOfDatanodes, (short)numOfDatanodes);
+      createFile(cluster, filePath, totalUsedSpace / numOfDatanodes,
+          (short) numOfDatanodes, 0);
       // start up an empty node with the same capacity and on the same rack
       cluster.startDataNodes(conf, 1, true, null,
           new String[]{newRack}, new long[]{newCapacity});
@@ -328,7 +331,7 @@ public class TestBalancer extends TestCase {
 
   private void runBalancer(Configuration conf,
       long totalUsedSpace, long totalCapacity) throws Exception {
-    waitForHeartBeat(totalUsedSpace, totalCapacity);
+    waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
 
     // start rebalancing
     Map<String, Map<String, InetSocketAddress>> namenodes =
@@ -336,9 +339,9 @@ public class TestBalancer extends TestCase {
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
     assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
-    waitForHeartBeat(totalUsedSpace, totalCapacity);
+    waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
     LOG.info("Rebalancing with default ctor.");
-    waitForBalancer(totalUsedSpace, totalCapacity);
+    waitForBalancer(totalUsedSpace, totalCapacity, client, cluster);
   }
   
   /** one-node cluster test*/
@@ -403,7 +406,8 @@ public class TestBalancer extends TestCase {
 
       // fill up the cluster to be 30% full
       long totalUsedSpace = totalCapacity * 3 / 10;
-      createFile(totalUsedSpace / numOfDatanodes, (short) numOfDatanodes);
+      createFile(cluster, filePath, totalUsedSpace / numOfDatanodes,
+          (short) numOfDatanodes, 0);
       // start up an empty node with the same capacity and on the same rack
       cluster.startDataNodes(conf, 1, true, null, new String[] { newRack },
           new long[] { newCapacity });
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
new file mode 100644
index 00000000000..6764213e12d
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.balancer;
+
+import static org.junit.Assert.assertEquals;
+
+import java.net.InetSocketAddress;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
+import org.junit.Test;
+
+/**
+ * Test balancer with HA NameNodes
+ */
+public class TestBalancerWithHANameNodes {
+  private MiniDFSCluster cluster;
+  ClientProtocol client;
+
+  static {
+    Balancer.setBlockMoveWaitTime(1000L);
+  }
+
+  /**
+   * Test a cluster with even distribution, then a new empty node is added to
+   * the cluster. Test start a cluster with specified number of nodes, and fills
+   * it to be 30% full (with a single file replicated identically to all
+   * datanodes); It then adds one new empty node and starts balancing.
+   */
+  @Test(timeout = 60000)
+  public void testBalancerWithHANameNodes() throws Exception {
+    Configuration conf = new HdfsConfiguration();
+    TestBalancer.initConf(conf);
+    long newNodeCapacity = TestBalancer.CAPACITY; // new node's capacity
+    String newNodeRack = TestBalancer.RACK2; // new node's rack
+    // array of racks for original nodes in cluster
+    String[] racks = new String[] { TestBalancer.RACK0, TestBalancer.RACK1 };
+    // array of capacities of original nodes in cluster
+    long[] capacities = new long[] { TestBalancer.CAPACITY,
+        TestBalancer.CAPACITY };
+    assertEquals(capacities.length, racks.length);
+    int numOfDatanodes = capacities.length;
+    NNConf nn1Conf = new MiniDFSNNTopology.NNConf("nn1");
+    nn1Conf.setIpcPort(NameNode.DEFAULT_PORT);
+    MiniDFSNNTopology simpleHATopology = new MiniDFSNNTopology()
+        .addNameservice(new MiniDFSNNTopology.NSConf(null).addNN(nn1Conf)
+            .addNN(new MiniDFSNNTopology.NNConf("nn2")));
+    cluster = new MiniDFSCluster.Builder(conf).nnTopology(simpleHATopology)
+        .numDataNodes(capacities.length).racks(racks).simulatedCapacities(
+            capacities).build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(1);
+      Thread.sleep(500);
+      client = DFSUtil.createNamenode(cluster.getNameNode(1)
+          .getNameNodeAddress(), conf);
+      long totalCapacity = TestBalancer.sum(capacities);
+      // fill up the cluster to be 30% full
+      long totalUsedSpace = totalCapacity * 3 / 10;
+      TestBalancer.createFile(cluster, TestBalancer.filePath, totalUsedSpace
+          / numOfDatanodes, (short) numOfDatanodes, 1);
+
+      // start up an empty node with the same capacity and on the same rack
+      cluster.startDataNodes(conf, 1, true, null, new String[] { newNodeRack },
+          new long[] { newNodeCapacity });
+
+      HATestUtil.setFailoverConfigurations(cluster, conf, NameNode.getUri(
+          cluster.getNameNode(0).getNameNodeAddress()).getHost());
+      totalCapacity += newNodeCapacity;
+      TestBalancer.waitForHeartBeat(totalUsedSpace, totalCapacity, client,
+          cluster);
+      Map<String, Map<String, InetSocketAddress>> namenodes = DFSUtil
+          .getNNServiceRpcAddresses(conf);
+      final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
+      assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
+      TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
+          cluster);
+    } finally {
+      cluster.shutdown();
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index ba05da82414..cee846d7620 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -127,34 +127,36 @@ public abstract class HATestUtil {
       super(message);
     }
   }
-
+  
+  /** Gets the filesystem instance by setting the failover configurations */
   public static FileSystem configureFailoverFs(MiniDFSCluster cluster, Configuration conf)
   throws IOException, URISyntaxException {
+    conf = new Configuration(conf);
+    String logicalName = getLogicalHostname(cluster);
+    setFailoverConfigurations(cluster, conf, logicalName);
+    FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalName), conf);
+    return fs;
+  }
+
+  /** Sets the required configurations for performing failover */
+  public static void setFailoverConfigurations(MiniDFSCluster cluster,
+      Configuration conf, String logicalName) {
     InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
     InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
-  
     String nsId = "nameserviceId1";
-    
     String nameNodeId1 = "nn1";
     String nameNodeId2 = "nn2";
-    String logicalName = getLogicalHostname(cluster);
-    
-    conf = new Configuration(conf);
     String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
     String address2 = "hdfs://" + nnAddr2.getHostName() + ":" + nnAddr2.getPort();
     conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
         nsId, nameNodeId1), address1);
     conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
         nsId, nameNodeId2), address2);
-    
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nsId);
     conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nsId),
         nameNodeId1 + "," + nameNodeId2);
     conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
         ConfiguredFailoverProxyProvider.class.getName());
-    
-    FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalName), conf);
-    return fs;
   }
   
 

From 02919e61f6935813bc3dbe23cc89e00e0cb02918 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 19 Jan 2012 19:41:48 +0000
Subject: [PATCH 091/177] HDFS-2367. Enable the configuration of multiple HA
 cluster addresses. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1233549 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../java/org/apache/hadoop/hdfs/HAUtil.java   | 11 +--
 .../ha/ConfiguredFailoverProxyProvider.java   | 89 +++++++++----------
 .../hadoop/hdfs/TestDFSClientFailover.java    | 77 +++++++++-------
 .../hdfs/server/namenode/ha/HATestUtil.java   | 12 +--
 5 files changed, 105 insertions(+), 86 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 0f42b77cb6f..9b8b3beff9d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -115,3 +115,5 @@ HDFS-2767. ConfiguredFailoverProxyProvider should support NameNodeProtocol. (Uma
 HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (todd)
 
 HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
+
+HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 1dc2bf67581..ad2f8f67f67 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -125,15 +125,15 @@ public class HAUtil {
   @SuppressWarnings("unchecked")
   public static <T> FailoverProxyProvider<T> createFailoverProxyProvider(
       Configuration conf, Class<FailoverProxyProvider<?>> failoverProxyProviderClass,
-      Class xface) throws IOException {
+      Class xface, URI nameNodeUri) throws IOException {
     Preconditions.checkArgument(
         xface.isAssignableFrom(NamenodeProtocols.class),
         "Interface %s is not a NameNode protocol", xface);
     try {
       Constructor<FailoverProxyProvider<?>> ctor = failoverProxyProviderClass
-          .getConstructor(Class.class);
-      FailoverProxyProvider<?> provider = ctor.newInstance(xface);
-      ReflectionUtils.setConf(provider, conf);
+          .getConstructor(Configuration.class, URI.class, Class.class);
+      FailoverProxyProvider<?> provider = ctor.newInstance(conf, nameNodeUri,
+          xface);
       return (FailoverProxyProvider<T>) provider;
     } catch (Exception e) {
       if (e.getCause() instanceof IOException) {
@@ -190,7 +190,8 @@ public class HAUtil {
         .getFailoverProxyProviderClass(conf, nameNodeUri, xface);
     if (failoverProxyProviderClass != null) {
       FailoverProxyProvider<?> failoverProxyProvider = HAUtil
-          .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface);
+          .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface,
+              nameNodeUri);
       Conf config = new Conf(conf);
       return RetryProxy.create(xface, failoverProxyProvider, RetryPolicies
           .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index c44c1c1d74b..d2d0c00b557 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 import java.io.Closeable;
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.net.URI;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -46,22 +47,59 @@ import com.google.common.base.Preconditions;
  * and on a fail-over event the other address is tried.
  */
 public class ConfiguredFailoverProxyProvider<T> implements
-    FailoverProxyProvider<T>, Configurable {
+    FailoverProxyProvider<T> {
   
   private static final Log LOG =
       LogFactory.getLog(ConfiguredFailoverProxyProvider.class);
   
-  private Configuration conf;
-  private int currentProxyIndex = 0;
-  private List<AddressRpcProxyPair<T>> proxies = new ArrayList<AddressRpcProxyPair<T>>();
-  private UserGroupInformation ugi;
+  private final Configuration conf;
+  private final List<AddressRpcProxyPair<T>> proxies =
+      new ArrayList<AddressRpcProxyPair<T>>();
+  private final UserGroupInformation ugi;
   private final Class<T> xface;
+  
+  private int currentProxyIndex = 0;
 
-  public ConfiguredFailoverProxyProvider(Class<T> xface) {
+  public ConfiguredFailoverProxyProvider(Configuration conf, URI uri,
+      Class<T> xface) {
     Preconditions.checkArgument(
         xface.isAssignableFrom(NamenodeProtocols.class),
         "Interface class %s is not a valid NameNode protocol!");
     this.xface = xface;
+    
+    this.conf = new Configuration(conf);
+    int maxRetries = this.conf.getInt(
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_KEY,
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT);
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
+        maxRetries);
+    
+    int maxRetriesOnSocketTimeouts = this.conf.getInt(
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT);
+    this.conf.setInt(
+        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
+        maxRetriesOnSocketTimeouts);
+    
+    try {
+      ugi = UserGroupInformation.getCurrentUser();
+      
+      Map<String, Map<String, InetSocketAddress>> map = DFSUtil.getHaNnRpcAddresses(
+          conf);
+      Map<String, InetSocketAddress> addressesInNN = map.get(uri.getHost());
+      
+      if (addressesInNN == null || addressesInNN.size() == 0) {
+        throw new RuntimeException("Could not find any configured addresses " +
+            "for URI " + uri);
+      }
+      
+      for (InetSocketAddress address : addressesInNN.values()) {
+        proxies.add(new AddressRpcProxyPair<T>(address));
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
   }
     
   @Override
@@ -104,45 +142,6 @@ public class ConfiguredFailoverProxyProvider<T> implements
     currentProxyIndex = (currentProxyIndex + 1) % proxies.size();
   }
 
-  @Override
-  public synchronized Configuration getConf() {
-    return conf;
-  }
-
-  @Override
-  public synchronized void setConf(Configuration conf) {
-    this.conf = new Configuration(conf);
-    int maxRetries = this.conf.getInt(
-        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_KEY,
-        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT);
-    this.conf.setInt(
-        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY,
-        maxRetries);
-    
-    int maxRetriesOnSocketTimeouts = this.conf.getInt(
-        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
-        DFSConfigKeys.DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT);
-    this.conf.setInt(
-        CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY,
-        maxRetriesOnSocketTimeouts);
-    try {
-      ugi = UserGroupInformation.getCurrentUser();
-      
-      Map<String, Map<String, InetSocketAddress>> map = DFSUtil.getHaNnRpcAddresses(
-          conf);
-      // TODO(HA): currently hardcoding the nameservice used by MiniDFSCluster.
-      // We need to somehow communicate this into the proxy provider.
-      String nsId = "nameserviceId1";
-      Map<String, InetSocketAddress> addressesInNN = map.get(nsId);
-      
-      for (InetSocketAddress address : addressesInNN.values()) {
-        proxies.add(new AddressRpcProxyPair<T>(address));
-      }
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
   /**
    * A little pair object to store the address and connected RPC proxy object to
    * an NN. Note that {@link AddressRpcProxyPair#namenode} may be null.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
index d06a606e54b..a88e8a74edf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientFailover.java
@@ -18,28 +18,32 @@
 package org.apache.hadoop.hdfs;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
-
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.IOException;
-import java.io.OutputStream;
-import java.net.InetSocketAddress;
+import java.net.URI;
 import java.net.URISyntaxException;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
 import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.util.StringUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
 public class TestDFSClientFailover {
   
+  private static final Log LOG = LogFactory.getLog(TestDFSClientFailover.class);
+  
   private static final Path TEST_FILE = new Path("/tmp/failover-test-file");
   private static final int FILE_LENGTH_TO_VERIFY = 100;
   
@@ -49,8 +53,9 @@ public class TestDFSClientFailover {
   @Before
   public void setUpCluster() throws IOException {
     cluster = new MiniDFSCluster.Builder(conf)
-      .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .build();
+    cluster.transitionToActive(0);
     cluster.waitActive();
   }
   
@@ -58,34 +63,22 @@ public class TestDFSClientFailover {
   public void tearDownCluster() throws IOException {
     cluster.shutdown();
   }
-  
-  // TODO(HA): This test should probably be made to fail if a client fails over
-  // to talk to an NN with a different block pool id. Once failover between
-  // active/standy in a single block pool is implemented, this test should be
-  // changed to exercise that.
+
+  /**
+   * Make sure that client failover works when an active NN dies and the standby
+   * takes over.
+   */
   @Test
   public void testDfsClientFailover() throws IOException, URISyntaxException {
-    InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
-    InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
-    
-    ClientProtocol nn1 = DFSUtil.createNamenode(nnAddr1, conf);
-    ClientProtocol nn2 = DFSUtil.createNamenode(nnAddr2, conf);
-    
-    DFSClient dfsClient1 = new DFSClient(null, nn1, conf, null);
-    DFSClient dfsClient2 = new DFSClient(null, nn2, conf, null);
-    
-    OutputStream out1 = dfsClient1.create(TEST_FILE.toString(), false);
-    OutputStream out2 = dfsClient2.create(TEST_FILE.toString(), false);
-    AppendTestUtil.write(out1, 0, FILE_LENGTH_TO_VERIFY);
-    AppendTestUtil.write(out2, 0, FILE_LENGTH_TO_VERIFY);
-    out1.close();
-    out2.close();
-        
     FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
     
-    AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
-    cluster.getNameNode(0).stop();
-    AppendTestUtil.check(fs, TEST_FILE, FILE_LENGTH_TO_VERIFY);
+    DFSTestUtil.createFile(fs, TEST_FILE,
+        FILE_LENGTH_TO_VERIFY, (short)1, 1L);
+    
+    assertEquals(fs.getFileStatus(TEST_FILE).getLen(), FILE_LENGTH_TO_VERIFY);
+    cluster.shutdownNameNode(0);
+    cluster.transitionToActive(1);
+    assertEquals(fs.getFileStatus(TEST_FILE).getLen(), FILE_LENGTH_TO_VERIFY);
     
     // Check that it functions even if the URL becomes canonicalized
     // to include a port number.
@@ -115,4 +108,28 @@ public class TestDFSClientFailover {
           "does not use port information", ioe);
     }
   }
+
+  /**
+   * Make sure that a helpful error message is shown if a proxy provider is
+   * configured for a given URI, but no actual addresses are configured for that
+   * URI.
+   */
+  @Test
+  public void testFailureWithMisconfiguredHaNNs() throws Exception {
+    String logicalHost = "misconfigured-ha-uri";
+    Configuration conf = new Configuration();
+    conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalHost,
+        ConfiguredFailoverProxyProvider.class.getName());
+    
+    URI uri = new URI("hdfs://" + logicalHost + "/test");
+    try {
+      FileSystem.get(uri, conf).exists(new Path("/test"));
+      fail("Successfully got proxy provider for misconfigured FS");
+    } catch (IOException ioe) {
+      LOG.info("got expected exception", ioe);
+      assertTrue("expected exception did not contain helpful message",
+          StringUtils.stringifyException(ioe).contains(
+          "Could not find any configured addresses for URI " + uri));
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index cee846d7620..5536ba37b59 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -130,7 +130,7 @@ public abstract class HATestUtil {
   
   /** Gets the filesystem instance by setting the failover configurations */
   public static FileSystem configureFailoverFs(MiniDFSCluster cluster, Configuration conf)
-  throws IOException, URISyntaxException {
+      throws IOException, URISyntaxException {
     conf = new Configuration(conf);
     String logicalName = getLogicalHostname(cluster);
     setFailoverConfigurations(cluster, conf, logicalName);
@@ -143,17 +143,17 @@ public abstract class HATestUtil {
       Configuration conf, String logicalName) {
     InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
     InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
-    String nsId = "nameserviceId1";
     String nameNodeId1 = "nn1";
     String nameNodeId2 = "nn2";
     String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
     String address2 = "hdfs://" + nnAddr2.getHostName() + ":" + nnAddr2.getPort();
     conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
-        nsId, nameNodeId1), address1);
+        logicalName, nameNodeId1), address1);
     conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
-        nsId, nameNodeId2), address2);
-    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nsId);
-    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nsId),
+        logicalName, nameNodeId2), address2);
+    
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, logicalName);
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, logicalName),
         nameNodeId1 + "," + nameNodeId2);
     conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
         ConfiguredFailoverProxyProvider.class.getName());

From 1aed1296dd4a3bc471393dd0bc9b35e8afcd7e4c Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 19 Jan 2012 22:35:04 +0000
Subject: [PATCH 092/177] HDFS-2812. When becoming active, the NN should treat
 all leases as freshly renewed. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1233612 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/FSNamesystem.java    | 13 +++-
 .../hdfs/server/namenode/LeaseManager.java    | 14 +++++
 .../hdfs/server/namenode/NameNodeAdapter.java | 14 +++++
 .../namenode/ha/TestHAStateTransitions.java   | 60 +++++++++++++++++++
 5 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 9b8b3beff9d..dcb198d6bf1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -117,3 +117,5 @@ HDFS-2795. Standby NN takes a long time to recover from a dead DN starting up. (
 HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
 
 HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
+
+HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 258cb53186a..54d6ebe3fcd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -337,6 +337,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    */
   private HAContext haContext;
 
+  private boolean haEnabled;
+
   private final Configuration conf;
   
   PendingDataNodeMessages getPendingDataNodeMessages() {
@@ -545,6 +547,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if (UserGroupInformation.isSecurityEnabled()) {
         startSecretManager();
       }
+      if (haEnabled) {
+        // Renew all of the leases before becoming active.
+        // This is because, while we were in standby mode,
+        // the leases weren't getting renewed on this NN.
+        // Give them all a fresh start here.
+        leaseManager.renewAllLeases();
+      }
       leaseManager.startMonitor();
     } finally {
       writeUnlock();
@@ -737,8 +746,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     // block allocation has to be persisted in HA using a shared edits directory
     // so that the standby has up-to-date namespace information
     String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
-    this.persistBlocks |= HAUtil.isHAEnabled(conf, nameserviceId) &&
-        HAUtil.usesSharedEditsDir(conf);
+    this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);  
+    this.persistBlocks |= haEnabled && HAUtil.usesSharedEditsDir(conf);
 
     short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
                                               DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
index 323dac06a32..71e6cbb1e26 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java
@@ -200,6 +200,15 @@ public class LeaseManager {
     }
   }
 
+  /**
+   * Renew all of the currently open leases.
+   */
+  synchronized void renewAllLeases() {
+    for (Lease l : leases.values()) {
+      renewLease(l);
+    }
+  }
+
   /************************************************************
    * A Lease governs all the locks held by a single client.
    * For each client there's a corresponding lease, whose
@@ -306,6 +315,11 @@ public class LeaseManager {
       paths.remove(oldpath);
       paths.add(newpath);
     }
+    
+    @VisibleForTesting
+    long getLastUpdate() {
+      return lastUpdate;
+    }
   }
 
   synchronized void changeLease(String src, String dst,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index 800cb542c60..53a38404297 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
+import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.ipc.Server;
@@ -126,6 +127,19 @@ public class NameNodeAdapter {
     return namenode.getNamesystem().leaseManager.getLeaseByPath(path).getHolder();
   }
 
+  /**
+   * @return the timestamp of the last renewal of the given lease,
+   *   or -1 in the case that the lease doesn't exist.
+   */
+  public static long getLeaseRenewalTime(NameNode nn, String path) {
+    LeaseManager lm = nn.getNamesystem().leaseManager;
+    Lease l = lm.getLeaseByPath(path);
+    if (l == null) {
+      return -1;
+    }
+    return l.getLastUpdate();
+  }
+
   /**
    * Return the datanode descriptor for the given datanode.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 071a2985e8c..52e21c8602e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -24,15 +24,19 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
 import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
+import org.apache.tools.ant.taskdefs.WaitFor;
 import org.junit.Test;
 import org.mockito.Mockito;
 
@@ -45,6 +49,7 @@ public class TestHAStateTransitions {
       TestStandbyIsHot.class);
   private static final Path TEST_DIR = new Path("/test");
   private static final Path TEST_FILE_PATH = new Path(TEST_DIR, "foo");
+  private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath();
   private static final String TEST_FILE_DATA =
     "Hello state transitioning world";
 
@@ -191,4 +196,59 @@ public class TestHAStateTransitions {
       cluster.shutdown();
     }
   }
+  
+  /**
+   * Test for HDFS-2812. Since lease renewals go from the client
+   * only to the active NN, the SBN will have out-of-date lease
+   * info when it becomes active. We need to make sure we don't
+   * accidentally mark the leases as expired when the failover
+   * proceeds.
+   */
+  @Test(timeout=120000)
+  public void testLeasesRenewedOnTransition() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(1)
+      .build();
+    FSDataOutputStream stm = null;
+    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+    NameNode nn0 = cluster.getNameNode(0);
+    NameNode nn1 = cluster.getNameNode(1);
+    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
+    nn1.getNamesystem().getEditLogTailer().interrupt();
+
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      
+      LOG.info("Starting with NN 0 active");
+
+      stm = fs.create(TEST_FILE_PATH);
+      long nn0t0 = NameNodeAdapter.getLeaseRenewalTime(nn0, TEST_FILE_STR);
+      assertTrue(nn0t0 > 0);
+      long nn1t0 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
+      assertEquals("Lease should not yet exist on nn1",
+          -1, nn1t0);
+      
+      Thread.sleep(5); // make sure time advances!
+
+      HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+      long nn1t1 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
+      assertTrue("Lease should have been created on standby. Time was: " +
+          nn1t1, nn1t1 > nn0t0);
+          
+      Thread.sleep(5); // make sure time advances!
+      
+      LOG.info("Failing over to NN 1");
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      long nn1t2 = NameNodeAdapter.getLeaseRenewalTime(nn1, TEST_FILE_STR);
+      assertTrue("Lease should have been renewed by failover process",
+          nn1t2 > nn1t1);
+    } finally {
+      IOUtils.closeStream(stm);
+      cluster.shutdown();
+    }
+  }
 }

From c3e62de9ce952aa8572b3cae6a8497b8fdef40aa Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 21 Jan 2012 03:17:26 +0000
Subject: [PATCH 093/177] HDFS-2737. Automatically trigger log rolls
 periodically on the active NN. Contributed by Todd Lipcon and Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1234256 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   4 +
 .../java/org/apache/hadoop/hdfs/HAUtil.java   |  39 ++++-
 .../NamenodeProtocolTranslatorPB.java         |   1 -
 .../NamenodeProtocolTranslatorR23.java        |   1 -
 .../NamenodeWireProtocol.java                 |   3 -
 .../hdfs/server/namenode/FSNamesystem.java    |   5 +
 .../server/namenode/NameNodeRpcServer.java    |   1 -
 .../server/namenode/ha/EditLogTailer.java     | 146 ++++++++++++++++--
 .../namenode/ha/StandbyCheckpointer.java      |  39 +----
 .../server/protocol/NamenodeProtocol.java     |   3 -
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   6 +
 .../apache/hadoop/hdfs/MiniDFSNNTopology.java |  15 ++
 .../server/namenode/ha/TestDNFencing.java     |   3 +-
 .../ha/TestDNFencingWithReplication.java      |   4 +-
 .../server/namenode/ha/TestEditLogTailer.java |  63 +++++++-
 .../namenode/ha/TestFailureToReadEdits.java   |   3 +-
 .../server/namenode/ha/TestHASafeMode.java    |   5 +-
 .../namenode/ha/TestHAStateTransitions.java   |   4 +-
 .../namenode/ha/TestStandbyCheckpoints.java   |  10 +-
 .../server/namenode/ha/TestStandbyIsHot.java  |  12 +-
 21 files changed, 283 insertions(+), 86 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index dcb198d6bf1..e079202641b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -119,3 +119,5 @@ HDFS-2592. Balancer support for HA namenodes. (Uma Maheswara Rao G via todd)
 HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
 
 HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. (todd)
+
+HDFS-2737. Automatically trigger log rolls periodically on the active NN. (todd and atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index b3fee6fc511..55d1ccd1ce0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -333,4 +333,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String DFS_HA_NAMENODE_ID_KEY = "dfs.ha.namenode.id";
   public static final String  DFS_HA_STANDBY_CHECKPOINTS_KEY = "dfs.ha.standby.checkpoints";
   public static final boolean DFS_HA_STANDBY_CHECKPOINTS_DEFAULT = true;
+  public static final String DFS_HA_LOGROLL_PERIOD_KEY = "dfs.ha.log-roll.period";
+  public static final int DFS_HA_LOGROLL_PERIOD_DEFAULT = 2 * 60; // 2m
+  public static final String DFS_HA_TAILEDITS_PERIOD_KEY = "dfs.ha.tail-edits.period";
+  public static final int DFS_HA_TAILEDITS_PERIOD_DEFAULT = 60; // 1m
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index ad2f8f67f67..a260c0e4fa8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -22,6 +22,8 @@ import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.net.InetSocketAddress;
 import java.net.URI;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Map;
 
 import org.apache.hadoop.HadoopIllegalArgumentException;
@@ -32,9 +34,9 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryProxy;
-import org.apache.hadoop.util.ReflectionUtils;
-
+import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 
 public class HAUtil {
   private HAUtil() { /* Hidden constructor */ }
@@ -109,6 +111,39 @@ public class HAUtil {
     return null;
   }
 
+  /**
+   * Given the configuration for this node, return a Configuration object for
+   * the other node in an HA setup.
+   * 
+   * @param myConf the configuration of this node
+   * @return the configuration of the other node in an HA setup
+   */
+  public static Configuration getConfForOtherNode(
+      Configuration myConf) {
+    
+    String nsId = DFSUtil.getNamenodeNameServiceId(myConf);
+    Collection<String> nnIds = DFSUtil.getNameNodeIds(myConf, nsId);
+    String myNNId = myConf.get(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY);
+    Preconditions.checkArgument(nnIds != null,
+        "Could not determine namenode ids in namespace '%s'",
+        nsId);
+    Preconditions.checkArgument(nnIds.size() == 2,
+        "Expected exactly 2 NameNodes in this namespace. Instead, got: '%s'",
+        Joiner.on("','").join(nnIds));
+    Preconditions.checkState(myNNId != null && !myNNId.isEmpty(),
+        "Could not determine own NN ID");
+
+    ArrayList<String> nnSet = Lists.newArrayList(nnIds);
+    nnSet.remove(myNNId);
+    assert nnSet.size() == 1;
+    String activeNN = nnSet.get(0);
+    
+    // Look up the address of the active NN.
+    Configuration confForOtherNode = new Configuration(myConf);
+    NameNode.initializeGenericKeys(confForOtherNode, nsId, activeNN);
+    return confForOtherNode;
+  }
+
   /**
    * This is used only by tests at the moment.
    * @return true if the NN should allow read operations while in standby mode.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
index 51a74746b2f..22a6d8a8eba 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
@@ -157,7 +157,6 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol,
   }
 
   @Override
-  @SuppressWarnings("deprecation")
   public CheckpointSignature rollEditLog() throws IOException {
     try {
       return PBHelper.convert(rpcProxy.rollEditLog(NULL_CONTROLLER,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/NamenodeProtocolTranslatorR23.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/NamenodeProtocolTranslatorR23.java
index 11589756af4..e7ea9ec7bc4 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/NamenodeProtocolTranslatorR23.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/NamenodeProtocolTranslatorR23.java
@@ -134,7 +134,6 @@ public class NamenodeProtocolTranslatorR23 implements
   }
 
   @Override
-  @SuppressWarnings("deprecation")
   public CheckpointSignature rollEditLog() throws IOException {
     return rpcProxy.rollEditLog().convert();
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/NamenodeWireProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/NamenodeWireProtocol.java
index 6eaa224d43e..2bfba10387f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/NamenodeWireProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolR23Compatible/NamenodeWireProtocol.java
@@ -84,10 +84,7 @@ public interface NamenodeWireProtocol extends VersionedProtocol {
    * call fails if the file system is in SafeMode.
    * @throws IOException
    * @return a unique token to identify this transaction.
-   * @deprecated 
-   *    See {@link org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode}
    */
-  @Deprecated
   public CheckpointSignatureWritable rollEditLog() throws IOException;
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 54d6ebe3fcd..e92ae020011 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -690,6 +690,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         DFS_NAMENODE_SHARED_EDITS_DIR_KEY);
     return Util.stringCollectionAsURIs(dirNames);
   }
+  
+  public Configuration getConf() {
+    return conf;
+  }
 
   @Override
   public void readLock() {
@@ -3846,6 +3850,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   CheckpointSignature rollEditLog() throws IOException {
     writeLock();
     try {
+      checkOperation(OperationCategory.JOURNAL);
       if (isInSafeMode()) {
         throw new SafeModeException("Log not rolled", safeMode);
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index a1bc504fe02..a0d7e14897e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -738,7 +738,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // NamenodeProtocol
   public CheckpointSignature rollEditLog() throws IOException {
-    // TODO:HA decide on OperationCategory for this
     return namesystem.rollEditLog();
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 264e3a72e61..160c16ed5e7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -19,21 +19,34 @@
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import java.io.IOException;
+import java.net.InetSocketAddress;
 import java.util.Collection;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputException;
 import org.apache.hadoop.hdfs.server.namenode.EditLogInputStream;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLog;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.ipc.RPC;
+
+import static org.apache.hadoop.hdfs.server.common.Util.now;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 
+
 /**
  * EditLogTailer represents a thread which periodically reads from edits
  * journals and applies the transactions contained within to a given
@@ -50,13 +63,87 @@ public class EditLogTailer {
   private FSEditLog editLog;
   
   private volatile Runtime runtime = Runtime.getRuntime();
+
+  private InetSocketAddress activeAddr;
+  private NamenodeProtocol cachedActiveProxy = null;
+
+  /**
+   * The last transaction ID at which an edit log roll was initiated.
+   */
+  private long lastRollTriggerTxId = HdfsConstants.INVALID_TXID;
+  
+  /**
+   * The highest transaction ID loaded by the Standby.
+   */
+  private long lastLoadedTxnId = HdfsConstants.INVALID_TXID;
+
+  /**
+   * The last time we successfully loaded a non-zero number of edits from the
+   * shared directory.
+   */
+  private long lastLoadTimestamp;
+
+  /**
+   * How often the Standby should roll edit logs. Since the Standby only reads
+   * from finalized log segments, the Standby will only be as up-to-date as how
+   * often the logs are rolled.
+   */
+  private long logRollPeriodMs;
+
+  /**
+   * How often the Standby should check if there are new finalized segment(s)
+   * available to be read from.
+   */
+  private long sleepTimeMs;
   
   public EditLogTailer(FSNamesystem namesystem) {
     this.tailerThread = new EditLogTailerThread();
     this.namesystem = namesystem;
     this.editLog = namesystem.getEditLog();
+    
+
+    Configuration conf = namesystem.getConf();
+    lastLoadTimestamp = now();
+
+    logRollPeriodMs = conf.getInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY,
+        DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_DEFAULT) * 1000;
+    if (logRollPeriodMs >= 0) {
+      this.activeAddr = getActiveNodeAddress();
+      Preconditions.checkArgument(activeAddr.getPort() > 0,
+          "Active NameNode must have an IPC port configured. " +
+          "Got address '%s'", activeAddr);
+      LOG.info("Will roll logs on active node at " + activeAddr + " every " +
+          (logRollPeriodMs / 1000) + " seconds.");
+    } else {
+      LOG.info("Not going to trigger log rolls on active node because " +
+          DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY + " is negative.");
+    }
+    
+    sleepTimeMs = conf.getInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY,
+        DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_DEFAULT) * 1000;
+    
+    LOG.debug("logRollPeriodMs=" + logRollPeriodMs +
+        " sleepTime=" + sleepTimeMs);
   }
   
+  private InetSocketAddress getActiveNodeAddress() {
+    Configuration conf = namesystem.getConf();
+    Configuration activeConf = HAUtil.getConfForOtherNode(conf);
+    return NameNode.getServiceAddress(activeConf, true);
+  }
+  
+  private NamenodeProtocol getActiveNodeProxy() throws IOException {
+    if (cachedActiveProxy == null) {
+      Configuration conf = namesystem.getConf();
+      NamenodeProtocolPB proxy = 
+        RPC.waitForProxy(NamenodeProtocolPB.class,
+            RPC.getProtocolVersion(NamenodeProtocolPB.class), activeAddr, conf);
+      cachedActiveProxy = new NamenodeProtocolTranslatorPB(proxy);
+    }
+    assert cachedActiveProxy != null;
+    return cachedActiveProxy;
+  }
+
   public void start() {
     tailerThread.start();
   }
@@ -71,16 +158,6 @@ public class EditLogTailer {
       throw new IOException(e);
     }
   }
-
-  @VisibleForTesting
-  public void setSleepTime(long sleepTime) {
-    tailerThread.setSleepTime(sleepTime);
-  }
-  
-  @VisibleForTesting
-  public void interrupt() {
-    tailerThread.interrupt();
-  }
   
   @VisibleForTesting
   FSEditLog getEditLog() {
@@ -152,18 +229,43 @@ public class EditLogTailer {
               editsLoaded, lastTxnId));
         }
       }
+
+      if (editsLoaded > 0) {
+        lastLoadTimestamp = now();
+      }
+      lastLoadedTxnId = image.getLastAppliedTxId();
     } finally {
       namesystem.writeUnlock();
     }
   }
 
+  /**
+   * @return true if the configured log roll period has elapsed.
+   */
+  private boolean tooLongSinceLastLoad() {
+    return logRollPeriodMs >= 0 && 
+      (now() - lastLoadTimestamp) > logRollPeriodMs ;
+  }
+
+  /**
+   * Trigger the active node to roll its logs.
+   */
+  private void triggerActiveLogRoll() {
+    LOG.info("Triggering log roll on remote NameNode " + activeAddr);
+    try {
+      getActiveNodeProxy().rollEditLog();
+      lastRollTriggerTxId = lastLoadedTxnId;
+    } catch (IOException ioe) {
+      LOG.warn("Unable to trigger a roll of the active NN", ioe);
+    }
+  }
+
   /**
    * The thread which does the actual work of tailing edits journals and
    * applying the transactions to the FSNS.
    */
   private class EditLogTailerThread extends Thread {
     private volatile boolean shouldRun = true;
-    private long sleepTime = 60 * 1000;
     
     private EditLogTailerThread() {
       super("Edit log tailer");
@@ -173,14 +275,26 @@ public class EditLogTailer {
       this.shouldRun = shouldRun;
     }
     
-    private void setSleepTime(long sleepTime) {
-      this.sleepTime = sleepTime;
-    }
-    
     @Override
     public void run() {
       while (shouldRun) {
         try {
+          // There's no point in triggering a log roll if the Standby hasn't
+          // read any more transactions since the last time a roll was
+          // triggered. 
+          if (tooLongSinceLastLoad() &&
+              lastRollTriggerTxId < lastLoadedTxnId) {
+            triggerActiveLogRoll();
+          }
+          /**
+           * Check again in case someone calls {@link EditLogTailer#stop} while
+           * we're triggering an edit log roll, since ipc.Client catches and
+           * ignores {@link InterruptedException} in a few places. This fixes
+           * the bug described in HDFS-2823.
+           */
+          if (!shouldRun) {
+            break;
+          }
           doTailEdits();
         } catch (EditLogInputException elie) {
           LOG.warn("Error while reading edits from disk. Will try again.", elie);
@@ -194,7 +308,7 @@ public class EditLogTailer {
         }
 
         try {
-          Thread.sleep(sleepTime);
+          Thread.sleep(sleepTimeMs);
         } catch (InterruptedException e) {
           LOG.warn("Edit log tailer interrupted", e);
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index ee7921db4f8..83e85f7709c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -20,20 +20,17 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.security.PrivilegedAction;
-import java.util.ArrayList;
-import java.util.Collection;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ha.ServiceFailedException;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.server.namenode.CheckpointConf;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceCancelledException;
 import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
 import org.apache.hadoop.net.NetUtils;
@@ -41,9 +38,7 @@ import org.apache.hadoop.security.UserGroupInformation;
 import static org.apache.hadoop.hdfs.server.common.Util.now;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
 
 /**
  * Thread which runs inside the NN when it's in Standby state,
@@ -79,37 +74,19 @@ public class StandbyCheckpointer {
    * as well as our own HTTP address from the configuration.
    */
   private void setNameNodeAddresses(Configuration conf) {
-    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
-    Collection<String> nnIds = DFSUtil.getNameNodeIds(conf, nsId);
-    String myNNId = conf.get(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY);
-    Preconditions.checkArgument(nnIds != null,
-        "Could not determine namenode ids in namespace '%s'",
-        nsId);
-    Preconditions.checkArgument(nnIds.size() == 2,
-        "Expected exactly 2 NameNodes in this namespace. Instead, got: '%s'",
-        Joiner.on("','").join(nnIds));
-    Preconditions.checkState(myNNId != null && !myNNId.isEmpty(),
-        "Could not determine own NN ID");
-
-    ArrayList<String> nnSet = Lists.newArrayList(nnIds);
-    nnSet.remove(myNNId);
-    assert nnSet.size() == 1;
-    String activeNN = nnSet.get(0);
-    
-    // Look up the address of the active NN.
-    Configuration confForActive = new Configuration(conf);
-    NameNode.initializeGenericKeys(confForActive, nsId, activeNN);
-    activeNNAddress = DFSUtil.getInfoServer(null, confForActive, true);
-    
     // Look up our own address.
     String myAddrString = DFSUtil.getInfoServer(null, conf, true);
+
+    // Look up the active node's address
+    Configuration confForActive = HAUtil.getConfForOtherNode(conf);
+    activeNNAddress = DFSUtil.getInfoServer(null, confForActive, true);
+    
     
     // Sanity-check.
     Preconditions.checkArgument(checkAddress(activeNNAddress),
         "Bad address for active NN: %s", activeNNAddress);
-    Preconditions.checkArgument(checkAddress(activeNNAddress),
-        "Bad address for standby NN: %s", myNNAddress);
-    
+    Preconditions.checkArgument(checkAddress(myAddrString),
+        "Bad address for standby NN: %s", myAddrString);
     myNNAddress = NetUtils.createSocketAddr(myAddrString);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
index 48de14c657f..de04b33cb6e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NamenodeProtocol.java
@@ -100,10 +100,7 @@ public interface NamenodeProtocol extends VersionedProtocol {
    * call fails if the file system is in SafeMode.
    * @throws IOException
    * @return a unique token to identify this transaction.
-   * @deprecated 
-   *    See {@link org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode}
    */
-  @Deprecated
   public CheckpointSignature rollEditLog() throws IOException;
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 0357c5d714f..977ee956cb5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -548,6 +548,12 @@ public class MiniDFSCluster {
           "since no HTTP ports have been specified.");
       conf.setBoolean(DFS_HA_STANDBY_CHECKPOINTS_KEY, false);
     }
+    if (!nnTopology.allIpcPortsSpecified() &&
+        nnTopology.isHA()) {
+      LOG.info("MiniDFSCluster disabling log-roll triggering in the "
+          + "Standby node since no IPC ports have been specified.");
+      conf.setInt(DFS_HA_LOGROLL_PERIOD_KEY, -1);
+    }
     
     federation = nnTopology.isFederated();
     createNameNodesAndSetConf(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
index fc9bb64f9ed..c8e22e3b454 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
@@ -134,6 +134,21 @@ public class MiniDFSNNTopology {
     }
     return true;
   }
+  
+  /**
+   * @return true if all of the NNs in the cluster have their IPC
+   * port specified to be non-ephemeral.
+   */
+  public boolean allIpcPortsSpecified() {
+    for (NSConf ns : nameservices) {
+      for (NNConf nn : ns.getNNs()) {
+        if (nn.getIpcPort() == 0) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
 
   public List<NSConf> getNameservices() {
     return nameservices;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
index 9a2149a281c..1f43e057f7b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
@@ -94,6 +94,7 @@ public class TestDNFencing {
     // See RandomDeleterPolicy javadoc.
     conf.setClass("dfs.block.replicator.classname", RandomDeleterPolicy.class,
         BlockPlacementPolicy.class); 
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(3)
@@ -106,8 +107,6 @@ public class TestDNFencing {
     // Trigger block reports so that the first NN trusts all
     // of the DNs, and will issue deletions
     cluster.triggerBlockReports();
-    nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
-    nn2.getNamesystem().getEditLogTailer().interrupt();
     fs = HATestUtil.configureFailoverFs(cluster, conf);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
index 8fc9d49eb05..44bc01d1cdc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
@@ -115,9 +115,11 @@ public class TestDNFencingWithReplication {
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
     conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
     conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     // Increase max streams so that we re-replicate quickly.
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 1000);
 
+    
     final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(3)
@@ -128,8 +130,6 @@ public class TestDNFencingWithReplication {
       
       final NameNode nn1 = cluster.getNameNode(0);
       final NameNode nn2 = cluster.getNameNode(1);
-      nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
-      nn2.getNamesystem().getEditLogTailer().interrupt();
       
       FileSystem fs = HATestUtil.configureFailoverFs(
           cluster, conf);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
index 7c3e38b18a5..1f5822ee575 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
@@ -19,23 +19,32 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.junit.Assert.assertTrue;
 
+import java.io.File;
 import java.io.IOException;
+import java.net.URI;
+import java.util.List;
+import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.log4j.Level;
 import org.junit.Test;
 
+import com.google.common.base.Supplier;
+
 public class TestEditLogTailer {
   
   private static final String DIR_PREFIX = "/dir";
@@ -52,6 +61,8 @@ public class TestEditLogTailer {
   public void testTailer() throws IOException, InterruptedException,
       ServiceFailedException {
     Configuration conf = new HdfsConfiguration();
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+
     HAUtil.setAllowStandbyReads(conf, true);
     
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
@@ -64,8 +75,6 @@ public class TestEditLogTailer {
     
     NameNode nn1 = cluster.getNameNode(0);
     NameNode nn2 = cluster.getNameNode(1);
-    nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
-    nn2.getNamesystem().getEditLogTailer().interrupt();
     try {
       for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
         NameNodeAdapter.mkdirs(nn1, getDirPath(i),
@@ -97,7 +106,57 @@ public class TestEditLogTailer {
     }
   }
   
+  @Test
+  public void testNN0TriggersLogRolls() throws Exception {
+    testStandbyTriggersLogRolls(0);
+  }
+  
+  @Test
+  public void testNN1TriggersLogRolls() throws Exception {
+    testStandbyTriggersLogRolls(1);
+  }
+  
+  private static void testStandbyTriggersLogRolls(int activeIndex)
+      throws Exception {
+    Configuration conf = new Configuration();
+    // Roll every 1s
+    conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    
+    // Have to specify IPC ports so the NNs can talk to each other.
+    MiniDFSNNTopology topology = new MiniDFSNNTopology()
+      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+        .addNN(new MiniDFSNNTopology.NNConf("nn1").setIpcPort(10001))
+        .addNN(new MiniDFSNNTopology.NNConf("nn2").setIpcPort(10002)));
+
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(topology)
+      .numDataNodes(0)
+      .build();
+    try {
+      cluster.transitionToActive(activeIndex);
+      waitForLogRollInSharedDir(cluster, 3);
+    } finally {
+      cluster.shutdown();
+    }
+  }
+  
   private static String getDirPath(int suffix) {
     return DIR_PREFIX + suffix;
   }
+  
+  private static void waitForLogRollInSharedDir(MiniDFSCluster cluster,
+      long startTxId) throws Exception {
+    URI sharedUri = cluster.getSharedEditsDir(0, 1);
+    File sharedDir = new File(sharedUri.getPath(), "current");
+    final File expectedLog = new File(sharedDir,
+        NNStorage.getInProgressEditsFileName(startTxId));
+    
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return expectedLog.exists();
+      }
+    }, 100, 10000);
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
index 24b2c0866e3..e41a7a6e51b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
@@ -78,6 +78,7 @@ public class TestFailureToReadEdits {
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 10);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     HAUtil.setAllowStandbyReads(conf, true);
     
     MiniDFSNNTopology topology = new MiniDFSNNTopology()
@@ -93,8 +94,6 @@ public class TestFailureToReadEdits {
     
     nn0 = cluster.getNameNode(0);
     nn1 = cluster.getNameNode(1);
-    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
-    nn1.getNamesystem().getEditLogTailer().interrupt();
     nn1.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
     
     cluster.transitionToActive(0);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index af7985e21d3..0703f8c8a02 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -90,10 +90,11 @@ public class TestHASafeMode {
     // have been achieved, without being racy.
     cluster.getConfiguration(1).setInt(
         DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
+    cluster.getConfiguration(1).setInt(
+        DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+
     cluster.restartNameNode(1);
     nn1 = cluster.getNameNode(1);
-    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
-    nn1.getNamesystem().getEditLogTailer().interrupt();
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 52e21c8602e..fbeaa30a938 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
@@ -207,6 +208,7 @@ public class TestHAStateTransitions {
   @Test(timeout=120000)
   public void testLeasesRenewedOnTransition() throws Exception {
     Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(1)
@@ -215,8 +217,6 @@ public class TestHAStateTransitions {
     FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
     NameNode nn0 = cluster.getNameNode(0);
     NameNode nn1 = cluster.getNameNode(1);
-    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
-    nn1.getNamesystem().getEditLogTailer().interrupt();
 
     try {
       cluster.waitActive();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 83f077c55d6..2c0c81947c8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -57,7 +57,8 @@ public class TestStandbyCheckpoints {
     Configuration conf = new Configuration();
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
-    
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+
     MiniDFSNNTopology topology = new MiniDFSNNTopology()
       .addNameservice(new MiniDFSNNTopology.NSConf(null)
         .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10001))
@@ -73,9 +74,6 @@ public class TestStandbyCheckpoints {
     nn1 = cluster.getNameNode(1);
     fs = HATestUtil.configureFailoverFs(cluster, conf);
 
-    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
-    nn1.getNamesystem().getEditLogTailer().interrupt();
-
     cluster.transitionToActive(0);
   }
   
@@ -150,8 +148,6 @@ public class TestStandbyCheckpoints {
         DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
     cluster.restartNameNode(1);
     nn1 = cluster.getNameNode(1);
-    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
-    nn1.getNamesystem().getEditLogTailer().interrupt();
  
     FSImage spyImage1 = NameNodeAdapter.spyOnFsImage(nn1);
     
@@ -195,8 +191,6 @@ public class TestStandbyCheckpoints {
         DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
     cluster.restartNameNode(1);
     nn1 = cluster.getNameNode(1);
-    nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
-    nn1.getNamesystem().getEditLogTailer().interrupt();
 
     cluster.transitionToActive(0);    
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
index 7bb8d814d22..ce5814b0dd0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyIsHot.java
@@ -17,14 +17,13 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
 import static org.mockito.Matchers.anyInt;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
-import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -32,13 +31,12 @@ import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.AppendTestUtil;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
@@ -77,6 +75,7 @@ public class TestStandbyIsHot {
     Configuration conf = new Configuration();
     // We read from the standby to watch block locations
     HAUtil.setAllowStandbyReads(conf, true);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(3)
@@ -90,8 +89,6 @@ public class TestStandbyIsHot {
       NameNode nn2 = cluster.getNameNode(1);
       
       nn2.getNamesystem().getEditLogTailer().setRuntime(mockRuntime);
-      nn2.getNamesystem().getEditLogTailer().setSleepTime(250);
-      nn2.getNamesystem().getEditLogTailer().interrupt();
       
       FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
       
@@ -151,6 +148,7 @@ public class TestStandbyIsHot {
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
     // We read from the standby to watch block locations
     HAUtil.setAllowStandbyReads(conf, true);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(1)
@@ -158,8 +156,6 @@ public class TestStandbyIsHot {
     try {
       NameNode nn0 = cluster.getNameNode(0);
       NameNode nn1 = cluster.getNameNode(1);
-      nn1.getNamesystem().getEditLogTailer().setSleepTime(250);
-      nn1.getNamesystem().getEditLogTailer().interrupt();
 
       cluster.transitionToActive(0);
       

From c10853a6b949806a77c2ce610c56a1ad77068a6f Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 23 Jan 2012 21:49:43 +0000
Subject: [PATCH 094/177] HDFS-2820. Add a simple sanity check for HA config.
 Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1235013 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt |  2 ++
 .../src/main/java/org/apache/hadoop/hdfs/DFSUtil.java |  6 ++++++
 .../hadoop/hdfs/server/namenode/FSNamesystem.java     | 11 +++++++++++
 3 files changed, 19 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index e079202641b..eb3f3e91a5a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -121,3 +121,5 @@ HDFS-2367. Enable the configuration of multiple HA cluster addresses. (atm)
 HDFS-2812. When becoming active, the NN should treat all leases as freshly renewed. (todd)
 
 HDFS-2737. Automatically trigger log rolls periodically on the active NN. (todd and atm)
+
+HDFS-2820. Add a simple sanity check for HA config (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index d5dc5b30c54..94605b932f7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -569,6 +569,12 @@ public class DFSUtil {
     }
     return b.toString();
   }
+  
+  public static String nnAddressesAsString(Configuration conf) {
+    Map<String, Map<String, InetSocketAddress>> addresses =
+      getHaNnRpcAddresses(conf);
+    return addressMapToString(addresses);
+  }
 
   /**
    * Represent one of the NameNodes configured in the cluster.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index e92ae020011..3aa9e60bed3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -752,6 +752,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
     this.haEnabled = HAUtil.isHAEnabled(conf, nameserviceId);  
     this.persistBlocks |= haEnabled && HAUtil.usesSharedEditsDir(conf);
+    
+    // Sanity check the HA-related config.
+    if (nameserviceId != null) {
+      LOG.info("Determined nameservice ID: " + nameserviceId);
+    }
+    LOG.info("HA Enabled: " + haEnabled);
+    if (!haEnabled && HAUtil.usesSharedEditsDir(conf)) {
+      LOG.warn("Configured NNs:\n" + DFSUtil.nnAddressesAsString(conf));
+      throw new IOException("Invalid configuration: a shared edits dir " +
+          "must not be specified if HA is not enabled.");
+    }
 
     short filePermission = (short)conf.getInt(DFS_NAMENODE_UPGRADE_PERMISSION_KEY,
                                               DFS_NAMENODE_UPGRADE_PERMISSION_DEFAULT);

From 8d1c99624207ea9a26413abfceba05d7faef5397 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 23 Jan 2012 21:57:00 +0000
Subject: [PATCH 095/177] HDFS-2688. Add tests for quota tracking in an HA
 cluster. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1235017 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../server/namenode/ha/TestQuotasWithHA.java  | 133 ++++++++++++++++++
 2 files changed, 135 insertions(+)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestQuotasWithHA.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index eb3f3e91a5a..b002f411614 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -123,3 +123,5 @@ HDFS-2812. When becoming active, the NN should treat all leases as freshly renew
 HDFS-2737. Automatically trigger log rolls periodically on the active NN. (todd and atm)
 
 HDFS-2820. Add a simple sanity check for HA config (todd)
+
+HDFS-2688. Add tests for quota tracking in an HA cluster. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestQuotasWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestQuotasWithHA.java
new file mode 100644
index 00000000000..5800d3a351d
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestQuotasWithHA.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.io.IOUtils;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestQuotasWithHA {
+  private static final Path TEST_DIR = new Path("/test");
+  private static final Path TEST_FILE = new Path(TEST_DIR, "file");
+  private static final String TEST_DIR_STR = TEST_DIR.toUri().getPath();
+  
+  private static final long NS_QUOTA = 10000;
+  private static final long DS_QUOTA = 10000;
+  private static final long BLOCK_SIZE = 1024; // 1KB blocks
+  
+  private MiniDFSCluster cluster;
+  private NameNode nn0;
+  private NameNode nn1;
+  private FileSystem fs;
+
+  @Before
+  public void setupCluster() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    HAUtil.setAllowStandbyReads(conf, true);
+    
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(1)
+      .waitSafeMode(false)
+      .build();
+    cluster.waitActive();
+    
+    nn0 = cluster.getNameNode(0);
+    nn1 = cluster.getNameNode(1);
+    fs = HATestUtil.configureFailoverFs(cluster, conf);
+    
+    cluster.transitionToActive(0);
+  }
+  
+  @After
+  public void shutdownCluster() throws IOException {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  /**
+   * Test that quotas are properly tracked by the standby through
+   * create, append, delete.
+   */
+  @Test(timeout=60000)
+  public void testQuotasTrackedOnStandby() throws Exception {
+    fs.mkdirs(TEST_DIR);
+    DistributedFileSystem dfs = (DistributedFileSystem)fs;
+    dfs.setQuota(TEST_DIR, NS_QUOTA, DS_QUOTA);
+    long expectedSize = 3 * BLOCK_SIZE + BLOCK_SIZE/2;
+    DFSTestUtil.createFile(fs, TEST_FILE, expectedSize, (short)1, 1L);
+
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    ContentSummary cs = nn1.getRpcServer().getContentSummary(TEST_DIR_STR);
+    assertEquals(NS_QUOTA, cs.getQuota());
+    assertEquals(DS_QUOTA, cs.getSpaceQuota());
+    assertEquals(expectedSize, cs.getSpaceConsumed());
+    assertEquals(1, cs.getDirectoryCount());
+    assertEquals(1, cs.getFileCount());
+
+    // Append to the file and make sure quota is updated correctly.
+    FSDataOutputStream stm = fs.append(TEST_FILE);
+    try {
+      byte[] data = new byte[(int) (BLOCK_SIZE * 3 / 2)];
+      stm.write(data);
+      expectedSize += data.length;
+    } finally {
+      IOUtils.closeStream(stm);
+    }
+    
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    cs = nn1.getRpcServer().getContentSummary(TEST_DIR_STR);
+    assertEquals(NS_QUOTA, cs.getQuota());
+    assertEquals(DS_QUOTA, cs.getSpaceQuota());
+    assertEquals(expectedSize, cs.getSpaceConsumed());
+    assertEquals(1, cs.getDirectoryCount());
+    assertEquals(1, cs.getFileCount());
+
+    
+    fs.delete(TEST_FILE, true);
+    expectedSize = 0;
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    cs = nn1.getRpcServer().getContentSummary(TEST_DIR_STR);
+    assertEquals(NS_QUOTA, cs.getQuota());
+    assertEquals(DS_QUOTA, cs.getSpaceQuota());
+    assertEquals(expectedSize, cs.getSpaceConsumed());
+    assertEquals(1, cs.getDirectoryCount());
+    assertEquals(0, cs.getFileCount());
+  }
+}

From 465663afbf0463f8e91d978993f7d46ea9c48dc2 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 23 Jan 2012 22:22:46 +0000
Subject: [PATCH 096/177] HDFS-2804. Should not mark blocks under-replicated
 when exiting safemode. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1235033 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/FSNamesystem.java    | 21 +++++++--
 .../server/namenode/ha/TestHASafeMode.java    | 46 +++++++++++++++++++
 3 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index b002f411614..b908da82b43 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -125,3 +125,5 @@ HDFS-2737. Automatically trigger log rolls periodically on the active NN. (todd
 HDFS-2820. Add a simple sanity check for HA config (todd)
 
 HDFS-2688. Add tests for quota tracking in an HA cluster. (todd)
+
+HDFS-2804. Should not mark blocks under-replicated when exiting safemode (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 3aa9e60bed3..80b05d66c7f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -848,6 +848,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return fsRunning;
   }
 
+  private boolean isInStandbyState() {
+    if (haContext == null || haContext.getState() == null) {
+      // We're still starting up. In this case, if HA is
+      // on for the cluster, we always start in standby. Otherwise
+      // start in active.
+      return haEnabled;
+    }
+  
+    return haContext.getState() instanceof StandbyState;
+  }
+
   /**
    * Dump all metadata into specified file
    */
@@ -3345,8 +3356,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           return;
         }
       }
-      // if not done yet, initialize replication queues
-      if (!isPopulatingReplQueues()) {
+      // if not done yet, initialize replication queues.
+      // In the standby, do not populate repl queues
+      if (!isPopulatingReplQueues() && !isInStandbyState()) {
         initializeReplQueues();
       }
       long timeInSafemode = now() - systemStart;
@@ -3389,7 +3401,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * initializing replication queues.
      */
     private synchronized boolean canInitializeReplQueues() {
-      return blockSafe >= blockReplQueueThreshold;
+      return !isInStandbyState() && blockSafe >= blockReplQueueThreshold;
     }
       
     /** 
@@ -3705,8 +3717,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
   @Override
   public boolean isPopulatingReplQueues() {
-    if (haContext != null && // null during startup!
-        !haContext.getState().shouldPopulateReplQueues()) {
+    if (isInStandbyState()) {
       return false;
     }
     // safeMode is volatile, and may be set to null at any time
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index 0703f8c8a02..d423ce26617 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
+import static org.junit.Assert.*;
 import static org.junit.Assert.assertTrue;
 import static org.mockito.Matchers.anyInt;
 import static org.mockito.Mockito.mock;
@@ -32,16 +33,21 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
+import com.google.common.base.Supplier;
+
 /**
  * Tests that exercise safemode in an HA cluster.
  */
@@ -59,6 +65,8 @@ public class TestHASafeMode {
     Configuration conf = new Configuration();
     conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
     conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+
     cluster = new MiniDFSCluster.Builder(conf)
       .nnTopology(MiniDFSNNTopology.simpleHATopology())
       .numDataNodes(3)
@@ -425,6 +433,44 @@ public class TestHASafeMode {
             "total blocks 6. Safe mode will be turned off automatically"));    
   }
   
+  /**
+   * Regression test for HDFS-2804: standby should not populate replication
+   * queues when exiting safe mode.
+   */
+  @Test
+  public void testNoPopulatingReplQueuesWhenExitingSafemode() throws Exception {
+    DFSTestUtil.createFile(fs, new Path("/test"), 15*BLOCK_SIZE, (short)3, 1L);
+    
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    
+    // get some blocks in the SBN's image
+    nn1.getRpcServer().setSafeMode(SafeModeAction.SAFEMODE_ENTER);
+    NameNodeAdapter.saveNamespace(nn1);
+    nn1.getRpcServer().setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
+
+    // and some blocks in the edit logs
+    DFSTestUtil.createFile(fs, new Path("/test2"), 15*BLOCK_SIZE, (short)3, 1L);
+    nn0.getRpcServer().rollEditLog();
+    
+    cluster.stopDataNode(1);
+    cluster.shutdownNameNode(1);
+
+    //Configuration sbConf = cluster.getConfiguration(1);
+    //sbConf.setInt(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 1);
+    cluster.restartNameNode(1, false);
+    nn1 = cluster.getNameNode(1);
+    GenericTestUtils.waitFor(new Supplier<Boolean>() {
+      @Override
+      public Boolean get() {
+        return !nn1.isInSafeMode();
+      }
+    }, 100, 10000);
+    
+    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
+    assertEquals(0L, nn1.getNamesystem().getUnderReplicatedBlocks());
+    assertEquals(0L, nn1.getNamesystem().getPendingReplicationBlocks());
+  }
+  
   /**
    * Print a big banner in the test log to make debug easier.
    */

From 0b2245a0f30d6745f86c9908576d8b50d9348545 Mon Sep 17 00:00:00 2001
From: Jitendra Nath Pandey <jitendra@apache.org>
Date: Tue, 24 Jan 2012 19:31:49 +0000
Subject: [PATCH 097/177] HDFS-2807. Service level authorizartion for
 HAServiceProtocol.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1235431 13f79535-47bb-0310-9956-ffa450edef68
---
 .../documentation/content/xdocs/service_level_auth.xml   | 6 ++++++
 .../org/apache/hadoop/fs/CommonConfigurationKeys.java    | 3 ++-
 .../java/org/apache/hadoop/ha/HAServiceProtocol.java     | 4 ++++
 .../src/main/packages/templates/conf/hadoop-policy.xml   | 7 +++++++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt    | 2 ++
 .../java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java  | 3 +++
 .../hadoop-hdfs/src/test/resources/hadoop-policy.xml     | 9 ++++++++-
 7 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/service_level_auth.xml b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/service_level_auth.xml
index b8f5f511d3f..771ac052b33 100644
--- a/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/service_level_auth.xml
+++ b/hadoop-common-project/hadoop-common/src/main/docs/src/documentation/content/xdocs/service_level_auth.xml
@@ -138,6 +138,12 @@
             dfsadmin and mradmin commands to refresh the security policy in-effect.
             </td>
           </tr>
+          <tr>
+            <td><code>security.ha.service.protocol.acl</code></td>
+            <td>ACL for HAService protocol used by HAAdmin to manage the
+            active and stand-by states of namenode.
+            </td>
+          </tr>
         </table>
       </section>
       
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
index f0ca72b00ed..c2a6479dd20 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
@@ -114,11 +114,12 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
   public static final String 
   HADOOP_SECURITY_SERVICE_AUTHORIZATION_REFRESH_USER_MAPPINGS =
       "security.refresh.user.mappings.protocol.acl";
+  public static final String 
+  SECURITY_HA_SERVICE_PROTOCOL_ACL = "security.ha.service.protocol.acl";
   
   public static final String HADOOP_SECURITY_TOKEN_SERVICE_USE_IP =
       "hadoop.security.token.service.use_ip";
   public static final boolean HADOOP_SECURITY_TOKEN_SERVICE_USE_IP_DEFAULT =
       true;
-
 }
 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
index 2243ba130b6..672c6d6fba3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
@@ -19,7 +19,9 @@ package org.apache.hadoop.ha;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.ipc.VersionedProtocol;
+import org.apache.hadoop.security.KerberosInfo;
 
 import java.io.IOException;
 
@@ -29,6 +31,8 @@ import java.io.IOException;
  * 
  * This interface could be used by HA frameworks to manage the service.
  */
+@KerberosInfo(
+    serverPrincipal=CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_USER_NAME_KEY)
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
 public interface HAServiceProtocol extends VersionedProtocol {
diff --git a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-policy.xml b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-policy.xml
index b3e12d14e26..2fd9f8d2a98 100644
--- a/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-policy.xml
+++ b/hadoop-common-project/hadoop-common/src/main/packages/templates/conf/hadoop-policy.xml
@@ -216,6 +216,13 @@
     group list is separated by a blank. For e.g. "alice,bob users,wheel".
     A special value of "*" means all users are allowed.</description>
   </property>
+  
+  <property>
+    <name>security.ha.service.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for HAService protocol used by HAAdmin to manage the
+      active and stand-by states of namenode.</description>
+  </property>
 
    <property>
       <name>security.mrhs.client.protocol.acl</name>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index b908da82b43..508860c57bc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -127,3 +127,5 @@ HDFS-2820. Add a simple sanity check for HA config (todd)
 HDFS-2688. Add tests for quota tracking in an HA cluster. (todd)
 
 HDFS-2804. Should not mark blocks under-replicated when exiting safemode (todd)
+
+HDFS-2807. Service level authorizartion for HAServiceProtocol. (jitendra)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java
index 27702b5795b..6e212458d07 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HDFSPolicyProvider.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol;
@@ -44,6 +45,8 @@ public class HDFSPolicyProvider extends PolicyProvider {
     new Service("security.inter.datanode.protocol.acl", 
                 InterDatanodeProtocol.class),
     new Service("security.namenode.protocol.acl", NamenodeProtocol.class),
+    new Service(CommonConfigurationKeys.SECURITY_HA_SERVICE_PROTOCOL_ACL,
+        HAServiceProtocol.class),
     new Service(
         CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_AUTHORIZATION_REFRESH_POLICY, 
         RefreshAuthorizationPolicyProtocol.class),
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-policy.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-policy.xml
index 0f5310c76f0..eb3f4bd7447 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-policy.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/hadoop-policy.xml
@@ -109,5 +109,12 @@
     group list is separated by a blank. For e.g. "alice,bob users,wheel". 
     A special value of "*" means all users are allowed.</description>
   </property>
-
+  
+  <property>
+    <name>security.ha.service.protocol.acl</name>
+    <value>*</value>
+    <description>ACL for HAService protocol used by HAAdmin to manage the
+      active and stand-by states of namenode.</description>
+  </property>
+  
 </configuration>

From dea3164e00080316200ff3c6412bb7f809901fd8 Mon Sep 17 00:00:00 2001
From: Jitendra Nath Pandey <jitendra@apache.org>
Date: Tue, 24 Jan 2012 21:06:27 +0000
Subject: [PATCH 098/177] HDFS-2809. Add test to verify that delegation tokens
 are honored after failover.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1235495 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../namenode/ha/TestHAStateTransitions.java   | 45 ++++++++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 508860c57bc..bf685c91cf2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -129,3 +129,5 @@ HDFS-2688. Add tests for quota tracking in an HA cluster. (todd)
 HDFS-2804. Should not mark blocks under-replicated when exiting safemode (todd)
 
 HDFS-2807. Service level authorizartion for HAServiceProtocol. (jitendra)
+
+HDFS-2809. Add test to verify that delegation tokens are honored after failover. (jitendra and atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index fbeaa30a938..5197c6e7647 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.junit.Assert.*;
 
+import java.io.IOException;
+import java.net.URISyntaxException;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.commons.logging.Log;
@@ -31,13 +33,17 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
 import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
-import org.apache.tools.ant.taskdefs.WaitFor;
+import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;
 
@@ -251,4 +257,41 @@ public class TestHAStateTransitions {
       cluster.shutdown();
     }
   }
+  
+  /**
+   * Test that delegation tokens continue to work after the failover.
+   */
+  @Test
+  public void testDelegationTokensAfterFailover() throws IOException,
+      URISyntaxException {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(0)
+        .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      NameNode nn1 = cluster.getNameNode(0);
+      NameNode nn2 = cluster.getNameNode(1);
+      NameNodeAdapter.getDtSecretManager(nn1.getNamesystem()).startThreads();
+
+      String renewer = UserGroupInformation.getLoginUser().getUserName();
+      Token<DelegationTokenIdentifier> token = nn1.getRpcServer()
+          .getDelegationToken(new Text(renewer));
+
+      LOG.info("Failing over to NN 1");
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      // Need to explicitly start threads because security is not enabled.
+      NameNodeAdapter.getDtSecretManager(nn2.getNamesystem()).startThreads();
+
+      nn2.getRpcServer().renewDelegationToken(token);
+      nn2.getRpcServer().cancelDelegationToken(token);
+      token = nn2.getRpcServer().getDelegationToken(new Text(renewer));
+      Assert.assertTrue(token != null);
+    } finally {
+      cluster.shutdown();
+    }
+  }
 }

From cbfe8fea0ee9753b1f79dd8bb9a2972d0539e9ec Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Wed, 25 Jan 2012 17:26:20 +0000
Subject: [PATCH 099/177] HADOOP-7992. Add ZKClient library to facilitate
 leader election. Contributed by Bikas Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1235841 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  13 +-
 hadoop-common-project/hadoop-common/pom.xml   |  28 +
 .../hadoop/ha/ActiveStandbyElector.java       | 593 ++++++++++++++++++
 .../hadoop/ha/TestActiveStandbyElector.java   | 527 ++++++++++++++++
 .../ha/TestActiveStandbyElectorRealZK.java    | 223 +++++++
 5 files changed, 1379 insertions(+), 5 deletions(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java
 create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 0b86369a2de..b6bd6ed918b 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -9,21 +9,21 @@ HADOOP-7455. HA: Introduce HA Service Protocol Interface. (suresh)
 HADOOP-7774. HA: Administrative CLI to control HA daemons. (todd)
 
 HADOOP-7896. HA: if both NNs are in Standby mode, client needs to try failing
-             back and forth several times with sleeps. (atm)
+back and forth several times with sleeps. (atm)
 
 HADOOP-7922. Improve some logging for client IPC failovers and
-             StandbyExceptions (todd)
+StandbyExceptions (todd)
 
 HADOOP-7921. StandbyException should extend IOException (todd)
 
 HADOOP-7928. HA: Client failover policy is incorrectly trying to fail over all
-             IOExceptions (atm)
+IOExceptions (atm)
 
 HADOOP-7925. Add interface and update CLI to query current state to
-             HAServiceProtocol (eli via todd)
+HAServiceProtocol (eli via todd)
 
 HADOOP-7932. Make client connection retries on socket time outs configurable.
-             (Uma Maheswara Rao G via todd)
+(Uma Maheswara Rao G via todd)
 
 HADOOP-7924.  FailoverController for client-based configuration (eli)
 
@@ -31,3 +31,6 @@ HADOOP-7961. Move HA fencing to common. (eli)
 
 HADOOP-7970. HAServiceProtocol methods must throw IOException.
 (Hari Mankude via suresh).
+
+HADOOP-7992. Add ZKClient library to facilitate leader election.
+(Bikas Saha via suresh).
diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index 23d61f825b3..497af85aa2e 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -268,6 +268,34 @@
       <groupId>com.jcraft</groupId>
       <artifactId>jsch</artifactId>
     </dependency>
+
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <version>3.4.2</version>
+      <exclusions>
+        <exclusion>
+          <!-- otherwise seems to drag in junit 3.8.1 via jline -->
+          <groupId>junit</groupId>
+          <artifactId>junit</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jdmk</groupId>
+          <artifactId>jmxtools</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.sun.jmx</groupId>
+          <artifactId>jmxri</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <version>3.4.2</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
new file mode 100644
index 00000000000..e91c4ce9926
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
@@ -0,0 +1,593 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ha;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.zookeeper.data.ACL;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.AsyncCallback.*;
+import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.KeeperException.Code;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * 
+ * This class implements a simple library to perform leader election on top of
+ * Apache Zookeeper. Using Zookeeper as a coordination service, leader election
+ * can be performed by atomically creating an ephemeral lock file (znode) on
+ * Zookeeper. The service instance that successfully creates the znode becomes
+ * active and the rest become standbys. <br/>
+ * This election mechanism is only efficient for small number of election
+ * candidates (order of 10's) because contention on single znode by a large
+ * number of candidates can result in Zookeeper overload. <br/>
+ * The elector does not guarantee fencing (protection of shared resources) among
+ * service instances. After it has notified an instance about becoming a leader,
+ * then that instance must ensure that it meets the service consistency
+ * requirements. If it cannot do so, then it is recommended to quit the
+ * election. The application implements the {@link ActiveStandbyElectorCallback}
+ * to interact with the elector
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public class ActiveStandbyElector implements Watcher, StringCallback,
+    StatCallback {
+
+  /**
+   * Callback interface to interact with the ActiveStandbyElector object. <br/>
+   * The application will be notified with a callback only on state changes
+   * (i.e. there will never be successive calls to becomeActive without an
+   * intermediate call to enterNeutralMode). <br/>
+   * The callbacks will be running on Zookeeper client library threads. The
+   * application should return from these callbacks quickly so as not to impede
+   * Zookeeper client library performance and notifications. The app will
+   * typically remember the state change and return from the callback. It will
+   * then proceed with implementing actions around that state change. It is
+   * possible to be called back again while these actions are in flight and the
+   * app should handle this scenario.
+   */
+  public interface ActiveStandbyElectorCallback {
+    /**
+     * This method is called when the app becomes the active leader
+     */
+    void becomeActive();
+
+    /**
+     * This method is called when the app becomes a standby
+     */
+    void becomeStandby();
+
+    /**
+     * If the elector gets disconnected from Zookeeper and does not know about
+     * the lock state, then it will notify the service via the enterNeutralMode
+     * interface. The service may choose to ignore this or stop doing state
+     * changing operations. Upon reconnection, the elector verifies the leader
+     * status and calls back on the becomeActive and becomeStandby app
+     * interfaces. <br/>
+     * Zookeeper disconnects can happen due to network issues or loss of
+     * Zookeeper quorum. Thus enterNeutralMode can be used to guard against
+     * split-brain issues. In such situations it might be prudent to call
+     * becomeStandby too. However, such state change operations might be
+     * expensive and enterNeutralMode can help guard against doing that for
+     * transient issues.
+     */
+    void enterNeutralMode();
+
+    /**
+     * If there is any fatal error (e.g. wrong ACL's, unexpected Zookeeper
+     * errors or Zookeeper persistent unavailability) then notifyFatalError is
+     * called to notify the app about it.
+     */
+    void notifyFatalError(String errorMessage);
+  }
+
+  /**
+   * Name of the lock znode used by the library. Protected for access in test
+   * classes
+   */
+  @VisibleForTesting
+  protected static final String LOCKFILENAME = "ActiveStandbyElectorLock";
+
+  public static final Log LOG = LogFactory.getLog(ActiveStandbyElector.class);
+
+  private static final int NUM_RETRIES = 3;
+
+  private enum ConnectionState {
+    DISCONNECTED, CONNECTED, TERMINATED
+  };
+
+  private enum State {
+    INIT, ACTIVE, STANDBY, NEUTRAL
+  };
+
+  private State state = State.INIT;
+  private int createRetryCount = 0;
+  private int statRetryCount = 0;
+  private ZooKeeper zkClient;
+  private ConnectionState zkConnectionState = ConnectionState.TERMINATED;
+
+  private final ActiveStandbyElectorCallback appClient;
+  private final String zkHostPort;
+  private final int zkSessionTimeout;
+  private final List<ACL> zkAcl;
+  private byte[] appData;
+  private final String zkLockFilePath;
+  private final String znodeWorkingDir;
+
+  /**
+   * Create a new ActiveStandbyElector object <br/>
+   * The elector is created by providing to it the Zookeeper configuration, the
+   * parent znode under which to create the znode and a reference to the
+   * callback interface. <br/>
+   * The parent znode name must be the same for all service instances and
+   * different across services. <br/>
+   * After the leader has been lost, a new leader will be elected after the
+   * session timeout expires. Hence, the app must set this parameter based on
+   * its needs for failure response time. The session timeout must be greater
+   * than the Zookeeper disconnect timeout and is recommended to be 3X that
+   * value to enable Zookeeper to retry transient disconnections. Setting a very
+   * short session timeout may result in frequent transitions between active and
+   * standby states during issues like network outages/GS pauses.
+   * 
+   * @param zookeeperHostPorts
+   *          ZooKeeper hostPort for all ZooKeeper servers
+   * @param zookeeperSessionTimeout
+   *          ZooKeeper session timeout
+   * @param parentZnodeName
+   *          znode under which to create the lock
+   * @param acl
+   *          ZooKeeper ACL's
+   * @param app
+   *          reference to callback interface object
+   * @throws IOException
+   * @throws HadoopIllegalArgumentException
+   */
+  public ActiveStandbyElector(String zookeeperHostPorts,
+      int zookeeperSessionTimeout, String parentZnodeName, List<ACL> acl,
+      ActiveStandbyElectorCallback app) throws IOException,
+      HadoopIllegalArgumentException {
+    if (app == null || acl == null || parentZnodeName == null
+        || zookeeperHostPorts == null || zookeeperSessionTimeout <= 0) {
+      throw new HadoopIllegalArgumentException("Invalid argument");
+    }
+    zkHostPort = zookeeperHostPorts;
+    zkSessionTimeout = zookeeperSessionTimeout;
+    zkAcl = acl;
+    appClient = app;
+    znodeWorkingDir = parentZnodeName;
+    zkLockFilePath = znodeWorkingDir + "/" + LOCKFILENAME;
+
+    // createConnection for future API calls
+    createConnection();
+  }
+
+  /**
+   * To participate in election, the app will call joinElection. The result will
+   * be notified by a callback on either the becomeActive or becomeStandby app
+   * interfaces. <br/>
+   * After this the elector will automatically monitor the leader status and
+   * perform re-election if necessary<br/>
+   * The app could potentially start off in standby mode and ignore the
+   * becomeStandby call.
+   * 
+   * @param data
+   *          to be set by the app. non-null data must be set.
+   * @throws HadoopIllegalArgumentException
+   *           if valid data is not supplied
+   */
+  public synchronized void joinElection(byte[] data)
+      throws HadoopIllegalArgumentException {
+    LOG.debug("Attempting active election");
+
+    if (data == null) {
+      throw new HadoopIllegalArgumentException("data cannot be null");
+    }
+
+    appData = new byte[data.length];
+    System.arraycopy(data, 0, appData, 0, data.length);
+
+    joinElectionInternal();
+  }
+
+  /**
+   * Any service instance can drop out of the election by calling quitElection. 
+   * <br/>
+   * This will lose any leader status, if held, and stop monitoring of the lock
+   * node. <br/>
+   * If the instance wants to participate in election again, then it needs to
+   * call joinElection(). <br/>
+   * This allows service instances to take themselves out of rotation for known
+   * impending unavailable states (e.g. long GC pause or software upgrade).
+   */
+  public synchronized void quitElection() {
+    LOG.debug("Yielding from election");
+    reset();
+  }
+
+  /**
+   * Exception thrown when there is no active leader
+   */
+  public class ActiveNotFoundException extends Exception {
+    private static final long serialVersionUID = 3505396722342846462L;
+  }
+
+  /**
+   * get data set by the active leader
+   * 
+   * @return data set by the active instance
+   * @throws ActiveNotFoundException
+   *           when there is no active leader
+   * @throws KeeperException
+   *           other zookeeper operation errors
+   * @throws InterruptedException
+   * @throws IOException
+   *           when ZooKeeper connection could not be established
+   */
+  public synchronized byte[] getActiveData() throws ActiveNotFoundException,
+      KeeperException, InterruptedException, IOException {
+    try {
+      if (zkClient == null) {
+        createConnection();
+      }
+      Stat stat = new Stat();
+      return zkClient.getData(zkLockFilePath, false, stat);
+    } catch(KeeperException e) {
+      Code code = e.code();
+      if (operationNodeDoesNotExist(code)) {
+        // handle the commonly expected cases that make sense for us
+        throw new ActiveNotFoundException();
+      } else {
+        throw e;
+      }
+    }
+  }
+
+  /**
+   * interface implementation of Zookeeper callback for create
+   */
+  @Override
+  public synchronized void processResult(int rc, String path, Object ctx,
+      String name) {
+    LOG.debug("CreateNode result: " + rc + " for path: " + path
+        + " connectionState: " + zkConnectionState);
+    if (zkClient == null) {
+      // zkClient is nulled before closing the connection
+      // this is the callback with session expired after we closed the session
+      return;
+    }
+
+    Code code = Code.get(rc);
+    if (operationSuccess(code)) {
+      // we successfully created the znode. we are the leader. start monitoring
+      becomeActive();
+      monitorActiveStatus();
+      return;
+    }
+
+    if (operationNodeExists(code)) {
+      if (createRetryCount == 0) {
+        // znode exists and we did not retry the operation. so a different
+        // instance has created it. become standby and monitor lock.
+        becomeStandby();
+      }
+      // if we had retried then the znode could have been created by our first
+      // attempt to the server (that we lost) and this node exists response is
+      // for the second attempt. verify this case via ephemeral node owner. this
+      // will happen on the callback for monitoring the lock.
+      monitorActiveStatus();
+      return;
+    }
+
+    String errorMessage = "Received create error from Zookeeper. code:"
+        + code.toString();
+    LOG.debug(errorMessage);
+
+    if (operationRetry(code)) {
+      if (createRetryCount < NUM_RETRIES) {
+        LOG.debug("Retrying createNode createRetryCount: " + createRetryCount);
+        ++createRetryCount;
+        createNode();
+        return;
+      }
+      errorMessage = errorMessage
+          + ". Not retrying further znode create connection errors.";
+    }
+
+    fatalError(errorMessage);
+  }
+
+  /**
+   * interface implementation of Zookeeper callback for monitor (exists)
+   */
+  @Override
+  public synchronized void processResult(int rc, String path, Object ctx,
+      Stat stat) {
+    LOG.debug("StatNode result: " + rc + " for path: " + path
+        + " connectionState: " + zkConnectionState);
+    if (zkClient == null) {
+      // zkClient is nulled before closing the connection
+      // this is the callback with session expired after we closed the session
+      return;
+    }
+
+    Code code = Code.get(rc);
+    if (operationSuccess(code)) {
+      // the following owner check completes verification in case the lock znode
+      // creation was retried
+      if (stat.getEphemeralOwner() == zkClient.getSessionId()) {
+        // we own the lock znode. so we are the leader
+        becomeActive();
+      } else {
+        // we dont own the lock znode. so we are a standby.
+        becomeStandby();
+      }
+      // the watch set by us will notify about changes
+      return;
+    }
+
+    if (operationNodeDoesNotExist(code)) {
+      // the lock znode disappeared before we started monitoring it
+      enterNeutralMode();
+      joinElectionInternal();
+      return;
+    }
+
+    String errorMessage = "Received stat error from Zookeeper. code:"
+        + code.toString();
+    LOG.debug(errorMessage);
+
+    if (operationRetry(code)) {
+      if (statRetryCount < NUM_RETRIES) {
+        ++statRetryCount;
+        monitorNode();
+        return;
+      }
+      errorMessage = errorMessage
+          + ". Not retrying further znode monitoring connection errors.";
+    }
+
+    fatalError(errorMessage);
+  }
+
+  /**
+   * interface implementation of Zookeeper watch events (connection and node)
+   */
+  @Override
+  public synchronized void process(WatchedEvent event) {
+    Event.EventType eventType = event.getType();
+    LOG.debug("Watcher event type: " + eventType + " with state:"
+        + event.getState() + " for path:" + event.getPath()
+        + " connectionState: " + zkConnectionState);
+    if (zkClient == null) {
+      // zkClient is nulled before closing the connection
+      // this is the callback with session expired after we closed the session
+      return;
+    }
+
+    if (eventType == Event.EventType.None) {
+      // the connection state has changed
+      switch (event.getState()) {
+      case SyncConnected:
+        // if the listener was asked to move to safe state then it needs to
+        // be undone
+        ConnectionState prevConnectionState = zkConnectionState;
+        zkConnectionState = ConnectionState.CONNECTED;
+        if (prevConnectionState == ConnectionState.DISCONNECTED) {
+          monitorActiveStatus();
+        }
+        break;
+      case Disconnected:
+        // ask the app to move to safe state because zookeeper connection
+        // is not active and we dont know our state
+        zkConnectionState = ConnectionState.DISCONNECTED;
+        enterNeutralMode();
+        break;
+      case Expired:
+        // the connection got terminated because of session timeout
+        // call listener to reconnect
+        enterNeutralMode();
+        reJoinElection();
+        break;
+      default:
+        fatalError("Unexpected Zookeeper watch event state: "
+            + event.getState());
+        break;
+      }
+
+      return;
+    }
+
+    // a watch on lock path in zookeeper has fired. so something has changed on
+    // the lock. ideally we should check that the path is the same as the lock
+    // path but trusting zookeeper for now
+    String path = event.getPath();
+    if (path != null) {
+      switch (eventType) {
+      case NodeDeleted:
+        if (state == State.ACTIVE) {
+          enterNeutralMode();
+        }
+        joinElectionInternal();
+        break;
+      case NodeDataChanged:
+        monitorActiveStatus();
+        break;
+      default:
+        LOG.debug("Unexpected node event: " + eventType + " for path: " + path);
+        monitorActiveStatus();
+      }
+
+      return;
+    }
+
+    // some unexpected error has occurred
+    fatalError("Unexpected watch error from Zookeeper");
+  }
+
+  /**
+   * Get a new zookeeper client instance. protected so that test class can
+   * inherit and pass in a mock object for zookeeper
+   * 
+   * @return new zookeeper client instance
+   * @throws IOException
+   */
+  protected synchronized ZooKeeper getNewZooKeeper() throws IOException {
+    return new ZooKeeper(zkHostPort, zkSessionTimeout, this);
+  }
+
+  private void fatalError(String errorMessage) {
+    reset();
+    appClient.notifyFatalError(errorMessage);
+  }
+
+  private void monitorActiveStatus() {
+    LOG.debug("Monitoring active leader");
+    statRetryCount = 0;
+    monitorNode();
+  }
+
+  private void joinElectionInternal() {
+    if (zkClient == null) {
+      if (!reEstablishSession()) {
+        fatalError("Failed to reEstablish connection with ZooKeeper");
+        return;
+      }
+    }
+
+    createRetryCount = 0;
+    createNode();
+  }
+
+  private void reJoinElection() {
+    LOG.debug("Trying to re-establish ZK session");
+    terminateConnection();
+    joinElectionInternal();
+  }
+
+  private boolean reEstablishSession() {
+    int connectionRetryCount = 0;
+    boolean success = false;
+    while(!success && connectionRetryCount < NUM_RETRIES) {
+      LOG.debug("Establishing zookeeper connection");
+      try {
+        createConnection();
+        success = true;
+      } catch(IOException e) {
+        LOG.warn(e);
+        try {
+          Thread.sleep(5000);
+        } catch(InterruptedException e1) {
+          LOG.warn(e1);
+        }
+      }
+      ++connectionRetryCount;
+    }
+    return success;
+  }
+
+  private void createConnection() throws IOException {
+    zkClient = getNewZooKeeper();
+  }
+
+  private void terminateConnection() {
+    if (zkClient == null) {
+      return;
+    }
+    LOG.debug("Terminating ZK connection");
+    ZooKeeper tempZk = zkClient;
+    zkClient = null;
+    try {
+      tempZk.close();
+    } catch(InterruptedException e) {
+      LOG.warn(e);
+    }
+    zkConnectionState = ConnectionState.TERMINATED;
+  }
+
+  private void reset() {
+    state = State.INIT;
+    terminateConnection();
+  }
+
+  private void becomeActive() {
+    if (state != State.ACTIVE) {
+      LOG.debug("Becoming active");
+      state = State.ACTIVE;
+      appClient.becomeActive();
+    }
+  }
+
+  private void becomeStandby() {
+    if (state != State.STANDBY) {
+      LOG.debug("Becoming standby");
+      state = State.STANDBY;
+      appClient.becomeStandby();
+    }
+  }
+
+  private void enterNeutralMode() {
+    if (state != State.NEUTRAL) {
+      LOG.debug("Entering neutral mode");
+      state = State.NEUTRAL;
+      appClient.enterNeutralMode();
+    }
+  }
+
+  private void createNode() {
+    zkClient.create(zkLockFilePath, appData, zkAcl, CreateMode.EPHEMERAL, this,
+        null);
+  }
+
+  private void monitorNode() {
+    zkClient.exists(zkLockFilePath, true, this, null);
+  }
+
+  private boolean operationSuccess(Code code) {
+    return (code == Code.OK);
+  }
+
+  private boolean operationNodeExists(Code code) {
+    return (code == Code.NODEEXISTS);
+  }
+
+  private boolean operationNodeDoesNotExist(Code code) {
+    return (code == Code.NONODE);
+  }
+
+  private boolean operationRetry(Code code) {
+    switch (code) {
+    case CONNECTIONLOSS:
+    case OPERATIONTIMEOUT:
+      return true;
+    }
+    return false;
+  }
+
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java
new file mode 100644
index 00000000000..fec350d3bc0
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java
@@ -0,0 +1,527 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ha;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.zookeeper.AsyncCallback;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.KeeperException.Code;
+import org.apache.zookeeper.WatchedEvent;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher.Event;
+import org.apache.zookeeper.data.ACL;
+import org.apache.zookeeper.data.Stat;
+import org.apache.zookeeper.ZooDefs.Ids;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.Assert;
+import org.mockito.Mockito;
+
+import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback;
+import org.apache.hadoop.ha.ActiveStandbyElector.ActiveNotFoundException;
+
+public class TestActiveStandbyElector {
+
+  static ZooKeeper mockZK;
+  static int count;
+  static ActiveStandbyElectorCallback mockApp;
+  static final byte[] data = new byte[8];
+
+  ActiveStandbyElectorTester elector;
+
+  class ActiveStandbyElectorTester extends ActiveStandbyElector {
+    ActiveStandbyElectorTester(String hostPort, int timeout, String parent,
+        List<ACL> acl, ActiveStandbyElectorCallback app) throws IOException {
+      super(hostPort, timeout, parent, acl, app);
+    }
+
+    @Override
+    public ZooKeeper getNewZooKeeper() {
+      ++TestActiveStandbyElector.count;
+      return TestActiveStandbyElector.mockZK;
+    }
+
+  }
+
+  private static final String zkParentName = "/zookeeper";
+  private static final String zkLockPathName = "/zookeeper/"
+      + ActiveStandbyElector.LOCKFILENAME;
+
+  @Before
+  public void init() throws IOException {
+    count = 0;
+    mockZK = Mockito.mock(ZooKeeper.class);
+    mockApp = Mockito.mock(ActiveStandbyElectorCallback.class);
+    elector = new ActiveStandbyElectorTester("hostPort", 1000, zkParentName,
+        Ids.OPEN_ACL_UNSAFE, mockApp);
+  }
+
+  /**
+   * verify that joinElection checks for null data
+   */
+  @Test(expected = HadoopIllegalArgumentException.class)
+  public void testJoinElectionException() {
+    elector.joinElection(null);
+  }
+
+  /**
+   * verify that joinElection tries to create ephemeral lock znode
+   */
+  @Test
+  public void testJoinElection() {
+    elector.joinElection(data);
+    Mockito.verify(mockZK, Mockito.times(1)).create(zkLockPathName, data,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, null);
+  }
+
+  /**
+   * verify that successful znode create result becomes active and monitoring is
+   * started
+   */
+  @Test
+  public void testCreateNodeResultBecomeActive() {
+    elector.joinElection(data);
+    elector.processResult(Code.OK.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeActive();
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+
+    // monitor callback verifies the leader is ephemeral owner of lock but does
+    // not call becomeActive since its already active
+    Stat stat = new Stat();
+    stat.setEphemeralOwner(1L);
+    Mockito.when(mockZK.getSessionId()).thenReturn(1L);
+    elector.processResult(Code.OK.intValue(), zkLockPathName, null, stat);
+    // should not call neutral mode/standby/active
+    Mockito.verify(mockApp, Mockito.times(0)).enterNeutralMode();
+    Mockito.verify(mockApp, Mockito.times(0)).becomeStandby();
+    Mockito.verify(mockApp, Mockito.times(1)).becomeActive();
+    // another joinElection not called.
+    Mockito.verify(mockZK, Mockito.times(1)).create(zkLockPathName, data,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, null);
+    // no new monitor called
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+  }
+
+  /**
+   * verify that znode create for existing node and no retry becomes standby and
+   * monitoring is started
+   */
+  @Test
+  public void testCreateNodeResultBecomeStandby() {
+    elector.joinElection(data);
+
+    elector.processResult(Code.NODEEXISTS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeStandby();
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+  }
+
+  /**
+   * verify that znode create error result in fatal error
+   */
+  @Test
+  public void testCreateNodeResultError() {
+    elector.joinElection(data);
+
+    elector.processResult(Code.APIERROR.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(1)).notifyFatalError(
+        "Received create error from Zookeeper. code:APIERROR");
+  }
+
+  /**
+   * verify that retry of network errors verifies master by session id and
+   * becomes active if they match. monitoring is started.
+   */
+  @Test
+  public void testCreateNodeResultRetryBecomeActive() {
+    elector.joinElection(data);
+
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    // 4 errors results in fatalError
+    Mockito
+        .verify(mockApp, Mockito.times(1))
+        .notifyFatalError(
+            "Received create error from Zookeeper. code:CONNECTIONLOSS. "+
+            "Not retrying further znode create connection errors.");
+
+    elector.joinElection(data);
+    // recreate connection via getNewZooKeeper
+    Assert.assertEquals(2, TestActiveStandbyElector.count);
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    elector.processResult(Code.NODEEXISTS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+
+    Stat stat = new Stat();
+    stat.setEphemeralOwner(1L);
+    Mockito.when(mockZK.getSessionId()).thenReturn(1L);
+    elector.processResult(Code.OK.intValue(), zkLockPathName, null, stat);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeActive();
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+    Mockito.verify(mockZK, Mockito.times(6)).create(zkLockPathName, data,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, null);
+  }
+
+  /**
+   * verify that retry of network errors verifies active by session id and
+   * becomes standby if they dont match. monitoring is started.
+   */
+  @Test
+  public void testCreateNodeResultRetryBecomeStandby() {
+    elector.joinElection(data);
+
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    elector.processResult(Code.NODEEXISTS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+
+    Stat stat = new Stat();
+    stat.setEphemeralOwner(0);
+    Mockito.when(mockZK.getSessionId()).thenReturn(1L);
+    elector.processResult(Code.OK.intValue(), zkLockPathName, null, stat);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeStandby();
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+  }
+
+  /**
+   * verify that if create znode results in nodeexists and that znode is deleted
+   * before exists() watch is set then the return of the exists() method results
+   * in attempt to re-create the znode and become active
+   */
+  @Test
+  public void testCreateNodeResultRetryNoNode() {
+    elector.joinElection(data);
+
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    elector.processResult(Code.NODEEXISTS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+
+    elector.processResult(Code.NONODE.intValue(), zkLockPathName, null,
+        (Stat) null);
+    Mockito.verify(mockApp, Mockito.times(1)).enterNeutralMode();
+    Mockito.verify(mockZK, Mockito.times(4)).create(zkLockPathName, data,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, null);
+  }
+
+  /**
+   * verify that more than 3 network error retries result fatalError
+   */
+  @Test
+  public void testStatNodeRetry() {
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        (Stat) null);
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        (Stat) null);
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        (Stat) null);
+    elector.processResult(Code.CONNECTIONLOSS.intValue(), zkLockPathName, null,
+        (Stat) null);
+    Mockito
+        .verify(mockApp, Mockito.times(1))
+        .notifyFatalError(
+            "Received stat error from Zookeeper. code:CONNECTIONLOSS. "+
+            "Not retrying further znode monitoring connection errors.");
+  }
+
+  /**
+   * verify error in exists() callback results in fatal error
+   */
+  @Test
+  public void testStatNodeError() {
+    elector.processResult(Code.RUNTIMEINCONSISTENCY.intValue(), zkLockPathName,
+        null, (Stat) null);
+    Mockito.verify(mockApp, Mockito.times(0)).enterNeutralMode();
+    Mockito.verify(mockApp, Mockito.times(1)).notifyFatalError(
+        "Received stat error from Zookeeper. code:RUNTIMEINCONSISTENCY");
+  }
+
+  /**
+   * verify behavior of watcher.process callback with non-node event
+   */
+  @Test
+  public void testProcessCallbackEventNone() {
+    elector.joinElection(data);
+
+    WatchedEvent mockEvent = Mockito.mock(WatchedEvent.class);
+    Mockito.when(mockEvent.getType()).thenReturn(Event.EventType.None);
+
+    // first SyncConnected should not do anything
+    Mockito.when(mockEvent.getState()).thenReturn(
+        Event.KeeperState.SyncConnected);
+    elector.process(mockEvent);
+    Mockito.verify(mockZK, Mockito.times(0)).exists(Mockito.anyString(),
+        Mockito.anyBoolean(), Mockito.<AsyncCallback.StatCallback> anyObject(),
+        Mockito.<Object> anyObject());
+
+    // disconnection should enter safe mode
+    Mockito.when(mockEvent.getState()).thenReturn(
+        Event.KeeperState.Disconnected);
+    elector.process(mockEvent);
+    Mockito.verify(mockApp, Mockito.times(1)).enterNeutralMode();
+
+    // re-connection should monitor master status
+    Mockito.when(mockEvent.getState()).thenReturn(
+        Event.KeeperState.SyncConnected);
+    elector.process(mockEvent);
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+
+    // session expired should enter safe mode and initiate re-election
+    // re-election checked via checking re-creation of new zookeeper and
+    // call to create lock znode
+    Mockito.when(mockEvent.getState()).thenReturn(Event.KeeperState.Expired);
+    elector.process(mockEvent);
+    // already in safe mode above. should not enter safe mode again
+    Mockito.verify(mockApp, Mockito.times(1)).enterNeutralMode();
+    // called getNewZooKeeper to create new session. first call was in
+    // constructor
+    Assert.assertEquals(2, TestActiveStandbyElector.count);
+    // once in initial joinElection and one now
+    Mockito.verify(mockZK, Mockito.times(2)).create(zkLockPathName, data,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, null);
+
+    // create znode success. become master and monitor
+    elector.processResult(Code.OK.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeActive();
+    Mockito.verify(mockZK, Mockito.times(2)).exists(zkLockPathName, true,
+        elector, null);
+
+    // error event results in fatal error
+    Mockito.when(mockEvent.getState()).thenReturn(Event.KeeperState.AuthFailed);
+    elector.process(mockEvent);
+    Mockito.verify(mockApp, Mockito.times(1)).notifyFatalError(
+        "Unexpected Zookeeper watch event state: AuthFailed");
+    // only 1 state change callback is called at a time
+    Mockito.verify(mockApp, Mockito.times(1)).enterNeutralMode();
+  }
+
+  /**
+   * verify behavior of watcher.process with node event
+   */
+  @Test
+  public void testProcessCallbackEventNode() {
+    elector.joinElection(data);
+
+    // make the object go into the monitoring state
+    elector.processResult(Code.NODEEXISTS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeStandby();
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+
+    WatchedEvent mockEvent = Mockito.mock(WatchedEvent.class);
+    Mockito.when(mockEvent.getPath()).thenReturn(zkLockPathName);
+
+    // monitoring should be setup again after event is received
+    Mockito.when(mockEvent.getType()).thenReturn(
+        Event.EventType.NodeDataChanged);
+    elector.process(mockEvent);
+    Mockito.verify(mockZK, Mockito.times(2)).exists(zkLockPathName, true,
+        elector, null);
+
+    // monitoring should be setup again after event is received
+    Mockito.when(mockEvent.getType()).thenReturn(
+        Event.EventType.NodeChildrenChanged);
+    elector.process(mockEvent);
+    Mockito.verify(mockZK, Mockito.times(3)).exists(zkLockPathName, true,
+        elector, null);
+
+    // lock node deletion when in standby mode should create znode again
+    // successful znode creation enters active state and sets monitor
+    Mockito.when(mockEvent.getType()).thenReturn(Event.EventType.NodeDeleted);
+    elector.process(mockEvent);
+    // enterNeutralMode not called when app is standby and leader is lost
+    Mockito.verify(mockApp, Mockito.times(0)).enterNeutralMode();
+    // once in initial joinElection() and one now
+    Mockito.verify(mockZK, Mockito.times(2)).create(zkLockPathName, data,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, null);
+    elector.processResult(Code.OK.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeActive();
+    Mockito.verify(mockZK, Mockito.times(4)).exists(zkLockPathName, true,
+        elector, null);
+
+    // lock node deletion in active mode should enter neutral mode and create
+    // znode again successful znode creation enters active state and sets
+    // monitor
+    Mockito.when(mockEvent.getType()).thenReturn(Event.EventType.NodeDeleted);
+    elector.process(mockEvent);
+    Mockito.verify(mockApp, Mockito.times(1)).enterNeutralMode();
+    // another joinElection called
+    Mockito.verify(mockZK, Mockito.times(3)).create(zkLockPathName, data,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, null);
+    elector.processResult(Code.OK.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(2)).becomeActive();
+    Mockito.verify(mockZK, Mockito.times(5)).exists(zkLockPathName, true,
+        elector, null);
+
+    // bad path name results in fatal error
+    Mockito.when(mockEvent.getPath()).thenReturn(null);
+    elector.process(mockEvent);
+    Mockito.verify(mockApp, Mockito.times(1)).notifyFatalError(
+        "Unexpected watch error from Zookeeper");
+    // fatal error means no new connection other than one from constructor
+    Assert.assertEquals(1, TestActiveStandbyElector.count);
+    // no new watches after fatal error
+    Mockito.verify(mockZK, Mockito.times(5)).exists(zkLockPathName, true,
+        elector, null);
+
+  }
+
+  /**
+   * verify becomeStandby is not called if already in standby
+   */
+  @Test
+  public void testSuccessiveStandbyCalls() {
+    elector.joinElection(data);
+
+    // make the object go into the monitoring standby state
+    elector.processResult(Code.NODEEXISTS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeStandby();
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+
+    WatchedEvent mockEvent = Mockito.mock(WatchedEvent.class);
+    Mockito.when(mockEvent.getPath()).thenReturn(zkLockPathName);
+
+    // notify node deletion
+    // monitoring should be setup again after event is received
+    Mockito.when(mockEvent.getType()).thenReturn(Event.EventType.NodeDeleted);
+    elector.process(mockEvent);
+    // is standby. no need to notify anything now
+    Mockito.verify(mockApp, Mockito.times(0)).enterNeutralMode();
+    // another joinElection called.
+    Mockito.verify(mockZK, Mockito.times(2)).create(zkLockPathName, data,
+        Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL, elector, null);
+    // lost election
+    elector.processResult(Code.NODEEXISTS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    // still standby. so no need to notify again
+    Mockito.verify(mockApp, Mockito.times(1)).becomeStandby();
+    // monitor is set again
+    Mockito.verify(mockZK, Mockito.times(2)).exists(zkLockPathName, true,
+        elector, null);
+  }
+
+  /**
+   * verify quit election terminates connection and there are no new watches.
+   * next call to joinElection creates new connection and performs election
+   */
+  @Test
+  public void testQuitElection() throws InterruptedException {
+    elector.quitElection();
+    Mockito.verify(mockZK, Mockito.times(1)).close();
+    // no watches added
+    Mockito.verify(mockZK, Mockito.times(0)).exists(zkLockPathName, true,
+        elector, null);
+
+    byte[] data = new byte[8];
+    elector.joinElection(data);
+    // getNewZooKeeper called 2 times. once in constructor and once now
+    Assert.assertEquals(2, TestActiveStandbyElector.count);
+    elector.processResult(Code.NODEEXISTS.intValue(), zkLockPathName, null,
+        zkLockPathName);
+    Mockito.verify(mockApp, Mockito.times(1)).becomeStandby();
+    Mockito.verify(mockZK, Mockito.times(1)).exists(zkLockPathName, true,
+        elector, null);
+
+  }
+
+  /**
+   * verify that receiveActiveData gives data when active exists, tells that
+   * active does not exist and reports error in getting active information
+   * 
+   * @throws IOException
+   * @throws InterruptedException
+   * @throws KeeperException
+   * @throws ActiveNotFoundException
+   */
+  @Test
+  public void testGetActiveData() throws ActiveNotFoundException,
+      KeeperException, InterruptedException, IOException {
+    // get valid active data
+    byte[] data = new byte[8];
+    Mockito.when(
+        mockZK.getData(Mockito.eq(zkLockPathName), Mockito.eq(false),
+            Mockito.<Stat> anyObject())).thenReturn(data);
+    Assert.assertEquals(data, elector.getActiveData());
+    Mockito.verify(mockZK, Mockito.times(1)).getData(
+        Mockito.eq(zkLockPathName), Mockito.eq(false),
+        Mockito.<Stat> anyObject());
+
+    // active does not exist
+    Mockito.when(
+        mockZK.getData(Mockito.eq(zkLockPathName), Mockito.eq(false),
+            Mockito.<Stat> anyObject())).thenThrow(
+        new KeeperException.NoNodeException());
+    try {
+      elector.getActiveData();
+      Assert.fail("ActiveNotFoundException expected");
+    } catch(ActiveNotFoundException e) {
+      Mockito.verify(mockZK, Mockito.times(2)).getData(
+          Mockito.eq(zkLockPathName), Mockito.eq(false),
+          Mockito.<Stat> anyObject());
+    }
+
+    // error getting active data rethrows keeperexception
+    try {
+      Mockito.when(
+          mockZK.getData(Mockito.eq(zkLockPathName), Mockito.eq(false),
+              Mockito.<Stat> anyObject())).thenThrow(
+          new KeeperException.AuthFailedException());
+      elector.getActiveData();
+      Assert.fail("KeeperException.AuthFailedException expected");
+    } catch(KeeperException.AuthFailedException ke) {
+      Mockito.verify(mockZK, Mockito.times(3)).getData(
+          Mockito.eq(zkLockPathName), Mockito.eq(false),
+          Mockito.<Stat> anyObject());
+    }
+  }
+
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
new file mode 100644
index 00000000000..85a5f8b682c
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
@@ -0,0 +1,223 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ha;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback;
+import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.ZooDefs.Ids;
+import org.apache.zookeeper.data.ACL;
+import org.apache.zookeeper.test.ClientBase;
+
+/**
+ * Test for {@link ActiveStandbyElector} using real zookeeper.
+ */
+public class TestActiveStandbyElectorRealZK extends ClientBase {
+  static final int NUM_ELECTORS = 2;
+  static ZooKeeper[] zkClient = new ZooKeeper[NUM_ELECTORS];
+  static int currentClientIndex = 0;
+
+  class ActiveStandbyElectorTesterRealZK extends ActiveStandbyElector {
+    ActiveStandbyElectorTesterRealZK(String hostPort, int timeout,
+        String parent, List<ACL> acl, ActiveStandbyElectorCallback app)
+        throws IOException {
+      super(hostPort, timeout, parent, acl, app);
+    }
+
+    @Override
+    public ZooKeeper getNewZooKeeper() {
+      return TestActiveStandbyElectorRealZK.zkClient[
+                             TestActiveStandbyElectorRealZK.currentClientIndex];
+    }
+  }
+
+  /**
+   * The class object runs on a thread and waits for a signal to start from the 
+   * test object. On getting the signal it joins the election and thus by doing 
+   * this on multiple threads we can test simultaneous attempts at leader lock 
+   * creation. after joining the election, the object waits on a signal to exit.
+   * this signal comes when the object's elector has become a leader or there is 
+   * an unexpected fatal error. this lets another thread object to become a 
+   * leader.
+   */
+  class ThreadRunner implements Runnable, ActiveStandbyElectorCallback {
+    int index;
+    TestActiveStandbyElectorRealZK test;
+    boolean wait = true;
+
+    ThreadRunner(int i, TestActiveStandbyElectorRealZK s) {
+      index = i;
+      test = s;
+    }
+
+    @Override
+    public void run() {
+      LOG.info("starting " + index);
+      while(true) {
+        synchronized (test) {
+          // wait for test start signal to come
+          if (!test.start) {
+            try {
+              test.wait();
+            } catch(InterruptedException e) {
+              Assert.fail(e.getMessage());
+            }
+          } else {
+            break;
+          }
+        }
+      }
+      // join election
+      byte[] data = new byte[8];
+      ActiveStandbyElector elector = test.elector[index];
+      LOG.info("joining " + index);
+      elector.joinElection(data);
+      try {
+        while(true) {
+          synchronized (this) {
+            // wait for elector to become active/fatal error
+            if (wait) {
+              // wait to become active
+              // wait capped at 30s to prevent hung test
+              wait(30000);
+            } else {
+              break;
+            }
+          }
+        }
+        Thread.sleep(1000);
+        // quit election to allow other elector to become active
+        elector.quitElection();
+      } catch(InterruptedException e) {
+        Assert.fail(e.getMessage());
+      }
+      LOG.info("ending " + index);
+    }
+
+    @Override
+    public synchronized void becomeActive() {
+      test.reportActive(index);
+      LOG.info("active " + index);
+      wait = false;
+      notifyAll();
+    }
+
+    @Override
+    public synchronized void becomeStandby() {
+      test.reportStandby(index);
+      LOG.info("standby " + index);
+    }
+
+    @Override
+    public synchronized void enterNeutralMode() {
+      LOG.info("neutral " + index);
+    }
+
+    @Override
+    public synchronized void notifyFatalError(String errorMessage) {
+      LOG.info("fatal " + index + " .Error message:" + errorMessage);
+      wait = false;
+      notifyAll();
+    }
+  }
+
+  boolean start = false;
+  int activeIndex = -1;
+  int standbyIndex = -1;
+  String parentDir = "/" + java.util.UUID.randomUUID().toString();
+
+  ActiveStandbyElector[] elector = new ActiveStandbyElector[NUM_ELECTORS];
+  ThreadRunner[] threadRunner = new ThreadRunner[NUM_ELECTORS];
+  Thread[] thread = new Thread[NUM_ELECTORS];
+
+  synchronized void reportActive(int index) {
+    if (activeIndex == -1) {
+      activeIndex = index;
+    } else {
+      // standby should become active
+      Assert.assertEquals(standbyIndex, index);
+      // old active should not become active
+      Assert.assertFalse(activeIndex == index);
+    }
+    activeIndex = index;
+  }
+
+  synchronized void reportStandby(int index) {
+    // only 1 standby should be reported and it should not be the same as active
+    Assert.assertEquals(-1, standbyIndex);
+    standbyIndex = index;
+    Assert.assertFalse(activeIndex == standbyIndex);
+  }
+
+  /**
+   * the test creates 2 electors which try to become active using a real
+   * zookeeper server. It verifies that 1 becomes active and 1 becomes standby.
+   * Upon becoming active the leader quits election and the test verifies that
+   * the standby now becomes active. these electors run on different threads and 
+   * callback to the test class to report active and standby where the outcome 
+   * is verified
+   * 
+   * @throws IOException
+   * @throws InterruptedException
+   * @throws KeeperException
+   */
+  @Test
+  public void testActiveStandbyTransition() throws IOException,
+      InterruptedException, KeeperException {
+    LOG.info("starting test with parentDir:" + parentDir);
+    start = false;
+    byte[] data = new byte[8];
+    // create random working directory
+    createClient().create(parentDir, data, Ids.OPEN_ACL_UNSAFE,
+        CreateMode.PERSISTENT);
+
+    for(currentClientIndex = 0; 
+        currentClientIndex < NUM_ELECTORS; 
+        ++currentClientIndex) {
+      LOG.info("creating " + currentClientIndex);
+      zkClient[currentClientIndex] = createClient();
+      threadRunner[currentClientIndex] = new ThreadRunner(currentClientIndex,
+          this);
+      elector[currentClientIndex] = new ActiveStandbyElectorTesterRealZK(
+          "hostPort", 1000, parentDir, Ids.OPEN_ACL_UNSAFE,
+          threadRunner[currentClientIndex]);
+      zkClient[currentClientIndex].register(elector[currentClientIndex]);
+      thread[currentClientIndex] = new Thread(threadRunner[currentClientIndex]);
+      thread[currentClientIndex].start();
+    }
+
+    synchronized (this) {
+      // signal threads to start
+      LOG.info("signaling threads");
+      start = true;
+      notifyAll();
+    }
+
+    for(int i = 0; i < thread.length; i++) {
+      thread[i].join();
+    }
+  }
+}

From fdf7b182475050aaf67765eb53aaf342ebaebe8b Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Thu, 26 Jan 2012 23:48:08 +0000
Subject: [PATCH 100/177] HDFS-2838. NPE in FSNamesystem when in safe mode.
 Contributed by Gregory Chanan

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1236450 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hdfs/server/namenode/FSNamesystem.java    |  3 +--
 .../hadoop/hdfs/TestMiniDFSCluster.java       | 20 ++++++++++++++++++-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index bf685c91cf2..c8a760336a9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -131,3 +131,5 @@ HDFS-2804. Should not mark blocks under-replicated when exiting safemode (todd)
 HDFS-2807. Service level authorizartion for HAServiceProtocol. (jitendra)
 
 HDFS-2809. Add test to verify that delegation tokens are honored after failover. (jitendra and atm)
+
+HDFS-2838. NPE in FSNamesystem when in safe mode. (Gregory Chanan via eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 85ae12d4bd0..b3b3dbdaf31 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3623,11 +3623,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       assert assertsOn = true; // set to true if asserts are on
       if (!assertsOn) return;
       
-      
-      int activeBlocks = blockManager.getActiveBlockCount();
       if (blockTotal == -1 && blockSafe == -1) {
         return; // manual safe mode
       }
+      int activeBlocks = blockManager.getActiveBlockCount();
       if ((blockTotal != activeBlocks) &&
           !(blockSafe >= 0 && blockSafe <= blockTotal)) {
         throw new AssertionError(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
index 5a3524495b4..0eec0d18774 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs;
 
 import junit.framework.Assert;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.FSConstants;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
@@ -37,6 +38,7 @@ public class TestMiniDFSCluster {
   private static final String CLUSTER_1 = "cluster1";
   private static final String CLUSTER_2 = "cluster2";
   private static final String CLUSTER_3 = "cluster3";
+  private static final String CLUSTER_4 = "cluster4";
   protected String testDataPath;
   protected File testDataDir;
   @Before
@@ -104,5 +106,21 @@ public class TestMiniDFSCluster {
     }
   }
 
-
+  @Test(timeout=100000)
+  public void testIsClusterUpAfterShutdown() throws Throwable {
+    Configuration conf = new HdfsConfiguration();
+    File testDataCluster4 = new File(testDataPath, CLUSTER_4);
+    String c4Path = testDataCluster4.getAbsolutePath();
+    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, c4Path);
+    MiniDFSCluster cluster4 = new MiniDFSCluster.Builder(conf).build();
+    try {
+      DistributedFileSystem dfs = (DistributedFileSystem) cluster4.getFileSystem();
+      dfs.setSafeMode(FSConstants.SafeModeAction.SAFEMODE_ENTER);
+      cluster4.shutdown();
+    } finally {
+      while(cluster4.isClusterUp()){
+        Thread.sleep(1000);
+      }  
+    }
+  }
 }

From e7775e0b3bc3e42e8b01d7823aedc14f7dfb6672 Mon Sep 17 00:00:00 2001
From: Jitendra Nath Pandey <jitendra@apache.org>
Date: Fri, 27 Jan 2012 00:34:37 +0000
Subject: [PATCH 101/177] HDFS-2805. Add a test for a federated cluster with HA
 NNs. Contributed by Brandon Li.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1236471 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../apache/hadoop/hdfs/MiniDFSCluster.java    | 20 ++--
 .../apache/hadoop/hdfs/MiniDFSNNTopology.java | 16 ++++
 .../hdfs/server/namenode/ha/HATestUtil.java   | 28 +++++-
 .../namenode/ha/TestHAStateTransitions.java   | 94 +++++++++++++------
 5 files changed, 119 insertions(+), 41 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index c8a760336a9..044181097f2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -133,3 +133,5 @@ HDFS-2807. Service level authorizartion for HAServiceProtocol. (jitendra)
 HDFS-2809. Add test to verify that delegation tokens are honored after failover. (jitendra and atm)
 
 HDFS-2838. NPE in FSNamesystem when in safe mode. (Gregory Chanan via eli)
+
+HDFS-2805. Add a test for a federated cluster with HA NNs. (Brandon Li via jitendra)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 977ee956cb5..bf3af609d24 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -586,11 +586,19 @@ public class MiniDFSCluster {
       conf.set(FS_DEFAULT_NAME_KEY, "127.0.0.1:" + onlyNN.getIpcPort());
     }
     
+    // If we have more than one nameservice, need to enumerate them in the
+    // config.
+    if (federation) {      
+      List<String> allNsIds = Lists.newArrayList();
+      for (MiniDFSNNTopology.NSConf nameservice : nnTopology.getNameservices()) {
+        allNsIds.add(nameservice.getId());
+      }
+      conf.set(DFS_FEDERATION_NAMESERVICES, Joiner.on(",").join(allNsIds));
+    }
+    
     int nnCounter = 0;
-    List<String> nsIds = Lists.newArrayList();
     for (MiniDFSNNTopology.NSConf nameservice : nnTopology.getNameservices()) {
       String nsId = nameservice.getId();
-      nsIds.add(nameservice.getId());
       
       Preconditions.checkArgument(
           !federation || nsId != null,
@@ -643,6 +651,7 @@ public class MiniDFSCluster {
         }
         prevNNDirs = FSNamesystem.getNamespaceDirs(conf);
       }
+
       // Start all Namenodes
       for (NNConf nn : nameservice.getNNs()) {
         initNameNodeConf(conf, nsId, nn.getNnId(), manageNameDfsDirs, nnCounter);
@@ -651,12 +660,7 @@ public class MiniDFSCluster {
       }
       
     }
-    if (federation) {
-      // If we have more than one nameservice, need to enumerate them in the
-      // config.
-      conf.set(DFS_FEDERATION_NAMESERVICES, Joiner.on(",").join(nsIds));
-    }
-    
+
   }
   
   public URI getSharedEditsDir(int minNN, int maxNN) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
index c8e22e3b454..311e687526b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
@@ -77,6 +77,22 @@ public class MiniDFSNNTopology {
     return topology;
   }
 
+  /**
+   * Set up federated cluster with the given number of nameservices, each
+   * of which has two NameNodes.
+   */
+  public static MiniDFSNNTopology simpleHAFederatedTopology(
+      int numNameservices) {
+    MiniDFSNNTopology topology = new MiniDFSNNTopology();
+    for (int i = 0; i < numNameservices; i++) {
+      topology.addNameservice(new MiniDFSNNTopology.NSConf("ns" + i)
+        .addNN(new MiniDFSNNTopology.NNConf("nn0"))
+        .addNN(new MiniDFSNNTopology.NNConf("nn1")));
+    }
+    topology.setFederation(true);
+    return topology;
+  }
+
   public MiniDFSNNTopology setFederation(boolean federation) {
     this.federation = federation;
     return this;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index 5536ba37b59..5439d15b814 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -131,18 +131,36 @@ public abstract class HATestUtil {
   /** Gets the filesystem instance by setting the failover configurations */
   public static FileSystem configureFailoverFs(MiniDFSCluster cluster, Configuration conf)
       throws IOException, URISyntaxException {
+    return configureFailoverFs(cluster, conf, 0);
+  }
+
+  /** 
+   * Gets the filesystem instance by setting the failover configurations
+   * @param cluster the single process DFS cluster
+   * @param conf cluster configuration
+   * @param nsIndex namespace index starting with zero
+   * @throws IOException if an error occurs rolling the edit log
+   */
+  public static FileSystem configureFailoverFs(MiniDFSCluster cluster, Configuration conf,
+      int nsIndex) throws IOException, URISyntaxException {
     conf = new Configuration(conf);
     String logicalName = getLogicalHostname(cluster);
-    setFailoverConfigurations(cluster, conf, logicalName);
+    setFailoverConfigurations(cluster, conf, logicalName, nsIndex);
     FileSystem fs = FileSystem.get(new URI("hdfs://" + logicalName), conf);
     return fs;
   }
-
-  /** Sets the required configurations for performing failover */
+  
+  /** Sets the required configurations for performing failover of default namespace. */
   public static void setFailoverConfigurations(MiniDFSCluster cluster,
       Configuration conf, String logicalName) {
-    InetSocketAddress nnAddr1 = cluster.getNameNode(0).getNameNodeAddress();
-    InetSocketAddress nnAddr2 = cluster.getNameNode(1).getNameNodeAddress();
+    setFailoverConfigurations(cluster, conf, logicalName, 0);
+  }
+  
+  /** Sets the required configurations for performing failover.  */
+  public static void setFailoverConfigurations(MiniDFSCluster cluster,
+      Configuration conf, String logicalName, int nsIndex) {
+    InetSocketAddress nnAddr1 = cluster.getNameNode(2 * nsIndex).getNameNodeAddress();
+    InetSocketAddress nnAddr2 = cluster.getNameNode(2 * nsIndex + 1).getNameNodeAddress();
     String nameNodeId1 = "nn1";
     String nameNodeId2 = "nn2";
     String address1 = "hdfs://" + nnAddr1.getHostName() + ":" + nnAddr1.getPort();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 5197c6e7647..3b31445f68a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -106,6 +106,46 @@ public class TestHAStateTransitions {
     }
   }
   
+  /**
+   * Test manual failover failback for one namespace
+   * @param cluster single process test cluster
+   * @param conf cluster configuration
+   * @param nsIndex namespace index starting from zero
+   * @throws Exception
+   */
+  private void testManualFailoverFailback(MiniDFSCluster cluster, 
+		  Configuration conf, int nsIndex) throws Exception {
+      int nn0 = 2 * nsIndex, nn1 = 2 * nsIndex + 1;
+
+      cluster.transitionToActive(nn0);
+      
+      LOG.info("Starting with NN 0 active in namespace " + nsIndex);
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+      fs.mkdirs(TEST_DIR);
+
+      LOG.info("Failing over to NN 1 in namespace " + nsIndex);
+      cluster.transitionToStandby(nn0);
+      cluster.transitionToActive(nn1);
+      assertTrue(fs.exists(TEST_DIR));
+      DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA);
+
+      LOG.info("Failing over to NN 0 in namespace " + nsIndex);
+      cluster.transitionToStandby(nn1);
+      cluster.transitionToActive(nn0);
+      assertTrue(fs.exists(TEST_DIR));
+      assertEquals(TEST_FILE_DATA, 
+          DFSTestUtil.readFile(fs, TEST_FILE_PATH));
+
+      LOG.info("Removing test file");
+      fs.delete(TEST_DIR, true);
+      assertFalse(fs.exists(TEST_DIR));
+
+      LOG.info("Failing over to NN 1 in namespace " + nsIndex);
+      cluster.transitionToStandby(nn0);
+      cluster.transitionToActive(nn1);
+      assertFalse(fs.exists(TEST_DIR));
+  }
+  
   /**
    * Tests manual failover back and forth between two NameNodes.
    */
@@ -118,34 +158,8 @@ public class TestHAStateTransitions {
       .build();
     try {
       cluster.waitActive();
-      cluster.transitionToActive(0);
-      
-      LOG.info("Starting with NN 0 active");
-      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
-      fs.mkdirs(TEST_DIR);
-
-      LOG.info("Failing over to NN 1");
-      cluster.transitionToStandby(0);
-      cluster.transitionToActive(1);
-      assertTrue(fs.exists(TEST_DIR));
-      DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA);
-
-      LOG.info("Failing over to NN 0");
-      cluster.transitionToStandby(1);
-      cluster.transitionToActive(0);
-      assertTrue(fs.exists(TEST_DIR));
-      assertEquals(TEST_FILE_DATA, 
-          DFSTestUtil.readFile(fs, TEST_FILE_PATH));
-
-      LOG.info("Removing test file");
-      fs.delete(TEST_DIR, true);
-      assertFalse(fs.exists(TEST_DIR));
-
-      LOG.info("Failing over to NN 1");
-      cluster.transitionToStandby(0);
-      cluster.transitionToActive(1);
-      assertFalse(fs.exists(TEST_DIR));
-
+      // test the only namespace
+      testManualFailoverFailback(cluster, conf, 0);
     } finally {
       cluster.shutdown();
     }
@@ -294,4 +308,28 @@ public class TestHAStateTransitions {
       cluster.shutdown();
     }
   }
+  
+  /**
+   * Tests manual failover back and forth between two NameNodes
+   * for federation cluster with two namespaces.
+   */
+  @Test
+  public void testManualFailoverFailbackFederationHA() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHAFederatedTopology(2))
+      .numDataNodes(1)
+      .build();
+    try {
+      cluster.waitActive();
+   
+      // test for namespace 0
+      testManualFailoverFailback(cluster, conf, 0);
+      
+      // test for namespace 1
+      testManualFailoverFailback(cluster, conf, 1); 
+    } finally {
+      cluster.shutdown();
+    }
+  }
 }

From 6122357da51bc447391a464a8f7b4de1bae2d8cf Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Mon, 30 Jan 2012 02:52:27 +0000
Subject: [PATCH 102/177] HDFS-2841. HAAdmin does not work if security is
 enabled. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1237534 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/src/main/bin/hadoop              |  5 -----
 .../main/java/org/apache/hadoop/ha/HAAdmin.java    |  9 +--------
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt              |  2 ++
 hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs  |  5 +++++
 .../main/java/org/apache/hadoop/hdfs/HAUtil.java   | 14 ++++++++++++--
 5 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/bin/hadoop b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
index a121f3c268d..89dd17c43e4 100755
--- a/hadoop-common-project/hadoop-common/src/main/bin/hadoop
+++ b/hadoop-common-project/hadoop-common/src/main/bin/hadoop
@@ -36,7 +36,6 @@ function print_usage(){
   echo "  classpath            prints the class path needed to get the"
   echo "                       Hadoop jar and the required libraries"
   echo "  daemonlog            get/set the log level for each daemon"
-  echo "  haadmin              run a HA admin client"
   echo " or"
   echo "  CLASSNAME            run the class named CLASSNAME"
   echo ""
@@ -112,10 +111,6 @@ case $COMMAND in
       CLASS=org.apache.hadoop.tools.HadoopArchives
       CLASSPATH=${CLASSPATH}:${TOOL_PATH}
       HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
-    elif [ "$COMMAND" = "haadmin" ] ; then
-      CLASS=org.apache.hadoop.ha.HAAdmin
-      CLASSPATH=${CLASSPATH}:${TOOL_PATH}
-      HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
     elif [[ "$COMMAND" = -*  ]] ; then
         # class and package names cannot begin with a -
         echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index 7dbc17ed6e7..79edd11fe0f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -37,7 +37,7 @@ import com.google.common.collect.ImmutableMap;
  * mode, or to trigger a health-check.
  */
 @InterfaceAudience.Private
-public class HAAdmin extends Configured implements Tool {
+public abstract class HAAdmin extends Configured implements Tool {
   
   private static Map<String, UsageInfo> USAGE =
     ImmutableMap.<String, UsageInfo>builder()
@@ -171,7 +171,6 @@ public class HAAdmin extends Configured implements Tool {
           addr, getConf());
   }
 
-      
   @Override
   public int run(String[] argv) throws Exception {
     if (argv.length < 1) {
@@ -226,12 +225,6 @@ public class HAAdmin extends Configured implements Tool {
     errOut.println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help);
     return 1;
   }
-
-  public static void main(String[] argv) throws Exception {
-    int res = ToolRunner.run(new HAAdmin(), argv);
-    System.exit(res);
-  }
-  
   
   private static class UsageInfo {
     private final String args;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 044181097f2..2ae00c763fb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -135,3 +135,5 @@ HDFS-2809. Add test to verify that delegation tokens are honored after failover.
 HDFS-2838. NPE in FSNamesystem when in safe mode. (Gregory Chanan via eli)
 
 HDFS-2805. Add a test for a federated cluster with HA NNs. (Brandon Li via jitendra)
+
+HDFS-2841. HAAdmin does not work if security is enabled. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
index 314fac8fd82..32ed159e219 100755
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/bin/hdfs
@@ -31,6 +31,7 @@ function print_usage(){
   echo "  namenode             run the DFS namenode"
   echo "  datanode             run a DFS datanode"
   echo "  dfsadmin             run a DFS admin client"
+  echo "  haadmin              run a DFS HA admin client"
   echo "  fsck                 run a DFS filesystem checking utility"
   echo "  balancer             run a cluster balancing utility"
   echo "  jmxget               get JMX exported values from NameNode or DataNode."
@@ -85,6 +86,10 @@ elif [ "$COMMAND" = "dfs" ] ; then
 elif [ "$COMMAND" = "dfsadmin" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.DFSAdmin
   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "haadmin" ] ; then
+  CLASS=org.apache.hadoop.hdfs.tools.DFSHAAdmin
+  CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+  HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
 elif [ "$COMMAND" = "fsck" ] ; then
   CLASS=org.apache.hadoop.hdfs.tools.DFSck
   HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index a260c0e4fa8..f12cd5e0bce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -26,6 +26,8 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Map;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient.Conf;
@@ -34,11 +36,16 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryProxy;
+
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 
 public class HAUtil {
+  
+  private static final Log LOG = 
+    LogFactory.getLog(HAUtil.class);
+  
   private HAUtil() { /* Hidden constructor */ }
 
   /**
@@ -171,11 +178,14 @@ public class HAUtil {
           xface);
       return (FailoverProxyProvider<T>) provider;
     } catch (Exception e) {
+      String message = "Couldn't create proxy provider " + failoverProxyProviderClass;
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(message, e);
+      }
       if (e.getCause() instanceof IOException) {
         throw (IOException) e.getCause();
       } else {
-        throw new IOException(
-            "Couldn't create proxy provider " + failoverProxyProviderClass, e);
+        throw new IOException(message, e);
       }
     }
   }

From 846f97312c6db7b84b7401174acd0fc943baa093 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 30 Jan 2012 19:16:15 +0000
Subject: [PATCH 103/177] HDFS-2691. Fixes for pipeline recovery in an HA
 cluster: report RBW replicas immediately upon pipeline creation. Contributed
 by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1237935 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hadoop/hdfs/protocolPB/PBHelper.java      |  34 ++-
 .../server/blockmanagement/BlockManager.java  |  63 +++--
 .../hdfs/server/datanode/BPOfferService.java  |  25 +-
 .../hdfs/server/datanode/BPServiceActor.java  |   8 +-
 .../hdfs/server/datanode/BlockReceiver.java   |   3 +
 .../hadoop/hdfs/server/datanode/DataNode.java |  15 +-
 .../hdfs/server/namenode/FSNamesystem.java    |   2 +-
 .../server/namenode/NameNodeRpcServer.java    |   2 +-
 .../protocol/ReceivedDeletedBlockInfo.java    |  56 ++++-
 .../ReceivedDeletedBlockInfoWritable.java     |  31 ++-
 .../src/main/proto/DatanodeProtocol.proto     |  14 +-
 .../apache/hadoop/hdfs/AppendTestUtil.java    |  10 +-
 .../namenode/NNThroughputBenchmark.java       |   6 +-
 .../server/namenode/TestDeadDatanode.java     |   4 +-
 .../namenode/ha/TestPipelinesFailover.java    | 237 ++++++++++++++++++
 16 files changed, 456 insertions(+), 56 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 2ae00c763fb..dd4fa42ffd0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -137,3 +137,5 @@ HDFS-2838. NPE in FSNamesystem when in safe mode. (Gregory Chanan via eli)
 HDFS-2805. Add a test for a federated cluster with HA NNs. (Brandon Li via jitendra)
 
 HDFS-2841. HAAdmin does not work if security is enabled. (atm)
+
+HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas immediately upon pipeline creation. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
index af224f34cb9..2b2d0000fae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@@ -116,6 +116,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
+import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo.BlockStatus;
 import org.apache.hadoop.hdfs.server.protocol.RegisterCommand;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
@@ -813,6 +814,23 @@ public class PBHelper {
     ReceivedDeletedBlockInfoProto.Builder builder = 
         ReceivedDeletedBlockInfoProto.newBuilder();
     
+    ReceivedDeletedBlockInfoProto.BlockStatus status;
+    switch (receivedDeletedBlockInfo.getStatus()) {
+    case RECEIVING_BLOCK:
+      status = ReceivedDeletedBlockInfoProto.BlockStatus.RECEIVING;
+      break;
+    case RECEIVED_BLOCK:
+      status = ReceivedDeletedBlockInfoProto.BlockStatus.RECEIVED;
+      break;
+    case DELETED_BLOCK:
+      status = ReceivedDeletedBlockInfoProto.BlockStatus.DELETED;
+      break;
+    default:
+      throw new IllegalArgumentException("Bad status: " +
+          receivedDeletedBlockInfo.getStatus());
+    }
+    builder.setStatus(status);
+    
     if (receivedDeletedBlockInfo.getDelHints() != null) {
       builder.setDeleteHint(receivedDeletedBlockInfo.getDelHints());
     }
@@ -844,7 +862,21 @@ public class PBHelper {
 
   public static ReceivedDeletedBlockInfo convert(
       ReceivedDeletedBlockInfoProto proto) {
-    return new ReceivedDeletedBlockInfo(PBHelper.convert(proto.getBlock()),
+    ReceivedDeletedBlockInfo.BlockStatus status = null;
+    switch (proto.getStatus()) {
+    case RECEIVING:
+      status = BlockStatus.RECEIVING_BLOCK;
+      break;
+    case RECEIVED:
+      status = BlockStatus.RECEIVED_BLOCK;
+      break;
+    case DELETED:
+      status = BlockStatus.DELETED_BLOCK;
+      break;
+    }
+    return new ReceivedDeletedBlockInfo(
+        PBHelper.convert(proto.getBlock()),
+        status,
         proto.hasDeleteHint() ? proto.getDeleteHint() : null);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 59cbeab4394..9f2dfba55ea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -2256,13 +2256,19 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     // Modify the blocks->datanode map and node's map.
     //
     pendingReplications.remove(block);
-
+    processAndHandleReportedBlock(node, block, ReplicaState.FINALIZED,
+        delHintNode);
+  }
+  
+  private void processAndHandleReportedBlock(DatanodeDescriptor node, Block block,
+      ReplicaState reportedState, DatanodeDescriptor delHintNode)
+      throws IOException {
     // blockReceived reports a finalized block
     Collection<BlockInfo> toAdd = new LinkedList<BlockInfo>();
     Collection<Block> toInvalidate = new LinkedList<Block>();
     Collection<BlockInfo> toCorrupt = new LinkedList<BlockInfo>();
     Collection<StatefulBlockInfo> toUC = new LinkedList<StatefulBlockInfo>();
-    processReportedBlock(node, block, ReplicaState.FINALIZED,
+    processReportedBlock(node, block, reportedState,
                               toAdd, toInvalidate, toCorrupt, toUC);
     // the block is only in one of the to-do lists
     // if it is in none then data-node already has it
@@ -2286,47 +2292,66 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     }
   }
 
-  /** The given node is reporting that it received/deleted certain blocks. */
-  public void blockReceivedAndDeleted(final DatanodeID nodeID, 
+  /**
+   * The given node is reporting incremental information about some blocks.
+   * This includes blocks that are starting to be received, completed being
+   * received, or deleted.
+   */
+  public void processIncrementalBlockReport(final DatanodeID nodeID, 
      final String poolId, 
-     final ReceivedDeletedBlockInfo receivedAndDeletedBlocks[]
+     final ReceivedDeletedBlockInfo blockInfos[]
   ) throws IOException {
     namesystem.writeLock();
     int received = 0;
     int deleted = 0;
+    int receiving = 0;
     try {
       final DatanodeDescriptor node = datanodeManager.getDatanode(nodeID);
       if (node == null || !node.isAlive) {
         NameNode.stateChangeLog
-            .warn("BLOCK* blockReceivedDeleted"
+            .warn("BLOCK* processIncrementalBlockReport"
                 + " is received from dead or unregistered node "
                 + nodeID.getName());
         throw new IOException(
-            "Got blockReceivedDeleted message from unregistered or dead node");
+            "Got incremental block report from unregistered or dead node");
       }
 
-      for (int i = 0; i < receivedAndDeletedBlocks.length; i++) {
-        if (receivedAndDeletedBlocks[i].isDeletedBlock()) {
-          removeStoredBlock(
-              receivedAndDeletedBlocks[i].getBlock(), node);
+      for (ReceivedDeletedBlockInfo rdbi : blockInfos) {
+        switch (rdbi.getStatus()) {
+        case DELETED_BLOCK:
+          removeStoredBlock(rdbi.getBlock(), node);
           deleted++;
-        } else {
-          addBlock(node, receivedAndDeletedBlocks[i].getBlock(),
-              receivedAndDeletedBlocks[i].getDelHints());
+          break;
+        case RECEIVED_BLOCK:
+          addBlock(node, rdbi.getBlock(), rdbi.getDelHints());
           received++;
+          break;
+        case RECEIVING_BLOCK:
+          receiving++;
+          processAndHandleReportedBlock(node, rdbi.getBlock(),
+              ReplicaState.RBW, null);
+          break;
+        default:
+          String msg = 
+            "Unknown block status code reported by " + nodeID.getName() +
+            ": " + rdbi;
+          NameNode.stateChangeLog.warn(msg);
+          assert false : msg; // if assertions are enabled, throw.
+          break;
         }
         if (NameNode.stateChangeLog.isDebugEnabled()) {
-          NameNode.stateChangeLog.debug("BLOCK* block"
-              + (receivedAndDeletedBlocks[i].isDeletedBlock() ? "Deleted"
-                  : "Received") + ": " + receivedAndDeletedBlocks[i].getBlock()
+          NameNode.stateChangeLog.debug("BLOCK* block "
+              + (rdbi.getStatus()) + ": " + rdbi.getBlock()
               + " is received from " + nodeID.getName());
         }
       }
     } finally {
       namesystem.writeUnlock();
       NameNode.stateChangeLog
-          .debug("*BLOCK* NameNode.blockReceivedAndDeleted: " + "from "
-              + nodeID.getName() + " received: " + received + ", "
+          .debug("*BLOCK* NameNode.processIncrementalBlockReport: " + "from "
+              + nodeID.getName()
+              +  " receiving: " + receiving + ", "
+              + " received: " + received + ", "
               + " deleted: " + deleted);
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index 34c123cee73..27df1f2de15 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -47,6 +47,7 @@ import org.apache.hadoop.hdfs.server.protocol.KeyUpdateCommand;
 import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
+import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo.BlockStatus;
 import org.apache.hadoop.hdfs.server.protocol.UpgradeCommand;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -202,10 +203,13 @@ class BPOfferService {
   void notifyNamenodeReceivedBlock(ExtendedBlock block, String delHint) {
     checkBlock(block);
     checkDelHint(delHint);
-    ReceivedDeletedBlockInfo bInfo = 
-               new ReceivedDeletedBlockInfo(block.getLocalBlock(), delHint);
+    ReceivedDeletedBlockInfo bInfo = new ReceivedDeletedBlockInfo(
+        block.getLocalBlock(),
+        ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK,
+        delHint);
+
     for (BPServiceActor actor : bpServices) {
-      actor.notifyNamenodeReceivedBlock(bInfo);
+      actor.notifyNamenodeBlockImmediately(bInfo);
     }
   }
 
@@ -224,13 +228,24 @@ class BPOfferService {
 
   void notifyNamenodeDeletedBlock(ExtendedBlock block) {
     checkBlock(block);
-    ReceivedDeletedBlockInfo bInfo = new ReceivedDeletedBlockInfo(block
-          .getLocalBlock(), ReceivedDeletedBlockInfo.TODELETE_HINT);
+    ReceivedDeletedBlockInfo bInfo = new ReceivedDeletedBlockInfo(
+       block.getLocalBlock(), BlockStatus.DELETED_BLOCK, null);
     
     for (BPServiceActor actor : bpServices) {
       actor.notifyNamenodeDeletedBlock(bInfo);
     }
   }
+  
+  void notifyNamenodeReceivingBlock(ExtendedBlock block) {
+    checkBlock(block);
+    ReceivedDeletedBlockInfo bInfo = new ReceivedDeletedBlockInfo(
+       block.getLocalBlock(), BlockStatus.RECEIVING_BLOCK, null);
+    
+    for (BPServiceActor actor : bpServices) {
+      actor.notifyNamenodeBlockImmediately(bInfo);
+    }
+  }
+
 
   //This must be called only by blockPoolManager
   void start() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index ead3e143a59..e5916763365 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -267,7 +267,7 @@ class BPServiceActor implements Runnable {
    * till namenode is informed before responding with success to the
    * client? For now we don't.
    */
-  void notifyNamenodeReceivedBlock(ReceivedDeletedBlockInfo bInfo) {
+  void notifyNamenodeBlockImmediately(ReceivedDeletedBlockInfo bInfo) {
     synchronized (receivedAndDeletedBlockList) {
       receivedAndDeletedBlockList.add(bInfo);
       pendingReceivedRequests++;
@@ -341,6 +341,12 @@ class BPServiceActor implements Runnable {
     long startTime = now();
     if (startTime - lastBlockReport > dnConf.blockReportInterval) {
 
+      // Flush any block information that precedes the block report. Otherwise
+      // we have a chance that we will miss the delHint information
+      // or we will report an RBW replica after the BlockReport already reports
+      // a FINALIZED one.
+      reportReceivedDeletedBlocks();
+
       // Create block report
       long brCreateStartTime = now();
       BlockListAsLongs bReport = dn.getFSDataset().getBlockReport(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
index c8aac296a7a..09706cab858 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java
@@ -153,6 +153,7 @@ class BlockReceiver implements Closeable {
         switch (stage) {
         case PIPELINE_SETUP_CREATE:
           replicaInfo = datanode.data.createRbw(block);
+          datanode.notifyNamenodeReceivingBlock(block);
           break;
         case PIPELINE_SETUP_STREAMING_RECOVERY:
           replicaInfo = datanode.data.recoverRbw(
@@ -166,6 +167,7 @@ class BlockReceiver implements Closeable {
                 block.getLocalBlock());
           }
           block.setGenerationStamp(newGs);
+          datanode.notifyNamenodeReceivingBlock(block);
           break;
         case PIPELINE_SETUP_APPEND_RECOVERY:
           replicaInfo = datanode.data.recoverAppend(block, newGs, minBytesRcvd);
@@ -174,6 +176,7 @@ class BlockReceiver implements Closeable {
                 block.getLocalBlock());
           }
           block.setGenerationStamp(newGs);
+          datanode.notifyNamenodeReceivingBlock(block);
           break;
         case TRANSFER_RBW:
         case TRANSFER_FINALIZED:
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 3ac89aec6cb..726010e6536 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -522,7 +522,18 @@ public class DataNode extends Configured
     if(bpos != null) {
       bpos.notifyNamenodeReceivedBlock(block, delHint); 
     } else {
-      LOG.warn("Cannot find BPOfferService for reporting block received for bpid="
+      LOG.error("Cannot find BPOfferService for reporting block received for bpid="
+          + block.getBlockPoolId());
+    }
+  }
+  
+  // calls specific to BP
+  protected void notifyNamenodeReceivingBlock(ExtendedBlock block) {
+    BPOfferService bpos = blockPoolManager.get(block.getBlockPoolId());
+    if(bpos != null) {
+      bpos.notifyNamenodeReceivingBlock(block); 
+    } else {
+      LOG.error("Cannot find BPOfferService for reporting block receiving for bpid="
           + block.getBlockPoolId());
     }
   }
@@ -533,7 +544,7 @@ public class DataNode extends Configured
     if (bpos != null) {
       bpos.notifyNamenodeDeletedBlock(block);
     } else {
-      LOG.warn("Cannot find BPOfferService for reporting block deleted for bpid="
+      LOG.error("Cannot find BPOfferService for reporting block deleted for bpid="
           + block.getBlockPoolId());
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index b3b3dbdaf31..aef137c3650 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -4933,7 +4933,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
                     + m.getNodeReg().getName() + " "
                     + m.getReceivedAndDeletedBlocks().length + " blocks.");
           }
-          this.getBlockManager().blockReceivedAndDeleted(m.getNodeReg(),
+          this.getBlockManager().processIncrementalBlockReport(m.getNodeReg(),
               m.getPoolId(), m.getReceivedAndDeletedBlocks());
           break;
         case BLOCK_REPORT:
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index a0d7e14897e..5920762ac83 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -928,7 +928,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
           +"from "+nodeReg.getName()+" "+receivedAndDeletedBlocks.length
           +" blocks.");
     }
-    namesystem.getBlockManager().blockReceivedAndDeleted(
+    namesystem.getBlockManager().processIncrementalBlockReport(
         nodeReg, poolId, receivedAndDeletedBlocks);
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/ReceivedDeletedBlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/ReceivedDeletedBlockInfo.java
index 45014add97b..bde5a5e2d75 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/ReceivedDeletedBlockInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/ReceivedDeletedBlockInfo.java
@@ -25,22 +25,47 @@ import java.io.IOException;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
 
 /**
- * A data structure to store Block and delHints together, used to send
- * received/deleted ACKs.
+ * A data structure to store the blocks in an incremental block report. 
  */
 public class ReceivedDeletedBlockInfo implements Writable {
   Block block;
+  BlockStatus status;
   String delHints;
 
-  public final static String TODELETE_HINT = "-";
+  public static enum BlockStatus {
+    RECEIVING_BLOCK(1),
+    RECEIVED_BLOCK(2),
+    DELETED_BLOCK(3);
+    
+    private final int code;
+    BlockStatus(int code) {
+      this.code = code;
+    }
+    
+    public int getCode() {
+      return code;
+    }
+    
+    public static BlockStatus fromCode(int code) {
+      for (BlockStatus bs : BlockStatus.values()) {
+        if (bs.code == code) {
+          return bs;
+        }
+      }
+      return null;
+    }
+  }
 
   public ReceivedDeletedBlockInfo() {
   }
 
-  public ReceivedDeletedBlockInfo(Block blk, String delHints) {
+  public ReceivedDeletedBlockInfo(
+      Block blk, BlockStatus status, String delHints) {
     this.block = blk;
+    this.status = status;
     this.delHints = delHints;
   }
 
@@ -60,13 +85,19 @@ public class ReceivedDeletedBlockInfo implements Writable {
     this.delHints = hints;
   }
 
+  public BlockStatus getStatus() {
+    return status;
+  }
+
   public boolean equals(Object o) {
     if (!(o instanceof ReceivedDeletedBlockInfo)) {
       return false;
     }
     ReceivedDeletedBlockInfo other = (ReceivedDeletedBlockInfo) o;
     return this.block.equals(other.getBlock())
-        && this.delHints.equals(other.delHints);
+        && this.status == other.status
+        && (this.delHints == other.delHints ||
+            this.delHints != null && this.delHints.equals(other.delHints));
   }
 
   public int hashCode() {
@@ -79,23 +110,30 @@ public class ReceivedDeletedBlockInfo implements Writable {
   }
 
   public boolean isDeletedBlock() {
-    return delHints.equals(TODELETE_HINT);
+    return status == BlockStatus.DELETED_BLOCK;
   }
 
   @Override
   public void write(DataOutput out) throws IOException {
     this.block.write(out);
-    Text.writeString(out, this.delHints);
+    WritableUtils.writeVInt(out, this.status.code);
+    if (this.status == BlockStatus.DELETED_BLOCK) {
+      Text.writeString(out, this.delHints);
+    }
   }
 
   @Override
   public void readFields(DataInput in) throws IOException {
     this.block = new Block();
     this.block.readFields(in);
-    this.delHints = Text.readString(in);
+    this.status = BlockStatus.fromCode(WritableUtils.readVInt(in));
+    if (this.status == BlockStatus.DELETED_BLOCK) {
+      this.delHints = Text.readString(in);
+    }
   }
 
   public String toString() {
-    return block.toString() + ", delHint: " + delHints;
+    return block.toString() + ", status: " + status +
+      ", delHint: " + delHints;
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/ReceivedDeletedBlockInfoWritable.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/ReceivedDeletedBlockInfoWritable.java
index 5d37890c7fa..02bf84c72a9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/ReceivedDeletedBlockInfoWritable.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocolR23Compatible/ReceivedDeletedBlockInfoWritable.java
@@ -24,8 +24,10 @@ import java.io.IOException;
 
 import org.apache.hadoop.hdfs.protocolR23Compatible.BlockWritable;
 import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
+import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo.BlockStatus;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
 
 /**
  * A data structure to store Block and delHints together, used to send
@@ -33,33 +35,43 @@ import org.apache.hadoop.io.Writable;
  */
 public class ReceivedDeletedBlockInfoWritable implements Writable {
   BlockWritable block;
+  int statusCode;
   String delHints;
 
-  public final static String TODELETE_HINT = "-";
 
   public ReceivedDeletedBlockInfoWritable() {
   }
 
-  public ReceivedDeletedBlockInfoWritable(BlockWritable blk, String delHints) {
+  public ReceivedDeletedBlockInfoWritable(
+      BlockWritable blk, int statusCode, String delHints) {
     this.block = blk;
+    this.statusCode = statusCode;
     this.delHints = delHints;
   }
 
+
   @Override
   public void write(DataOutput out) throws IOException {
     this.block.write(out);
-    Text.writeString(out, this.delHints);
+    WritableUtils.writeVInt(out, this.statusCode);
+    if (this.statusCode == BlockStatus.DELETED_BLOCK.getCode()) {
+      Text.writeString(out, this.delHints);
+    }
   }
 
   @Override
   public void readFields(DataInput in) throws IOException {
     this.block = new BlockWritable();
     this.block.readFields(in);
-    this.delHints = Text.readString(in);
+    this.statusCode = WritableUtils.readVInt(in);
+    if (this.statusCode == BlockStatus.DELETED_BLOCK.getCode()) {
+      this.delHints = Text.readString(in);
+    }
   }
 
   public String toString() {
-    return block.toString() + ", delHint: " + delHints;
+    return block.toString() + ", statusCode: " + statusCode +
+      ", delHint: " + delHints;
   }
 
   public static ReceivedDeletedBlockInfo[] convert(
@@ -83,13 +95,16 @@ public class ReceivedDeletedBlockInfoWritable implements Writable {
   }
 
   public ReceivedDeletedBlockInfo convert() {
-    return new ReceivedDeletedBlockInfo(block.convert(), delHints);
+    return new ReceivedDeletedBlockInfo(block.convert(),
+        BlockStatus.fromCode(statusCode), delHints);
   }
 
   public static ReceivedDeletedBlockInfoWritable convert(
       ReceivedDeletedBlockInfo b) {
     if (b == null) return null;
-    return new ReceivedDeletedBlockInfoWritable(BlockWritable.convert(b
-        .getBlock()), b.getDelHints());
+    return new ReceivedDeletedBlockInfoWritable(
+        BlockWritable.convert(b.getBlock()),
+        b.getStatus().getCode(),
+        b.getDelHints());
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
index 6426de95ba7..124bb5514e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto
@@ -213,12 +213,16 @@ message BlockReportResponseProto {
 /**
  * Data structure to send received or deleted block information
  * from datanode to namenode.
- *
- * deleteHint set to "-" indicates block deletion.
- * other deleteHint indicates block addition.
  */
 message ReceivedDeletedBlockInfoProto {
+  enum BlockStatus {
+    RECEIVING = 1; // block being created
+    RECEIVED = 2; // block creation complete
+    DELETED = 3;
+  }
+
   required BlockProto block = 1;
+  required BlockStatus status = 3;
   optional string deleteHint = 2;
 }
 
@@ -329,7 +333,9 @@ service DatanodeProtocolService {
   rpc blockReport(BlockReportRequestProto) returns(BlockReportResponseProto);
 
   /**
-   * Report from datanode about recently received or deleted block
+   * Incremental block report from the DN. This contains info about recently
+   * received and deleted blocks, as well as when blocks start being
+   * received.
    */
   rpc blockReceivedAndDeleted(BlockReceivedAndDeletedRequestProto) 
       returns(BlockReceivedAndDeletedResponseProto);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java
index 478c7909410..f28648189dc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/AppendTestUtil.java
@@ -113,8 +113,14 @@ public class AppendTestUtil {
     int i = -1;
     try {
       final FileStatus status = fs.getFileStatus(p);
-      TestCase.assertEquals(length, status.getLen());
-      InputStream in = fs.open(p);
+      FSDataInputStream in = fs.open(p);
+      if (in.getWrappedStream() instanceof DFSInputStream) {
+        long len = ((DFSInputStream)in.getWrappedStream()).getFileLength();
+        TestCase.assertEquals(length, len);
+      } else {
+        TestCase.assertEquals(length, status.getLen());
+      }
+      
       for(i++; i < length; i++) {
         TestCase.assertEquals((byte)i, (byte)in.read());  
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
index 7d15900756a..ae7e80676e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java
@@ -884,7 +884,8 @@ public class NNThroughputBenchmark {
           nameNodeProto.blockReceivedAndDeleted(receivedDNReg, nameNode
               .getNamesystem().getBlockPoolId(),
               new ReceivedDeletedBlockInfo[] { new ReceivedDeletedBlockInfo(
-                  blocks[i], DataNode.EMPTY_DEL_HINT) });
+                  blocks[i], ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK,
+                  null) });
         }
       }
       return blocks.length;
@@ -999,7 +1000,8 @@ public class NNThroughputBenchmark {
           nameNodeProto.blockReceivedAndDeleted(datanodes[dnIdx].dnRegistration, loc
               .getBlock().getBlockPoolId(),
               new ReceivedDeletedBlockInfo[] { new ReceivedDeletedBlockInfo(loc
-                  .getBlock().getLocalBlock(), "") });
+                  .getBlock().getLocalBlock(),
+                  ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK, null) });
         }
       }
       return prevBlock;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
index 33a71294571..54df2c77659 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java
@@ -107,7 +107,9 @@ public class TestDeadDatanode {
     DatanodeProtocol dnp = cluster.getNameNodeRpc();
     
     ReceivedDeletedBlockInfo[] blocks = { new ReceivedDeletedBlockInfo(
-        new Block(0), "") };
+        new Block(0), 
+        ReceivedDeletedBlockInfo.BlockStatus.RECEIVED_BLOCK,
+        null) };
     
     // Ensure blockReceived call from dead datanode is rejected with IOException
     try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
new file mode 100644
index 00000000000..ce7347cdf06
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
@@ -0,0 +1,237 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertTrue;
+
+import java.security.PrivilegedExceptionAction;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.AppendTestUtil;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.Level;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Test cases regarding pipeline recovery during NN failover.
+ */
+public class TestPipelinesFailover {
+  static {
+    ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)LogFactory.getLog(BlockManager.class)).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)LogFactory.getLog(
+        "org.apache.hadoop.io.retry.RetryInvocationHandler")).getLogger().setLevel(Level.ALL);
+
+    ((Log4JLogger)NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
+  }
+  
+  protected static final Log LOG = LogFactory.getLog(
+      TestPipelinesFailover.class);
+  private static final Path TEST_PATH =
+    new Path("/test-file");
+  private static final int BLOCK_SIZE = 4096;
+  private static final int BLOCK_AND_A_HALF = BLOCK_SIZE * 3 / 2;
+
+  /**
+   * Tests continuing a write pipeline over a failover.
+   */
+  @Test(timeout=30000)
+  public void testWriteOverFailover() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    // Don't check replication periodically.
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
+    
+    FSDataOutputStream stm = null;
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      Thread.sleep(500);
+
+      LOG.info("Starting with NN 0 active");
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+      stm = fs.create(TEST_PATH);
+      
+      // write a block and a half
+      AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
+      
+      // Make sure all of the blocks are written out before failover.
+      stm.hflush();
+
+      LOG.info("Failing over to NN 1");
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+
+      assertTrue(fs.exists(TEST_PATH));
+      FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
+      BlockManagerTestUtil.updateState(ns1.getBlockManager());
+      assertEquals(0, ns1.getPendingReplicationBlocks());
+      assertEquals(0, ns1.getCorruptReplicaBlocks());
+      assertEquals(0, ns1.getMissingBlocksCount());
+
+      // write another block and a half
+      AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
+
+      stm.close();
+      stm = null;
+      
+      AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE * 3);
+    } finally {
+      IOUtils.closeStream(stm);
+      cluster.shutdown();
+    }
+  }
+  
+  /**
+   * Tests continuing a write pipeline over a failover when a DN fails
+   * after the failover - ensures that updating the pipeline succeeds
+   * even when the pipeline was constructed on a different NN.
+   */
+  @Test(timeout=30000)
+  public void testWriteOverFailoverWithDnFail() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    
+    FSDataOutputStream stm = null;
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(5)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      Thread.sleep(500);
+
+      LOG.info("Starting with NN 0 active");
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+      stm = fs.create(TEST_PATH);
+      
+      // write a block and a half
+      AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
+      
+      // Make sure all the blocks are written before failover
+      stm.hflush();
+
+      LOG.info("Failing over to NN 1");
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+
+      assertTrue(fs.exists(TEST_PATH));
+      
+      cluster.stopDataNode(0);
+
+      // write another block and a half
+      AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
+      stm.hflush(); // TODO: see above
+      
+      LOG.info("Failing back to NN 0");
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      
+      cluster.stopDataNode(1);
+      
+      AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
+      stm.hflush(); // TODO: see above
+      
+      
+      stm.close();
+      stm = null;
+      
+      AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
+    } finally {
+      IOUtils.closeStream(stm);
+      cluster.shutdown();
+    }
+  }
+  
+  /**
+   * Tests lease recovery if a client crashes. This approximates the
+   * use case of HBase WALs being recovered after a NN failover.
+   */
+  @Test(timeout=30000)
+  public void testLeaseRecoveryAfterFailover() throws Exception {
+    final Configuration conf = new Configuration();
+    // Disable permissions so that another user can recover the lease.
+    conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    
+    FSDataOutputStream stm = null;
+    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      Thread.sleep(500);
+
+      LOG.info("Starting with NN 0 active");
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+      stm = fs.create(TEST_PATH);
+      
+      // write a block and a half
+      AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
+      stm.hflush();
+      
+      LOG.info("Failing over to NN 1");
+      
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      
+      assertTrue(fs.exists(TEST_PATH));
+      
+      FileSystem fsOtherUser = UserGroupInformation.createUserForTesting(
+          "otheruser", new String[] { "othergroup"})
+          .doAs(new PrivilegedExceptionAction<FileSystem>() {
+            @Override
+            public FileSystem run() throws Exception {
+              return HATestUtil.configureFailoverFs(cluster, conf);
+            }
+          });
+      ((DistributedFileSystem)fsOtherUser).recoverLease(TEST_PATH);
+      
+      AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
+    } finally {
+      IOUtils.closeStream(stm);
+      cluster.shutdown();
+    }
+  }
+
+}

From 9a8f119741debce684d8d5e0aef158f6e43f5e56 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Mon, 30 Jan 2012 20:32:35 +0000
Subject: [PATCH 104/177] Amend HDFS-2841 to include new file which was omitted
 from original commit.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1237971 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/hdfs/tools/DFSHAAdmin.java  | 59 +++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java
new file mode 100644
index 00000000000..aae99de8aff
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.ha.HAAdmin;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.util.ToolRunner;
+
+/**
+ * Class to extend HAAdmin to do a little bit of HDFS-specific configuration.
+ */
+public class DFSHAAdmin extends HAAdmin {
+  
+  private static final Log LOG = 
+    LogFactory.getLog(DFSHAAdmin.class);
+  
+  @Override
+  public void setConf(Configuration conf) {
+    if (conf != null) {
+      // Make a copy so we don't mutate it. Also use an HdfsConfiguration to
+      // force loading of hdfs-site.xml.
+      conf = new HdfsConfiguration(conf);
+      String nameNodePrincipal = conf.get(
+          DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY, "");
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Using NN principal: " + nameNodePrincipal);
+      }
+
+      conf.set(CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_USER_NAME_KEY,
+          nameNodePrincipal);
+    }
+    super.setConf(conf);
+  }
+
+  public static void main(String[] argv) throws Exception {
+    int res = ToolRunner.run(new DFSHAAdmin(), argv);
+    System.exit(res);
+  }
+}

From 5c156519dfc1be193a9b7fc2aa450ed1f774b60f Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Mon, 30 Jan 2012 22:27:42 +0000
Subject: [PATCH 105/177] HADOOP-7983. HA: failover should be able to pass args
 to fencers. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1238049 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |   2 +
 .../org/apache/hadoop/ha/FenceMethod.java     |   6 +-
 .../java/org/apache/hadoop/ha/NodeFencer.java |   7 +-
 .../apache/hadoop/ha/ShellCommandFencer.java  |  18 ++-
 .../apache/hadoop/ha/SshFenceByTcpPort.java   | 104 +++++++-----------
 .../org/apache/hadoop/ha/TestNodeFencer.java  |  53 +++++++--
 .../hadoop/ha/TestShellCommandFencer.java     |  45 ++++++--
 .../hadoop/ha/TestSshFenceByTcpPort.java      |  91 ++++++++-------
 8 files changed, 194 insertions(+), 132 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index b6bd6ed918b..f881067d0a8 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -34,3 +34,5 @@ HADOOP-7970. HAServiceProtocol methods must throw IOException.
 
 HADOOP-7992. Add ZKClient library to facilitate leader election.
 (Bikas Saha via suresh).
+
+HADOOP-7983. HA: failover should be able to pass args to fencers. (eli)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java
index c448241a835..d8bda1402fa 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FenceMethod.java
@@ -17,6 +17,8 @@
  */
 package org.apache.hadoop.ha;
 
+import java.net.InetSocketAddress;
+
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configurable;
@@ -52,6 +54,7 @@ public interface FenceMethod {
   
   /**
    * Attempt to fence the target node.
+   * @param serviceAddr the address (host:ipcport) of the service to fence
    * @param args the configured arguments, which were checked at startup by
    *             {@link #checkArgs(String)}
    * @return true if fencing was successful, false if unsuccessful or
@@ -59,5 +62,6 @@ public interface FenceMethod {
    * @throws BadFencingConfigurationException if the configuration was
    *         determined to be invalid only at runtime
    */
-  public boolean tryFence(String args) throws BadFencingConfigurationException; 
+  public boolean tryFence(InetSocketAddress serviceAddr, String args)
+    throws BadFencingConfigurationException;
 }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
index 61ef950c05b..f4cadb3828e 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.ha;
 
+import java.net.InetSocketAddress;
 import java.util.List;
 import java.util.Map;
 import java.util.regex.Matcher;
@@ -67,7 +68,7 @@ public class NodeFencer {
   private static final Log LOG = LogFactory.getLog(NodeFencer.class);
 
   /**
-   * Standard fencing methods included with HDFS.
+   * Standard fencing methods included with Hadoop.
    */
   private static final Map<String, Class<? extends FenceMethod>> STANDARD_METHODS =
     ImmutableMap.<String, Class<? extends FenceMethod>>of(
@@ -81,14 +82,14 @@ public class NodeFencer {
     this.methods = parseMethods(conf);
   }
   
-  public boolean fence() {
+  public boolean fence(InetSocketAddress serviceAddr) {
     LOG.info("====== Beginning NameNode Fencing Process... ======");
     int i = 0;
     for (FenceMethodWithArg method : methods) {
       LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
       
       try {
-        if (method.method.tryFence(method.arg)) {
+        if (method.method.tryFence(serviceAddr, method.arg)) {
           LOG.info("====== Fencing successful by method " + method + " ======");
           return true;
         }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java
index 07d11629a4c..ca81f23a187 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java
@@ -19,11 +19,16 @@ package org.apache.hadoop.ha;
 
 import java.io.IOException;
 import java.lang.reflect.Field;
+import java.net.InetSocketAddress;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.util.StringUtils;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -70,9 +75,18 @@ public class ShellCommandFencer
   }
 
   @Override
-  public boolean tryFence(String cmd) {
+  public boolean tryFence(InetSocketAddress serviceAddr, String cmd) {
+    List<String> cmdList = Arrays.asList(cmd.split("\\s+"));
+
+    // Create arg list with service as the first argument
+    List<String> argList = new ArrayList<String>();
+    argList.add(cmdList.get(0));
+    argList.add(serviceAddr.getHostName() + ":" + serviceAddr.getPort());
+    argList.addAll(cmdList.subList(1, cmdList.size()));
+    String cmdWithSvc = StringUtils.join(" ", argList);
+
     ProcessBuilder builder = new ProcessBuilder(
-        "bash", "-e", "-c", cmd);
+        "bash", "-e", "-c", cmdWithSvc);
     setConfAsEnvVars(builder.environment());
 
     Process p;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
index 48bb59c2943..c9272491ebf 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
@@ -18,8 +18,7 @@
 package org.apache.hadoop.ha;
 
 import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
+import java.net.InetSocketAddress;
 import java.util.Collection;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -48,14 +47,9 @@ import com.jcraft.jsch.Session;
  * <p>
  * This fencing mechanism is configured as following in the fencing method
  * list:
- * <code>sshfence([username@]nnhost[:ssh-port], target-port)</code>
- * where the first argument specifies the username, host, and port to ssh
- * into, and the second argument specifies the port on which the target
- * NN process is listening on.
- * <p>
- * For example, <code>sshfence(other-nn, 8020)<code> will SSH into
- * <code>other-nn<code> as the current user on the standard SSH port,
- * then kill whatever process is listening on port 8020.
+ * <code>sshfence([[username][:ssh-port]])</code>
+ * where the optional argument specifies the username and port to use
+ * with ssh.
  * <p>
  * In order to achieve passwordless SSH, the operator must also configure
  * <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
@@ -75,25 +69,23 @@ public class SshFenceByTcpPort extends Configured
     "dfs.namenode.ha.fencing.ssh.private-key-files";
 
   /**
-   * Verify that the arguments are parseable and that the host
-   * can be resolved.
+   * Verify that the argument, if given, in the conf is parseable.
    */
   @Override
   public void checkArgs(String argStr) throws BadFencingConfigurationException {
-    Args args = new Args(argStr);
-    try {
-      InetAddress.getByName(args.host);
-    } catch (UnknownHostException e) {
-      throw new BadFencingConfigurationException(
-          "Unknown host: " + args.host);
+    if (argStr != null) {
+      // Use a dummy service when checking the arguments defined
+      // in the configuration are parseable.
+      Args args = new Args(new InetSocketAddress("localhost", 8020), argStr);
     }
   }
 
   @Override
-  public boolean tryFence(String argsStr)
+  public boolean tryFence(InetSocketAddress serviceAddr, String argsStr)
       throws BadFencingConfigurationException {
-    Args args = new Args(argsStr);
-    
+
+    Args args = new Args(serviceAddr, argsStr);
+
     Session session;
     try {
       session = createSession(args);
@@ -155,11 +147,11 @@ public class SshFenceByTcpPort extends Configured
             "Verifying whether it is running using nc...");
         rc = execCommand(session, "nc -z localhost 8020");
         if (rc == 0) {
-          // the NN is still listening - we are unable to fence
-          LOG.warn("Unable to fence NN - it is running but we cannot kill it");
+          // the service is still listening - we are unable to fence
+          LOG.warn("Unable to fence - it is running but we cannot kill it");
           return false;
         } else {
-          LOG.info("Verified that the NN is down.");
+          LOG.info("Verified that the service is down.");
           return true;          
         }
       } else {
@@ -189,7 +181,6 @@ public class SshFenceByTcpPort extends Configured
       exec.setCommand(cmd);
       exec.setInputStream(null);
       exec.connect();
-      
 
       // Pump stdout of the command to our WARN logs
       StreamPumper outPumper = new StreamPumper(LOG, cmd + " via ssh",
@@ -233,50 +224,37 @@ public class SshFenceByTcpPort extends Configured
    */
   @VisibleForTesting
   static class Args {
-    private static final Pattern USER_HOST_PORT_RE = Pattern.compile(
-      "(?:(.+?)@)?([^:]+?)(?:\\:(\\d+))?");
+    private static final Pattern USER_PORT_RE = Pattern.compile(
+      "([^:]+?)?(?:\\:(\\d+))?");
 
     private static final int DEFAULT_SSH_PORT = 22;
 
-    final String user;
-    final String host;
-    final int sshPort;
-    final int targetPort;
+    String host;
+    int targetPort;
+    String user;
+    int sshPort;
     
-    public Args(String args) throws BadFencingConfigurationException {
-      if (args == null) {
-        throw new BadFencingConfigurationException(
-            "Must specify args for ssh fencing configuration");
-      }
-      String[] argList = args.split(",\\s*");
-      if (argList.length != 2) {
-        throw new BadFencingConfigurationException(
-            "Incorrect number of arguments: " + args);
-      }
-      
-      // Parse SSH destination.
-      String sshDestArg = argList[0];
-      Matcher m = USER_HOST_PORT_RE.matcher(sshDestArg);
-      if (!m.matches()) {
-        throw new BadFencingConfigurationException(
-            "Unable to parse SSH destination: "+ sshDestArg);
-      }
-      if (m.group(1) != null) {
-        user = m.group(1);
-      } else {
-        user = System.getProperty("user.name");
-      }
-      
-      host = m.group(2);
+    public Args(InetSocketAddress serviceAddr, String arg) 
+        throws BadFencingConfigurationException {
+      host = serviceAddr.getHostName();
+      targetPort = serviceAddr.getPort();
+      user = System.getProperty("user.name");
+      sshPort = DEFAULT_SSH_PORT;
 
-      if (m.group(3) != null) {
-        sshPort = parseConfiggedPort(m.group(3));
-      } else {
-        sshPort = DEFAULT_SSH_PORT;
+      // Parse optional user and ssh port
+      if (arg != null && !"".equals(arg)) {
+        Matcher m = USER_PORT_RE.matcher(arg);
+        if (!m.matches()) {
+          throw new BadFencingConfigurationException(
+              "Unable to parse user and SSH port: "+ arg);
+        }
+        if (m.group(1) != null) {
+          user = m.group(1);
+        }
+        if (m.group(2) != null) {
+          sshPort = parseConfiggedPort(m.group(2));
+        }
       }
-      
-      // Parse target port.
-      targetPort = parseConfiggedPort(argList[1]);
     }
 
     private Integer parseConfiggedPort(String portStr)
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
index 93baf0dc2f6..4a9ffae9b96 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.ha;
 
 import static org.junit.Assert.*;
 
+import java.net.InetSocketAddress;
 import java.util.List;
 
 import org.apache.hadoop.conf.Configuration;
@@ -42,8 +43,9 @@ public class TestNodeFencer {
   public void testSingleFencer() throws BadFencingConfigurationException {
     NodeFencer fencer = setupFencer(
         AlwaysSucceedFencer.class.getName() + "(foo)");
-    assertTrue(fencer.fence());
+    assertTrue(fencer.fence(new InetSocketAddress("host", 1234)));
     assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals("host:1234", AlwaysSucceedFencer.fencedSvc);
     assertEquals("foo", AlwaysSucceedFencer.callArgs.get(0));
   }
   
@@ -52,7 +54,7 @@ public class TestNodeFencer {
     NodeFencer fencer = setupFencer(
         AlwaysSucceedFencer.class.getName() + "(foo)\n" +
         AlwaysSucceedFencer.class.getName() + "(bar)\n");
-    assertTrue(fencer.fence());
+    assertTrue(fencer.fence(new InetSocketAddress("host", 1234)));
     // Only one call, since the first fencer succeeds
     assertEquals(1, AlwaysSucceedFencer.fenceCalled);
     assertEquals("foo", AlwaysSucceedFencer.callArgs.get(0));
@@ -66,10 +68,12 @@ public class TestNodeFencer {
         " # the next one will always fail\n" +
         " " + AlwaysFailFencer.class.getName() + "(foo) # <- fails\n" +
         AlwaysSucceedFencer.class.getName() + "(bar) \n");
-    assertTrue(fencer.fence());
+    assertTrue(fencer.fence(new InetSocketAddress("host", 1234)));
     // One call to each, since top fencer fails
     assertEquals(1, AlwaysFailFencer.fenceCalled);
+    assertEquals("host:1234", AlwaysFailFencer.fencedSvc);
     assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals("host:1234", AlwaysSucceedFencer.fencedSvc);
     assertEquals("foo", AlwaysFailFencer.callArgs.get(0));
     assertEquals("bar", AlwaysSucceedFencer.callArgs.get(0));
   }
@@ -78,18 +82,43 @@ public class TestNodeFencer {
   public void testArglessFencer() throws BadFencingConfigurationException {
     NodeFencer fencer = setupFencer(
         AlwaysSucceedFencer.class.getName());
-    assertTrue(fencer.fence());
+    assertTrue(fencer.fence(new InetSocketAddress("host", 1234)));
     // One call to each, since top fencer fails
     assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals("host:1234", AlwaysSucceedFencer.fencedSvc);
     assertEquals(null, AlwaysSucceedFencer.callArgs.get(0));
   }
-  
+
   @Test
-  public void testShortName() throws BadFencingConfigurationException {
+  public void testShortNameShell() throws BadFencingConfigurationException {
     NodeFencer fencer = setupFencer("shell(true)");
-    assertTrue(fencer.fence());
+    assertTrue(fencer.fence(new InetSocketAddress("host", 1234)));
   }
- 
+
+  @Test
+  public void testShortNameSsh() throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer("sshfence");
+    assertFalse(fencer.fence(new InetSocketAddress("host", 1234)));
+  }
+
+  @Test
+  public void testShortNameSshWithUser() throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer("sshfence(user)");
+    assertFalse(fencer.fence(new InetSocketAddress("host", 1234)));
+  }
+
+  @Test
+  public void testShortNameSshWithPort() throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer("sshfence(:123)");
+    assertFalse(fencer.fence(new InetSocketAddress("host", 1234)));
+  }
+
+  @Test
+  public void testShortNameSshWithUserPort() throws BadFencingConfigurationException {
+    NodeFencer fencer = setupFencer("sshfence(user:123)");
+    assertFalse(fencer.fence(new InetSocketAddress("host", 1234)));
+  }
+
   private NodeFencer setupFencer(String confStr)
       throws BadFencingConfigurationException {
     System.err.println("Testing configuration:\n" + confStr);
@@ -105,10 +134,12 @@ public class TestNodeFencer {
   public static class AlwaysSucceedFencer extends Configured
       implements FenceMethod {
     static int fenceCalled = 0;
+    static String fencedSvc;
     static List<String> callArgs = Lists.newArrayList();
 
     @Override
-    public boolean tryFence(String args) {
+    public boolean tryFence(InetSocketAddress serviceAddr, String args) {
+      fencedSvc = serviceAddr.getHostName() + ":" + serviceAddr.getPort();
       callArgs.add(args);
       fenceCalled++;
       return true;
@@ -125,10 +156,12 @@ public class TestNodeFencer {
   public static class AlwaysFailFencer extends Configured
       implements FenceMethod {
     static int fenceCalled = 0;
+    static String fencedSvc;
     static List<String> callArgs = Lists.newArrayList();
 
     @Override
-    public boolean tryFence(String args) {
+    public boolean tryFence(InetSocketAddress serviceAddr, String args) {
+      fencedSvc = serviceAddr.getHostName() + ":" + serviceAddr.getPort();
       callArgs.add(args);
       fenceCalled++;
       return false;
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
index f02aa8a14b0..49bae039ecc 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java
@@ -19,6 +19,8 @@ package org.apache.hadoop.ha;
 
 import static org.junit.Assert.*;
 
+import java.net.InetSocketAddress;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.util.StringUtils;
 import org.junit.Before;
@@ -55,14 +57,15 @@ public class TestShellCommandFencer {
    */
   @Test
   public void testBasicSuccessFailure() {
-    assertTrue(fencer.tryFence("exit 0"));
-    assertFalse(fencer.tryFence("exit 1"));
+    InetSocketAddress addr = new InetSocketAddress("host", 1234);
+    assertTrue(fencer.tryFence(addr, "echo"));
+    assertFalse(fencer.tryFence(addr, "exit 1"));
     // bad path should also fail
-    assertFalse(fencer.tryFence("xxxxxxxxxxxx"));
+    assertFalse(fencer.tryFence(addr, "xxxxxxxxxxxx"));
   }
   
   @Test
-  public void testCheckArgs() {
+  public void testCheckNoArgs() {
     try {
       Configuration conf = new Configuration();
       conf.set(NodeFencer.CONF_METHODS_KEY, "shell");
@@ -74,16 +77,31 @@ public class TestShellCommandFencer {
         confe.getMessage().contains("No argument passed"));    
     }
   }
-  
+
+  @Test
+  public void testCheckParensNoArgs() {
+    try {
+      Configuration conf = new Configuration();
+      conf.set(NodeFencer.CONF_METHODS_KEY, "shell()");
+      new NodeFencer(conf);
+      fail("Didn't throw when passing no args to shell");
+    } catch (BadFencingConfigurationException confe) {
+      assertTrue(
+        "Unexpected exception:" + StringUtils.stringifyException(confe),
+        confe.getMessage().contains("Unable to parse line: 'shell()'"));
+    }
+  }
+
   /**
    * Test that lines on stdout get passed as INFO
    * level messages
    */
   @Test
   public void testStdoutLogging() {
-    assertTrue(fencer.tryFence("echo hello"));
+    InetSocketAddress addr = new InetSocketAddress("host", 1234);
+    assertTrue(fencer.tryFence(addr, "echo hello"));
     Mockito.verify(ShellCommandFencer.LOG).info(
-        Mockito.endsWith("echo hello: hello"));
+        Mockito.endsWith("echo hello: host:1234 hello"));
   }
    
   /**
@@ -92,9 +110,10 @@ public class TestShellCommandFencer {
    */
   @Test
   public void testStderrLogging() {
-    assertTrue(fencer.tryFence("echo hello >&2"));
+    InetSocketAddress addr = new InetSocketAddress("host", 1234);
+    assertTrue(fencer.tryFence(addr, "echo hello >&2"));
     Mockito.verify(ShellCommandFencer.LOG).warn(
-        Mockito.endsWith("echo hello >&2: hello"));
+        Mockito.endsWith("echo hello >&2: host:1234 hello"));
   }
 
   /**
@@ -103,9 +122,10 @@ public class TestShellCommandFencer {
    */
   @Test
   public void testConfAsEnvironment() {
-    fencer.tryFence("echo $in_fencing_tests");
+    InetSocketAddress addr = new InetSocketAddress("host", 1234);
+    fencer.tryFence(addr, "echo $in_fencing_tests");
     Mockito.verify(ShellCommandFencer.LOG).info(
-        Mockito.endsWith("echo $in...ing_tests: yessir"));
+        Mockito.endsWith("echo $in...ing_tests: host:1234 yessir"));
   }
 
   /**
@@ -116,7 +136,8 @@ public class TestShellCommandFencer {
    */
   @Test(timeout=10000)
   public void testSubprocessInputIsClosed() {
-    assertFalse(fencer.tryFence("read"));
+    InetSocketAddress addr = new InetSocketAddress("host", 1234);
+    assertFalse(fencer.tryFence(addr, "read"));
   }
   
   @Test
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
index d88d892ed85..f89df6a21ff 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestSshFenceByTcpPort.java
@@ -19,9 +19,10 @@ package org.apache.hadoop.ha;
 
 import static org.junit.Assert.*;
 
+import java.net.InetSocketAddress;
+
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.ha.SshFenceByTcpPort.Args;
 import org.apache.log4j.Level;
 import org.junit.Assume;
@@ -33,8 +34,10 @@ public class TestSshFenceByTcpPort {
     ((Log4JLogger)SshFenceByTcpPort.LOG).getLogger().setLevel(Level.ALL);
   }
   
-  private String TEST_FENCING_ARG = System.getProperty(
-      "test.TestSshFenceByTcpPort.arg", "localhost");
+  private String TEST_FENCING_HOST = System.getProperty(
+      "test.TestSshFenceByTcpPort.host", "localhost");
+  private String TEST_FENCING_PORT = System.getProperty(
+      "test.TestSshFenceByTcpPort.port", "8020");
   private final String TEST_KEYFILE = System.getProperty(
       "test.TestSshFenceByTcpPort.key");
 
@@ -43,10 +46,12 @@ public class TestSshFenceByTcpPort {
     Assume.assumeTrue(isConfigured());
     Configuration conf = new Configuration();
     conf.set(SshFenceByTcpPort.CONF_IDENTITIES_KEY, TEST_KEYFILE);
-    FileSystem.setDefaultUri(conf, "localhost:8020");
     SshFenceByTcpPort fence = new SshFenceByTcpPort();
     fence.setConf(conf);
-    assertTrue(fence.tryFence(TEST_FENCING_ARG));
+    assertTrue(fence.tryFence(
+        new InetSocketAddress(TEST_FENCING_HOST,
+                              Integer.valueOf(TEST_FENCING_PORT)),
+        null));
   }
 
   /**
@@ -61,61 +66,65 @@ public class TestSshFenceByTcpPort {
     SshFenceByTcpPort fence = new SshFenceByTcpPort();
     fence.setConf(conf);
     // Connect to Google's DNS server - not running ssh!
-    assertFalse(fence.tryFence("8.8.8.8, 1234"));
+    assertFalse(fence.tryFence(new InetSocketAddress("8.8.8.8", 1234), ""));
   }
   
   @Test
   public void testArgsParsing() throws BadFencingConfigurationException {
-    Args args = new SshFenceByTcpPort.Args("foo@bar.com:1234, 5678");
-    assertEquals("foo", args.user);
-    assertEquals("bar.com", args.host);
-    assertEquals(1234, args.sshPort);
-    assertEquals(5678, args.targetPort);
+    InetSocketAddress addr = new InetSocketAddress("bar.com", 1234);
 
-    args = new SshFenceByTcpPort.Args("foo@bar.com, 1234");
+    Args args = new SshFenceByTcpPort.Args(addr, null);
+    assertEquals("bar.com", args.host);
+    assertEquals(1234, args.targetPort);
+    assertEquals(System.getProperty("user.name"), args.user);
+    assertEquals(22, args.sshPort);
+    
+    args = new SshFenceByTcpPort.Args(addr, "");
+    assertEquals("bar.com", args.host);
+    assertEquals(1234, args.targetPort);    
+    assertEquals(System.getProperty("user.name"), args.user);
+    assertEquals(22, args.sshPort);
+
+    args = new SshFenceByTcpPort.Args(addr, "12345");
+    assertEquals("bar.com", args.host);
+    assertEquals(1234, args.targetPort);
+    assertEquals("12345", args.user);
+    assertEquals(22, args.sshPort);
+
+    args = new SshFenceByTcpPort.Args(addr, ":12345");
+    assertEquals("bar.com", args.host);
+    assertEquals(1234, args.targetPort);
+    assertEquals(System.getProperty("user.name"), args.user);
+    assertEquals(12345, args.sshPort);
+
+    args = new SshFenceByTcpPort.Args(addr, "foo:8020");
+    assertEquals("bar.com", args.host);
+    assertEquals(1234, args.targetPort);
     assertEquals("foo", args.user);
-    assertEquals("bar.com", args.host);
-    assertEquals(22, args.sshPort);
-    assertEquals(1234, args.targetPort);
-    
-    args = new SshFenceByTcpPort.Args("bar.com, 1234");
-    assertEquals(System.getProperty("user.name"), args.user);
-    assertEquals("bar.com", args.host);
-    assertEquals(22, args.sshPort);
-    assertEquals(1234, args.targetPort);
-    
-    args = new SshFenceByTcpPort.Args("bar.com:1234, 12345");
-    assertEquals(System.getProperty("user.name"), args.user);
-    assertEquals("bar.com", args.host);
-    assertEquals(1234, args.sshPort);
-    assertEquals(12345, args.targetPort);
-    
-    args = new SshFenceByTcpPort.Args("bar, 8020");
-    assertEquals(8020, args.targetPort);    
+    assertEquals(8020, args.sshPort);
   }
   
   @Test
   public void testBadArgsParsing() throws BadFencingConfigurationException {
-    assertBadArgs(null);
-    assertBadArgs("");
-    assertBadArgs("bar.com:");
-    assertBadArgs("bar.com:x");
-    assertBadArgs("foo.com, x");
-    assertBadArgs("foo.com,");
-    assertBadArgs("foo.com, ");
+    assertBadArgs(":");          // No port specified
+    assertBadArgs("bar.com:");   // "
+    assertBadArgs(":xx");        // Port does not parse
+    assertBadArgs("bar.com:xx"); // "
   }
   
   private void assertBadArgs(String argStr) {
+    InetSocketAddress addr = new InetSocketAddress("bar.com", 1234);
     try {
-      new Args(argStr);
+      new Args(addr, argStr);
       fail("Did not fail on bad args: " + argStr);
     } catch (BadFencingConfigurationException e) {
-      // expected
+      // Expected
     }
   }
 
   private boolean isConfigured() {
-    return (TEST_FENCING_ARG != null && !TEST_FENCING_ARG.isEmpty()) &&
-      (TEST_KEYFILE != null && !TEST_KEYFILE.isEmpty());
+    return (TEST_FENCING_HOST != null && !TEST_FENCING_HOST.isEmpty()) &&
+           (TEST_FENCING_PORT != null && !TEST_FENCING_PORT.isEmpty()) &&
+           (TEST_KEYFILE != null && !TEST_KEYFILE.isEmpty());
   }
 }

From 68843484448fcf45e2c48bed2426d9844ee8128c Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Mon, 30 Jan 2012 22:42:46 +0000
Subject: [PATCH 106/177] HADOOP-7938. HA: the FailoverController should
 optionally fence the active during failover. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1238058 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |   3 +
 .../apache/hadoop/ha/FailoverController.java  |  81 +++++---
 .../java/org/apache/hadoop/ha/HAAdmin.java    |  74 +++++--
 .../hadoop/ha/HAServiceProtocolHelper.java    |   5 -
 .../java/org/apache/hadoop/ha/NodeFencer.java |  12 +-
 .../hadoop/ha/TestFailoverController.java     | 186 ++++++++++++++++--
 .../org/apache/hadoop/ha/TestHAAdmin.java     |  72 ++++++-
 .../org/apache/hadoop/ha/TestNodeFencer.java  |   5 +-
 .../namenode/ha/TestHAStateTransitions.java   |  24 ++-
 9 files changed, 383 insertions(+), 79 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index f881067d0a8..f62c7177214 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -36,3 +36,6 @@ HADOOP-7992. Add ZKClient library to facilitate leader election.
 (Bikas Saha via suresh).
 
 HADOOP-7983. HA: failover should be able to pass args to fencers. (eli)
+
+HADOOP-7938. HA: the FailoverController should optionally fence the
+active during failover. (eli)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
index 0060567ebbd..711296d342f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.ha;
 
 import java.io.IOException;
+import java.net.InetSocketAddress;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -26,6 +27,8 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 
+import com.google.common.base.Preconditions;
+
 /**
  * The FailOverController is responsible for electing an active service
  * on startup or when the current active is changing (eg due to failure),
@@ -48,13 +51,13 @@ public class FailoverController {
    * @throws FailoverFailedException if we should avoid failover
    */
   private static void preFailoverChecks(HAServiceProtocol toSvc,
-                                        String toSvcName)
+                                        InetSocketAddress toSvcAddr)
       throws FailoverFailedException {
     HAServiceState toSvcState;
     try {
       toSvcState = toSvc.getServiceState();
     } catch (IOException e) {
-      String msg = "Unable to get service state for " + toSvcName;
+      String msg = "Unable to get service state for " + toSvcAddr;
       LOG.error(msg, e);
       throw new FailoverFailedException(msg, e);
     }
@@ -69,7 +72,7 @@ public class FailoverController {
           "Can't failover to an unhealthy service", hce);
     } catch (IOException e) {
       throw new FailoverFailedException(
-          "Got an io exception", e);
+          "Got an IO exception", e);
     }
     // TODO(HA): ask toSvc if it's capable. Eg not in SM.
   }
@@ -79,26 +82,42 @@ public class FailoverController {
    * then try to failback.
    *
    * @param fromSvc currently active service
-   * @param fromSvcName name of currently active service
+   * @param fromSvcAddr addr of the currently active service
    * @param toSvc service to make active
-   * @param toSvcName name of service to make active
+   * @param toSvcAddr addr of the service to make active
+   * @param fencer for fencing fromSvc
+   * @param forceFence to fence fromSvc even if not strictly necessary
    * @throws FailoverFailedException if the failover fails
    */
-  public static void failover(HAServiceProtocol fromSvc, String fromSvcName,
-                              HAServiceProtocol toSvc, String toSvcName)
+  public static void failover(HAServiceProtocol fromSvc,
+                              InetSocketAddress fromSvcAddr,
+                              HAServiceProtocol toSvc,
+                              InetSocketAddress toSvcAddr,
+                              NodeFencer fencer, boolean forceFence)
       throws FailoverFailedException {
-    preFailoverChecks(toSvc, toSvcName);
+    Preconditions.checkArgument(fencer != null, "failover requires a fencer");
+    preFailoverChecks(toSvc, toSvcAddr);
 
     // Try to make fromSvc standby
+    boolean tryFence = true;
     try {
       HAServiceProtocolHelper.transitionToStandby(fromSvc);
+      // We should try to fence if we failed or it was forced
+      tryFence = forceFence ? true : false;
     } catch (ServiceFailedException sfe) {
-      LOG.warn("Unable to make " + fromSvcName + " standby (" +
+      LOG.warn("Unable to make " + fromSvcAddr + " standby (" +
           sfe.getMessage() + ")");
-    } catch (Exception e) {
-      LOG.warn("Unable to make " + fromSvcName +
-          " standby (unable to connect)", e);
-      // TODO(HA): fence fromSvc and unfence on failback
+    } catch (IOException ioe) {
+      LOG.warn("Unable to make " + fromSvcAddr +
+          " standby (unable to connect)", ioe);
+    }
+
+    // Fence fromSvc if it's required or forced by the user
+    if (tryFence) {
+      if (!fencer.fence(fromSvcAddr)) {
+        throw new FailoverFailedException("Unable to fence " +
+            fromSvcAddr + ". Fencing failed.");
+      }
     }
 
     // Try to make toSvc active
@@ -107,29 +126,31 @@ public class FailoverController {
     try {
       HAServiceProtocolHelper.transitionToActive(toSvc);
     } catch (ServiceFailedException sfe) {
-      LOG.error("Unable to make " + toSvcName + " active (" +
-          sfe.getMessage() + "). Failing back");
+      LOG.error("Unable to make " + toSvcAddr + " active (" +
+          sfe.getMessage() + "). Failing back.");
       failed = true;
       cause = sfe;
-    } catch (Exception e) {
-      LOG.error("Unable to make " + toSvcName +
-          " active (unable to connect). Failing back", e);
+    } catch (IOException ioe) {
+      LOG.error("Unable to make " + toSvcAddr +
+          " active (unable to connect). Failing back.", ioe);
       failed = true;
-      cause = e;
+      cause = ioe;
     }
 
-    // Try to failback if we failed to make toSvc active
+    // We failed to make toSvc active
     if (failed) {
-      String msg = "Unable to failover to " + toSvcName;
-      try {
-        HAServiceProtocolHelper.transitionToActive(fromSvc);
-      } catch (ServiceFailedException sfe) {
-        msg = "Failback to " + fromSvcName + " failed (" +
-              sfe.getMessage() + ")";
-        LOG.fatal(msg);
-      } catch (Exception e) {
-        msg = "Failback to " + fromSvcName + " failed (unable to connect)";
-        LOG.fatal(msg);
+      String msg = "Unable to failover to " + toSvcAddr;
+      // Only try to failback if we didn't fence fromSvc
+      if (!tryFence) {
+        try {
+          // Unconditionally fence toSvc in case it is still trying to
+          // become active, eg we timed out waiting for its response.
+          failover(toSvc, toSvcAddr, fromSvc, fromSvcAddr, fencer, true);
+        } catch (FailoverFailedException ffe) {
+          msg += ". Failback to " + fromSvcAddr +
+            " failed (" + ffe.getMessage() + ")";
+          LOG.fatal(msg);
+        }
       }
       throw new FailoverFailedException(msg, cause);
     }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index 79edd11fe0f..714fe6c110c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -22,6 +22,13 @@ import java.io.PrintStream;
 import java.net.InetSocketAddress;
 import java.util.Map;
 
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.ParseException;
+
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.ipc.RPC;
@@ -37,8 +44,11 @@ import com.google.common.collect.ImmutableMap;
  * mode, or to trigger a health-check.
  */
 @InterfaceAudience.Private
+
 public abstract class HAAdmin extends Configured implements Tool {
   
+  private static final String FORCEFENCE = "forcefence";
+
   private static Map<String, UsageInfo> USAGE =
     ImmutableMap.<String, UsageInfo>builder()
     .put("-transitionToActive",
@@ -46,8 +56,9 @@ public abstract class HAAdmin extends Configured implements Tool {
     .put("-transitionToStandby",
         new UsageInfo("<host:port>", "Transitions the daemon into Standby state"))
     .put("-failover",
-        new UsageInfo("<host:port> <host:port>",
-            "Failover from the first daemon to the second"))
+        new UsageInfo("[--"+FORCEFENCE+"] <host:port> <host:port>",
+            "Failover from the first daemon to the second.\n" +
+            "Unconditionally fence services if the "+FORCEFENCE+" option is used."))
     .put("-getServiceState",
         new UsageInfo("<host:port>", "Returns the state of the daemon"))
     .put("-checkHealth",
@@ -111,20 +122,61 @@ public abstract class HAAdmin extends Configured implements Tool {
 
   private int failover(final String[] argv)
       throws IOException, ServiceFailedException {
-    if (argv.length != 3) {
-      errOut.println("failover: incorrect number of arguments");
+    Configuration conf = getConf();
+    boolean forceFence = false;
+
+    Options failoverOpts = new Options();
+    // "-failover" isn't really an option but we need to add
+    // it to appease CommandLineParser
+    failoverOpts.addOption("failover", false, "failover");
+    failoverOpts.addOption(FORCEFENCE, false, "force fencing");
+
+    CommandLineParser parser = new GnuParser();
+    CommandLine cmd;
+
+    try {
+      cmd = parser.parse(failoverOpts, argv);
+      forceFence = cmd.hasOption(FORCEFENCE);
+    } catch (ParseException pe) {
+      errOut.println("failover: incorrect arguments");
+      printUsage(errOut, "-failover");
+      return -1;
+    }
+    
+    int numOpts = cmd.getOptions() == null ? 0 : cmd.getOptions().length;
+    final String[] args = cmd.getArgs();
+
+    if (numOpts > 2 || args.length != 2) {
+      errOut.println("failover: incorrect arguments");
       printUsage(errOut, "-failover");
       return -1;
     }
 
-    HAServiceProtocol proto1 = getProtocol(argv[1]);
-    HAServiceProtocol proto2 = getProtocol(argv[2]);
+    NodeFencer fencer;
     try {
-      FailoverController.failover(proto1, argv[1], proto2, argv[2]);
-      out.println("Failover from "+argv[1]+" to "+argv[2]+" successful");
+      fencer = NodeFencer.create(conf);
+    } catch (BadFencingConfigurationException bfce) {
+      errOut.println("failover: incorrect fencing configuration: " + 
+          bfce.getLocalizedMessage());
+      return -1;
+    }
+    if (fencer == null) {
+      errOut.println("failover: no fencer configured");
+      return -1;
+    }
+
+    InetSocketAddress addr1 = NetUtils.createSocketAddr(args[0]);
+    InetSocketAddress addr2 = NetUtils.createSocketAddr(args[1]);
+    HAServiceProtocol proto1 = getProtocol(args[0]);
+    HAServiceProtocol proto2 = getProtocol(args[1]);
+
+    try {
+      FailoverController.failover(proto1, addr1, proto2, addr2,
+          fencer, forceFence); 
+      out.println("Failover from "+args[0]+" to "+args[1]+" successful");
     } catch (FailoverFailedException ffe) {
       errOut.println("Failover failed: " + ffe.getLocalizedMessage());
-      return 1;
+      return -1;
     }
     return 0;
   }
@@ -142,7 +194,7 @@ public abstract class HAAdmin extends Configured implements Tool {
       HAServiceProtocolHelper.monitorHealth(proto);
     } catch (HealthCheckFailedException e) {
       errOut.println("Health check failed: " + e.getLocalizedMessage());
-      return 1;
+      return -1;
     }
     return 0;
   }
@@ -223,7 +275,7 @@ public abstract class HAAdmin extends Configured implements Tool {
     }
     
     errOut.println(cmd + " [" + usageInfo.args + "]: " + usageInfo.help);
-    return 1;
+    return 0;
   }
   
   private static class UsageInfo {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java
index c8de74269e3..b8ee7179519 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocolHelper.java
@@ -19,18 +19,13 @@ package org.apache.hadoop.ha;
 
 import java.io.IOException;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.ipc.RemoteException;
 
 /**
  * Helper for making {@link HAServiceProtocol} RPC calls. This helper
  * unwraps the {@link RemoteException} to specific exceptions.
- * 
  */
 @InterfaceAudience.Public
 @InterfaceStability.Evolving
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
index f4cadb3828e..85a5b424852 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
@@ -82,6 +82,15 @@ public class NodeFencer {
     this.methods = parseMethods(conf);
   }
   
+  public static NodeFencer create(Configuration conf)
+      throws BadFencingConfigurationException {
+    String confStr = conf.get(CONF_METHODS_KEY);
+    if (confStr == null) {
+      return null;
+    }
+    return new NodeFencer(conf);
+  }
+
   public boolean fence(InetSocketAddress serviceAddr) {
     LOG.info("====== Beginning NameNode Fencing Process... ======");
     int i = 0;
@@ -108,7 +117,7 @@ public class NodeFencer {
   }
 
   private static List<FenceMethodWithArg> parseMethods(Configuration conf)
-  throws BadFencingConfigurationException {
+      throws BadFencingConfigurationException {
     String confStr = conf.get(CONF_METHODS_KEY);
     String[] lines = confStr.split("\\s*\n\\s*");
     
@@ -130,7 +139,6 @@ public class NodeFencer {
     if ((m = CLASS_WITH_ARGUMENT.matcher(line)).matches()) {
       String className = m.group(1);
       String arg = m.group(2);
-      
       return createFenceMethod(conf, className, arg);
     } else if ((m = CLASS_WITHOUT_ARGUMENT.matcher(line)).matches()) {
       String className = m.group(1);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
index f4a6ff2427f..36aead56b95 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
@@ -26,6 +26,9 @@ import static org.mockito.Mockito.verify;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.TestNodeFencer.AlwaysSucceedFencer;
+import org.apache.hadoop.ha.TestNodeFencer.AlwaysFailFencer;
+import static org.apache.hadoop.ha.TestNodeFencer.setupFencer;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
@@ -35,6 +38,9 @@ import static org.junit.Assert.*;
 
 public class TestFailoverController {
 
+  private InetSocketAddress svc1Addr = new InetSocketAddress("svc1", 1234); 
+  private InetSocketAddress svc2Addr = new InetSocketAddress("svc2", 5678); 
+
   private class DummyService implements HAServiceProtocol {
     HAServiceState state;
 
@@ -55,36 +61,41 @@ public class TestFailoverController {
     }
 
     @Override
-    public void monitorHealth() throws HealthCheckFailedException {
+    public void monitorHealth() throws HealthCheckFailedException, IOException {
       // Do nothing
     }
 
     @Override
-    public void transitionToActive() throws ServiceFailedException {
+    public void transitionToActive() throws ServiceFailedException, IOException {
       state = HAServiceState.ACTIVE;
     }
 
     @Override
-    public void transitionToStandby() throws ServiceFailedException {
+    public void transitionToStandby() throws ServiceFailedException, IOException {
       state = HAServiceState.STANDBY;
     }
 
     @Override
-    public HAServiceState getServiceState() {
+    public HAServiceState getServiceState() throws IOException {
       return state;
     }
   }
-
+  
   @Test
   public void testFailoverAndFailback() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
     DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
-    FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+    AlwaysSucceedFencer.fenceCalled = 0;
+    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+    assertEquals(0, TestNodeFencer.AlwaysSucceedFencer.fenceCalled);
     assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
 
-    FailoverController.failover(svc2, "svc2", svc1, "svc1");
+    AlwaysSucceedFencer.fenceCalled = 0;
+    FailoverController.failover(svc2, svc2Addr, svc1, svc1Addr, fencer, false);
+    assertEquals(0, TestNodeFencer.AlwaysSucceedFencer.fenceCalled);
     assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
     assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
   }
@@ -93,8 +104,9 @@ public class TestFailoverController {
   public void testFailoverFromStandbyToStandby() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.STANDBY);
     DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
-    FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
     assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
   }
@@ -103,9 +115,10 @@ public class TestFailoverController {
   public void testFailoverFromActiveToActive() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
     DummyService svc2 = new DummyService(HAServiceState.ACTIVE);
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
       fail("Can't failover to an already active service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -116,7 +129,7 @@ public class TestFailoverController {
   }
 
   @Test
-  public void testFailoverToUnhealthyServiceFails() throws Exception {
+  public void testFailoverToUnhealthyServiceFailsAndFailsback() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
     DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
       @Override
@@ -124,9 +137,10 @@ public class TestFailoverController {
         throw new HealthCheckFailedException("Failed!");
       }
     };
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
       fail("Failover to unhealthy service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -144,17 +158,69 @@ public class TestFailoverController {
       }
     };
     DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
+    AlwaysSucceedFencer.fenceCalled = 0;
     try {
-      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
     } catch (FailoverFailedException ffe) {
       fail("Faulty active prevented failover");
     }
-    // svc1 still thinks they're active, that's OK, we'll fence them
+
+    // svc1 still thinks it's active, that's OK, it was fenced
+    assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals("svc1:1234", AlwaysSucceedFencer.fencedSvc);
     assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
   }
 
+  @Test
+  public void testFailoverFromFaultyServiceFencingFailure() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
+      @Override
+      public void transitionToStandby() throws ServiceFailedException {
+        throw new ServiceFailedException("Failed!");
+      }
+    };
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+    NodeFencer fencer = setupFencer(AlwaysFailFencer.class.getName());
+
+    AlwaysFailFencer.fenceCalled = 0;
+    try {
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      fail("Failed over even though fencing failed");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    assertEquals(1, AlwaysFailFencer.fenceCalled);
+    assertEquals("svc1:1234", AlwaysFailFencer.fencedSvc);
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+  }
+
+  @Test
+  public void testFencingFailureDuringFailover() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+    NodeFencer fencer = setupFencer(AlwaysFailFencer.class.getName());
+
+    AlwaysFailFencer.fenceCalled = 0;
+    try {
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true);
+      fail("Failed over even though fencing requested and failed");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    // If fencing was requested and it failed we don't try to make
+    // svc2 active anyway, and we don't failback to svc1.
+    assertEquals(1, AlwaysFailFencer.fenceCalled);
+    assertEquals("svc1:1234", AlwaysFailFencer.fencedSvc);
+    assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+  }
+  
   private HAServiceProtocol getProtocol(String target)
       throws IOException {
     InetSocketAddress addr = NetUtils.createSocketAddr(target);
@@ -166,17 +232,18 @@ public class TestFailoverController {
   }
 
   @Test
-  public void testFailoverFromNonExistantServiceSucceeds() throws Exception {
+  public void testFailoverFromNonExistantServiceWithFencer() throws Exception {
     HAServiceProtocol svc1 = getProtocol("localhost:1234");
     DummyService svc2 = new DummyService(HAServiceState.STANDBY);
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
     } catch (FailoverFailedException ffe) {
       fail("Non-existant active prevented failover");
     }
 
-    // Don't check svc1 (we can't reach it, but that's OK, we'll fence)
+    // Don't check svc1 because we can't reach it, but that's OK, it's been fenced.
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
   }
 
@@ -184,9 +251,10 @@ public class TestFailoverController {
   public void testFailoverToNonExistantServiceFails() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
     HAServiceProtocol svc2 = getProtocol("localhost:1234");
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
       fail("Failed over to a non-existant standby");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -204,9 +272,10 @@ public class TestFailoverController {
         throw new ServiceFailedException("Failed!");
       }
     };
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1, "svc1",  svc2,  "svc2");
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
       fail("Failover to already active service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -219,6 +288,84 @@ public class TestFailoverController {
     assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
   }
 
+  @Test
+  public void testWeDontFailbackIfActiveWasFenced() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public void transitionToActive() throws ServiceFailedException {
+        throw new ServiceFailedException("Failed!");
+      }
+    };
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
+
+    try {
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true);
+      fail("Failed over to service that won't transition to active");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    // We failed to failover and did not failback because we fenced
+    // svc1 (we forced it), therefore svc1 and svc2 should be standby.
+    assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+  }
+
+  @Test
+  public void testWeFenceOnFailbackIfTransitionToActiveFails() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public void transitionToActive() throws ServiceFailedException, IOException {
+        throw new IOException("Failed!");
+      }
+    };
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
+    AlwaysSucceedFencer.fenceCalled = 0;
+
+    try {
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      fail("Failed over to service that won't transition to active");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    // We failed to failover. We did not fence svc1 because it cooperated
+    // and we didn't force it, so we failed back to svc1 and fenced svc2.
+    // Note svc2 still thinks it's active, that's OK, we fenced it.
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(1, AlwaysSucceedFencer.fenceCalled);
+    assertEquals("svc2:5678", AlwaysSucceedFencer.fencedSvc);
+  }
+
+  @Test
+  public void testFailureToFenceOnFailbackFailsTheFailback() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public void transitionToActive() throws ServiceFailedException, IOException {
+        throw new IOException("Failed!");
+      }
+    };
+    NodeFencer fencer = setupFencer(AlwaysFailFencer.class.getName());
+    AlwaysFailFencer.fenceCalled = 0;
+
+    try {
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      fail("Failed over to service that won't transition to active");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    // We did not fence svc1 because it cooperated and we didn't force it, 
+    // we failed to failover so we fenced svc2, we failed to fence svc2
+    // so we did not failback to svc1, ie it's still standby.
+    assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
+    assertEquals(1, AlwaysFailFencer.fenceCalled);
+    assertEquals("svc2:5678", AlwaysFailFencer.fencedSvc);
+  }
+
   @Test
   public void testFailbackToFaultyServiceFails() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
@@ -233,9 +380,10 @@ public class TestFailoverController {
         throw new ServiceFailedException("Failed!");
       }
     };
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1, "svc1", svc2, "svc2");
+      FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
       fail("Failover to already active service");
     } catch (FailoverFailedException ffe) {
       // Expected
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
index 9bea4849947..02e7fffff32 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
@@ -80,47 +80,103 @@ public class TestHAAdmin {
     assertOutputContains("transitionToActive: incorrect number of arguments");
     assertEquals(-1, runTool("-transitionToActive", "x", "y"));
     assertOutputContains("transitionToActive: incorrect number of arguments");
+    assertEquals(-1, runTool("-failover"));
+    assertOutputContains("failover: incorrect arguments");
+    assertOutputContains("failover: incorrect arguments");    
+    assertEquals(-1, runTool("-failover", "foo:1234"));
+    assertOutputContains("failover: incorrect arguments");
   }
   
   @Test
   public void testHelp() throws Exception {
     assertEquals(-1, runTool("-help"));
-    assertEquals(1, runTool("-help", "transitionToActive"));
+    assertEquals(0, runTool("-help", "transitionToActive"));
     assertOutputContains("Transitions the daemon into Active");
   }
   
   @Test
   public void testTransitionToActive() throws Exception {
-    assertEquals(0, runTool("-transitionToActive", "xxx"));
+    assertEquals(0, runTool("-transitionToActive", "foo:1234"));
     Mockito.verify(mockProtocol).transitionToActive();
   }
 
   @Test
   public void testTransitionToStandby() throws Exception {
-    assertEquals(0, runTool("-transitionToStandby", "xxx"));
+    assertEquals(0, runTool("-transitionToStandby", "foo:1234"));
     Mockito.verify(mockProtocol).transitionToStandby();
   }
 
   @Test
-  public void testFailover() throws Exception {
+  public void testFailoverWithNoFencerConfigured() throws Exception {
     Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    assertEquals(0, runTool("-failover", "xxx", "yyy"));
+    assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678"));
+  }
+
+  @Test
+  public void testFailoverWithFencerConfigured() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Configuration conf = new Configuration();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "foo:1234", "bar:5678"));
+  }
+
+  @Test
+  public void testFailoverWithFencerConfiguredAndForce() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Configuration conf = new Configuration();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
+  }
+
+  @Test
+  public void testFailoverWithInvalidFenceArg() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Configuration conf = new Configuration();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "notforcefence"));
+  }
+
+  @Test
+  public void testFailoverWithFenceButNoFencer() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
+  }
+
+  @Test
+  public void testFailoverWithFenceAndBadFencer() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Configuration conf = new Configuration();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "foobar!");
+    tool.setConf(conf);
+    assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
+  }
+
+  @Test
+  public void testForceFenceOptionListedBeforeArgs() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Configuration conf = new Configuration();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "--forcefence", "foo:1234", "bar:5678"));
   }
 
   @Test
   public void testGetServiceState() throws Exception {
-    assertEquals(0, runTool("-getServiceState", "xxx"));
+    assertEquals(0, runTool("-getServiceState", "foo:1234"));
     Mockito.verify(mockProtocol).getServiceState();
   }
 
   @Test
   public void testCheckHealth() throws Exception {
-    assertEquals(0, runTool("-checkHealth", "xxx"));
+    assertEquals(0, runTool("-checkHealth", "foo:1234"));
     Mockito.verify(mockProtocol).monitorHealth();
     
     Mockito.doThrow(new HealthCheckFailedException("fake health check failure"))
       .when(mockProtocol).monitorHealth();
-    assertEquals(1, runTool("-checkHealth", "xxx"));
+    assertEquals(-1, runTool("-checkHealth", "foo:1234"));
     assertOutputContains("Health check failed: fake health check failure");
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
index 4a9ffae9b96..5508547c0a5 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestNodeFencer.java
@@ -119,12 +119,11 @@ public class TestNodeFencer {
     assertFalse(fencer.fence(new InetSocketAddress("host", 1234)));
   }
 
-  private NodeFencer setupFencer(String confStr)
+  public static NodeFencer setupFencer(String confStr)
       throws BadFencingConfigurationException {
     System.err.println("Testing configuration:\n" + confStr);
     Configuration conf = new Configuration();
-    conf.set(NodeFencer.CONF_METHODS_KEY,
-        confStr);
+    conf.set(NodeFencer.CONF_METHODS_KEY, confStr);
     return new NodeFencer(conf);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 3b31445f68a..5e059b44795 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -105,7 +105,29 @@ public class TestHAStateTransitions {
       cluster.shutdown();
     }
   }
-  
+
+  /**
+   * Test that transitioning a service to the state that it is already
+   * in is a nop, specifically, an exception is not thrown.
+   */
+  @Test
+  public void testTransitionToCurrentStateIsANop() throws Exception {
+    Configuration conf = new Configuration();
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(1)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      cluster.transitionToActive(0);
+      cluster.transitionToStandby(0);
+      cluster.transitionToStandby(0);
+    } finally {
+      cluster.shutdown();
+    }
+  }
+
   /**
    * Test manual failover failback for one namespace
    * @param cluster single process test cluster

From 641f79a325bad571b11b5700a42efb844eabc5af Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Mon, 30 Jan 2012 23:05:18 +0000
Subject: [PATCH 107/177] HDFS-2824. Fix failover when prior NN died just after
 creating an edit log segment. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1238069 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../namenode/EditLogFileInputStream.java      |  6 +-
 .../namenode/EditLogFileOutputStream.java     | 19 +++-
 .../hdfs/server/namenode/FSEditLogLoader.java | 16 ++--
 .../server/namenode/FileJournalManager.java   | 88 ++++++++++++-------
 .../server/namenode/ha/EditLogTailer.java     |  2 +-
 .../hdfs/server/namenode/NameNodeAdapter.java |  7 +-
 .../hdfs/server/namenode/TestEditLog.java     | 58 +++++++-----
 .../server/namenode/TestFSEditLogLoader.java  |  4 +-
 .../namenode/TestFileJournalManager.java      |  3 +-
 .../namenode/ha/TestHAStateTransitions.java   | 71 ++++++++++++++-
 .../apache/hadoop/test/GenericTestUtils.java  |  4 +-
 12 files changed, 207 insertions(+), 73 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index dd4fa42ffd0..a426f0926ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -139,3 +139,5 @@ HDFS-2805. Add a test for a federated cluster with HA NNs. (Brandon Li via jiten
 HDFS-2841. HAAdmin does not work if security is enabled. (atm)
 
 HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas immediately upon pipeline creation. (todd)
+
+HDFS-2824. Fix failover when prior NN died just after creating an edit log segment. (atm via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
index a27fa9490e7..22c1297dac2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileInputStream.java
@@ -168,11 +168,11 @@ class EditLogFileInputStream extends EditLogInputStream {
     try {
       in = new EditLogFileInputStream(file);
     } catch (LogHeaderCorruptException corrupt) {
-      // If it's missing its header, this is equivalent to no transactions
+      // If the header is malformed or the wrong value, this indicates a corruption
       FSImage.LOG.warn("Log at " + file + " has no valid header",
           corrupt);
-      return new FSEditLogLoader.EditLogValidation(0, HdfsConstants.INVALID_TXID, 
-                                                   HdfsConstants.INVALID_TXID);
+      return new FSEditLogLoader.EditLogValidation(0,
+          HdfsConstants.INVALID_TXID, HdfsConstants.INVALID_TXID, true);
     }
     
     try {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
index bdc0bd2a56c..938c3e085f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hdfs.server.namenode;
 
+import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
@@ -27,6 +28,7 @@ import java.nio.channels.FileChannel;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.io.IOUtils;
 
@@ -36,7 +38,8 @@ import com.google.common.annotations.VisibleForTesting;
  * An implementation of the abstract class {@link EditLogOutputStream}, which
  * stores edits in a local file.
  */
-class EditLogFileOutputStream extends EditLogOutputStream {
+@InterfaceAudience.Private
+public class EditLogFileOutputStream extends EditLogOutputStream {
   private static Log LOG = LogFactory.getLog(EditLogFileOutputStream.class);
 
   private File file;
@@ -96,11 +99,23 @@ class EditLogFileOutputStream extends EditLogOutputStream {
   public void create() throws IOException {
     fc.truncate(0);
     fc.position(0);
-    doubleBuf.getCurrentBuf().writeInt(HdfsConstants.LAYOUT_VERSION);
+    writeHeader(doubleBuf.getCurrentBuf());
     setReadyToFlush();
     flush();
   }
 
+  /**
+   * Write header information for this EditLogFileOutputStream to the provided
+   * DataOutputSream.
+   * 
+   * @param out the output stream to write the header to.
+   * @throws IOException in the event of error writing to the stream.
+   */
+  @VisibleForTesting
+  public static void writeHeader(DataOutputStream out) throws IOException {
+    out.writeInt(HdfsConstants.LAYOUT_VERSION);
+  }
+
   @Override
   public void close() throws IOException {
     if (fp == null) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 56d2fcb5887..8c664d0695e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -605,19 +605,21 @@ public class FSEditLogLoader {
       FSImage.LOG.debug("Caught exception after reading " + numValid +
           " ops from " + in + " while determining its valid length.", t);
     }
-    return new EditLogValidation(lastPos, firstTxId, lastTxId);
+    return new EditLogValidation(lastPos, firstTxId, lastTxId, false);
   }
   
   static class EditLogValidation {
-    private long validLength;
-    private long startTxId;
-    private long endTxId;
+    private final long validLength;
+    private final long startTxId;
+    private final long endTxId;
+    private final boolean corruptionDetected;
      
-    EditLogValidation(long validLength, 
-                      long startTxId, long endTxId) {
+    EditLogValidation(long validLength, long startTxId, long endTxId,
+        boolean corruptionDetected) {
       this.validLength = validLength;
       this.startTxId = startTxId;
       this.endTxId = endTxId;
+      this.corruptionDetected = corruptionDetected;
     }
     
     long getValidLength() { return validLength; }
@@ -633,6 +635,8 @@ public class FSEditLogLoader {
       }
       return (endTxId - startTxId) + 1;
     }
+    
+    boolean hasCorruptHeader() { return corruptionDetected; }
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index 2380e93f0f7..90bf1a77def 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -31,7 +31,6 @@ import java.util.regex.Pattern;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
-import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException;
 import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.EditLogValidation;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeFile;
@@ -61,7 +60,6 @@ class FileJournalManager implements JournalManager {
     NameNodeFile.EDITS_INPROGRESS.getName() + "_(\\d+)");
 
   private File currentInProgress = null;
-  private long maxSeenTransaction = 0L;
 
   @VisibleForTesting
   StoragePurger purger
@@ -143,7 +141,7 @@ class FileJournalManager implements JournalManager {
         allLogFiles.size());
 
     for (EditLogFile elf : allLogFiles) {
-      if (elf.isCorrupt() || elf.isInProgress()) continue;
+      if (elf.hasCorruptHeader() || elf.isInProgress()) continue;
       if (elf.getFirstTxId() >= firstTxId) {
         ret.add(new RemoteEditLog(elf.firstTxId, elf.lastTxId));
       } else if ((firstTxId > elf.getFirstTxId()) &&
@@ -244,7 +242,7 @@ class FileJournalManager implements JournalManager {
           elf.validateLog();
         } 
 
-        if (elf.isCorrupt()) {
+        if (elf.hasCorruptHeader()) {
           break;
         }
         numTxns += elf.getLastTxId() + 1 - fromTxId;
@@ -281,22 +279,38 @@ class FileJournalManager implements JournalManager {
     File currentDir = sd.getCurrentDir();
     LOG.info("Recovering unfinalized segments in " + currentDir);
     List<EditLogFile> allLogFiles = matchEditLogs(currentDir.listFiles());
-    
-    // make sure journal is aware of max seen transaction before moving corrupt 
-    // files aside
-    findMaxTransaction(true);
 
     for (EditLogFile elf : allLogFiles) {
       if (elf.getFile().equals(currentInProgress)) {
         continue;
       }
       if (elf.isInProgress()) {
-        elf.validateLog();
-
-        if (elf.isCorrupt()) {
-          elf.moveAsideCorruptFile();
+        // If the file is zero-length, we likely just crashed after opening the
+        // file, but before writing anything to it. Safe to delete it.
+        if (elf.getFile().length() == 0) {
+          LOG.info("Deleting zero-length edit log file " + elf);
+          elf.getFile().delete();
           continue;
         }
+        
+        elf.validateLog();
+
+        if (elf.hasCorruptHeader()) {
+          elf.moveAsideCorruptFile();
+          throw new CorruptionException("In-progress edit log file is corrupt: "
+              + elf);
+        }
+        
+        // If the file has a valid header (isn't corrupt) but contains no
+        // transactions, we likely just crashed after opening the file and
+        // writing the header, but before syncing any transactions. Safe to
+        // delete the file.
+        if (elf.getNumTransactions() == 0) {
+          LOG.info("Deleting edit log file with zero transactions " + elf);
+          elf.getFile().delete();
+          continue;
+        }
+        
         finalizeLogSegment(elf.getFirstTxId(), elf.getLastTxId());
       }
     }
@@ -321,15 +335,21 @@ class FileJournalManager implements JournalManager {
 
   /** 
    * Find the maximum transaction in the journal.
-   * This gets stored in a member variable, as corrupt edit logs
-   * will be moved aside, but we still need to remember their first
-   * tranaction id in the case that it was the maximum transaction in
-   * the journal.
    */
   private long findMaxTransaction(boolean inProgressOk)
       throws IOException {
+    boolean considerSeenTxId = true;
+    long seenTxId = NNStorage.readTransactionIdFile(sd);
+    long maxSeenTransaction = 0;
     for (EditLogFile elf : getLogFiles(0)) {
       if (elf.isInProgress() && !inProgressOk) {
+        if (elf.getFirstTxId() != HdfsConstants.INVALID_TXID &&
+            elf.getFirstTxId() <= seenTxId) {
+          // don't look at the seen_txid file if in-progress logs are not to be
+          // examined, and the value in seen_txid falls within the in-progress
+          // segment.
+          considerSeenTxId = false;
+        }
         continue;
       }
       
@@ -339,7 +359,11 @@ class FileJournalManager implements JournalManager {
       }
       maxSeenTransaction = Math.max(elf.getLastTxId(), maxSeenTransaction);
     }
-    return maxSeenTransaction;
+    if (considerSeenTxId) {
+      return Math.max(maxSeenTransaction, seenTxId);
+    } else {
+      return maxSeenTransaction;
+    }
   }
 
   @Override
@@ -354,8 +378,9 @@ class FileJournalManager implements JournalManager {
     private File file;
     private final long firstTxId;
     private long lastTxId;
+    private long numTx = -1;
 
-    private boolean isCorrupt = false;
+    private boolean hasCorruptHeader = false;
     private final boolean isInProgress;
 
     final static Comparator<EditLogFile> COMPARE_BY_START_TXID 
@@ -407,11 +432,13 @@ class FileJournalManager implements JournalManager {
      */
     void validateLog() throws IOException {
       EditLogValidation val = EditLogFileInputStream.validateEditLog(file);
-      if (val.getNumTransactions() == 0) {
-        markCorrupt();
-      } else {
-        this.lastTxId = val.getEndTxId();
-      }
+      this.numTx = val.getNumTransactions();
+      this.lastTxId = val.getEndTxId();
+      this.hasCorruptHeader = val.hasCorruptHeader();
+    }
+    
+    long getNumTransactions() {
+      return numTx;
     }
 
     boolean isInProgress() {
@@ -422,16 +449,12 @@ class FileJournalManager implements JournalManager {
       return file;
     }
     
-    void markCorrupt() {
-      isCorrupt = true;
-    }
-    
-    boolean isCorrupt() {
-      return isCorrupt;
+    boolean hasCorruptHeader() {
+      return hasCorruptHeader;
     }
 
     void moveAsideCorruptFile() throws IOException {
-      assert isCorrupt;
+      assert hasCorruptHeader;
     
       File src = file;
       File dst = new File(src.getParent(), src.getName() + ".corrupt");
@@ -446,8 +469,9 @@ class FileJournalManager implements JournalManager {
     @Override
     public String toString() {
       return String.format("EditLogFile(file=%s,first=%019d,last=%019d,"
-                           +"inProgress=%b,corrupt=%b)", file.toString(),
-                           firstTxId, lastTxId, isInProgress(), isCorrupt);
+                           +"inProgress=%b,hasCorruptHeader=%b,numTx=%d)",
+                           file.toString(), firstTxId, lastTxId,
+                           isInProgress(), hasCorruptHeader, numTx);
     }
   }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 160c16ed5e7..8a2312e08aa 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -224,7 +224,7 @@ public class EditLogTailer {
         editsLoaded = elie.getNumEditsLoaded();
         throw elie;
       } finally {
-        if (editsLoaded > 0) {
+        if (editsLoaded > 0 || LOG.isDebugEnabled()) {
           LOG.info(String.format("Loaded %d edits starting from txid %d ",
               editsLoaded, lastTxnId));
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index 84d5537b1f2..181de70f336 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hdfs.server.namenode;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
@@ -27,6 +28,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
@@ -35,7 +37,6 @@ import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.security.AccessControlException;
-import org.apache.hadoop.test.GenericTestUtils;
 import org.mockito.Mockito;
 
 /**
@@ -204,4 +205,8 @@ public class NameNodeAdapter {
     }
     return smi.initializedReplQueues;
   }
+  
+  public static File getInProgressEditsFile(StorageDirectory sd, long startTxId) {
+    return NNStorage.getInProgressEditsFile(sd, startTxId);
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index f95a876eed9..f2f4d930dbe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -629,22 +629,26 @@ public class TestEditLog extends TestCase {
     }
   }
   
+  // should succeed - only one corrupt log dir
   public void testCrashRecoveryEmptyLogOneDir() throws Exception {
-    doTestCrashRecoveryEmptyLog(false, true);
+    doTestCrashRecoveryEmptyLog(false, true, true);
   }
   
+  // should fail - seen_txid updated to 3, but no log dir contains txid 3
   public void testCrashRecoveryEmptyLogBothDirs() throws Exception {
-    doTestCrashRecoveryEmptyLog(true, true);
+    doTestCrashRecoveryEmptyLog(true, true, false);
   }
 
+  // should succeed - only one corrupt log dir
   public void testCrashRecoveryEmptyLogOneDirNoUpdateSeenTxId() 
       throws Exception {
-    doTestCrashRecoveryEmptyLog(false, false);
+    doTestCrashRecoveryEmptyLog(false, false, true);
   }
   
+  // should succeed - both log dirs corrupt, but seen_txid never updated
   public void testCrashRecoveryEmptyLogBothDirsNoUpdateSeenTxId()
       throws Exception {
-    doTestCrashRecoveryEmptyLog(true, false);
+    doTestCrashRecoveryEmptyLog(true, false, true);
   }
 
   /**
@@ -660,12 +664,13 @@ public class TestEditLog extends TestCase {
    * NN should fail to start up, because it's aware that txid 3
    * was reached, but unable to find a non-corrupt log starting there.
    * @param updateTransactionIdFile if true update the seen_txid file.
-   * If false, the it will not be updated. This will simulate a case 
-   * where the NN crashed between creating the new segment and updating
-   * seen_txid. 
+   * If false, it will not be updated. This will simulate a case where
+   * the NN crashed between creating the new segment and updating the
+   * seen_txid file.
+   * @param shouldSucceed true if the test is expected to succeed.
    */
   private void doTestCrashRecoveryEmptyLog(boolean inBothDirs, 
-                                           boolean updateTransactionIdFile) 
+      boolean updateTransactionIdFile, boolean shouldSucceed)
       throws Exception {
     // start a cluster 
     Configuration conf = new HdfsConfiguration();
@@ -684,29 +689,40 @@ public class TestEditLog extends TestCase {
       // Make a truncated edits_3_inprogress
       File log = new File(currentDir,
           NNStorage.getInProgressEditsFileName(3));
-      NNStorage storage = new NNStorage(conf, 
-                                        Collections.<URI>emptyList(),
-                                        Lists.newArrayList(uri));
-      if (updateTransactionIdFile) {
-        storage.writeTransactionIdFileToStorage(3);
-      }
-      storage.close();
 
       new EditLogFileOutputStream(log, 1024).create();
       if (!inBothDirs) {
         break;
       }
+      
+      NNStorage storage = new NNStorage(conf, 
+          Collections.<URI>emptyList(),
+          Lists.newArrayList(uri));
+      
+      if (updateTransactionIdFile) {
+        storage.writeTransactionIdFileToStorage(3);
+      }
+      storage.close();
     }
     
     try {
       cluster = new MiniDFSCluster.Builder(conf)
         .numDataNodes(NUM_DATA_NODES).format(false).build();
-      fail("Did not fail to start with all-corrupt logs");
+      if (!shouldSucceed) {
+        fail("Should not have succeeded in startin cluster");
+      }
     } catch (IOException ioe) {
-      GenericTestUtils.assertExceptionContains(
-          "No non-corrupt logs for txid 3", ioe);
+      if (shouldSucceed) {
+        LOG.info("Should have succeeded in starting cluster, but failed", ioe);
+        throw ioe;
+      } else {
+        GenericTestUtils.assertExceptionContains(
+            "No non-corrupt logs for txid 3",
+            ioe);
+      }
+    } finally {
+      cluster.shutdown();
     }
-    cluster.shutdown();
   }
 
   
@@ -1082,9 +1098,7 @@ public class TestEditLog extends TestCase {
     editlog.initJournalsForWrite();
     long startTxId = 1;
     try {
-      Iterable<EditLogInputStream> editStreams 
-        = editlog.selectInputStreams(startTxId, 4*TXNS_PER_ROLL);
-      
+      editlog.selectInputStreams(startTxId, 4*TXNS_PER_ROLL);
       fail("Should have thrown exception");
     } catch (IOException ioe) {
       GenericTestUtils.assertExceptionContains(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
index e100b7013ae..160ffb60049 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java
@@ -243,7 +243,9 @@ public class TestFSEditLogLoader {
       Files.copy(logFileBak, logFile);
       corruptByteInFile(logFile, offset);
       EditLogValidation val = EditLogFileInputStream.validateEditLog(logFile);
-      assertTrue(val.getNumTransactions() >= prevNumValid);
+      assertTrue(String.format("%d should have been >= %d",
+          val.getNumTransactions(), prevNumValid),
+          val.getNumTransactions() >= prevNumValid);
       prevNumValid = val.getNumTransactions();
     }
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
index 300080a5c96..b862727b0e6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
@@ -255,7 +255,8 @@ public class TestFileJournalManager {
       jm.getNumberOfTransactions(startGapTxId, true);
       fail("Should have thrown an exception by now");
     } catch (IOException ioe) {
-      assertTrue(true);
+      GenericTestUtils.assertExceptionContains(
+          "Gap in transactions, max txnid is 110, 0 txns from 31", ioe);
     }
 
     // rolled 10 times so there should be 11 files.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 5e059b44795..97a88d1e739 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -17,14 +17,22 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
+import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -33,7 +41,10 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
+import org.apache.hadoop.hdfs.server.namenode.EditLogFileOutputStream;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.io.IOUtils;
@@ -41,8 +52,9 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.test.GenericTestUtils;
-import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
 import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
+import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
+import org.apache.log4j.Level;
 import org.junit.Assert;
 import org.junit.Test;
 import org.mockito.Mockito;
@@ -59,6 +71,10 @@ public class TestHAStateTransitions {
   private static final String TEST_FILE_STR = TEST_FILE_PATH.toUri().getPath();
   private static final String TEST_FILE_DATA =
     "Hello state transitioning world";
+  
+  static {
+    ((Log4JLogger)EditLogTailer.LOG).getLogger().setLevel(Level.ALL);
+  }
 
   /**
    * Test which takes a single node and flip flops between
@@ -354,4 +370,55 @@ public class TestHAStateTransitions {
       cluster.shutdown();
     }
   }
+
+  @Test
+  public void testFailoverWithEmptyInProgressEditLog() throws Exception {
+    testFailoverAfterCrashDuringLogRoll(false);
+  }
+  
+  @Test
+  public void testFailoverWithEmptyInProgressEditLogWithHeader()
+      throws Exception {
+    testFailoverAfterCrashDuringLogRoll(true);
+  }
+  
+  private static void testFailoverAfterCrashDuringLogRoll(boolean writeHeader)
+      throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, Integer.MAX_VALUE);
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(0)
+      .build();
+    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+    try {
+      cluster.transitionToActive(0);
+      NameNode nn0 = cluster.getNameNode(0);
+      nn0.getRpcServer().rollEditLog();
+      cluster.shutdownNameNode(0);
+      createEmptyInProgressEditLog(cluster, nn0, writeHeader);
+      cluster.transitionToActive(1);
+    } finally {
+      IOUtils.cleanup(LOG, fs);
+      cluster.shutdown();
+    }
+  }
+  
+  private static void createEmptyInProgressEditLog(MiniDFSCluster cluster,
+      NameNode nn, boolean writeHeader) throws IOException {
+    long txid = nn.getNamesystem().getEditLog().getLastWrittenTxId();
+    URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
+    File sharedEditsDir = new File(sharedEditsUri.getPath());
+    StorageDirectory storageDir = new StorageDirectory(sharedEditsDir);
+    File inProgressFile = NameNodeAdapter.getInProgressEditsFile(storageDir,
+        txid + 1);
+    assertTrue("Failed to create in-progress edits file",
+        inProgressFile.createNewFile());
+    
+    if (writeHeader) {
+      DataOutputStream out = new DataOutputStream(new FileOutputStream(
+          inProgressFile));
+      EditLogFileOutputStream.writeHeader(out);
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
index ea2b11e2b17..f723a85bf45 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
@@ -80,8 +80,8 @@ public abstract class GenericTestUtils {
   public static void assertExceptionContains(String string, Throwable t) {
     String msg = t.getMessage();
     Assert.assertTrue(
-        "Unexpected exception:" + StringUtils.stringifyException(t),
-        msg.contains(string));    
+        "Expected to find '" + string + "' but got unexpected exception:"
+        + StringUtils.stringifyException(t), msg.contains(string));
   }  
 
   public static void waitFor(Supplier<Boolean> check,

From 6be13332db5342465c2f279a5984b4b8a33420fc Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Tue, 31 Jan 2012 02:17:53 +0000
Subject: [PATCH 108/177] HDFS-2853. HA: NN fails to start if the shared edits
 dir is marked required. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1238134 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/FSEditLog.java       |  2 +-
 .../namenode/NameNodeResourcePolicy.java      |  9 --
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  7 +-
 .../hdfs/server/namenode/TestEditLog.java     |  2 +-
 .../namenode/TestNameNodeResourcePolicy.java  |  8 +-
 .../namenode/ha/TestFailureOfSharedDir.java   | 93 +++++++++++++++++++
 7 files changed, 104 insertions(+), 19 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index a426f0926ee..fe88dbbe97f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -141,3 +141,5 @@ HDFS-2841. HAAdmin does not work if security is enabled. (atm)
 HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas immediately upon pipeline creation. (todd)
 
 HDFS-2824. Fix failover when prior NN died just after creating an edit log segment. (atm via todd)
+
+HDFS-2853. HA: NN fails to start if the shared edits dir is marked required (atm via eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index cd7ff5b0c8f..a78039f2c2d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -865,7 +865,7 @@ public class FSEditLog  {
       editLogStream = journalSet.startLogSegment(segmentTxId);
     } catch (IOException ex) {
       throw new IOException("Unable to start log segment " +
-          segmentTxId + ": no journals successfully started.");
+          segmentTxId + ": too few journals successfully started.", ex);
     }
     
     curSegmentTxId = segmentTxId;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java
index 53cd867fbc1..3896165ff39 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourcePolicy.java
@@ -37,9 +37,6 @@ final class NameNodeResourcePolicy {
    *        required to continue operation.
    * @return true if and only if there are sufficient NN resources to
    *         continue logging edits.
-   * @throws RuntimeException if the number of <bold>configured</bold>
-   *         redundant resources is fewer than the minimum number of available
-   *         redundant resources.
    */
   static boolean areResourcesAvailable(
       Collection<? extends CheckableNameNodeResource> resources,
@@ -63,12 +60,6 @@ final class NameNodeResourcePolicy {
       }
     }
     
-    if (redundantResourceCount < minimumRedundantResources) {
-      throw new RuntimeException("Need a minimum of " + minimumRedundantResources
-          + " for NN to operate but only " + redundantResourceCount
-          + " are configured.");
-    }
-    
     if (redundantResourceCount == 0) {
       // If there are no redundant resources, return true if there are any
       // required resources available.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index bf3af609d24..dc3074aeb51 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -664,7 +664,12 @@ public class MiniDFSCluster {
   }
   
   public URI getSharedEditsDir(int minNN, int maxNN) throws IOException {
-    return fileAsURI(new File(base_dir, "shared-edits-" +
+    return formatSharedEditsDir(base_dir, minNN, maxNN);
+  }
+  
+  public static URI formatSharedEditsDir(File baseDir, int minNN, int maxNN)
+      throws IOException {
+    return fileAsURI(new File(baseDir, "shared-edits-" +
         minNN + "-through-" + maxNN));
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
index f2f4d930dbe..9281eb2f04a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java
@@ -807,7 +807,7 @@ public class TestEditLog extends TestCase {
       fail("Did no throw exception on only having a bad dir");
     } catch (IOException ioe) {
       GenericTestUtils.assertExceptionContains(
-          "no journals successfully started", ioe);
+          "too few journals successfully started", ioe);
     } finally {
       logDir.setWritable(true);
       log.close();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java
index 559d1657269..49a96e9b66d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeResourcePolicy.java
@@ -50,13 +50,7 @@ public class TestNameNodeResourcePolicy {
     assertFalse(testResourceScenario(4, 0, 3, 0, 2));
     assertTrue(testResourceScenario(4, 0, 3, 0, 1));
     assertFalse(testResourceScenario(4, 0, 4, 0, 1));
-    try {
-      testResourceScenario(1, 0, 0, 0, 2);
-      fail("Should fail if there are more minimum redundant resources than " +
-          "total redundant resources");
-    } catch (RuntimeException rte) {
-      assertTrue(rte.getMessage().startsWith("Need a minimum"));
-    }
+    assertFalse(testResourceScenario(1, 0, 0, 0, 2));
   }
   
   @Test
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
new file mode 100644
index 00000000000..20c93b7e734
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+
+public class TestFailureOfSharedDir {
+  
+  private static final Log LOG = LogFactory.getLog(TestFailureOfSharedDir.class);
+
+  /**
+   * Test that marking the shared edits dir as being "required" causes the NN to
+   * fail if that dir can't be accessed.
+   */
+  @Test
+  public void testFailureOfSharedDir() throws Exception {
+    Configuration conf = new Configuration();
+    URI sharedEditsUri = MiniDFSCluster.formatSharedEditsDir(
+        new File(MiniDFSCluster.getBaseDirectory()), 0, 1);
+    // Mark the shared edits dir required.
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY,
+        sharedEditsUri.toString());
+    
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(0)
+        .build();
+      
+      assertEquals(sharedEditsUri, cluster.getSharedEditsDir(0, 1));
+      
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+      
+      assertTrue(fs.mkdirs(new Path("/test1")));
+      
+      // Blow away the shared edits dir.
+      FileUtil.fullyDelete(new File(sharedEditsUri));
+      
+      NameNode nn0 = cluster.getNameNode(0);
+      try {
+        // Make sure that subsequent operations on the NN fail.
+        nn0.getRpcServer().rollEditLog();
+        fail("Succeeded in rolling edit log despite shared dir being deleted");
+      } catch (IOException ioe) {
+        GenericTestUtils.assertExceptionContains(
+            "Unable to start log segment 4: too few journals successfully started",
+            ioe);
+        LOG.info("Got expected exception", ioe);
+      }
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+}

From 43679fcccd3ed35cf1bf15fe42001106170761a0 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 1 Feb 2012 01:29:06 +0000
Subject: [PATCH 109/177] HDFS-2845. SBN should not allow browsing of the file
 system via web UI. Contributed by Bikas Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1238897 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../src/main/webapps/hdfs/corrupt_files.jsp   |  9 ++-
 .../src/main/webapps/hdfs/dfshealth.jsp       |  8 ++-
 .../src/main/webapps/hdfs/dfsnodelist.jsp     |  7 +-
 .../hdfs/server/namenode/ha/TestHAWebUI.java  | 70 +++++++++++++++++++
 5 files changed, 91 insertions(+), 5 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAWebUI.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index fe88dbbe97f..d3a725ba86b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -143,3 +143,5 @@ HDFS-2691. Fixes for pipeline recovery in an HA cluster: report RBW replicas imm
 HDFS-2824. Fix failover when prior NN died just after creating an edit log segment. (atm via todd)
 
 HDFS-2853. HA: NN fails to start if the shared edits dir is marked required (atm via eli)
+
+HDFS-2845. SBN should not allow browsing of the file system via web UI. (Bikas Saha via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp
index a71f40f26e6..a4906a58801 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/corrupt_files.jsp
@@ -23,6 +23,7 @@
 	import="org.apache.hadoop.fs.FileStatus"
 	import="org.apache.hadoop.fs.FileUtil"
 	import="org.apache.hadoop.fs.Path"
+	import="org.apache.hadoop.ha.HAServiceProtocol.HAServiceState"
 	import="java.util.Collection"
 	import="java.util.Arrays" %>
 <%!//for java.io.Serializable
@@ -30,6 +31,8 @@
 <%
   NameNode nn = NameNodeHttpServer.getNameNodeFromContext(application);
   FSNamesystem fsn = nn.getNamesystem();
+  HAServiceState nnHAState = nn.getServiceState();
+  boolean isActive = (nnHAState == HAServiceState.ACTIVE);
   String namenodeRole = nn.getRole().toString();
   String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":"
       + nn.getNameNodeAddress().getPort();
@@ -46,8 +49,10 @@
 <h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
 <%=NamenodeJspHelper.getVersionTable(fsn)%>
 <br>
-<b><a href="/nn_browsedfscontent.jsp">Browse the filesystem</a></b>
-<br>
+<% if (isActive) { %> 
+  <b><a href="/nn_browsedfscontent.jsp">Browse the filesystem</a></b>
+  <br>
+<% } %> 
 <b><a href="/logs/"><%=namenodeRole%> Logs</a></b>
 <br>
 <b><a href=/dfshealth.jsp> Go back to DFS home</a></b>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
index 4c65701a1f9..81e595d718e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfshealth.jsp
@@ -30,8 +30,10 @@
   final NamenodeJspHelper.HealthJsp healthjsp  = new NamenodeJspHelper.HealthJsp();
   NameNode nn = NameNodeHttpServer.getNameNodeFromContext(application);
   FSNamesystem fsn = nn.getNamesystem();
+  HAServiceState nnHAState = nn.getServiceState();
+  boolean isActive = (nnHAState == HAServiceState.ACTIVE);
   String namenodeRole = nn.getRole().toString();
-  String namenodeState = nn.getServiceState().toString();
+  String namenodeState = nnHAState.toString();
   String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":" + nn.getNameNodeAddress().getPort();
 %>
 
@@ -45,7 +47,9 @@
 <h1><%=namenodeRole%> '<%=namenodeLabel%>' (<%=namenodeState%>)</h1>
 <%= NamenodeJspHelper.getVersionTable(fsn) %>
 <br />
-<b><a href="/nn_browsedfscontent.jsp">Browse the filesystem</a></b><br>
+<% if (isActive) { %> 
+  <b><a href="/nn_browsedfscontent.jsp">Browse the filesystem</a></b><br>
+<% } %> 
 <b><a href="/logs/"><%=namenodeRole%> Logs</a></b>
 
 <hr>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp
index 886fbeaa35d..35deb05f859 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/hdfs/dfsnodelist.jsp
@@ -20,6 +20,7 @@
 <%@ page
   contentType="text/html; charset=UTF-8"
   import="org.apache.hadoop.util.ServletUtil"
+  import="org.apache.hadoop.ha.HAServiceProtocol.HAServiceState"
 %>
 <%!
   //for java.io.Serializable
@@ -30,6 +31,8 @@ final NamenodeJspHelper.NodeListJsp nodelistjsp = new NamenodeJspHelper.NodeList
 NameNode nn = NameNodeHttpServer.getNameNodeFromContext(application);
 String namenodeRole = nn.getRole().toString();
 FSNamesystem fsn = nn.getNamesystem();
+HAServiceState nnHAState = nn.getServiceState();
+boolean isActive = (nnHAState == HAServiceState.ACTIVE);
 String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":" + nn.getNameNodeAddress().getPort();
 %>
 
@@ -43,7 +46,9 @@ String namenodeLabel = nn.getNameNodeAddress().getHostName() + ":" + nn.getNameN
 <h1><%=namenodeRole%> '<%=namenodeLabel%>'</h1>
 <%= NamenodeJspHelper.getVersionTable(fsn) %>
 <br />
-<b><a href="/nn_browsedfscontent.jsp">Browse the filesystem</a></b><br>
+<% if (isActive) { %> 
+  <b><a href="/nn_browsedfscontent.jsp">Browse the filesystem</a></b><br>
+<% } %> 
 <b><a href="/logs/"><%=namenodeRole%> Logs</a></b><br>
 <b><a href=/dfshealth.jsp> Go back to DFS home</a></b>
 <hr>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAWebUI.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAWebUI.java
new file mode 100644
index 00000000000..ccb4f5b5cdb
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAWebUI.java
@@ -0,0 +1,70 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.net.URL;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.junit.Test;
+
+public class TestHAWebUI {
+
+  /**
+   * Tests that the web UI of the name node provides a link to browse the file
+   * system only in active state
+   * 
+   */
+  @Test
+  public void testLinkToBrowseFilesystem() throws Exception {
+    Configuration conf = new Configuration();
+
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
+        .build();
+    try {
+      cluster.waitActive();
+
+      cluster.transitionToActive(0);
+      String pageContents = DFSTestUtil.urlGet(new URL("http://localhost:"
+          + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort()
+          + "/dfshealth.jsp"));
+      assertTrue(pageContents.contains("Browse the filesystem"));
+
+      cluster.transitionToStandby(0);
+      pageContents = DFSTestUtil.urlGet(new URL("http://localhost:"
+          + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort()
+          + "/dfshealth.jsp"));
+      assertFalse(pageContents.contains("Browse the filesystem"));
+
+      cluster.transitionToActive(0);
+      pageContents = DFSTestUtil.urlGet(new URL("http://localhost:"
+          + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort()
+          + "/dfshealth.jsp"));
+      assertTrue(pageContents.contains("Browse the filesystem"));
+
+    } finally {
+      cluster.shutdown();
+    }
+  }
+}

From cf611255d6fcd7016e0ce2a3f80ccd0d4e051d9f Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Wed, 1 Feb 2012 05:16:49 +0000
Subject: [PATCH 110/177] HDFS-2742. HA: observed dataloss in replication
 stress test. Contributed by Todd Lipcon

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1238940 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../server/blockmanagement/BlockInfo.java     |   2 +-
 .../server/blockmanagement/BlockManager.java  | 172 ++++++++--
 .../PendingDataNodeMessages.java              | 134 ++++++++
 .../hdfs/server/namenode/FSEditLogLoader.java |  55 ++--
 .../hdfs/server/namenode/FSNamesystem.java    | 204 +++++++-----
 .../server/namenode/NameNodeRpcServer.java    |  29 --
 .../hdfs/server/namenode/Namesystem.java      |   6 +
 .../namenode/PendingDataNodeMessages.java     | 201 ------------
 .../TestPendingDataNodeMessages.java          |  68 ++++
 .../hdfs/server/namenode/NameNodeAdapter.java |   2 +-
 .../server/namenode/ha/TestDNFencing.java     | 174 +++++++++-
 .../server/namenode/ha/TestHASafeMode.java    | 305 +++++++++++++-----
 13 files changed, 923 insertions(+), 431 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingDataNodeMessages.java
 delete mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingDataNodeMessages.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index d3a725ba86b..0406f192a24 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -145,3 +145,5 @@ HDFS-2824. Fix failover when prior NN died just after creating an edit log segme
 HDFS-2853. HA: NN fails to start if the shared edits dir is marked required (atm via eli)
 
 HDFS-2845. SBN should not allow browsing of the file system via web UI. (Bikas Saha via atm)
+
+HDFS-2742. HA: observed dataloss in replication stress test. (todd via eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
index 4c46d01b525..d0c7692228c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
@@ -180,7 +180,7 @@ public class BlockInfo extends Block implements LightWeightGSet.LinkedElement {
   /**
    * Count the number of data-nodes the block belongs to.
    */
-  int numNodes() {
+  public int numNodes() {
     assert this.triplets != null : "BlockInfo is not initialized";
     assert triplets.length % 3 == 0 : "Malformed BlockInfo";
     for(int idx = getCapacity()-1; idx >= 0; idx--) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 9f2dfba55ea..ca861318820 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import java.io.IOException;
 import java.io.PrintWriter;
+import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
@@ -28,6 +29,7 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Queue;
 import java.util.Set;
 import java.util.TreeMap;
 import org.apache.commons.logging.Log;
@@ -49,6 +51,7 @@ import org.apache.hadoop.hdfs.protocol.UnregisteredNodeException;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager.AccessMode;
 import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
+import org.apache.hadoop.hdfs.server.blockmanagement.PendingDataNodeMessages.ReportedBlockInfo;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
 import org.apache.hadoop.hdfs.server.common.Util;
@@ -58,7 +61,6 @@ import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileUnderConstruction;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.Namesystem;
-import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
@@ -69,7 +71,6 @@ import org.apache.hadoop.net.Node;
 import org.apache.hadoop.util.Daemon;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Joiner;
 import com.google.common.collect.Sets;
 
 /**
@@ -83,11 +84,20 @@ public class BlockManager {
   /** Default load factor of map */
   public static final float DEFAULT_MAP_LOAD_FACTOR = 0.75f;
 
+  private static final String QUEUE_REASON_CORRUPT_STATE =
+    "it has the wrong state or generation stamp";
+
+  private static final String QUEUE_REASON_FUTURE_GENSTAMP =
+    "generation stamp is in the future";
+
   private final Namesystem namesystem;
 
   private final DatanodeManager datanodeManager;
   private final HeartbeatManager heartbeatManager;
   private final BlockTokenSecretManager blockTokenSecretManager;
+  
+  private final PendingDataNodeMessages pendingDNMessages =
+    new PendingDataNodeMessages();
 
   private volatile long pendingReplicationBlocksCount = 0L;
   private volatile long corruptReplicaBlocksCount = 0L;
@@ -124,6 +134,10 @@ public class BlockManager {
   public long getPostponedMisreplicatedBlocksCount() {
     return postponedMisreplicatedBlocksCount;
   }
+  /** Used by metrics */
+  public int getPendingDataNodeMessageCount() {
+    return pendingDNMessages.count();
+  }
 
   /**replicationRecheckInterval is how often namenode checks for new replication work*/
   private final long replicationRecheckInterval;
@@ -479,12 +493,24 @@ public class BlockManager {
     if(curBlock.isComplete())
       return curBlock;
     BlockInfoUnderConstruction ucBlock = (BlockInfoUnderConstruction)curBlock;
-    if (!force && ucBlock.numNodes() < minReplication)
+    int numNodes = ucBlock.numNodes();
+    if (!force && numNodes < minReplication)
       throw new IOException("Cannot complete block: " +
           "block does not satisfy minimal replication requirement.");
     BlockInfo completeBlock = ucBlock.convertToCompleteBlock();
     // replace penultimate block in file
     fileINode.setBlock(blkIndex, completeBlock);
+    
+    // Since safe-mode only counts complete blocks, and we now have
+    // one more complete block, we need to adjust the total up, and
+    // also count it as safe, if we have at least the minimum replica
+    // count. (We may not have the minimum replica count yet if this is
+    // a "forced" completion when a file is getting closed by an
+    // OP_CLOSE edit on the standby).
+    namesystem.adjustSafeModeBlockTotals(0, 1);
+    namesystem.incrementSafeBlockCount(
+        Math.min(numNodes, minReplication));
+    
     // replace block in the blocksMap
     return blocksMap.replaceBlock(completeBlock);
   }
@@ -547,6 +573,14 @@ public class BlockManager {
       String datanodeId = dd.getStorageID();
       invalidateBlocks.remove(datanodeId, oldBlock);
     }
+    
+    // Adjust safe-mode totals, since under-construction blocks don't
+    // count in safe-mode.
+    namesystem.adjustSafeModeBlockTotals(
+        // decrement safe if we had enough
+        targets.length >= minReplication ? -1 : 0,
+        // always decrement total blocks
+        -1);
 
     final long fileLength = fileINode.computeContentSummary().getLength();
     final long pos = fileLength - ucBlock.getNumBytes();
@@ -1483,9 +1517,19 @@ public class BlockManager {
     assert (node.numBlocks() == 0);
     BlockReportIterator itBR = report.getBlockReportIterator();
 
+    boolean isStandby = namesystem.isInStandbyState();
+    
     while(itBR.hasNext()) {
       Block iblk = itBR.next();
       ReplicaState reportedState = itBR.getCurrentReplicaState();
+      
+      if (isStandby &&
+          namesystem.isGenStampInFuture(iblk.getGenerationStamp())) {
+        queueReportedBlock(node, iblk, reportedState,
+            QUEUE_REASON_FUTURE_GENSTAMP);
+        continue;
+      }
+      
       BlockInfo storedBlock = blocksMap.getStoredBlock(iblk);
       // If block does not belong to any file, we are done.
       if (storedBlock == null) continue;
@@ -1493,7 +1537,14 @@ public class BlockManager {
       // If block is corrupt, mark it and continue to next block.
       BlockUCState ucState = storedBlock.getBlockUCState();
       if (isReplicaCorrupt(iblk, reportedState, storedBlock, ucState, node)) {
-        markBlockAsCorrupt(storedBlock, node);
+        if (namesystem.isInStandbyState()) {
+          // In the Standby, we may receive a block report for a file that we
+          // just have an out-of-date gen-stamp or state for, for example.
+          queueReportedBlock(node, iblk, reportedState,
+              QUEUE_REASON_CORRUPT_STATE);
+        } else {
+          markBlockAsCorrupt(storedBlock, node);
+        }
         continue;
       }
       
@@ -1576,7 +1627,8 @@ public class BlockManager {
    * @param toCorrupt replicas with unexpected length or generation stamp;
    *        add to corrupt replicas
    * @param toUC replicas of blocks currently under construction
-   * @return
+   * @return the up-to-date stored block, if it should be kept.
+   *         Otherwise, null.
    */
   private BlockInfo processReportedBlock(final DatanodeDescriptor dn, 
       final Block block, final ReplicaState reportedState, 
@@ -1591,6 +1643,13 @@ public class BlockManager {
           + " replicaState = " + reportedState);
     }
   
+    if (namesystem.isInStandbyState() &&
+        namesystem.isGenStampInFuture(block.getGenerationStamp())) {
+      queueReportedBlock(dn, block, reportedState,
+          QUEUE_REASON_FUTURE_GENSTAMP);
+      return null;
+    }
+    
     // find block by blockId
     BlockInfo storedBlock = blocksMap.getStoredBlock(block);
     if(storedBlock == null) {
@@ -1615,7 +1674,16 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     }
 
     if (isReplicaCorrupt(block, reportedState, storedBlock, ucState, dn)) {
-      toCorrupt.add(storedBlock);
+      if (namesystem.isInStandbyState()) {
+        // If the block is an out-of-date generation stamp or state,
+        // but we're the standby, we shouldn't treat it as corrupt,
+        // but instead just queue it for later processing.
+        queueReportedBlock(dn, storedBlock, reportedState,
+            QUEUE_REASON_CORRUPT_STATE);
+
+      } else {
+        toCorrupt.add(storedBlock);
+      }
       return storedBlock;
     }
 
@@ -1633,6 +1701,68 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     return storedBlock;
   }
 
+  /**
+   * Queue the given reported block for later processing in the
+   * standby node. {@see PendingDataNodeMessages}.
+   * @param reason a textual reason to report in the debug logs
+   */
+  private void queueReportedBlock(DatanodeDescriptor dn, Block block,
+      ReplicaState reportedState, String reason) {
+    assert namesystem.isInStandbyState();
+    
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Queueing reported block " + block +
+          " in state " + reportedState + 
+          " from datanode " + dn + " for later processing " +
+          "because " + reason + ".");
+    }
+    pendingDNMessages.enqueueReportedBlock(dn, block, reportedState);
+  }
+
+  /**
+   * Try to process any messages that were previously queued for the given
+   * block. This is called from FSEditLogLoader whenever a block's state
+   * in the namespace has changed or a new block has been created.
+   */
+  public void processQueuedMessagesForBlock(Block b) throws IOException {
+    Queue<ReportedBlockInfo> queue = pendingDNMessages.takeBlockQueue(b);
+    if (queue == null) {
+      // Nothing to re-process
+      return;
+    }
+    processQueuedMessages(queue);
+  }
+  
+  private void processQueuedMessages(Iterable<ReportedBlockInfo> rbis)
+      throws IOException {
+    for (ReportedBlockInfo rbi : rbis) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Processing previouly queued message " + rbi);
+      }
+      processAndHandleReportedBlock(
+          rbi.getNode(), rbi.getBlock(), rbi.getReportedState(), null);
+    }
+  }
+  
+  /**
+   * Process any remaining queued datanode messages after entering
+   * active state. At this point they will not be re-queued since
+   * we are the definitive master node and thus should be up-to-date
+   * with the namespace information.
+   */
+  public void processAllPendingDNMessages() throws IOException {
+    assert !namesystem.isInStandbyState() :
+      "processAllPendingDNMessages() should be called after exiting " +
+      "standby state!";
+    int count = pendingDNMessages.count();
+    if (count > 0) {
+      LOG.info("Processing " + count + " messages from DataNodes " +
+          "that were previously queued during standby state.");
+    }
+    processQueuedMessages(pendingDNMessages.takeAll());
+    assert pendingDNMessages.count() == 0;
+  }
+
   /*
    * The next two methods test the various cases under which we must conclude
    * the replica is corrupt, or under construction.  These are laid out
@@ -1742,13 +1872,15 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     // Now check for completion of blocks and safe block count
     int numCurrentReplica = countLiveNodes(storedBlock);
     if (storedBlock.getBlockUCState() == BlockUCState.COMMITTED
-        && numCurrentReplica >= minReplication)
+        && numCurrentReplica >= minReplication) {
       storedBlock = completeBlock(storedBlock.getINode(), storedBlock, false);
-
-    // check whether safe replication is reached for the block
-    // only complete blocks are counted towards that
-    if(storedBlock.isComplete())
+    } else if (storedBlock.isComplete()) {
+      // check whether safe replication is reached for the block
+      // only complete blocks are counted towards that.
+      // In the case that the block just became complete above, completeBlock()
+      // handles the safe block count maintenance.
       namesystem.incrementSafeBlockCount(numCurrentReplica);
+    }
   }
 
   /**
@@ -1807,15 +1939,17 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
       + pendingReplications.getNumReplicas(storedBlock);
 
     if(storedBlock.getBlockUCState() == BlockUCState.COMMITTED &&
-        numLiveReplicas >= minReplication)
+        numLiveReplicas >= minReplication) {
       storedBlock = completeBlock(fileINode, storedBlock, false);
-
-    // check whether safe replication is reached for the block
-    // only complete blocks are counted towards that
-    // Is no-op if not in safe mode.
-    if(storedBlock.isComplete())
+    } else if (storedBlock.isComplete()) {
+      // check whether safe replication is reached for the block
+      // only complete blocks are counted towards that
+      // Is no-op if not in safe mode.
+      // In the case that the block just became complete above, completeBlock()
+      // handles the safe block count maintenance.
       namesystem.incrementSafeBlockCount(numCurrentReplica);
-
+    }
+    
     // if file is under construction, then done for now
     if (fileINode.isUnderConstruction()) {
       return storedBlock;
@@ -2514,7 +2648,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
   }
 
   public int getActiveBlockCount() {
-    return blocksMap.size() - (int)invalidateBlocks.numBlocks();
+    return blocksMap.size();
   }
 
   public DatanodeDescriptor[] getNodes(BlockInfo block) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingDataNodeMessages.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingDataNodeMessages.java
new file mode 100644
index 00000000000..b7da1160484
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/PendingDataNodeMessages.java
@@ -0,0 +1,134 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.blockmanagement;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.server.blockmanagement.PendingDataNodeMessages.ReportedBlockInfo;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+/**
+ * In the Standby Node, we can receive messages about blocks
+ * before they are actually available in the namespace, or while
+ * they have an outdated state in the namespace. In those cases,
+ * we queue those block-related messages in this structure.
+ * */  
+class PendingDataNodeMessages {
+  
+  Map<Block, Queue<ReportedBlockInfo>> queueByBlockId =
+    Maps.newHashMap();
+  private int count = 0;
+  
+    
+  static class ReportedBlockInfo {
+    private final Block block;
+    private final DatanodeDescriptor dn;
+    private final ReplicaState reportedState;
+
+    ReportedBlockInfo(DatanodeDescriptor dn, Block block,
+        ReplicaState reportedState) {
+      this.dn = dn;
+      this.block = block;
+      this.reportedState = reportedState;
+    }
+
+    Block getBlock() {
+      return block;
+    }
+
+    DatanodeDescriptor getNode() {
+      return dn;
+    }
+
+    ReplicaState getReportedState() {
+      return reportedState;
+    }
+
+    @Override
+    public String toString() {
+      return "ReportedBlockInfo [block=" + block + ", dn=" + dn
+          + ", reportedState=" + reportedState + "]";
+    }
+  }
+  
+  void enqueueReportedBlock(DatanodeDescriptor dn, Block block,
+      ReplicaState reportedState) {
+    block = new Block(block);
+    getBlockQueue(block).add(
+        new ReportedBlockInfo(dn, block, reportedState));
+    count++;
+  }
+  
+  /**
+   * @return any messages that were previously queued for the given block,
+   * or null if no messages were queued.
+   */
+  Queue<ReportedBlockInfo> takeBlockQueue(Block block) {
+    Queue<ReportedBlockInfo> queue = queueByBlockId.remove(block);
+    if (queue != null) {
+      count -= queue.size();
+    }
+    return queue;
+  }
+
+
+  private Queue<ReportedBlockInfo> getBlockQueue(Block block) {
+    Queue<ReportedBlockInfo> queue = queueByBlockId.get(block);
+    if (queue == null) {
+      queue = Lists.newLinkedList();
+      queueByBlockId.put(block, queue);
+    }
+    return queue;
+  }
+  
+  public int count() {
+    return count ;
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    for (Map.Entry<Block, Queue<ReportedBlockInfo>> entry :
+      queueByBlockId.entrySet()) {
+      sb.append("Block " + entry.getKey() + ":\n");
+      for (ReportedBlockInfo rbi : entry.getValue()) {
+        sb.append("  ").append(rbi).append("\n");
+      }
+    }
+    return sb.toString();
+  }
+
+  public Iterable<ReportedBlockInfo> takeAll() {
+    List<ReportedBlockInfo> rbis = Lists.newArrayListWithCapacity(
+        count);
+    for (Queue<ReportedBlockInfo> q : queueByBlockId.values()) {
+      rbis.addAll(q);
+    }
+    queueByBlockId.clear();
+    count = 0;
+    return rbis;
+  }
+}
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 8c664d0695e..d51752f5a31 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -66,7 +66,6 @@ import com.google.common.base.Joiner;
 @InterfaceStability.Evolving
 public class FSEditLogLoader {
   private final FSNamesystem fsNamesys;
-  private long maxGenStamp = 0;
 
   public FSEditLogLoader(FSNamesystem fsNamesys) {
     this.fsNamesys = fsNamesys;
@@ -91,15 +90,6 @@ public class FSEditLogLoader {
           + " of size " + edits.length() + " edits # " + numEdits 
           + " loaded in " + (now()-startTime)/1000 + " seconds.");
     } finally {
-      fsNamesys.setBlockTotal();
-      
-      // Delay the notification of genstamp updates until after
-      // setBlockTotal() above. Otherwise, we will mark blocks
-      // as "safe" before they've been incorporated in the expected
-      // totalBlocks and threshold for SafeMode -- triggering an
-      // assertion failure and/or exiting safemode too early!
-      fsNamesys.notifyGenStampUpdate(maxGenStamp);
-      
       edits.close();
       fsNamesys.writeUnlock();
     }
@@ -183,6 +173,12 @@ public class FSEditLogLoader {
     switch (op.opCode) {
     case OP_ADD: {
       AddCloseOp addCloseOp = (AddCloseOp)op;
+      if (FSNamesystem.LOG.isDebugEnabled()) {
+        FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
+            " numblocks : " + addCloseOp.blocks.length +
+            " clientHolder " + addCloseOp.clientName +
+            " clientMachine " + addCloseOp.clientMachine);
+      }
 
       // See if the file already exists (persistBlocks call)
       INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
@@ -197,13 +193,6 @@ public class FSEditLogLoader {
         }
         long blockSize = addCloseOp.blockSize;
         
-        if (FSNamesystem.LOG.isDebugEnabled()) {
-          FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
-              " numblocks : " + addCloseOp.blocks.length +
-              " clientHolder " + addCloseOp.clientName +
-              " clientMachine " + addCloseOp.clientMachine);
-        }
-
         // Older versions of HDFS does not store the block size in inode.
         // If the file has more than one block, use the size of the
         // first block as the blocksize. Otherwise use the default
@@ -227,12 +216,18 @@ public class FSEditLogLoader {
             addCloseOp.atime, blockSize);
 
         fsNamesys.prepareFileForWrite(addCloseOp.path, node,
-            addCloseOp.clientName, addCloseOp.clientMachine, null);
+            addCloseOp.clientName, addCloseOp.clientMachine, null,
+            false);
       } else { // This is OP_ADD on an existing file
         if (!oldFile.isUnderConstruction()) {
           // This is a call to append() on an already-closed file.
+          if (FSNamesystem.LOG.isDebugEnabled()) {
+            FSNamesystem.LOG.debug("Reopening an already-closed file " +
+                "for append");
+          }
           fsNamesys.prepareFileForWrite(addCloseOp.path, oldFile,
-              addCloseOp.clientName, addCloseOp.clientMachine, null);
+              addCloseOp.clientName, addCloseOp.clientMachine, null,
+              false);
           oldFile = getINodeFile(fsDir, addCloseOp.path);
         }
         
@@ -243,6 +238,13 @@ public class FSEditLogLoader {
     case OP_CLOSE: {
       AddCloseOp addCloseOp = (AddCloseOp)op;
       
+      if (FSNamesystem.LOG.isDebugEnabled()) {
+        FSNamesystem.LOG.debug(op.opCode + ": " + addCloseOp.path +
+            " numblocks : " + addCloseOp.blocks.length +
+            " clientHolder " + addCloseOp.clientName +
+            " clientMachine " + addCloseOp.clientMachine);
+      }
+
       INodeFile oldFile = getINodeFile(fsDir, addCloseOp.path);
       if (oldFile == null) {
         throw new IOException("Operation trying to close non-existent file " +
@@ -478,14 +480,23 @@ public class FSEditLogLoader {
       }
       
       oldBlock.setNumBytes(newBlock.getNumBytes());
+      boolean changeMade =
+        oldBlock.getGenerationStamp() != newBlock.getGenerationStamp();
       oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
       
       if (oldBlock instanceof BlockInfoUnderConstruction &&
           (!isLastBlock || addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE)) {
+        changeMade = true;
         fsNamesys.getBlockManager().forceCompleteBlock(
             (INodeFileUnderConstruction)file,
             (BlockInfoUnderConstruction)oldBlock);
       }
+      if (changeMade) {
+        // The state or gen-stamp of the block has changed. So, we may be
+        // able to process some messages from datanodes that we previously
+        // were unable to process.
+        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
+      }
     }
     
     if (addCloseOp.blocks.length < oldBlocks.length) {
@@ -517,13 +528,9 @@ public class FSEditLogLoader {
         }
         fsNamesys.getBlockManager().addINode(newBI, file);
         file.addBlock(newBI);
+        fsNamesys.getBlockManager().processQueuedMessagesForBlock(newBlock);
       }
     }
-    
-    // Record the max genstamp seen
-    for (Block b : addCloseOp.blocks) {
-      maxGenStamp = Math.max(maxGenStamp, b.getGenerationStamp());
-    }
   }
 
   private static void dumpOpCounts(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index aef137c3650..bede75ebde5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -154,10 +154,6 @@ import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
 import org.apache.hadoop.hdfs.server.common.Util;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory;
-import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReceivedDeleteMessage;
-import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.BlockReportMessage;
-import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.CommitBlockSynchronizationMessage;
-import org.apache.hadoop.hdfs.server.namenode.PendingDataNodeMessages.DataNodeMessage;
 import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState;
 import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer;
 import org.apache.hadoop.hdfs.server.namenode.ha.HAContext;
@@ -321,8 +317,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   // lock to protect FSNamesystem.
   private ReentrantReadWriteLock fsLock;
 
-  private PendingDataNodeMessages pendingDatanodeMessages = new PendingDataNodeMessages();
-  
   /**
    * Used when this NN is in standby state to read from the shared edit log.
    */
@@ -342,11 +336,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private boolean haEnabled;
 
   private final Configuration conf;
-  
-  PendingDataNodeMessages getPendingDataNodeMessages() {
-    return pendingDatanodeMessages;
-  }
-  
+    
   /**
    * Instantiates an FSNamesystem loaded from the image and edits
    * directories specified in the passed Configuration.
@@ -481,6 +471,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     try {
       nnResourceChecker = new NameNodeResourceChecker(conf);
       checkAvailableResources();
+      assert safeMode != null &&
+        !safeMode.initializedReplQueues;
       setBlockTotal();
       blockManager.activate(conf);
       this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
@@ -531,6 +523,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         LOG.info("Reprocessing replication and invalidation queues...");
         blockManager.getDatanodeManager().markAllDatanodesStale();
         blockManager.clearQueues();
+        blockManager.processAllPendingDNMessages();
         blockManager.processMisReplicatedBlocks();
         
         if (LOG.isDebugEnabled()) {
@@ -849,8 +842,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public boolean isRunning() {
     return fsRunning;
   }
-
-  private boolean isInStandbyState() {
+  
+  @Override
+  public boolean isInStandbyState() {
     if (haContext == null || haContext.getState() == null) {
       // We're still starting up. In this case, if HA is
       // on for the cluster, we always start in standby. Otherwise
@@ -1543,7 +1537,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           blockManager.getDatanodeManager().getDatanodeByHost(clientMachine);
 
       if (append && myFile != null) {
-        return prepareFileForWrite(src, myFile, holder, clientMachine, clientNode);
+        return prepareFileForWrite(
+            src, myFile, holder, clientMachine, clientNode, true);
       } else {
        // Now we can add the name to the filesystem. This file has no
        // blocks associated with it.
@@ -1581,12 +1576,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * @param leaseHolder identifier of the lease holder on this file
    * @param clientMachine identifier of the client machine
    * @param clientNode if the client is collocated with a DN, that DN's descriptor
+   * @param writeToEditLog whether to persist this change to the edit log
    * @return the last block locations if the block is partial or null otherwise
    * @throws UnresolvedLinkException
    * @throws IOException
    */
   public LocatedBlock prepareFileForWrite(String src, INode file,
-      String leaseHolder, String clientMachine, DatanodeDescriptor clientNode)
+      String leaseHolder, String clientMachine, DatanodeDescriptor clientNode,
+      boolean writeToEditLog)
       throws UnresolvedLinkException, IOException {
     INodeFile node = (INodeFile) file;
     INodeFileUnderConstruction cons = new INodeFileUnderConstruction(
@@ -1601,6 +1598,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
                                     clientNode);
     dir.replaceNode(src, node, cons);
     leaseManager.addLease(cons.getClientName(), src);
+    
+    if (writeToEditLog) {
+      getEditLog().logOpenFile(src, cons);
+    }
 
     return blockManager.convertLastBlockToUnderConstruction(cons);
   }
@@ -2346,9 +2347,45 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     if (blocks == null) {
       return;
     }
-    for(Block b : blocks) {
+    
+    // In the case that we are a Standby tailing edits from the
+    // active while in safe-mode, we need to track the total number
+    // of blocks and safe blocks in the system.
+    boolean trackBlockCounts = isSafeModeTrackingBlocks();
+    int numRemovedComplete = 0, numRemovedSafe = 0;
+
+    for (Block b : blocks) {
+      if (trackBlockCounts) {
+        BlockInfo bi = blockManager.getStoredBlock(b);
+        if (bi.isComplete()) {
+          numRemovedComplete++;
+          if (bi.numNodes() >= blockManager.minReplication) {
+            numRemovedSafe++;
+          }
+        }
+      }
       blockManager.removeBlock(b);
     }
+    if (trackBlockCounts) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Adjusting safe-mode totals for deletion of " + src + ":" +
+            "decreasing safeBlocks by " + numRemovedSafe +
+            ", totalBlocks by " + numRemovedComplete);
+      }
+      adjustSafeModeBlockTotals(-numRemovedSafe, -numRemovedComplete);
+    }
+  }
+
+  /**
+   * @see SafeModeInfo#shouldIncrementallyTrackBlocks
+   */
+  private boolean isSafeModeTrackingBlocks() {
+    if (!haEnabled) {
+      // Never track blocks incrementally in non-HA code.
+      return false;
+    }
+    SafeModeInfo sm = this.safeMode;
+    return sm != null && sm.shouldIncrementallyTrackBlocks();
   }
 
   /**
@@ -2712,15 +2749,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       checkOperation(OperationCategory.WRITE);
       if (haContext.getState().equals(NameNode.STANDBY_STATE)) {
         // TODO(HA) we'll never get here, since we check for WRITE operation above!
-        if (isGenStampInFuture(newgenerationstamp)) {
-          LOG.info("Required GS=" + newgenerationstamp
-              + ", Queuing commitBlockSynchronization message");
-          getPendingDataNodeMessages().queueMessage(
-              new PendingDataNodeMessages.CommitBlockSynchronizationMessage(
-                  lastblock, newgenerationstamp, newlength, closeFile, deleteblock,
-                  newtargets, newgenerationstamp));
-          return;
-        }
+        // Need to implement tests, etc, for this - block recovery spanning
+        // failover.
       }
 
       if (isInSafeMode()) {
@@ -3264,6 +3294,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     boolean initializedReplQueues = false;
     /** Was safemode entered automatically because available resources were low. */
     private boolean resourcesLow = false;
+    /** Should safemode adjust its block totals as blocks come in */
+    private boolean shouldIncrementallyTrackBlocks = false;
     
     /**
      * Creates SafeModeInfo when the name node enters
@@ -3291,6 +3323,18 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       this.blockSafe = 0;
     }
 
+    /**
+     * In the HA case, the StandbyNode can be in safemode while the namespace
+     * is modified by the edit log tailer. In this case, the number of total
+     * blocks changes as edits are processed (eg blocks are added and deleted).
+     * However, we don't want to do the incremental tracking during the
+     * startup-time loading process -- only once the initial total has been
+     * set after the image has been loaded.
+     */
+    private boolean shouldIncrementallyTrackBlocks() {
+      return shouldIncrementallyTrackBlocks;
+    }
+
     /**
      * Creates SafeModeInfo when safe mode is entered manually, or because
      * available resources are low.
@@ -3476,6 +3520,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       this.blockThreshold = (int) (blockTotal * threshold);
       this.blockReplQueueThreshold = 
         (int) (blockTotal * replQueueThreshold);
+      if (haEnabled) {
+        // After we initialize the block count, any further namespace
+        // modifications done while in safe mode need to keep track
+        // of the number of total blocks in the system.
+        this.shouldIncrementallyTrackBlocks = true;
+      }
+      
       checkMode();
     }
       
@@ -3485,9 +3536,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * @param replication current replication 
      */
     private synchronized void incrementSafeBlockCount(short replication) {
-      if (replication == safeReplication)
+      if (replication == safeReplication) {
         this.blockSafe++;
-      checkMode();
+        checkMode();
+      }
     }
       
     /**
@@ -3496,9 +3548,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
      * @param replication current replication 
      */
     private synchronized void decrementSafeBlockCount(short replication) {
-      if (replication == safeReplication-1)
+      if (replication == safeReplication-1) {
         this.blockSafe--;
-      checkMode();
+        assert blockSafe >= 0 || isManual();
+        checkMode();
+      }
     }
 
     /**
@@ -3636,6 +3690,26 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         + "BlockManager data: active="  + activeBlocks);
       }
     }
+
+    private void adjustBlockTotals(int deltaSafe, int deltaTotal) {
+      if (!shouldIncrementallyTrackBlocks) {
+        return;
+      }
+      assert haEnabled;
+      
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Adjusting block totals from " +
+            blockSafe + "/" + blockTotal + " to " +
+            (blockSafe + deltaSafe) + "/" + (blockTotal + deltaTotal));
+      }
+      assert blockSafe + deltaSafe >= 0 : "Can't reduce blockSafe " +
+        blockSafe + " by " + deltaSafe + ": would be negative";
+      assert blockTotal + deltaTotal >= 0 : "Can't reduce blockTotal " +
+        blockTotal + " by " + deltaTotal + ": would be negative";
+      
+      blockSafe += deltaSafe;
+      setBlockTotal(blockTotal + deltaTotal);
+    }
   }
     
   /**
@@ -3741,7 +3815,24 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     SafeModeInfo safeMode = this.safeMode;
     if (safeMode == null) // mostly true
       return;
-    safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
+    BlockInfo storedBlock = blockManager.getStoredBlock(b);
+    if (storedBlock.isComplete()) {
+      safeMode.decrementSafeBlockCount((short)blockManager.countNodes(b).liveReplicas());
+    }
+  }
+  
+  /**
+   * Adjust the total number of blocks safe and expected during safe mode.
+   * If safe mode is not currently on, this is a no-op.
+   * @param deltaSafe the change in number of safe blocks
+   * @param deltaTotal the change i nnumber of total blocks expected
+   */
+  public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal) {
+    // safeMode is volatile, and may be set to null at any time
+    SafeModeInfo safeMode = this.safeMode;
+    if (safeMode == null)
+      return;
+    safeMode.adjustBlockTotals(deltaSafe, deltaTotal);
   }
 
   /**
@@ -4065,6 +4156,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return blockManager.getPostponedMisreplicatedBlocksCount();
   }
   
+  @Metric
+  public int getPendingDataNodeMessageCount() {
+    return blockManager.getPendingDataNodeMessageCount();
+  }
+  
   @Metric
   public int getBlockCapacity() {
     return blockManager.getCapacity();
@@ -4912,54 +5008,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public boolean isGenStampInFuture(long genStamp) {
     return (genStamp > getGenerationStamp());
   }
-  
-  public void notifyGenStampUpdate(long gs) {
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Generation stamp " + gs + " has been reached. " +
-          "Processing pending messages from DataNodes...");
-    }
-    DataNodeMessage msg = pendingDatanodeMessages.take(gs);
-    while (msg != null) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Processing previously pending message: " + msg);
-      }
-      try {
-        switch (msg.getType()) {
-        case BLOCK_RECEIVED_DELETE:
-          BlockReceivedDeleteMessage m = (BlockReceivedDeleteMessage) msg;
-          if (NameNode.stateChangeLog.isDebugEnabled()) {
-            NameNode.stateChangeLog
-                .debug("*BLOCK* NameNode.blockReceivedAndDeleted: " + "from "
-                    + m.getNodeReg().getName() + " "
-                    + m.getReceivedAndDeletedBlocks().length + " blocks.");
-          }
-          this.getBlockManager().processIncrementalBlockReport(m.getNodeReg(),
-              m.getPoolId(), m.getReceivedAndDeletedBlocks());
-          break;
-        case BLOCK_REPORT:
-          BlockReportMessage mbr = (BlockReportMessage) msg;
-          if (NameNode.stateChangeLog.isDebugEnabled()) {
-            NameNode.stateChangeLog.debug("*BLOCK* NameNode.blockReport: "
-                + "from " + mbr.getNodeReg().getName() + " "
-                + mbr.getBlockList().getNumberOfBlocks() + " blocks");
-          }
-          this.getBlockManager().processReport(mbr.getNodeReg(),
-              mbr.getPoolId(), mbr.getBlockList());
-          break;
-        case COMMIT_BLOCK_SYNCHRONIZATION:
-          CommitBlockSynchronizationMessage mcbm = (CommitBlockSynchronizationMessage) msg;
-          this.commitBlockSynchronization(mcbm.getBlock(),
-              mcbm.getNewgenerationstamp(), mcbm.getNewlength(),
-              mcbm.isCloseFile(), mcbm.isDeleteblock(), mcbm.getNewtargets());
-          break;
-        }
-      } catch (IOException ex) {
-        LOG.warn("Could not process the message " + msg.getType(), ex);
-      }
-      msg = pendingDatanodeMessages.take(gs);
-    }
-  }
-  
   @VisibleForTesting
   public EditLogTailer getEditLogTailer() {
     return editLogTailer;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 5920762ac83..b293b5a14fc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -878,16 +878,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
       String poolId, long[] blocks) throws IOException {
     verifyRequest(nodeReg);
     BlockListAsLongs blist = new BlockListAsLongs(blocks);
-    if (nn.isStandbyState()) {
-      long maxGs = blist.getMaxGsInBlockList();
-      if (namesystem.isGenStampInFuture(maxGs)) {
-        LOG.info("Required GS="+maxGs+", Queuing blockReport message");
-        namesystem.getPendingDataNodeMessages().queueMessage(
-            new PendingDataNodeMessages.BlockReportMessage(nodeReg, poolId,
-                blist, maxGs));
-        return null;
-      }
-    }
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*BLOCK* NameNode.blockReport: "
            + "from " + nodeReg.getName() + " " + blist.getNumberOfBlocks()
@@ -904,25 +894,6 @@ class NameNodeRpcServer implements NamenodeProtocols {
   public void blockReceivedAndDeleted(DatanodeRegistration nodeReg, String poolId,
       ReceivedDeletedBlockInfo[] receivedAndDeletedBlocks) throws IOException {
     verifyRequest(nodeReg);
-    if (nn.isStandbyState()) {
-      if (receivedAndDeletedBlocks.length > 0) {
-        long maxGs = receivedAndDeletedBlocks[0].getBlock()
-            .getGenerationStamp();
-        for (ReceivedDeletedBlockInfo binfo : receivedAndDeletedBlocks) {
-          if (binfo.getBlock().getGenerationStamp() > maxGs) {
-            maxGs = binfo.getBlock().getGenerationStamp();
-          }
-        }
-        if (namesystem.isGenStampInFuture(maxGs)) {
-          LOG.info("Required GS=" + maxGs
-              + ", Queuing blockReceivedAndDeleted message");
-          namesystem.getPendingDataNodeMessages().queueMessage(
-              new PendingDataNodeMessages.BlockReceivedDeleteMessage(nodeReg,
-                  poolId, receivedAndDeletedBlocks, maxGs));
-          return;
-        }
-      }
-    }
     if(stateChangeLog.isDebugEnabled()) {
       stateChangeLog.debug("*BLOCK* NameNode.blockReceivedAndDeleted: "
           +"from "+nodeReg.getName()+" "+receivedAndDeletedBlocks.length
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java
index 6846e959a49..c453db561eb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/Namesystem.java
@@ -32,4 +32,10 @@ public interface Namesystem extends RwLock, SafeMode {
 
   /** @return the block pool ID */
   public String getBlockPoolId();
+
+  public boolean isInStandbyState();
+
+  public boolean isGenStampInFuture(long generationStamp);
+
+  public void adjustSafeModeBlockTotals(int deltaSafe, int deltaTotal);
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
deleted file mode 100644
index 04eb4b9ccc0..00000000000
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/PendingDataNodeMessages.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.server.namenode;
-
-import java.util.PriorityQueue;
-
-import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
-import org.apache.hadoop.hdfs.protocol.DatanodeID;
-import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
-import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo;
-
-public class PendingDataNodeMessages {
-  
-  PriorityQueue<DataNodeMessage> queue = new PriorityQueue<DataNodeMessage>();
-  
-  enum MessageType {
-    BLOCK_RECEIVED_DELETE,
-    BLOCK_REPORT,
-    COMMIT_BLOCK_SYNCHRONIZATION
-  }
-  
-  static abstract class DataNodeMessage 
-     implements Comparable<DataNodeMessage> {
-    
-    final MessageType type;
-    private final long targetGs;
-    
-    DataNodeMessage(MessageType type, long targetGenStamp) {
-      this.type = type;
-      this.targetGs = targetGenStamp;
-    }
-    
-    protected MessageType getType() {
-      return type;
-    }
-    
-    protected long getTargetGs() {
-      return targetGs;
-    }
-    
-    public int compareTo(DataNodeMessage other) {
-      if (targetGs == other.targetGs) {
-        return 0;
-      } else if (targetGs < other.targetGs) {
-        return -1;
-      }
-      return 1;
-    }
-  }
-  
-  static class BlockReceivedDeleteMessage extends DataNodeMessage {
-    final DatanodeRegistration nodeReg;
-    final String poolId;
-    final ReceivedDeletedBlockInfo[] receivedAndDeletedBlocks;
-    
-    BlockReceivedDeleteMessage(DatanodeRegistration nodeReg, String poolId,
-      ReceivedDeletedBlockInfo[] receivedAndDeletedBlocks, long targetGs) {
-      super(MessageType.BLOCK_RECEIVED_DELETE, targetGs);
-      this.nodeReg = nodeReg;
-      this.poolId = poolId;
-      this.receivedAndDeletedBlocks = receivedAndDeletedBlocks;
-    }
-    
-    DatanodeRegistration getNodeReg() {
-      return nodeReg;
-    }
-    
-    String getPoolId() {
-      return poolId;
-    }
-    
-    ReceivedDeletedBlockInfo[] getReceivedAndDeletedBlocks() {
-      return receivedAndDeletedBlocks;
-    }
-    
-    public String toString() {
-      return "BlockReceivedDeletedMessage with " +
-        receivedAndDeletedBlocks.length + " blocks";
-    }
-  }
-  
-  static class CommitBlockSynchronizationMessage extends DataNodeMessage {
-
-    private final ExtendedBlock block;
-    private final long newgenerationstamp;
-    private final long newlength;
-    private final boolean closeFile;
-    private final boolean deleteblock;
-    private final DatanodeID[] newtargets;
-
-    CommitBlockSynchronizationMessage(ExtendedBlock block,
-        long newgenerationstamp, long newlength, boolean closeFile,
-        boolean deleteblock, DatanodeID[] newtargets, long targetGenStamp) {
-      super(MessageType.COMMIT_BLOCK_SYNCHRONIZATION, targetGenStamp);
-      this.block = block;
-      this.newgenerationstamp = newgenerationstamp;
-      this.newlength = newlength;
-      this.closeFile = closeFile;
-      this.deleteblock = deleteblock;
-      this.newtargets = newtargets;
-    }
-
-    ExtendedBlock getBlock() {
-      return block;
-    }
-
-    long getNewgenerationstamp() {
-      return newgenerationstamp;
-    }
-
-    long getNewlength() {
-      return newlength;
-    }
-
-    boolean isCloseFile() {
-      return closeFile;
-    }
-
-    boolean isDeleteblock() {
-      return deleteblock;
-    }
-
-    DatanodeID[] getNewtargets() {
-      return newtargets;
-    }
-    
-    public String toString() {
-      return "CommitBlockSynchronizationMessage for " + block;
-    }
-  }
-  
-  static class BlockReportMessage extends DataNodeMessage {
-
-    private final DatanodeRegistration nodeReg;
-    private final String poolId;
-    private final BlockListAsLongs blockList;
-
-    BlockReportMessage(DatanodeRegistration nodeReg, String poolId,
-        BlockListAsLongs blist, long targetGenStamp) {
-      super(MessageType.BLOCK_REPORT, targetGenStamp);
-      this.nodeReg = nodeReg;
-      this.poolId = poolId;
-      this.blockList = blist;
-    }
-
-    DatanodeRegistration getNodeReg() {
-      return nodeReg;
-    }
-
-    String getPoolId() {
-      return poolId;
-    }
-
-    BlockListAsLongs getBlockList() {
-      return blockList;
-    }
-
-    public String toString() {
-      return "BlockReport from " + nodeReg + " with " + blockList.getNumberOfBlocks() + " blocks";
-    }
-  }
-
-  synchronized void queueMessage(DataNodeMessage msg) {
-    queue.add(msg);
-  }
-  
-  /**
-   * Returns a message if contains a message less or equal to the given gs,
-   * otherwise returns null.
-   * 
-   * @param gs
-   */
-  synchronized DataNodeMessage take(long gs) {
-    DataNodeMessage m = queue.peek();
-    if (m != null && m.getTargetGs() <= gs) {
-      return queue.remove();
-    } else {
-      return null;
-    }
-  }
-  
-  synchronized boolean isEmpty() {
-    return queue.isEmpty();
-  }
-}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingDataNodeMessages.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingDataNodeMessages.java
new file mode 100644
index 00000000000..16977bb820e
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingDataNodeMessages.java
@@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.blockmanagement;
+
+import static org.junit.Assert.*;
+
+import java.util.Queue;
+
+import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.server.blockmanagement.PendingDataNodeMessages.ReportedBlockInfo;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState;
+import org.junit.Test;
+
+import com.google.common.base.Joiner;
+
+
+public class TestPendingDataNodeMessages {
+  PendingDataNodeMessages msgs = new PendingDataNodeMessages();
+  
+  private final Block block1Gs1 = new Block(1, 0, 1);
+  private final Block block1Gs2 = new Block(1, 0, 2);
+  private final Block block1Gs2DifferentInstance =
+    new Block(1, 0, 2);
+  private final Block block2Gs1 = new Block(2, 0, 1);
+  
+  private final DatanodeDescriptor fakeDN = new DatanodeDescriptor(
+      new DatanodeID("fake"));
+  
+  @Test
+  public void testQueues() {
+    msgs.enqueueReportedBlock(fakeDN, block1Gs1, ReplicaState.FINALIZED);
+    msgs.enqueueReportedBlock(fakeDN, block1Gs2, ReplicaState.FINALIZED);
+
+    assertEquals(2, msgs.count());
+    
+    // Nothing queued yet for block 2
+    assertNull(msgs.takeBlockQueue(block2Gs1));
+    assertEquals(2, msgs.count());
+    
+    Queue<ReportedBlockInfo> q =
+      msgs.takeBlockQueue(block1Gs2DifferentInstance);
+    assertEquals(
+        "ReportedBlockInfo [block=blk_1_1, dn=fake, reportedState=FINALIZED]," +
+        "ReportedBlockInfo [block=blk_1_2, dn=fake, reportedState=FINALIZED]",
+        Joiner.on(",").join(q));
+    assertEquals(0, msgs.count());
+    
+    // Should be null if we pull again
+    assertNull(msgs.takeBlockQueue(block1Gs1));
+    assertEquals(0, msgs.count());
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
index 181de70f336..fead3b6162f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java
@@ -30,8 +30,8 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretMan
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.MkdirOp;
-import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo;
+import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse;
 import org.apache.hadoop.ipc.Server;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
index 1f43e057f7b..a7a939c0081 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencing.java
@@ -21,18 +21,18 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.net.URISyntaxException;
 import java.util.Collection;
 import java.util.List;
+import java.util.concurrent.CountDownLatch;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.hdfs.AppendTestUtil;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
@@ -40,23 +40,29 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicyDefault;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
-import org.apache.hadoop.hdfs.server.namenode.FSClusterStats;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSInodeInfo;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
-import org.apache.hadoop.net.NetworkTopology;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
 import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Test;
+import org.mockito.Mockito;
+import org.mockito.invocation.InvocationOnMock;
 
 import com.google.common.base.Supplier;
 import com.google.common.collect.Lists;
@@ -360,6 +366,164 @@ public class TestDNFencing {
     FileSystem fs2 = cluster.getFileSystem(1);
     DFSTestUtil.readFile(fs2, TEST_FILE_PATH);
   }
+  
+  /**
+   * Regression test for HDFS-2742. The issue in this bug was:
+   * - DN does a block report while file is open. This BR contains
+   *   the block in RBW state.
+   * - Standby queues the RBW state in PendingDatanodeMessages
+   * - Standby processes edit logs during failover. Before fixing
+   *   this bug, it was mistakenly applying the RBW reported state
+   *   after the block had been completed, causing the block to get
+   *   marked corrupt. Instead, we should now be applying the RBW
+   *   message on OP_ADD, and then the FINALIZED message on OP_CLOSE.
+   */
+  @Test
+  public void testBlockReportsWhileFileBeingWritten() throws Exception {
+    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
+    try {
+      AppendTestUtil.write(out, 0, 10);
+      out.hflush();
+      
+      // Block report will include the RBW replica, but will be
+      // queued on the StandbyNode.
+      cluster.triggerBlockReports();
+      
+    } finally {
+      IOUtils.closeStream(out);
+    }
+
+    cluster.transitionToStandby(0);
+    cluster.transitionToActive(1);
+    
+    // Verify that no replicas are marked corrupt, and that the
+    // file is readable from the failed-over standby.
+    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
+    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
+    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
+    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
+    
+    DFSTestUtil.readFile(fs, TEST_FILE_PATH);
+  }
+  
+  /**
+   * Test that, when a block is re-opened for append, the related
+   * datanode messages are correctly queued by the SBN because
+   * they have future states and genstamps.
+   */
+  @Test
+  public void testQueueingWithAppend() throws Exception {
+    int numQueued = 0;
+    int numDN = cluster.getDataNodes().size();
+    
+    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
+    try {
+      AppendTestUtil.write(out, 0, 10);
+      out.hflush();
+
+      // Opening the file will report RBW replicas, but will be
+      // queued on the StandbyNode.
+      numQueued += numDN; // RBW messages
+    } finally {
+      IOUtils.closeStream(out);
+      numQueued += numDN; // blockReceived messages
+    }
+    
+    cluster.triggerBlockReports();
+    numQueued += numDN;
+    
+    try {
+      out = fs.append(TEST_FILE_PATH);
+      AppendTestUtil.write(out, 10, 10);
+      // RBW replicas once it's opened for append
+      numQueued += numDN;
+
+    } finally {
+      IOUtils.closeStream(out);
+      numQueued += numDN; // blockReceived
+    }
+    
+    cluster.triggerBlockReports();
+    numQueued += numDN;
+
+    assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
+        getPendingDataNodeMessageCount());
+
+    cluster.transitionToStandby(0);
+    cluster.transitionToActive(1);
+    
+    // Verify that no replicas are marked corrupt, and that the
+    // file is readable from the failed-over standby.
+    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
+    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
+    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
+    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
+    
+    AppendTestUtil.check(fs, TEST_FILE_PATH, 20);
+  }
+  
+  /**
+   * Another regression test for HDFS-2742. This tests the following sequence:
+   * - DN does a block report while file is open. This BR contains
+   *   the block in RBW state.
+   * - The block report is delayed in reaching the standby.
+   * - The file is closed.
+   * - The standby processes the OP_ADD and OP_CLOSE operations before
+   *   the RBW block report arrives.
+   * - The standby should not mark the block as corrupt.
+   */
+  @Test
+  public void testRBWReportArrivesAfterEdits() throws Exception {
+    final CountDownLatch brFinished = new CountDownLatch(1);
+    DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
+      @Override
+      protected Object passThrough(InvocationOnMock invocation)
+          throws Throwable {
+        try {
+          return super.passThrough(invocation);
+        } finally {
+          // inform the test that our block report went through.
+          brFinished.countDown();
+        }
+      }
+    };
+
+    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
+    try {
+      AppendTestUtil.write(out, 0, 10);
+      out.hflush();
+
+      DataNode dn = cluster.getDataNodes().get(0);
+      DatanodeProtocolClientSideTranslatorPB spy =
+        DataNodeAdapter.spyOnBposToNN(dn, nn2);
+      
+      Mockito.doAnswer(delayer)
+        .when(spy).blockReport(
+          Mockito.<DatanodeRegistration>anyObject(),
+          Mockito.anyString(),
+          Mockito.<long[]>anyObject());
+      dn.scheduleAllBlockReport(0);
+      delayer.waitForCall();
+      
+    } finally {
+      IOUtils.closeStream(out);
+    }
+
+    cluster.transitionToStandby(0);
+    cluster.transitionToActive(1);
+    
+    delayer.proceed();
+    brFinished.await();
+    
+    // Verify that no replicas are marked corrupt, and that the
+    // file is readable from the failed-over standby.
+    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
+    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
+    assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
+    assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
+    
+    DFSTestUtil.readFile(fs, TEST_FILE_PATH);
+  }
 
   /**
    * Print a big banner in the test log to make debug easier.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index d423ce26617..d6babb788a7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -25,10 +25,13 @@ import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 
 import java.io.IOException;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@@ -38,15 +41,19 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.log4j.Level;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
 import com.google.common.base.Supplier;
+import com.google.common.collect.Lists;
 
 /**
  * Tests that exercise safemode in an HA cluster.
@@ -60,6 +67,12 @@ public class TestHASafeMode {
   private MiniDFSCluster cluster;
   private Runtime mockRuntime = mock(Runtime.class);
   
+  static {
+    ((Log4JLogger)LogFactory.getLog(FSImage.class)).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL);
+    ((Log4JLogger)NameNode.stateChangeLog).getLogger().setLevel(Level.ALL);
+  }
+  
   @Before
   public void setupCluster() throws Exception {
     Configuration conf = new Configuration();
@@ -112,7 +125,11 @@ public class TestHASafeMode {
   @Test
   public void testEnterSafeModeInANNShouldNotThrowNPE() throws Exception {
     banner("Restarting active");
+    DFSTestUtil
+      .createFile(fs, new Path("/test"), 3 * BLOCK_SIZE, (short) 3, 1L);
     restartActive();
+    nn0.getRpcServer().transitionToActive();
+
     FSNamesystem namesystem = nn0.getNamesystem();
     String status = namesystem.getSafemode();
     assertTrue("Bad safemode status: '" + status + "'", status
@@ -187,24 +204,14 @@ public class TestHASafeMode {
     banner("Restarting standby");
     restartStandby();
 
-    // We expect it to be stuck in safemode (not the extension) because
-    // the block reports are delayed (since they include blocks
-    // from /test2 which are too-high genstamps.
-    String status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 0 needs additional 3 blocks to reach"));
+    // We expect it not to be stuck in safemode, since those blocks
+    // that are already visible to the SBN should be processed
+    // in the initial block reports.
+    assertSafeMode(nn1, 3, 3);
 
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-
-    status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 8 has reached the threshold 0.9990 of " +
-            "total blocks 8. Safe mode will be turned off automatically"));
+    assertSafeMode(nn1, 8, 8);
   }
   
   /**
@@ -224,12 +231,7 @@ public class TestHASafeMode {
     banner("Restarting standby");
     restartStandby();
     
-    String status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 3 has reached the threshold 0.9990 of " +
-            "total blocks 3. Safe mode will be turned off automatically"));
+    assertSafeMode(nn1, 3, 3);
     
     // Create a few blocks which will send blockReceived calls to the
     // SBN.
@@ -240,12 +242,7 @@ public class TestHASafeMode {
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
-    status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 8 has reached the threshold 0.9990 of " +
-            "total blocks 8. Safe mode will be turned off automatically"));
+    assertSafeMode(nn1, 8, 8);
   }
 
   /**
@@ -285,20 +282,11 @@ public class TestHASafeMode {
 
     banner("Restarting standby");
     restartStandby();
-    String status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 0 needs additional 5 blocks to reach"));
+    assertSafeMode(nn1, 0, 5);
     
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
-    status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 0 has reached the threshold 0.9990 of " +
-            "total blocks 0. Safe mode will be turned off automatically"));
+    assertSafeMode(nn1, 0, 0);
   }
   
   /**
@@ -320,12 +308,7 @@ public class TestHASafeMode {
     restartStandby();
     
     // It will initially have all of the blocks necessary.
-    String status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 10 has reached the threshold 0.9990 of " +
-            "total blocks 10. Safe mode will be turned off automatically"));
+    assertSafeMode(nn1, 10, 10);
 
     // Delete those blocks while the SBN is in safe mode - this
     // should reduce it back below the threshold
@@ -339,23 +322,123 @@ public class TestHASafeMode {
     HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
 
-    status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 0 needs additional 10 blocks"));
+    assertSafeMode(nn1, 0, 10);
 
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
-    status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 0 has reached the threshold 0.9990 of " +
-            "total blocks 0. Safe mode will be turned off automatically"));
+    assertSafeMode(nn1, 0, 0);
   }
   
+  /**
+   * Tests that the standby node properly tracks the number of total
+   * and safe blocks while it is in safe mode. Since safe-mode only
+   * counts completed blocks, append needs to decrement the total
+   * number of blocks and then re-increment when the file is closed
+   * again.
+   */
+  @Test
+  public void testAppendWhileInSafeMode() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    // Make 4.5 blocks so that append() will re-open an existing block
+    // instead of just adding a new one
+    DFSTestUtil.createFile(fs, new Path("/test"),
+        4*BLOCK_SIZE + BLOCK_SIZE/2, (short) 3, 1L);
+
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+ 
+    banner("Restarting standby");
+    restartStandby();
+    
+    // It will initially have all of the blocks necessary.
+    assertSafeMode(nn1, 5, 5);
+
+    // Append to a block while SBN is in safe mode. This should
+    // not affect safemode initially, since the DN message
+    // will get queued.
+    FSDataOutputStream stm = fs.append(new Path("/test"));
+    try {
+      assertSafeMode(nn1, 5, 5);
+      
+      // if we roll edits now, the SBN should see that it's under construction
+      // and change its total count and safe count down by one, since UC
+      // blocks are not counted by safe mode.
+      HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+      assertSafeMode(nn1, 4, 4);
+    } finally {
+      IOUtils.closeStream(stm);
+    }
+    
+    // Delete those blocks while the SBN is in safe mode - this
+    // should reduce it back below the threshold
+    banner("Removing the blocks without rolling the edit log");
+    fs.delete(new Path("/test"), true);
+    BlockManagerTestUtil.computeAllPendingWork(
+        nn0.getNamesystem().getBlockManager());
+    
+    banner("Triggering deletions on DNs and Deletion Reports");
+    cluster.triggerHeartbeats();
+    HATestUtil.waitForDNDeletions(cluster);
+    cluster.triggerDeletionReports();
+
+    assertSafeMode(nn1, 0, 4);
+
+    banner("Waiting for standby to catch up to active namespace");
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+
+    assertSafeMode(nn1, 0, 0);
+  }
+  
+  /**
+   * Regression test for a bug experienced while developing
+   * HDFS-2742. The scenario here is:
+   * - image contains some blocks
+   * - edits log contains at least one block addition, followed
+   *   by deletion of more blocks than were added.
+   * - When node starts up, some incorrect accounting of block
+   *   totals caused an assertion failure.
+   */
+  @Test
+  public void testBlocksDeletedInEditLog() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    // Make 4 blocks persisted in the image.
+    DFSTestUtil.createFile(fs, new Path("/test"),
+        4*BLOCK_SIZE, (short) 3, 1L);
+    NameNodeAdapter.enterSafeMode(nn0, false);
+    NameNodeAdapter.saveNamespace(nn0);
+    NameNodeAdapter.leaveSafeMode(nn0, false);
+    
+    // OP_ADD for 2 blocks
+    DFSTestUtil.createFile(fs, new Path("/test2"),
+        2*BLOCK_SIZE, (short) 3, 1L);
+    
+    // OP_DELETE for 4 blocks
+    fs.delete(new Path("/test"), true);
+
+    restartActive();
+  }
+  
+  private void assertSafeMode(NameNode nn, int safe, int total) {
+    String status = nn1.getNamesystem().getSafemode();
+    if (safe == total) {
+      assertTrue("Bad safemode status: '" + status + "'",
+          status.startsWith(
+            "Safe mode is ON." +
+            "The reported blocks " + safe + " has reached the threshold " +
+            "0.9990 of total blocks " + total + ". Safe mode will be " +
+            "turned off automatically"));
+    } else {
+      int additional = total - safe;
+      assertTrue("Bad safemode status: '" + status + "'",
+          status.startsWith(
+              "Safe mode is ON." +
+              "The reported blocks " + safe + " needs additional " +
+              additional + " blocks"));
+    }
+  }
+
   /**
    * Set up a namesystem with several edits, both deletions and
    * additions, and failover to a new NN while that NN is in
@@ -378,26 +461,107 @@ public class TestHASafeMode {
     banner("Restarting standby");
     restartStandby();
 
-    // We expect it to be stuck in safemode (not the extension) because
-    // the block reports are delayed (since they include blocks
-    // from /test2 which are too-high genstamps.
-    String status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 0 needs additional 3 blocks to reach"));
-
+    // We expect it to be on its way out of safemode, since all of the blocks
+    // from the edit log have been reported.
+    assertSafeMode(nn1, 3, 3);
+    
     // Initiate a failover into it while it's in safemode
     banner("Initiating a failover into NN1 in safemode");
     NameNodeAdapter.abortEditLogs(nn0);
     cluster.transitionToActive(1);
 
-    status = nn1.getNamesystem().getSafemode();
+    assertSafeMode(nn1, 5, 5);
+  }
+  
+  /**
+   * Similar to {@link #testBlocksRemovedWhileInSafeMode()} except that
+   * the OP_DELETE edits arrive at the SBN before the block deletion reports.
+   * The tracking of safe blocks needs to properly account for the removal
+   * of the blocks as well as the safe count. This is a regression test for
+   * HDFS-2742.
+   */
+  @Test
+  public void testBlocksRemovedWhileInSafeModeEditsArriveFirst() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some blocks");
+    DFSTestUtil.createFile(fs, new Path("/test"), 10*BLOCK_SIZE, (short) 3, 1L);
+
+    // Roll edit log so that, when the SBN restarts, it will load
+    // the namespace during startup.
+    nn0.getRpcServer().rollEditLog();
+ 
+    banner("Restarting standby");
+    restartStandby();
+    
+    // It will initially have all of the blocks necessary.
+    String status = nn1.getNamesystem().getSafemode();
     assertTrue("Bad safemode status: '" + status + "'",
         status.startsWith(
             "Safe mode is ON." +
-            "The reported blocks 5 has reached the threshold 0.9990 of " +
-            "total blocks 5. Safe mode will be turned off automatically"));
+            "The reported blocks 10 has reached the threshold 0.9990 of " +
+            "total blocks 10. Safe mode will be turned off automatically"));
+
+    // Delete those blocks while the SBN is in safe mode.
+    // Immediately roll the edit log before the actual deletions are sent
+    // to the DNs.
+    banner("Removing the blocks without rolling the edit log");
+    fs.delete(new Path("/test"), true);
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+
+    // Should see removal of the blocks as well as their contribution to safe block count.
+    assertSafeMode(nn1, 0, 0);
+
+    
+    banner("Triggering sending deletions to DNs and Deletion Reports");
+    BlockManagerTestUtil.computeAllPendingWork(
+        nn0.getNamesystem().getBlockManager());    
+    cluster.triggerHeartbeats();
+    HATestUtil.waitForDNDeletions(cluster);
+    cluster.triggerDeletionReports();
+
+    // No change in assertion status here, but some of the consistency checks
+    // in safemode will fire here if we accidentally decrement safe block count
+    // below 0.    
+    assertSafeMode(nn1, 0, 0);
+  }
+  
+
+  /**
+   * Test that the number of safe blocks is accounted correctly even when
+   * blocks move between under-construction state and completed state.
+   * If a FINALIZED report arrives at the SBN before the block is marked
+   * COMPLETE, then when we get the OP_CLOSE we need to count it as "safe"
+   * at that point. This is a regression test for HDFS-2742.
+   */
+  @Test
+  public void testSafeBlockTracking() throws Exception {
+    banner("Starting with NN0 active and NN1 standby, creating some " +
+    		"UC blocks plus some other blocks to force safemode");
+    DFSTestUtil.createFile(fs, new Path("/other-blocks"), 10*BLOCK_SIZE, (short) 3, 1L);
+
+    List<FSDataOutputStream> stms = Lists.newArrayList();
+    try {
+      for (int i = 0; i < 5; i++) {
+        FSDataOutputStream stm = fs.create(new Path("/test-uc-" + i));
+        stms.add(stm);
+        stm.write(1);
+        stm.hflush();
+      }
+      // Roll edit log so that, when the SBN restarts, it will load
+      // the namespace during startup and enter safemode.
+      nn0.getRpcServer().rollEditLog();
+    } finally {
+      for (FSDataOutputStream stm : stms) {
+        IOUtils.closeStream(stm);
+      }
+    }
+    
+    banner("Restarting SBN");
+    restartStandby();
+    assertSafeMode(nn1, 10, 10);
+
+    banner("Allowing SBN to catch up");
+    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
+    assertSafeMode(nn1, 15, 15);
   }
   
   /**
@@ -425,12 +589,7 @@ public class TestHASafeMode {
     nn0.getRpcServer().rollEditLog();
     
     restartStandby();
-    String status = nn1.getNamesystem().getSafemode();
-    assertTrue("Bad safemode status: '" + status + "'",
-        status.startsWith(
-            "Safe mode is ON." +
-            "The reported blocks 6 has reached the threshold 0.9990 of " +
-            "total blocks 6. Safe mode will be turned off automatically"));    
+    assertSafeMode(nn1, 6, 6);
   }
   
   /**

From 048c416beb42ad27cf0e82b144da1d99e50c62b1 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 1 Feb 2012 19:23:28 +0000
Subject: [PATCH 111/177] HDFS-2870. Fix log level for block debug info in
 processMisReplicatedBlocks. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1239278 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt         | 2 ++
 .../hadoop/hdfs/server/blockmanagement/BlockManager.java      | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 0406f192a24..293ebf96044 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -147,3 +147,5 @@ HDFS-2853. HA: NN fails to start if the shared edits dir is marked required (atm
 HDFS-2845. SBN should not allow browsing of the file system via web UI. (Bikas Saha via atm)
 
 HDFS-2742. HA: observed dataloss in replication stress test. (todd via eli)
+
+HDFS-2870. Fix log level for block debug info in processMisReplicatedBlocks (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index ca861318820..7143118a794 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -2033,7 +2033,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     neededReplications.clear();
     for (BlockInfo block : blocksMap.getBlocks()) {
       MisReplicationResult res = processMisReplicatedBlock(block);
-      LOG.info("block " + block + ": " + res);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("block " + block + ": " + res);
+      }
       switch (res) {
       case UNDER_REPLICATED:
         nrUnderReplicated++;

From 4d779e088a30f958c9788366e0e251476cb18410 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 1 Feb 2012 22:02:29 +0000
Subject: [PATCH 112/177] HDFS-2859. LOCAL_ADDRESS_MATCHER.match has NPE when
 called from DFSUtil.getSuffixIDs when the host is incorrect. Contributed by
 Bikas Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1239356 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt      | 2 ++
 .../src/main/java/org/apache/hadoop/hdfs/DFSUtil.java      | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 293ebf96044..7a4ef27f195 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -149,3 +149,5 @@ HDFS-2845. SBN should not allow browsing of the file system via web UI. (Bikas S
 HDFS-2742. HA: observed dataloss in replication stress test. (todd via eli)
 
 HDFS-2870. Fix log level for block debug info in processMisReplicatedBlocks (todd)
+
+HDFS-2859. LOCAL_ADDRESS_MATCHER.match has NPE when called from DFSUtil.getSuffixIDs when the host is incorrect (Bikas Saha via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 94605b932f7..c9ccf9f38c7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -61,6 +61,8 @@ import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 
 import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
@@ -69,6 +71,8 @@ import com.google.protobuf.BlockingService;
 
 @InterfaceAudience.Private
 public class DFSUtil {
+  private static final Log LOG = LogFactory.getLog(DFSUtil.class.getName());
+  
   private DFSUtil() { /* Hidden constructor */ }
   private static final ThreadLocal<Random> RANDOM = new ThreadLocal<Random>() {
     @Override
@@ -935,9 +939,10 @@ public class DFSUtil {
         try {
           s = NetUtils.createSocketAddr(addr);
         } catch (Exception e) {
+          LOG.warn("Exception in creating socket address", e);
           continue;
         }
-        if (matcher.match(s)) {
+        if (!s.isUnresolved() && matcher.match(s)) {
           nameserviceId = nsId;
           namenodeId = nnId;
           found++;

From 4324e1bcd78a98e2fb92c81ee959e25b4193da4f Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Thu, 2 Feb 2012 19:20:32 +0000
Subject: [PATCH 113/177] HADOOP-7991. HA: the FailoverController should check
 the standby is ready before failing over. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1239774 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  3 +
 .../apache/hadoop/ha/FailoverController.java  | 32 ++++++++--
 .../java/org/apache/hadoop/ha/HAAdmin.java    | 14 +++--
 .../apache/hadoop/ha/HAServiceProtocol.java   | 11 ++++
 .../hadoop/ha/TestFailoverController.java     | 62 ++++++++++++++-----
 .../org/apache/hadoop/ha/TestHAAdmin.java     | 13 +++-
 .../hadoop/hdfs/server/namenode/NameNode.java |  7 +++
 .../server/namenode/NameNodeRpcServer.java    |  5 ++
 8 files changed, 122 insertions(+), 25 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index f62c7177214..2170cd2a69a 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -39,3 +39,6 @@ HADOOP-7983. HA: failover should be able to pass args to fencers. (eli)
 
 HADOOP-7938. HA: the FailoverController should optionally fence the
 active during failover. (eli)
+
+HADOOP-7991. HA: the FailoverController should check the standby is
+ready before failing over. (eli)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
index 711296d342f..7205f9f53b5 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
@@ -46,12 +46,19 @@ public class FailoverController {
    * failover to, eg to prevent failing over to a service (eg due
    * to it being inaccessible, already active, not healthy, etc).
    *
+   * An option to ignore toSvc if it claims it is not ready to
+   * become active is provided in case performing a failover will
+   * allow it to become active, eg because it triggers a log roll
+   * so the standby can learn about new blocks and leave safemode.
+   *
    * @param toSvc service to make active
    * @param toSvcName name of service to make active
+   * @param forceActive ignore toSvc if it reports that it is not ready
    * @throws FailoverFailedException if we should avoid failover
    */
   private static void preFailoverChecks(HAServiceProtocol toSvc,
-                                        InetSocketAddress toSvcAddr)
+                                        InetSocketAddress toSvcAddr,
+                                        boolean forceActive)
       throws FailoverFailedException {
     HAServiceState toSvcState;
     try {
@@ -74,7 +81,17 @@ public class FailoverController {
       throw new FailoverFailedException(
           "Got an IO exception", e);
     }
-    // TODO(HA): ask toSvc if it's capable. Eg not in SM.
+    try {
+      if (!toSvc.readyToBecomeActive()) {
+        if (!forceActive) {
+          throw new FailoverFailedException(
+              toSvcAddr + " is not ready to become active");
+        }
+      }
+    } catch (IOException e) {
+      throw new FailoverFailedException(
+          "Got an IO exception", e);
+    }
   }
 
   /**
@@ -87,16 +104,19 @@ public class FailoverController {
    * @param toSvcAddr addr of the service to make active
    * @param fencer for fencing fromSvc
    * @param forceFence to fence fromSvc even if not strictly necessary
+   * @param forceActive try to make toSvc active even if it is not ready
    * @throws FailoverFailedException if the failover fails
    */
   public static void failover(HAServiceProtocol fromSvc,
                               InetSocketAddress fromSvcAddr,
                               HAServiceProtocol toSvc,
                               InetSocketAddress toSvcAddr,
-                              NodeFencer fencer, boolean forceFence)
+                              NodeFencer fencer,
+                              boolean forceFence,
+                              boolean forceActive)
       throws FailoverFailedException {
     Preconditions.checkArgument(fencer != null, "failover requires a fencer");
-    preFailoverChecks(toSvc, toSvcAddr);
+    preFailoverChecks(toSvc, toSvcAddr, forceActive);
 
     // Try to make fromSvc standby
     boolean tryFence = true;
@@ -145,7 +165,9 @@ public class FailoverController {
         try {
           // Unconditionally fence toSvc in case it is still trying to
           // become active, eg we timed out waiting for its response.
-          failover(toSvc, toSvcAddr, fromSvc, fromSvcAddr, fencer, true);
+          // Unconditionally force fromSvc to become active since it
+          // was previously active when we initiated failover.
+          failover(toSvc, toSvcAddr, fromSvc, fromSvcAddr, fencer, true, true);
         } catch (FailoverFailedException ffe) {
           msg += ". Failback to " + fromSvcAddr +
             " failed (" + ffe.getMessage() + ")";
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index 714fe6c110c..2286a357662 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -47,7 +47,8 @@ import com.google.common.collect.ImmutableMap;
 
 public abstract class HAAdmin extends Configured implements Tool {
   
-  private static final String FORCEFENCE = "forcefence";
+  private static final String FORCEFENCE  = "forcefence";
+  private static final String FORCEACTIVE = "forceactive";
 
   private static Map<String, UsageInfo> USAGE =
     ImmutableMap.<String, UsageInfo>builder()
@@ -56,9 +57,11 @@ public abstract class HAAdmin extends Configured implements Tool {
     .put("-transitionToStandby",
         new UsageInfo("<host:port>", "Transitions the daemon into Standby state"))
     .put("-failover",
-        new UsageInfo("[--"+FORCEFENCE+"] <host:port> <host:port>",
+        new UsageInfo("[--"+FORCEFENCE+"] [--"+FORCEACTIVE+"] <host:port> <host:port>",
             "Failover from the first daemon to the second.\n" +
-            "Unconditionally fence services if the "+FORCEFENCE+" option is used."))
+            "Unconditionally fence services if the "+FORCEFENCE+" option is used.\n" +
+            "Try to failover to the target service even if it is not ready if the " + 
+            FORCEACTIVE + " option is used."))
     .put("-getServiceState",
         new UsageInfo("<host:port>", "Returns the state of the daemon"))
     .put("-checkHealth",
@@ -124,12 +127,14 @@ public abstract class HAAdmin extends Configured implements Tool {
       throws IOException, ServiceFailedException {
     Configuration conf = getConf();
     boolean forceFence = false;
+    boolean forceActive = false;
 
     Options failoverOpts = new Options();
     // "-failover" isn't really an option but we need to add
     // it to appease CommandLineParser
     failoverOpts.addOption("failover", false, "failover");
     failoverOpts.addOption(FORCEFENCE, false, "force fencing");
+    failoverOpts.addOption(FORCEACTIVE, false, "force failover");
 
     CommandLineParser parser = new GnuParser();
     CommandLine cmd;
@@ -137,6 +142,7 @@ public abstract class HAAdmin extends Configured implements Tool {
     try {
       cmd = parser.parse(failoverOpts, argv);
       forceFence = cmd.hasOption(FORCEFENCE);
+      forceActive = cmd.hasOption(FORCEACTIVE);
     } catch (ParseException pe) {
       errOut.println("failover: incorrect arguments");
       printUsage(errOut, "-failover");
@@ -172,7 +178,7 @@ public abstract class HAAdmin extends Configured implements Tool {
 
     try {
       FailoverController.failover(proto1, addr1, proto2, addr2,
-          fencer, forceFence); 
+          fencer, forceFence, forceActive); 
       out.println("Failover from "+args[0]+" to "+args[1]+" successful");
     } catch (FailoverFailedException ffe) {
       errOut.println("Failover failed: " + ffe.getLocalizedMessage());
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
index 672c6d6fba3..9a7316db054 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
@@ -112,4 +112,15 @@ public interface HAServiceProtocol extends VersionedProtocol {
    *           if other errors happen
    */
   public HAServiceState getServiceState() throws IOException;
+
+  /**
+   * Return true if the service is capable and ready to transition
+   * from the standby state to the active state.
+   * 
+   * @return true if the service is ready to become active, false otherwise.
+   * @throws IOException
+   *           if other errors happen
+   */
+  public boolean readyToBecomeActive() throws ServiceFailedException,
+                                              IOException;
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
index 36aead56b95..7b5cc32b765 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
@@ -79,6 +79,11 @@ public class TestFailoverController {
     public HAServiceState getServiceState() throws IOException {
       return state;
     }
+
+    @Override
+    public boolean readyToBecomeActive() throws ServiceFailedException, IOException {
+      return true;
+    }
   }
   
   @Test
@@ -88,13 +93,13 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     AlwaysSucceedFencer.fenceCalled = 0;
-    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
     assertEquals(0, TestNodeFencer.AlwaysSucceedFencer.fenceCalled);
     assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
 
     AlwaysSucceedFencer.fenceCalled = 0;
-    FailoverController.failover(svc2, svc2Addr, svc1, svc1Addr, fencer, false);
+    FailoverController.failover(svc2, svc2Addr, svc1, svc1Addr, fencer, false, false);
     assertEquals(0, TestNodeFencer.AlwaysSucceedFencer.fenceCalled);
     assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
     assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
@@ -106,7 +111,7 @@ public class TestFailoverController {
     DummyService svc2 = new DummyService(HAServiceState.STANDBY);
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
-    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
     assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
   }
@@ -118,7 +123,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Can't failover to an already active service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -128,6 +133,33 @@ public class TestFailoverController {
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
   }
 
+  @Test
+  public void testFailoverToUnreadyService() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public boolean readyToBecomeActive() throws ServiceFailedException, IOException {
+        return false;
+      }
+    };
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
+
+    try {
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
+      fail("Can't failover to a service that's not ready");
+    } catch (FailoverFailedException ffe) {
+      // Expected
+    }
+
+    assertEquals(HAServiceState.ACTIVE, svc1.getServiceState());
+    assertEquals(HAServiceState.STANDBY, svc2.getServiceState());
+
+    // Forcing it means we ignore readyToBecomeActive
+    FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, true);
+    assertEquals(HAServiceState.STANDBY, svc1.getServiceState());
+    assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
+  }
+
   @Test
   public void testFailoverToUnhealthyServiceFailsAndFailsback() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
@@ -140,7 +172,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failover to unhealthy service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -162,7 +194,7 @@ public class TestFailoverController {
 
     AlwaysSucceedFencer.fenceCalled = 0;
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
     } catch (FailoverFailedException ffe) {
       fail("Faulty active prevented failover");
     }
@@ -187,7 +219,7 @@ public class TestFailoverController {
 
     AlwaysFailFencer.fenceCalled = 0;
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failed over even though fencing failed");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -207,7 +239,7 @@ public class TestFailoverController {
 
     AlwaysFailFencer.fenceCalled = 0;
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true, false);
       fail("Failed over even though fencing requested and failed");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -238,7 +270,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
     } catch (FailoverFailedException ffe) {
       fail("Non-existant active prevented failover");
     }
@@ -254,7 +286,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failed over to a non-existant standby");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -275,7 +307,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failover to already active service");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -300,7 +332,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, true, false);
       fail("Failed over to service that won't transition to active");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -325,7 +357,7 @@ public class TestFailoverController {
     AlwaysSucceedFencer.fenceCalled = 0;
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failed over to service that won't transition to active");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -352,7 +384,7 @@ public class TestFailoverController {
     AlwaysFailFencer.fenceCalled = 0;
 
     try {
-      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false);
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
       fail("Failed over to service that won't transition to active");
     } catch (FailoverFailedException ffe) {
       // Expected
@@ -383,7 +415,7 @@ public class TestFailoverController {
     NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
 
     try {
-      FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false);
+      FailoverController.failover(svc1, svc1Addr, svc2, svc2Addr, fencer, false, false);
       fail("Failover to already active service");
     } catch (FailoverFailedException ffe) {
       // Expected
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
index 02e7fffff32..a5a58648d46 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
+import static org.mockito.Mockito.when;
 
 import com.google.common.base.Charsets;
 import com.google.common.base.Joiner;
@@ -44,8 +45,9 @@ public class TestHAAdmin {
   private HAServiceProtocol mockProtocol;
   
   @Before
-  public void setup() {
+  public void setup() throws IOException {
     mockProtocol = Mockito.mock(HAServiceProtocol.class);
+    when(mockProtocol.readyToBecomeActive()).thenReturn(true);
     tool = new HAAdmin() {
       @Override
       protected HAServiceProtocol getProtocol(String target) throws IOException {
@@ -130,6 +132,15 @@ public class TestHAAdmin {
     assertEquals(0, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
   }
 
+  @Test
+  public void testFailoverWithForceActive() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    Configuration conf = new Configuration();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "foo:1234", "bar:5678", "--forceactive"));
+  }
+
   @Test
   public void testFailoverWithInvalidFenceArg() throws Exception {
     Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index bf31695eae8..27bd92d874d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -929,6 +929,13 @@ public class NameNode {
     return state.getServiceState();
   }
 
+  synchronized boolean readyToBecomeActive() throws ServiceFailedException {
+    if (!haEnabled) {
+      throw new ServiceFailedException("HA for namenode is not enabled");
+    }
+    return !isInSafeMode();
+  }
+
   
   /**
    * Class used as expose {@link NameNode} as context to {@link HAState}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index b293b5a14fc..45dd8ec55ce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -1007,6 +1007,11 @@ class NameNodeRpcServer implements NamenodeProtocols {
     return nn.getServiceState();
   }
 
+  @Override // HAServiceProtocol
+  public synchronized boolean readyToBecomeActive() throws ServiceFailedException {
+    return nn.readyToBecomeActive();
+  }
+
   /**
    * Verify version.
    * 

From 32c313d51cd2483ea510afe044c55eeaed7c2b2d Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 2 Feb 2012 22:21:57 +0000
Subject: [PATCH 114/177] HDFS-2861. checkpointing should verify that the
 dfs.http.address has been configured to a non-loopback for peer NN.
 Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1239886 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 27 +++++++
 .../java/org/apache/hadoop/hdfs/HAUtil.java   | 20 ++++-
 .../hdfs/server/namenode/FSNamesystem.java    | 16 ++--
 .../server/namenode/SecondaryNameNode.java    | 17 +---
 .../namenode/ha/StandbyCheckpointer.java      | 28 ++++++-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    | 10 +--
 .../apache/hadoop/hdfs/MiniDFSNNTopology.java |  2 +-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   | 17 ++++
 .../server/namenode/ha/TestEditLogTailer.java |  2 +-
 .../namenode/ha/TestFailureToReadEdits.java   |  2 +-
 .../namenode/ha/TestHAConfiguration.java      | 81 +++++++++++++++++++
 .../namenode/ha/TestStandbyCheckpoints.java   |  2 +-
 13 files changed, 191 insertions(+), 35 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 7a4ef27f195..45169c41aec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -151,3 +151,5 @@ HDFS-2742. HA: observed dataloss in replication stress test. (todd via eli)
 HDFS-2870. Fix log level for block debug info in processMisReplicatedBlocks (todd)
 
 HDFS-2859. LOCAL_ADDRESS_MATCHER.match has NPE when called from DFSUtil.getSuffixIDs when the host is incorrect (Bikas Saha via todd)
+
+HDFS-2861. checkpointing should verify that the dfs.http.address has been configured to a non-loopback for peer NN (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index c9ccf9f38c7..31280675544 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -676,6 +676,33 @@ public class DFSUtil {
     return getSuffixedConf(conf, httpAddressKey, httpAddressDefault, suffixes);
   }
   
+
+  /**
+   * Substitute a default host in the case that an address has been configured
+   * with a wildcard. This is used, for example, when determining the HTTP
+   * address of the NN -- if it's configured to bind to 0.0.0.0, we want to
+   * substitute the hostname from the filesystem URI rather than trying to
+   * connect to 0.0.0.0.
+   * @param configuredAddress the address found in the configuration
+   * @param defaultHost the host to substitute with, if configuredAddress
+   * is a local/wildcard address.
+   * @return the substituted address
+   * @throws IOException if it is a wildcard address and security is enabled
+   */
+  public static String substituteForWildcardAddress(String configuredAddress,
+      String defaultHost) throws IOException {
+    InetSocketAddress sockAddr = NetUtils.createSocketAddr(configuredAddress);
+    if (sockAddr.getAddress().isAnyLocalAddress()) {
+      if(UserGroupInformation.isSecurityEnabled()) {
+        throw new IOException("Cannot use a wildcard address with security. " +
+                              "Must explicitly set bind address for Kerberos");
+      }
+      return defaultHost + ":" + sockAddr.getPort();
+    } else {
+      return configuredAddress;
+    }
+  }
+  
   private static String getSuffixedConf(Configuration conf,
       String key, String defaultVal, String[] suffixes) {
     String ret = conf.get(DFSUtil.addKeySuffixes(key, suffixes));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index f12cd5e0bce..88122b9d8f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -129,16 +129,28 @@ public class HAUtil {
       Configuration myConf) {
     
     String nsId = DFSUtil.getNamenodeNameServiceId(myConf);
+    Preconditions.checkArgument(nsId != null,
+        "Could not determine namespace id. Please ensure that this " +
+        "machine is one of the machines listed as a NN RPC address, " +
+        "or configure " + DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID);
+    
     Collection<String> nnIds = DFSUtil.getNameNodeIds(myConf, nsId);
     String myNNId = myConf.get(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY);
     Preconditions.checkArgument(nnIds != null,
-        "Could not determine namenode ids in namespace '%s'",
+        "Could not determine namenode ids in namespace '%s'. " +
+        "Please configure " +
+        DFSUtil.addKeySuffixes(DFSConfigKeys.DFS_HA_NAMENODES_KEY,
+            nsId),
         nsId);
     Preconditions.checkArgument(nnIds.size() == 2,
-        "Expected exactly 2 NameNodes in this namespace. Instead, got: '%s'",
-        Joiner.on("','").join(nnIds));
+        "Expected exactly 2 NameNodes in namespace '%s'. " +
+        "Instead, got only %s (NN ids were '%s'",
+        nsId, nnIds.size(), Joiner.on("','").join(nnIds));
     Preconditions.checkState(myNNId != null && !myNNId.isEmpty(),
-        "Could not determine own NN ID");
+        "Could not determine own NN ID in namespace '%s'. Please " +
+        "ensure that this node is one of the machines listed as an " +
+        "NN RPC address, or configure " + DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY,
+        nsId);
 
     ArrayList<String> nnSet = Lists.newArrayList(nnIds);
     nnSet.remove(myNNId);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index bede75ebde5..b49005ff144 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -567,10 +567,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       if (leaseManager != null) {
         leaseManager.stopMonitor();
       }
-      dir.fsImage.editLog.close();
-      // Update the fsimage with the last txid that we wrote
-      // so that the tailer starts from the right spot.
-      dir.fsImage.updateLastAppliedTxIdFromWritten();
+      if (dir != null && dir.fsImage != null) {
+        if (dir.fsImage.editLog != null) {
+          dir.fsImage.editLog.close();
+        }
+        // Update the fsimage with the last txid that we wrote
+        // so that the tailer starts from the right spot.
+        dir.fsImage.updateLastAppliedTxIdFromWritten();
+      }
     } finally {
       writeUnlock();
     }
@@ -612,7 +616,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     if (editLogTailer != null) {
       editLogTailer.stop();
     }
-    dir.fsImage.editLog.close();
+    if (dir != null && dir.fsImage != null && dir.fsImage.editLog != null) {
+      dir.fsImage.editLog.close();
+    }
   }
   
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 2aebb63bd0e..8069501e01f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -450,19 +450,10 @@ public class SecondaryNameNode implements Runnable {
     }
 
     String configuredAddress = DFSUtil.getInfoServer(null, conf, true);
-    InetSocketAddress sockAddr = NetUtils.createSocketAddr(configuredAddress);
-    if (sockAddr.getAddress().isAnyLocalAddress()) {
-      if(UserGroupInformation.isSecurityEnabled()) {
-        throw new IOException("Cannot use a wildcard address with security. " +
-                              "Must explicitly set bind address for Kerberos");
-      }
-      return fsName.getHost() + ":" + sockAddr.getPort();
-    } else {
-      if(LOG.isDebugEnabled()) {
-        LOG.debug("configuredAddress = " + configuredAddress);
-      }
-      return configuredAddress;
-    }
+    String address = DFSUtil.substituteForWildcardAddress(configuredAddress,
+        fsName.getHost());
+    LOG.debug("Will connect to NameNode at HTTP address: " + address);
+    return address;
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index 83e85f7709c..9c5a3e5cd2b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.server.namenode.CheckpointConf;
 import org.apache.hadoop.hdfs.server.namenode.FSImage;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceCancelledException;
 import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
 import org.apache.hadoop.net.NetUtils;
@@ -75,12 +76,11 @@ public class StandbyCheckpointer {
    */
   private void setNameNodeAddresses(Configuration conf) {
     // Look up our own address.
-    String myAddrString = DFSUtil.getInfoServer(null, conf, true);
+    String myAddrString = getHttpAddress(conf);
 
     // Look up the active node's address
     Configuration confForActive = HAUtil.getConfForOtherNode(conf);
-    activeNNAddress = DFSUtil.getInfoServer(null, confForActive, true);
-    
+    activeNNAddress = getHttpAddress(confForActive);
     
     // Sanity-check.
     Preconditions.checkArgument(checkAddress(activeNNAddress),
@@ -90,13 +90,28 @@ public class StandbyCheckpointer {
     myNNAddress = NetUtils.createSocketAddr(myAddrString);
   }
   
+  private String getHttpAddress(Configuration conf) {
+    String configuredAddr = DFSUtil.getInfoServer(null, conf, true);
+    
+    // Use the hostname from the RPC address as a default, in case
+    // the HTTP address is configured to 0.0.0.0.
+    String hostnameFromRpc = NameNode.getServiceAddress(
+        conf, true).getHostName();
+    try {
+      return DFSUtil.substituteForWildcardAddress(
+          configuredAddr, hostnameFromRpc);
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+  
   /**
    * Ensure that the given address is valid and has a port
    * specified.
    */
   private boolean checkAddress(String addrStr) {
     InetSocketAddress addr = NetUtils.createSocketAddr(addrStr);
-    return addr.getPort() != 0;
+    return addr.getPort() != 0 && !addr.getAddress().isAnyLocalAddress();
   }
 
   public void start() {
@@ -287,4 +302,9 @@ public class StandbyCheckpointer {
       }
     }
   }
+
+  @VisibleForTesting
+  String getActiveNNAddress() {
+    return activeNNAddress;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index dc3074aeb51..0173f46814d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -586,13 +586,13 @@ public class MiniDFSCluster {
       conf.set(FS_DEFAULT_NAME_KEY, "127.0.0.1:" + onlyNN.getIpcPort());
     }
     
-    // If we have more than one nameservice, need to enumerate them in the
-    // config.
-    if (federation) {      
-      List<String> allNsIds = Lists.newArrayList();
-      for (MiniDFSNNTopology.NSConf nameservice : nnTopology.getNameservices()) {
+    List<String> allNsIds = Lists.newArrayList();
+    for (MiniDFSNNTopology.NSConf nameservice : nnTopology.getNameservices()) {
+      if (nameservice.getId() != null) {
         allNsIds.add(nameservice.getId());
       }
+    }
+    if (!allNsIds.isEmpty()) {
       conf.set(DFS_FEDERATION_NAMESERVICES, Joiner.on(",").join(allNsIds));
     }
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
index 311e687526b..4dfbfd81d95 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSNNTopology.java
@@ -57,7 +57,7 @@ public class MiniDFSNNTopology {
    */
   public static MiniDFSNNTopology simpleHATopology() {
     return new MiniDFSNNTopology()
-      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+      .addNameservice(new MiniDFSNNTopology.NSConf("minidfs-ns")
         .addNN(new MiniDFSNNTopology.NNConf("nn1"))
         .addNN(new MiniDFSNNTopology.NNConf("nn2")));
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index 9773a50d54a..d3ea10f87ed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.hdfs;
 
+import org.junit.Before;
 import org.junit.Test;
 
 import static org.junit.Assert.*;
@@ -43,6 +44,15 @@ import org.apache.hadoop.fs.BlockLocation;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 public class TestDFSUtil {
+  
+  /**
+   * Reset to default UGI settings since some tests change them.
+   */
+  @Before
+  public void resetUGI() {
+    UserGroupInformation.setConfiguration(new Configuration());
+  }
+  
   /**
    * Test conversion of LocatedBlock to BlockLocation
    */
@@ -398,4 +408,11 @@ public class TestDFSUtil {
     assertEquals(NS2_NN2_HOST, map.get("ns2").get("ns2-nn2").toString());
   }
 
+  @Test
+  public void testSubstituteForWildcardAddress() throws IOException {
+    assertEquals("foo:12345",
+        DFSUtil.substituteForWildcardAddress("0.0.0.0:12345", "foo"));
+    assertEquals("127.0.0.1:12345",
+        DFSUtil.substituteForWildcardAddress("127.0.0.1:12345", "foo"));
+  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
index 1f5822ee575..bc5c487a766 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java
@@ -125,7 +125,7 @@ public class TestEditLogTailer {
     
     // Have to specify IPC ports so the NNs can talk to each other.
     MiniDFSNNTopology topology = new MiniDFSNNTopology()
-      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+      .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
         .addNN(new MiniDFSNNTopology.NNConf("nn1").setIpcPort(10001))
         .addNN(new MiniDFSNNTopology.NNConf("nn2").setIpcPort(10002)));
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
index e41a7a6e51b..7bc2d8e1645 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureToReadEdits.java
@@ -82,7 +82,7 @@ public class TestFailureToReadEdits {
     HAUtil.setAllowStandbyReads(conf, true);
     
     MiniDFSNNTopology topology = new MiniDFSNNTopology()
-      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+      .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
         .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10001))
         .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10002)));
     cluster = new MiniDFSCluster.Builder(conf)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
new file mode 100644
index 00000000000..89d7ddd084f
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+/**
+ * Test cases that the HA configuration is reasonably validated and
+ * interpreted in various places. These should be proper unit tests
+ * which don't start daemons.
+ */
+public class TestHAConfiguration {
+  private static final String NSID = "ns1";
+  private static String HOST_A = "1.2.3.1";
+  private static String HOST_B = "1.2.3.2";
+
+  private FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
+  private Configuration conf = new Configuration();
+
+  @Test
+  public void testCheckpointerValidityChecks() throws Exception {
+    try {
+      new StandbyCheckpointer(conf, fsn);
+      fail("Bad config did not throw an error");
+    } catch (IllegalArgumentException iae) {
+      GenericTestUtils.assertExceptionContains(
+          "Invalid URI for NameNode address", iae);
+    }
+  }
+  
+  @Test
+  public void testGetOtherNNHttpAddress() {
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, NSID);    
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, NSID);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFSConfigKeys.DFS_HA_NAMENODES_KEY, NSID),
+        "nn1,nn2");    
+    conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1");
+    conf.set(DFSUtil.addKeySuffixes(
+            DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY,
+            NSID, "nn1"),
+        HOST_A + ":12345");
+    conf.set(DFSUtil.addKeySuffixes(
+            DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY,
+            NSID, "nn2"),
+        HOST_B + ":12345");
+    NameNode.initializeGenericKeys(conf, NSID, "nn1");
+
+    // Since we didn't configure the HTTP address, and the default is
+    // 0.0.0.0, it should substitute the address from the RPC configuratoin
+    // above.
+    StandbyCheckpointer checkpointer = new StandbyCheckpointer(conf, fsn);
+    assertEquals(HOST_B + ":" + DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT,
+        checkpointer.getActiveNNAddress());
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index 2c0c81947c8..a34d6bdfc21 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -60,7 +60,7 @@ public class TestStandbyCheckpoints {
     conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
 
     MiniDFSNNTopology topology = new MiniDFSNNTopology()
-      .addNameservice(new MiniDFSNNTopology.NSConf(null)
+      .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
         .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10001))
         .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10002)));
     

From 05ab55b705d7c52f6cd40107e2f3c54c9be75e56 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 3 Feb 2012 01:09:05 +0000
Subject: [PATCH 115/177] HDFS-2860. TestDFSRollback#testRollback is failing.
 Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1239968 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt           | 2 ++
 .../src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 45169c41aec..e95eb5c94e6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -153,3 +153,5 @@ HDFS-2870. Fix log level for block debug info in processMisReplicatedBlocks (tod
 HDFS-2859. LOCAL_ADDRESS_MATCHER.match has NPE when called from DFSUtil.getSuffixIDs when the host is incorrect (Bikas Saha via todd)
 
 HDFS-2861. checkpointing should verify that the dfs.http.address has been configured to a non-loopback for peer NN (todd)
+
+HDFS-2860. TestDFSRollback#testRollback is failing. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
index 282ad68a37f..b07bad252ed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSRollback.java
@@ -248,7 +248,7 @@ public class TestDFSRollback extends TestCase {
       baseDirs = UpgradeUtilities.createNameNodeStorageDirs(nameNodeDirs, "previous");
       deleteMatchingFiles(baseDirs, "edits.*");
       startNameNodeShouldFail(StartupOption.ROLLBACK,
-          "Gap in transactions. Expected to be able to read up until at least txid ");
+          "No non-corrupt logs for txid ");
       UpgradeUtilities.createEmptyDirs(nameNodeDirs);
       
       log("NameNode rollback with no image file", numDirs);

From d2b5531b0f034cbcde4f6dfaaba463b347fbc60b Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Fri, 3 Feb 2012 03:10:57 +0000
Subject: [PATCH 116/177] HDFS-2769. HA: When HA is enabled with a shared edits
 dir, that dir should be marked required. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1239988 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  3 ++
 .../hdfs/server/namenode/FSNamesystem.java    | 14 +++++++--
 .../namenode/ha/TestFailureOfSharedDir.java   | 29 +++++++++++++++++--
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index e95eb5c94e6..9e24b2071fd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -155,3 +155,6 @@ HDFS-2859. LOCAL_ADDRESS_MATCHER.match has NPE when called from DFSUtil.getSuffi
 HDFS-2861. checkpointing should verify that the dfs.http.address has been configured to a non-loopback for peer NN (todd)
 
 HDFS-2860. TestDFSRollback#testRollback is failing. (atm)
+
+HDFS-2769. HA: When HA is enabled with a shared edits dir, that dir should be
+marked required. (atm via eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index b49005ff144..7754a9085b2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -632,9 +632,19 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public static Collection<URI> getNamespaceDirs(Configuration conf) {
     return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY);
   }
-  
+
+  /**
+   * Get all edits dirs which are required. If any shared edits dirs are
+   * configured, these are also included in the set of required dirs.
+   * 
+   * @param conf the HDFS configuration.
+   * @return all required dirs.
+   */
   public static Collection<URI> getRequiredNamespaceEditsDirs(Configuration conf) {
-    return getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY);
+    Set<URI> ret = new HashSet<URI>();
+    ret.addAll(getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY));
+    ret.addAll(getSharedEditsDirs(conf));
+    return ret;
   }
 
   private static Collection<URI> getStorageDirs(Configuration conf,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
index 20c93b7e734..1fad704dc45 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
@@ -23,6 +23,7 @@ import java.io.File;
 import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.util.Collection;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -33,14 +34,38 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Test;
 
+import com.google.common.base.Joiner;
+
 public class TestFailureOfSharedDir {
   
   private static final Log LOG = LogFactory.getLog(TestFailureOfSharedDir.class);
 
+  /**
+   * Test that the shared edits dir is automatically added to the list of edits
+   * dirs that are marked required.
+   */
+  @Test
+  public void testSharedDirIsAutomaticallyMarkedRequired()
+      throws URISyntaxException {
+    URI foo = new URI("file:/foo");
+    URI bar = new URI("file:/bar");
+    Configuration conf = new Configuration();
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY, Joiner.on(",").join(foo, bar));
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY, foo.toString());
+    assertFalse(FSNamesystem.getRequiredNamespaceEditsDirs(conf).contains(
+        bar));
+    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, bar.toString());
+    Collection<URI> requiredEditsDirs = FSNamesystem
+        .getRequiredNamespaceEditsDirs(conf); 
+    assertTrue(Joiner.on(",").join(requiredEditsDirs) + " does not contain " + bar,
+        requiredEditsDirs.contains(bar));
+  }
+
   /**
    * Test that marking the shared edits dir as being "required" causes the NN to
    * fail if that dir can't be accessed.
@@ -48,11 +73,9 @@ public class TestFailureOfSharedDir {
   @Test
   public void testFailureOfSharedDir() throws Exception {
     Configuration conf = new Configuration();
+    // The shared edits dir will automatically be marked required.
     URI sharedEditsUri = MiniDFSCluster.formatSharedEditsDir(
         new File(MiniDFSCluster.getBaseDirectory()), 0, 1);
-    // Mark the shared edits dir required.
-    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY,
-        sharedEditsUri.toString());
     
     MiniDFSCluster cluster = null;
     try {

From 969318cfc2397110a686a0a728b56b6a0cab9f4d Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 3 Feb 2012 18:04:21 +0000
Subject: [PATCH 117/177] HDFS-2863. Failures observed if dfs.edits.dir and
 shared.edits.dir have same directories. Contributed by Bikas Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240267 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/FSNamesystem.java    | 14 ++++++
 .../server/namenode/TestFSNamesystem.java     | 47 +++++++++++++++++++
 .../namenode/ha/TestHAConfiguration.java      | 22 ++++++++-
 4 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 9e24b2071fd..92e634bb9ed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -158,3 +158,5 @@ HDFS-2860. TestDFSRollback#testRollback is failing. (atm)
 
 HDFS-2769. HA: When HA is enabled with a shared edits dir, that dir should be
 marked required. (atm via eli)
+
+HDFS-2863. Failures observed if dfs.edits.dir and shared.edits.dir have same directories. (Bikas Saha via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 7754a9085b2..c0f8a82e61c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -190,6 +190,7 @@ import org.apache.hadoop.util.VersionInfo;
 import org.mortbay.util.ajax.JSON;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
 
 import com.google.common.annotations.VisibleForTesting;
@@ -680,6 +681,19 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public static Collection<URI> getNamespaceEditsDirs(Configuration conf) {
     Collection<URI> editsDirs = getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY);
     editsDirs.addAll(getSharedEditsDirs(conf));
+    Set<URI> uniqueEditsDirs = new HashSet<URI>();
+    uniqueEditsDirs.addAll(editsDirs);
+    if (uniqueEditsDirs.size() != editsDirs.size()) {
+      // clearing and re-initializing editsDirs to preserve Collection semantics
+      // assigning finalEditsDirs to editsDirs would leak Set semantics in the 
+      // return value and cause unexpected results downstream. eg future addAll
+      // calls. Perf is not an issue since these are small lists.
+      editsDirs.clear();
+      editsDirs.addAll(uniqueEditsDirs);
+      LOG.warn("Overlapping entries in " + DFS_NAMENODE_EDITS_DIR_KEY 
+          + " and/or " + DFS_NAMENODE_SHARED_EDITS_DIR_KEY
+          + ". Using the following entries: " + Joiner.on(',').join(editsDirs));
+    }
     if (editsDirs.isEmpty()) {
       // If this is the case, no edit dirs have been explicitly configured.
       // Image dirs are to be used for edits too.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
new file mode 100644
index 00000000000..d79f583efd4
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
@@ -0,0 +1,47 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
+import static org.junit.Assert.*;
+
+import java.net.URI;
+import java.util.Collection;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+public class TestFSNamesystem {
+
+  /**
+   * Tests that the namenode edits dirs are gotten with duplicates removed
+   */
+  @Test
+  public void testUniqueEditDirs() {
+    Configuration config = new Configuration();
+
+    config.set(DFS_NAMENODE_EDITS_DIR_KEY, "file://edits/dir, "
+        + "file://edits/dir1,file://edits/dir1"); // overlapping internally
+
+    // getNamespaceEditsDirs removes duplicates
+    Collection<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(config);
+    assertEquals(2, editsDirs.size());
+  }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
index 89d7ddd084f..12472c4f10b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
@@ -17,8 +17,11 @@
  */
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import static org.junit.Assert.*;
-import static org.junit.Assert.assertEquals;
+
+import java.net.URI;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@@ -78,4 +81,21 @@ public class TestHAConfiguration {
     assertEquals(HOST_B + ":" + DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT,
         checkpointer.getActiveNNAddress());
   }
+  
+  /**
+   * Tests that the namenode edits dirs and shared edits dirs are gotten with
+   * duplicates removed
+   */
+  @Test
+  public void testHAUniqueEditDirs() {
+    Configuration config = new Configuration();
+
+    config.set(DFS_NAMENODE_EDITS_DIR_KEY, "file://edits/dir, "
+        + "file://edits/shared/dir"); // overlapping
+    config.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, "file://edits/shared/dir");
+
+    // getNamespaceEditsDirs removes duplicates across edits and shared.edits
+    Collection<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(config);
+    assertEquals(2, editsDirs.size());
+  }
 }

From 11df1c256171564b0578477c226723358be812c4 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Sat, 4 Feb 2012 02:52:53 +0000
Subject: [PATCH 118/177] HDFS-2874. Edit log should log to shared dirs before
 local dirs. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240445 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hadoop/hdfs/server/common/Util.java       |  5 +-
 .../hdfs/server/namenode/FSEditLog.java       | 10 ++--
 .../hadoop/hdfs/server/namenode/FSImage.java  |  6 +-
 .../hdfs/server/namenode/FSNamesystem.java    | 59 ++++++++++++-------
 .../hdfs/server/namenode/JournalSet.java      | 27 ++++++++-
 .../hadoop/hdfs/server/namenode/NameNode.java |  2 +-
 .../server/namenode/SecondaryNameNode.java    |  4 +-
 .../hdfs/server/namenode/TestClusterId.java   |  3 +-
 .../namenode/TestEditLogJournalFailures.java  | 51 +++++++++-------
 .../namenode/ha/TestFailureOfSharedDir.java   | 51 ++++++++++++++--
 11 files changed, 156 insertions(+), 64 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 92e634bb9ed..2ff3c83ebfc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -160,3 +160,5 @@ HDFS-2769. HA: When HA is enabled with a shared edits dir, that dir should be
 marked required. (atm via eli)
 
 HDFS-2863. Failures observed if dfs.edits.dir and shared.edits.dir have same directories. (Bikas Saha via atm)
+
+HDFS-2874. Edit log should log to shared dirs before local dirs. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java
index 642551e3793..1f4e9741664 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Util.java
@@ -23,6 +23,7 @@ import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -97,9 +98,9 @@ public final class Util {
    * @param names collection of strings to convert to URIs
    * @return collection of URIs
    */
-  public static Collection<URI> stringCollectionAsURIs(
+  public static List<URI> stringCollectionAsURIs(
                                   Collection<String> names) {
-    Collection<URI> uris = new ArrayList<URI>(names.size());
+    List<URI> uris = new ArrayList<URI>(names.size());
     for(String name : names) {
       try {
         uris.add(stringAsURI(name));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index a78039f2c2d..d9a64589cec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -126,12 +126,12 @@ public class FSEditLog  {
   private NNStorage storage;
   private Configuration conf;
   
-  private Collection<URI> editsDirs;
+  private List<URI> editsDirs;
   
   /**
    * The edit directories that are shared between primary and secondary.
    */
-  private Collection<URI> sharedEditsDirs;
+  private List<URI> sharedEditsDirs;
 
   private static class TransactionId {
     public long txid;
@@ -170,11 +170,11 @@ public class FSEditLog  {
    * @param storage Storage object used by namenode
    * @param editsDirs List of journals to use
    */
-  FSEditLog(Configuration conf, NNStorage storage, Collection<URI> editsDirs) {
+  FSEditLog(Configuration conf, NNStorage storage, List<URI> editsDirs) {
     init(conf, storage, editsDirs);
   }
   
-  private void init(Configuration conf, NNStorage storage, Collection<URI> editsDirs) {
+  private void init(Configuration conf, NNStorage storage, List<URI> editsDirs) {
     isSyncRunning = false;
     this.conf = conf;
     this.storage = storage;
@@ -209,7 +209,7 @@ public class FSEditLog  {
     state = State.OPEN_FOR_READING;
   }
   
-  private void initJournals(Collection<URI> dirs) {
+  private void initJournals(List<URI> dirs) {
     int minimumRedundantJournals = conf.getInt(
         DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY,
         DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index d72523d29f8..4b9ccee14cf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -115,7 +115,7 @@ public class FSImage implements Closeable {
    */
   protected FSImage(Configuration conf,
                     Collection<URI> imageDirs,
-                    Collection<URI> editsDirs)
+                    List<URI> editsDirs)
       throws IOException {
     this.conf = conf;
 
@@ -485,7 +485,7 @@ public class FSImage implements Closeable {
   void doImportCheckpoint(FSNamesystem target) throws IOException {
     Collection<URI> checkpointDirs =
       FSImage.getCheckpointDirs(conf, null);
-    Collection<URI> checkpointEditsDirs =
+    List<URI> checkpointEditsDirs =
       FSImage.getCheckpointEditsDirs(conf, null);
 
     if (checkpointDirs == null || checkpointDirs.isEmpty()) {
@@ -1094,7 +1094,7 @@ public class FSImage implements Closeable {
     return Util.stringCollectionAsURIs(dirNames);
   }
 
-  static Collection<URI> getCheckpointEditsDirs(Configuration conf,
+  static List<URI> getCheckpointEditsDirs(Configuration conf,
       String defaultName) {
     Collection<String> dirNames = 
       conf.getStringCollection(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index c0f8a82e61c..58584be2918 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -86,6 +86,7 @@ import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -190,10 +191,8 @@ import org.apache.hadoop.util.VersionInfo;
 import org.mortbay.util.ajax.JSON;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
-
-import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
 
 /***************************************************
  * FSNamesystem does the actual bookkeeping work for the
@@ -350,7 +349,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public static FSNamesystem loadFromDisk(Configuration conf)
     throws IOException {
     Collection<URI> namespaceDirs = FSNamesystem.getNamespaceDirs(conf);
-    Collection<URI> namespaceEditsDirs = 
+    List<URI> namespaceEditsDirs = 
       FSNamesystem.getNamespaceEditsDirs(conf);
 
     if (namespaceDirs.size() == 1) {
@@ -678,28 +677,44 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return Util.stringCollectionAsURIs(dirNames);
   }
 
-  public static Collection<URI> getNamespaceEditsDirs(Configuration conf) {
-    Collection<URI> editsDirs = getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY);
-    editsDirs.addAll(getSharedEditsDirs(conf));
-    Set<URI> uniqueEditsDirs = new HashSet<URI>();
-    uniqueEditsDirs.addAll(editsDirs);
-    if (uniqueEditsDirs.size() != editsDirs.size()) {
-      // clearing and re-initializing editsDirs to preserve Collection semantics
-      // assigning finalEditsDirs to editsDirs would leak Set semantics in the 
-      // return value and cause unexpected results downstream. eg future addAll
-      // calls. Perf is not an issue since these are small lists.
-      editsDirs.clear();
-      editsDirs.addAll(uniqueEditsDirs);
-      LOG.warn("Overlapping entries in " + DFS_NAMENODE_EDITS_DIR_KEY 
-          + " and/or " + DFS_NAMENODE_SHARED_EDITS_DIR_KEY
-          + ". Using the following entries: " + Joiner.on(',').join(editsDirs));
+  /**
+   * Return an ordered list of edits directories to write to.
+   * The list is ordered such that all shared edits directories
+   * are ordered before non-shared directories, and any duplicates
+   * are removed. The order they are specified in the configuration
+   * is retained.
+   */
+  public static List<URI> getNamespaceEditsDirs(Configuration conf) {
+    // Use a LinkedHashSet so that order is maintained while we de-dup
+    // the entries.
+    LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
+    
+    // First add the shared edits dirs. It's critical that the shared dirs
+    // are added first, since JournalSet syncs them in the order they are listed,
+    // and we need to make sure all edits are in place in the shared storage
+    // before they are replicated locally. See HDFS-2874.
+    for (URI dir : getSharedEditsDirs(conf)) {
+      if (!editsDirs.add(dir)) {
+        LOG.warn("Edits URI " + dir + " listed multiple times in " + 
+            DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
+      }
     }
+    
+    // Now add the non-shared dirs.
+    for (URI dir : getStorageDirs(conf, DFS_NAMENODE_EDITS_DIR_KEY)) {
+      if (!editsDirs.add(dir)) {
+        LOG.warn("Edits URI " + dir + " listed multiple times in " + 
+            DFS_NAMENODE_SHARED_EDITS_DIR_KEY + " and " +
+            DFS_NAMENODE_EDITS_DIR_KEY + ". Ignoring duplicates.");
+      }
+    }
+
     if (editsDirs.isEmpty()) {
       // If this is the case, no edit dirs have been explicitly configured.
       // Image dirs are to be used for edits too.
-      return getNamespaceDirs(conf);
+      return Lists.newArrayList(getNamespaceDirs(conf));
     } else {
-      return editsDirs;
+      return Lists.newArrayList(editsDirs);
     }
   }
   
@@ -708,7 +723,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * @param conf
    * @return Collection of edit directories.
    */
-  public static Collection<URI> getSharedEditsDirs(Configuration conf) {
+  public static List<URI> getSharedEditsDirs(Configuration conf) {
     // don't use getStorageDirs here, because we want an empty default
     // rather than the dir in /tmp
     Collection<String> dirNames = conf.getTrimmedStringCollection(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
index 3363c8b5b4c..8fc323c31d5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
@@ -309,13 +309,25 @@ public class JournalSet implements JournalManager {
    */
   private void mapJournalsAndReportErrors(
       JournalClosure closure, String status) throws IOException{
+
     List<JournalAndStream> badJAS = Lists.newLinkedList();
     for (JournalAndStream jas : journals) {
       try {
         closure.apply(jas);
       } catch (Throwable t) {
-        LOG.error("Error: " + status + " failed for (journal " + jas + ")", t);
-        badJAS.add(jas);
+        if (jas.isRequired()) {
+          String msg = "Error: " + status + " failed for required journal ("
+            + jas + ")";
+          LOG.fatal(msg, t);
+          // If we fail on *any* of the required journals, then we must not
+          // continue on any of the other journals. Abort them to ensure that
+          // retry behavior doesn't allow them to keep going in any way.
+          abortAllJournals();
+          throw new IOException(msg);
+        } else {
+          LOG.error("Error: " + status + " failed for (journal " + jas + ")", t);
+          badJAS.add(jas);          
+        }
       }
     }
     disableAndReportErrorOnJournals(badJAS);
@@ -327,6 +339,17 @@ public class JournalSet implements JournalManager {
     }
   }
   
+  /**
+   * Abort all of the underlying streams.
+   */
+  private void abortAllJournals() {
+    for (JournalAndStream jas : journals) {
+      if (jas.isActive()) {
+        jas.abort();
+      }
+    }
+  }
+
   /**
    * An implementation of EditLogOutputStream that applies a requested method on
    * all the journals that are currently active.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 27bd92d874d..eb7e3c667b6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -661,7 +661,7 @@ public class NameNode {
     }
     
     Collection<URI> dirsToFormat = FSNamesystem.getNamespaceDirs(conf);
-    Collection<URI> editDirsToFormat = 
+    List<URI> editDirsToFormat = 
                  FSNamesystem.getNamespaceEditsDirs(conf);
     for(Iterator<URI> it = dirsToFormat.iterator(); it.hasNext();) {
       File curDir = new File(it.next().getPath());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 8069501e01f..73f59900d1b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -114,7 +114,7 @@ public class SecondaryNameNode implements Runnable {
   private String infoBindAddress;
 
   private Collection<URI> checkpointDirs;
-  private Collection<URI> checkpointEditsDirs;
+  private List<URI> checkpointEditsDirs;
 
   private CheckpointConf checkpointConf;
   private FSNamesystem namesystem;
@@ -729,7 +729,7 @@ public class SecondaryNameNode implements Runnable {
      */
     CheckpointStorage(Configuration conf, 
                       Collection<URI> imageDirs,
-                      Collection<URI> editsDirs) throws IOException {
+                      List<URI> editsDirs) throws IOException {
       super(conf, imageDirs, editsDirs);
       
       // the 2NN never writes edits -- it only downloads them. So
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestClusterId.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestClusterId.java
index 68dc9f5b545..98c17a7b4d1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestClusterId.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestClusterId.java
@@ -26,6 +26,7 @@ import java.net.URI;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Properties;
 
 import org.apache.commons.logging.Log;
@@ -47,7 +48,7 @@ public class TestClusterId {
   private String getClusterId(Configuration config) throws IOException {
     // see if cluster id not empty.
     Collection<URI> dirsToFormat = FSNamesystem.getNamespaceDirs(config);
-    Collection<URI> editsToFormat = FSNamesystem.getNamespaceEditsDirs(config);
+    List<URI> editsToFormat = FSNamesystem.getNamespaceEditsDirs(config);
     FSImage fsImage = new FSImage(config, dirsToFormat, editsToFormat);
     
     Iterator<StorageDirectory> sdit = 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogJournalFailures.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogJournalFailures.java
index 77fd68637e5..d14b2b24dfb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogJournalFailures.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogJournalFailures.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
+import org.mockito.Mockito;
 import org.mockito.verification.VerificationMode;
 
 public class TestEditLogJournalFailures {
@@ -144,21 +145,35 @@ public class TestEditLogJournalFailures {
         DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY);
     shutDownMiniCluster();
     Configuration conf = new HdfsConfiguration();
-    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY, editsDirs[1]);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY, editsDirs[0]);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY, 0);
     conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKED_VOLUMES_MINIMUM_KEY, 0);
     setUpMiniCluster(conf, true);
     
     assertTrue(doAnEdit());
     // Invalidated the one required edits journal.
-    invalidateEditsDirAtIndex(1, false, false);
+    invalidateEditsDirAtIndex(0, false, false);
+    JournalAndStream nonRequiredJas = getJournalAndStream(1);
+    EditLogFileOutputStream nonRequiredSpy =
+      spyOnStream(nonRequiredJas);
+    
     // Make sure runtime.exit(...) hasn't been called at all yet.
     assertExitInvocations(0);
     
+    // ..and that the other stream is active.
+    assertTrue(nonRequiredJas.isActive());
+    
     // This will actually return true in the tests, since the NN will not in
     // fact call Runtime.exit();
     doAnEdit();
     
+    // Since the required directory failed setReadyToFlush, and that
+    // directory was listed prior to the non-required directory,
+    // we should not call setReadyToFlush on the non-required
+    // directory. Regression test for HDFS-2874.
+    Mockito.verify(nonRequiredSpy, Mockito.never()).setReadyToFlush();
+    assertFalse(nonRequiredJas.isActive());
+    
     // A single failure of a required journal should result in a call to
     // runtime.exit(...).
     assertExitInvocations(atLeast(1));
@@ -217,15 +232,10 @@ public class TestEditLogJournalFailures {
    * @param index the index of the journal to take offline.
    * @return the original <code>EditLogOutputStream</code> of the journal.
    */
-  private EditLogOutputStream invalidateEditsDirAtIndex(int index,
+  private void invalidateEditsDirAtIndex(int index,
       boolean failOnFlush, boolean failOnWrite) throws IOException {
-    FSImage fsimage = cluster.getNamesystem().getFSImage();
-    FSEditLog editLog = fsimage.getEditLog();
-
-    JournalAndStream jas = editLog.getJournals().get(index);
-    EditLogFileOutputStream elos =
-      (EditLogFileOutputStream) jas.getCurrentStream();
-    EditLogFileOutputStream spyElos = spy(elos);
+    JournalAndStream jas = getJournalAndStream(index);
+    EditLogFileOutputStream spyElos = spyOnStream(jas);
     if (failOnWrite) {
       doThrow(new IOException("fail on write()")).when(spyElos).write(
           (FSEditLogOp) any());
@@ -237,25 +247,24 @@ public class TestEditLogJournalFailures {
         .setReadyToFlush();
     }
     doNothing().when(spyElos).abort();
-     
+  }
+
+  private EditLogFileOutputStream spyOnStream(JournalAndStream jas) {
+    EditLogFileOutputStream elos =
+      (EditLogFileOutputStream) jas.getCurrentStream();
+    EditLogFileOutputStream spyElos = spy(elos);
     jas.setCurrentStreamForTests(spyElos);
-     
-    return elos;
+    return spyElos;
   }
 
   /**
-   * Restore the journal at index <code>index</code> with the passed
-   * {@link EditLogOutputStream}.
-   * 
-   * @param index index of the journal to restore.
-   * @param elos the {@link EditLogOutputStream} to put at that index.
+   * Pull out one of the JournalAndStream objects from the edit log.
    */
-  private void restoreEditsDirAtIndex(int index, EditLogOutputStream elos) {
+  private JournalAndStream getJournalAndStream(int index) {
     FSImage fsimage = cluster.getNamesystem().getFSImage();
     FSEditLog editLog = fsimage.getEditLog();
 
-    JournalAndStream jas = editLog.getJournals().get(index);
-    jas.setCurrentStreamForTests(elos);
+    return editLog.getJournals().get(index);
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
index 1fad704dc45..1a4e797cc84 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
@@ -24,6 +24,7 @@ import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Collection;
+import java.util.List;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -35,6 +36,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.namenode.NNStorage;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Test;
@@ -66,6 +68,35 @@ public class TestFailureOfSharedDir {
         requiredEditsDirs.contains(bar));
   }
 
+  
+  /**
+   * Make sure that the shared edits dirs are listed before non-shared dirs
+   * when the configuration is parsed. This ensures that the shared journals
+   * are synced before the local ones.
+   */
+  @Test
+  public void testSharedDirsComeFirstInEditsList() throws Exception {
+    Configuration conf = new Configuration();
+    URI sharedA = new URI("file:///shared-A");
+    URI sharedB = new URI("file:///shared-B");
+    URI localA = new URI("file:///local-A");
+    URI localB = new URI("file:///local-B");
+    URI localC = new URI("file:///local-C");
+    
+    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
+        Joiner.on(",").join(sharedA,sharedB));
+    // List them in reverse order, to make sure they show up in
+    // the order listed, regardless of lexical sort order.
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
+        Joiner.on(",").join(localC, localB, localA));
+    List<URI> dirs = FSNamesystem.getNamespaceEditsDirs(conf);
+    assertEquals(
+        "Shared dirs should come first, then local dirs, in the order " +
+        "they were listed in the configuration.",
+        Joiner.on(",").join(sharedA, sharedB, localC, localB, localA),
+        Joiner.on(",").join(dirs));
+  }
+  
   /**
    * Test that marking the shared edits dir as being "required" causes the NN to
    * fail if that dir can't be accessed.
@@ -73,10 +104,8 @@ public class TestFailureOfSharedDir {
   @Test
   public void testFailureOfSharedDir() throws Exception {
     Configuration conf = new Configuration();
-    // The shared edits dir will automatically be marked required.
-    URI sharedEditsUri = MiniDFSCluster.formatSharedEditsDir(
-        new File(MiniDFSCluster.getBaseDirectory()), 0, 1);
     
+    // The shared edits dir will automatically be marked required.
     MiniDFSCluster cluster = null;
     try {
       cluster = new MiniDFSCluster.Builder(conf)
@@ -84,8 +113,6 @@ public class TestFailureOfSharedDir {
         .numDataNodes(0)
         .build();
       
-      assertEquals(sharedEditsUri, cluster.getSharedEditsDir(0, 1));
-      
       cluster.waitActive();
       cluster.transitionToActive(0);
       
@@ -94,6 +121,7 @@ public class TestFailureOfSharedDir {
       assertTrue(fs.mkdirs(new Path("/test1")));
       
       // Blow away the shared edits dir.
+      URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);      
       FileUtil.fullyDelete(new File(sharedEditsUri));
       
       NameNode nn0 = cluster.getNameNode(0);
@@ -107,6 +135,19 @@ public class TestFailureOfSharedDir {
             ioe);
         LOG.info("Got expected exception", ioe);
       }
+      
+      // Check that none of the edits dirs rolled, since the shared edits
+      // dir didn't roll. Regression test for HDFS-2874.
+      for (URI editsUri : cluster.getNameEditsDirs(0)) {
+        if (editsUri.equals(sharedEditsUri)) {
+          continue;
+        }
+        File editsDir = new File(editsUri.getPath());
+        File curDir = new File(editsDir, "current");
+        GenericTestUtils.assertGlobEquals(curDir,
+            "edits_.*",
+            NNStorage.getInProgressEditsFileName(1));
+      }
     } finally {
       if (cluster != null) {
         cluster.shutdown();

From 3cc71933e9fc7f9e17c9e4fc59e651aba8dc0a5a Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 4 Feb 2012 03:00:17 +0000
Subject: [PATCH 119/177] HDFS-2890. DFSUtil#getSuffixIDs should skip unset
 configurations. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240447 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt  |  2 ++
 .../src/main/java/org/apache/hadoop/hdfs/DFSUtil.java  | 10 ++++++++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 2ff3c83ebfc..4905a289af5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -162,3 +162,5 @@ marked required. (atm via eli)
 HDFS-2863. Failures observed if dfs.edits.dir and shared.edits.dir have same directories. (Bikas Saha via atm)
 
 HDFS-2874. Edit log should log to shared dirs before local dirs. (todd)
+
+HDFS-2890. DFSUtil#getSuffixIDs should skip unset configurations. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 31280675544..0ae198a25f6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -655,7 +655,6 @@ public class DFSUtil {
    */
   public static String getInfoServer(
       InetSocketAddress namenodeAddr, Configuration conf, boolean httpsAddress) {
-    String httpAddress = null;
     boolean securityOn = UserGroupInformation.isSecurityEnabled();
     String httpAddressKey = (securityOn && httpsAddress) ? 
         DFS_NAMENODE_HTTPS_ADDRESS_KEY : DFS_NAMENODE_HTTP_ADDRESS_KEY;
@@ -957,16 +956,23 @@ public class DFSUtil {
       
       Collection<String> nnIds = getNameNodeIds(conf, nsId);
       for (String nnId : emptyAsSingletonNull(nnIds)) {
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(String.format("addressKey: %s nsId: %s nnId: %s",
+              addressKey, nsId, nnId));
+        }
         if (knownNNId != null && !knownNNId.equals(nnId)) {
           continue;
         }
         String key = addKeySuffixes(addressKey, nsId, nnId);
         String addr = conf.get(key);
+        if (addr == null) {
+          continue;
+        }
         InetSocketAddress s = null;
         try {
           s = NetUtils.createSocketAddr(addr);
         } catch (Exception e) {
-          LOG.warn("Exception in creating socket address", e);
+          LOG.warn("Exception in creating socket address " + addr, e);
           continue;
         }
         if (!s.isUnresolved() && matcher.match(s)) {

From ec6961b39c6e05a1ed0016e815c2e17c052d2462 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 4 Feb 2012 03:10:39 +0000
Subject: [PATCH 120/177] HDFS-2792. Make fsck work. Contributed by Aaron T.
 Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240449 13f79535-47bb-0310-9956-ffa450edef68
---
 .../io/retry/RetryInvocationHandler.java      |   6 +-
 .../apache/hadoop/ipc/ProtocolTranslator.java |  35 ++++++
 .../main/java/org/apache/hadoop/ipc/RPC.java  |  18 ++-
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |   2 +-
 .../ClientNamenodeProtocolTranslatorPB.java   |   8 +-
 .../org/apache/hadoop/hdfs/tools/DFSck.java   |  32 ++++--
 .../hdfs/server/namenode/ha/HATestUtil.java   |   1 +
 .../hdfs/server/namenode/ha/TestHAFsck.java   | 103 ++++++++++++++++++
 9 files changed, 188 insertions(+), 19 deletions(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolTranslator.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
index 3f94abfbb2a..28e88501d0a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -20,7 +20,6 @@ package org.apache.hadoop.io.retry;
 import java.io.IOException;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
-import java.lang.reflect.Proxy;
 import java.util.Collections;
 import java.util.Map;
 
@@ -29,6 +28,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
 import org.apache.hadoop.util.ThreadUtil;
 import org.apache.hadoop.ipc.Client.ConnectionId;
+import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RpcInvocationHandler;
 
 class RetryInvocationHandler implements RpcInvocationHandler {
@@ -163,9 +163,7 @@ class RetryInvocationHandler implements RpcInvocationHandler {
 
   @Override //RpcInvocationHandler
   public ConnectionId getConnectionId() {
-    RpcInvocationHandler inv = (RpcInvocationHandler) Proxy
-        .getInvocationHandler(currentProxy);
-    return inv.getConnectionId();
+    return RPC.getConnectionIdForProxy(currentProxy);
   }
 
 }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolTranslator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolTranslator.java
new file mode 100644
index 00000000000..5bf9dbaed17
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtocolTranslator.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ipc;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+/**
+ * An interface implemented by client-side protocol translators to get the
+ * underlying proxy object the translator is operating on.
+ */
+@InterfaceAudience.Private
+public interface ProtocolTranslator {
+  
+  /**
+   * Return the proxy object underlying this protocol translator.
+   * @return the proxy object underlying this protocol translator.
+   */
+  public Object getUnderlyingProxyObject();
+
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
index 4f85e905cd3..069841b1c9b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
@@ -40,6 +40,7 @@ import javax.net.SocketFactory;
 import org.apache.commons.logging.*;
 
 import org.apache.hadoop.io.*;
+import org.apache.hadoop.ipc.Client.ConnectionId;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
 import org.apache.hadoop.ipc.protobuf.ProtocolInfoProtos.ProtocolInfoService;
 import org.apache.hadoop.net.NetUtils;
@@ -530,9 +531,24 @@ public class RPC {
    * Returns the server address for a given proxy.
    */
   public static InetSocketAddress getServerAddress(Object proxy) {
+    return getConnectionIdForProxy(proxy).getAddress();
+  }
+
+  /**
+   * Return the connection ID of the given object. If the provided object is in
+   * fact a protocol translator, we'll get the connection ID of the underlying
+   * proxy object.
+   * 
+   * @param proxy the proxy object to get the connection ID of.
+   * @return the connection ID for the provided proxy object.
+   */
+  public static ConnectionId getConnectionIdForProxy(Object proxy) {
+    if (proxy instanceof ProtocolTranslator) {
+      proxy = ((ProtocolTranslator)proxy).getUnderlyingProxyObject();
+    }
     RpcInvocationHandler inv = (RpcInvocationHandler) Proxy
         .getInvocationHandler(proxy);
-    return inv.getConnectionId().getAddress();
+    return inv.getConnectionId();
   }
    
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4905a289af5..52b87db187d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -164,3 +164,5 @@ HDFS-2863. Failures observed if dfs.edits.dir and shared.edits.dir have same dir
 HDFS-2874. Edit log should log to shared dirs before local dirs. (todd)
 
 HDFS-2890. DFSUtil#getSuffixIDs should skip unset configurations. (atm)
+
+HDFS-2792. Make fsck work. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 0ae198a25f6..9fd24381768 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -71,7 +71,7 @@ import com.google.protobuf.BlockingService;
 
 @InterfaceAudience.Private
 public class DFSUtil {
-  private static final Log LOG = LogFactory.getLog(DFSUtil.class.getName());
+  public static final Log LOG = LogFactory.getLog(DFSUtil.class.getName());
   
   private DFSUtil() { /* Hidden constructor */ }
   private static final ThreadLocal<Random> RANDOM = new ThreadLocal<Random>() {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
index 5860d3a13af..f38467e07b3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
@@ -63,6 +63,7 @@ import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryPolicy;
 import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.ipc.ProtocolTranslator;
 import org.apache.hadoop.ipc.ProtobufHelper;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolSignature;
@@ -138,7 +139,7 @@ import com.google.protobuf.ServiceException;
 @InterfaceAudience.Private
 @InterfaceStability.Stable
 public class ClientNamenodeProtocolTranslatorPB implements
-    ClientProtocol, Closeable {
+    ClientProtocol, Closeable, ProtocolTranslator {
   final private ClientNamenodeProtocolPB rpcProxy;
 
   private static ClientNamenodeProtocolPB createNamenode(
@@ -874,4 +875,9 @@ public class ClientNamenodeProtocolTranslatorPB implements
       throw ProtobufHelper.getRemoteException(e);
     }
   }
+
+  @Override
+  public Object getUnderlyingProxyObject() {
+    return rpcProxy;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
index bc98995af30..1a99fcb62ab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java
@@ -32,11 +32,13 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.server.namenode.NamenodeFsck;
+import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.security.Krb5AndCertsSslSocketConnector;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -204,8 +206,9 @@ public class DFSck extends Configured implements Tool {
    * Derive the namenode http address from the current file system,
    * either default or as set by "-fs" in the generic options.
    * @return Returns http address or null if failure.
+   * @throws IOException if we can't determine the active NN address
    */
-  private String getCurrentNamenodeAddress() {
+  private String getCurrentNamenodeAddress() throws IOException {
     //String nnAddress = null;
     Configuration conf = getConf();
 
@@ -222,16 +225,21 @@ public class DFSck extends Configured implements Tool {
       System.err.println("FileSystem is " + fs.getUri());
       return null;
     }
-    DistributedFileSystem dfs = (DistributedFileSystem) fs;
-
-    // Derive the nameservice ID from the filesystem URI.
-    // The URI may have been provided by a human, and the server name may be
-    // aliased, so compare InetSocketAddresses instead of URI strings, and
-    // test against both possible variants of RPC address.
-    InetSocketAddress namenode = 
-      NameNode.getAddress(dfs.getUri().getAuthority());
     
-    return DFSUtil.getInfoServer(namenode, conf, true);
+    // force client address resolution.
+    fs.exists(new Path("/"));
+    
+    // Derive the nameservice ID from the filesystem connection. The URI may
+    // have been provided by a human, the server name may be aliased, or there
+    // may be multiple possible actual addresses (e.g. in an HA setup) so
+    // compare InetSocketAddresses instead of URI strings, and test against both
+    // possible configurations of RPC address (DFS_NAMENODE_RPC_ADDRESS_KEY and
+    // DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY).
+    DistributedFileSystem dfs = (DistributedFileSystem) fs;
+    DFSClient dfsClient = dfs.getClient();
+    InetSocketAddress addr = RPC.getServerAddress(dfsClient.getNamenode());
+    
+    return DFSUtil.getInfoServer(addr, conf, true);
   }
 
   private int doWork(final String[] args) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index 5439d15b814..572b97dc07f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -175,6 +175,7 @@ public abstract class HATestUtil {
         nameNodeId1 + "," + nameNodeId2);
     conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
         ConfiguredFailoverProxyProvider.class.getName());
+    conf.set("fs.defaultFS", "hdfs://" + logicalName);
   }
   
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java
new file mode 100644
index 00000000000..10218f218ec
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java
@@ -0,0 +1,103 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.PrintStream;
+
+import junit.framework.Assert;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.tools.DFSck;
+import org.apache.hadoop.util.ToolRunner;
+import org.apache.log4j.Level;
+import org.junit.Test;
+
+public class TestHAFsck {
+  
+  static {
+    ((Log4JLogger)LogFactory.getLog(DFSUtil.class)).getLogger().setLevel(Level.ALL);
+  }
+  
+  /**
+   * Test that fsck still works with HA enabled.
+   */
+  @Test
+  public void testHaFsck() throws Exception {
+    Configuration conf = new Configuration();
+    
+    // need some HTTP ports
+    MiniDFSNNTopology topology = new MiniDFSNNTopology()
+      .addNameservice(new MiniDFSNNTopology.NSConf("ha-nn-uri-0")
+        .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10001))
+        .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10002)));
+    
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(topology)
+      .numDataNodes(0)
+      .build();
+    FileSystem fs = null;
+    try {
+      cluster.waitActive();
+    
+      cluster.transitionToActive(0);
+      
+      // Make sure conf has the relevant HA configs.
+      HATestUtil.setFailoverConfigurations(cluster, conf, "ha-nn-uri-0", 0);
+      
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      fs.mkdirs(new Path("/test1"));
+      fs.mkdirs(new Path("/test2"));
+      
+      runFsck(conf);
+      
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      
+      runFsck(conf);
+    } finally {
+      if (fs != null) {
+        fs.close();
+      }
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
+  static void runFsck(Configuration conf) throws Exception {
+    ByteArrayOutputStream bStream = new ByteArrayOutputStream();
+    PrintStream out = new PrintStream(bStream, true);
+    int errCode = ToolRunner.run(new DFSck(conf, out),
+        new String[]{"/", "-files"});
+    String result = bStream.toString();
+    System.out.println("output from fsck:\n" + result);
+    Assert.assertEquals(0, errCode);
+    assertTrue(result.contains("/test1"));
+    assertTrue(result.contains("/test2"));
+  }
+}

From 7b913180be9cb8f5aaded964179d6313add2f13f Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Sat, 4 Feb 2012 20:12:51 +0000
Subject: [PATCH 121/177] HDFS-2808. HA: haadmin should use namenode ids.
 Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240600 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/hadoop/ha/HAAdmin.java    |  60 +++++++----
 .../java/org/apache/hadoop/ha/NodeFencer.java |   2 +-
 .../org/apache/hadoop/ha/TestHAAdmin.java     | 101 +-----------------
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |  35 ++++++
 .../apache/hadoop/hdfs/tools/DFSHAAdmin.java  |  65 ++++++++++-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   |  47 ++++++++
 .../TestCheckPointForSecurityTokens.java      |   2 +-
 .../hdfs/server/namenode/TestCheckpoint.java  |   2 +-
 9 files changed, 192 insertions(+), 124 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index 2286a357662..6ceafb9ea69 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -40,7 +40,7 @@ import com.google.common.collect.ImmutableMap;
 
 /**
  * A command-line tool for making calls in the HAServiceProtocol.
- * For example,. this can be used to force a daemon to standby or active
+ * For example,. this can be used to force a service to standby or active
  * mode, or to trigger a health-check.
  */
 @InterfaceAudience.Private
@@ -53,20 +53,20 @@ public abstract class HAAdmin extends Configured implements Tool {
   private static Map<String, UsageInfo> USAGE =
     ImmutableMap.<String, UsageInfo>builder()
     .put("-transitionToActive",
-        new UsageInfo("<host:port>", "Transitions the daemon into Active state"))
+        new UsageInfo("<serviceId>", "Transitions the service into Active state"))
     .put("-transitionToStandby",
-        new UsageInfo("<host:port>", "Transitions the daemon into Standby state"))
+        new UsageInfo("<serviceId>", "Transitions the service into Standby state"))
     .put("-failover",
-        new UsageInfo("[--"+FORCEFENCE+"] [--"+FORCEACTIVE+"] <host:port> <host:port>",
-            "Failover from the first daemon to the second.\n" +
+        new UsageInfo("[--"+FORCEFENCE+"] [--"+FORCEACTIVE+"] <serviceId> <serviceId>",
+            "Failover from the first service to the second.\n" +
             "Unconditionally fence services if the "+FORCEFENCE+" option is used.\n" +
             "Try to failover to the target service even if it is not ready if the " + 
             FORCEACTIVE + " option is used."))
     .put("-getServiceState",
-        new UsageInfo("<host:port>", "Returns the state of the daemon"))
+        new UsageInfo("<serviceId>", "Returns the state of the service"))
     .put("-checkHealth",
-        new UsageInfo("<host:port>",
-            "Requests that the daemon perform a health check.\n" + 
+        new UsageInfo("<serviceId>",
+            "Requests that the service perform a health check.\n" + 
             "The HAAdmin tool will exit with a non-zero exit code\n" +
             "if the check fails."))
     .put("-help",
@@ -74,11 +74,15 @@ public abstract class HAAdmin extends Configured implements Tool {
     .build();
 
   /** Output stream for errors, for use in tests */
-  PrintStream errOut = System.err;
+  protected PrintStream errOut = System.err;
   PrintStream out = System.out;
 
-  private static void printUsage(PrintStream errOut) {
-    errOut.println("Usage: java HAAdmin");
+  protected String getUsageString() {
+    return "Usage: HAAdmin";
+  }
+
+  protected void printUsage(PrintStream errOut) {
+    errOut.println(getUsageString());
     for (Map.Entry<String, UsageInfo> e : USAGE.entrySet()) {
       String cmd = e.getKey();
       UsageInfo usage = e.getValue();
@@ -94,7 +98,7 @@ public abstract class HAAdmin extends Configured implements Tool {
     if (usage == null) {
       throw new RuntimeException("No usage for cmd " + cmd);
     }
-    errOut.println("Usage: java HAAdmin [" + cmd + " " + usage.args + "]");
+    errOut.println("Usage: HAAdmin [" + cmd + " " + usage.args + "]");
   }
 
   private int transitionToActive(final String[] argv)
@@ -171,8 +175,10 @@ public abstract class HAAdmin extends Configured implements Tool {
       return -1;
     }
 
-    InetSocketAddress addr1 = NetUtils.createSocketAddr(args[0]);
-    InetSocketAddress addr2 = NetUtils.createSocketAddr(args[1]);
+    InetSocketAddress addr1 = 
+      NetUtils.createSocketAddr(getServiceAddr(args[0]));
+    InetSocketAddress addr2 = 
+      NetUtils.createSocketAddr(getServiceAddr(args[1]));
     HAServiceProtocol proto1 = getProtocol(args[0]);
     HAServiceProtocol proto2 = getProtocol(args[1]);
 
@@ -219,11 +225,20 @@ public abstract class HAAdmin extends Configured implements Tool {
   }
 
   /**
-   * Return a proxy to the specified target host:port.
+   * Return the serviceId as is, we are assuming it was
+   * given as a service address of form <host:ipcport>.
    */
-  protected HAServiceProtocol getProtocol(String target)
+  protected String getServiceAddr(String serviceId) {
+    return serviceId;
+  }
+
+  /**
+   * Return a proxy to the specified target service.
+   */
+  protected HAServiceProtocol getProtocol(String serviceId)
       throws IOException {
-    InetSocketAddress addr = NetUtils.createSocketAddr(target);
+    String serviceAddr = getServiceAddr(serviceId);
+    InetSocketAddress addr = NetUtils.createSocketAddr(serviceAddr);
     return (HAServiceProtocol)RPC.getProxy(
           HAServiceProtocol.class, HAServiceProtocol.versionID,
           addr, getConf());
@@ -231,6 +246,15 @@ public abstract class HAAdmin extends Configured implements Tool {
 
   @Override
   public int run(String[] argv) throws Exception {
+    try {
+      return runCmd(argv);
+    } catch (IllegalArgumentException iae) {
+      errOut.println("Illegal argument: " + iae.getMessage());
+      return -1;
+    }
+  }
+  
+  protected int runCmd(String[] argv) throws Exception {
     if (argv.length < 1) {
       printUsage(errOut);
       return -1;
@@ -244,7 +268,7 @@ public abstract class HAAdmin extends Configured implements Tool {
       printUsage(errOut);
       return -1;
     }
-    
+
     if ("-transitionToActive".equals(cmd)) {
       return transitionToActive(argv);
     } else if ("-transitionToStandby".equals(cmd)) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
index 85a5b424852..90e682172f1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
@@ -54,7 +54,7 @@ import com.google.common.collect.Lists;
 @InterfaceAudience.Private
 @InterfaceStability.Evolving
 public class NodeFencer {
-  static final String CONF_METHODS_KEY =
+  public static final String CONF_METHODS_KEY =
     "dfs.namenode.ha.fencing.methods";
   
   private static final String CLASS_RE = "([a-zA-Z0-9\\.\\$]+)";
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
index a5a58648d46..f22056a1f61 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestHAAdmin.java
@@ -26,7 +26,6 @@ import java.io.PrintStream;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -88,107 +87,12 @@ public class TestHAAdmin {
     assertEquals(-1, runTool("-failover", "foo:1234"));
     assertOutputContains("failover: incorrect arguments");
   }
-  
+
   @Test
   public void testHelp() throws Exception {
     assertEquals(-1, runTool("-help"));
     assertEquals(0, runTool("-help", "transitionToActive"));
-    assertOutputContains("Transitions the daemon into Active");
-  }
-  
-  @Test
-  public void testTransitionToActive() throws Exception {
-    assertEquals(0, runTool("-transitionToActive", "foo:1234"));
-    Mockito.verify(mockProtocol).transitionToActive();
-  }
-
-  @Test
-  public void testTransitionToStandby() throws Exception {
-    assertEquals(0, runTool("-transitionToStandby", "foo:1234"));
-    Mockito.verify(mockProtocol).transitionToStandby();
-  }
-
-  @Test
-  public void testFailoverWithNoFencerConfigured() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678"));
-  }
-
-  @Test
-  public void testFailoverWithFencerConfigured() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    Configuration conf = new Configuration();
-    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
-    tool.setConf(conf);
-    assertEquals(0, runTool("-failover", "foo:1234", "bar:5678"));
-  }
-
-  @Test
-  public void testFailoverWithFencerConfiguredAndForce() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    Configuration conf = new Configuration();
-    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
-    tool.setConf(conf);
-    assertEquals(0, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
-  }
-
-  @Test
-  public void testFailoverWithForceActive() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    Configuration conf = new Configuration();
-    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
-    tool.setConf(conf);
-    assertEquals(0, runTool("-failover", "foo:1234", "bar:5678", "--forceactive"));
-  }
-
-  @Test
-  public void testFailoverWithInvalidFenceArg() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    Configuration conf = new Configuration();
-    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
-    tool.setConf(conf);
-    assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "notforcefence"));
-  }
-
-  @Test
-  public void testFailoverWithFenceButNoFencer() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
-  }
-
-  @Test
-  public void testFailoverWithFenceAndBadFencer() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    Configuration conf = new Configuration();
-    conf.set(NodeFencer.CONF_METHODS_KEY, "foobar!");
-    tool.setConf(conf);
-    assertEquals(-1, runTool("-failover", "foo:1234", "bar:5678", "--forcefence"));
-  }
-
-  @Test
-  public void testForceFenceOptionListedBeforeArgs() throws Exception {
-    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
-    Configuration conf = new Configuration();
-    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
-    tool.setConf(conf);
-    assertEquals(0, runTool("-failover", "--forcefence", "foo:1234", "bar:5678"));
-  }
-
-  @Test
-  public void testGetServiceState() throws Exception {
-    assertEquals(0, runTool("-getServiceState", "foo:1234"));
-    Mockito.verify(mockProtocol).getServiceState();
-  }
-
-  @Test
-  public void testCheckHealth() throws Exception {
-    assertEquals(0, runTool("-checkHealth", "foo:1234"));
-    Mockito.verify(mockProtocol).monitorHealth();
-    
-    Mockito.doThrow(new HealthCheckFailedException("fake health check failure"))
-      .when(mockProtocol).monitorHealth();
-    assertEquals(-1, runTool("-checkHealth", "foo:1234"));
-    assertOutputContains("Health check failed: fake health check failure");
+    assertOutputContains("Transitions the service into Active");
   }
 
   private Object runTool(String ... args) throws Exception {
@@ -199,5 +103,4 @@ public class TestHAAdmin {
     LOG.info("Output:\n" + errOutput);
     return ret;
   }
-  
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 52b87db187d..31bf349df4e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -166,3 +166,5 @@ HDFS-2874. Edit log should log to shared dirs before local dirs. (todd)
 HDFS-2890. DFSUtil#getSuffixIDs should skip unset configurations. (atm)
 
 HDFS-2792. Make fsck work. (atm)
+
+HDFS-2808. HA: haadmin should use namenode ids. (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 9fd24381768..be447f9093e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -1042,4 +1042,39 @@ public class DFSUtil {
     RPC.setProtocolEngine(conf, protocol, ProtobufRpcEngine.class);
     server.addProtocol(RpcKind.RPC_PROTOCOL_BUFFER, protocol, service);
   }
+
+  /**
+   * Map a logical namenode ID to its service address. Use the given
+   * nameservice if specified, or the configured one if none is given.
+   *
+   * @param conf Configuration
+   * @param nsId which nameservice nnId is a part of, optional
+   * @param nnId the namenode ID to get the service addr for
+   * @return the service addr, null if it could not be determined
+   */
+  public static String getNamenodeServiceAddr(final Configuration conf,
+      String nsId, String nnId) {
+
+    if (nsId == null) {
+      Collection<String> nsIds = getNameServiceIds(conf);
+      if (nsIds.size() != 1) {
+        // No nameservice ID was given and more than one is configured
+        return null;
+      } else {
+        nsId = nsIds.toArray(new String[1])[0];
+      }
+    }
+
+    String serviceAddrKey = concatSuffixes(
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, nsId, nnId);
+
+    String addrKey = concatSuffixes(
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, nnId);
+
+    String serviceRpcAddr = conf.get(serviceAddrKey);
+    if (serviceRpcAddr == null) {
+      serviceRpcAddr = conf.get(addrKey);
+    }
+    return serviceRpcAddr;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java
index aae99de8aff..13bde2ae533 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java
@@ -17,12 +17,16 @@
  */
 package org.apache.hadoop.hdfs.tools;
 
+import java.io.PrintStream;
+import java.util.Arrays;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.ha.HAAdmin;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.util.ToolRunner;
 
@@ -30,10 +34,15 @@ import org.apache.hadoop.util.ToolRunner;
  * Class to extend HAAdmin to do a little bit of HDFS-specific configuration.
  */
 public class DFSHAAdmin extends HAAdmin {
-  
-  private static final Log LOG = 
-    LogFactory.getLog(DFSHAAdmin.class);
-  
+
+  private static final Log LOG = LogFactory.getLog(DFSHAAdmin.class);
+
+  private String nameserviceId;
+
+  protected void setErrOut(PrintStream errOut) {
+    this.errOut = errOut;
+  }
+
   @Override
   public void setConf(Configuration conf) {
     if (conf != null) {
@@ -52,6 +61,54 @@ public class DFSHAAdmin extends HAAdmin {
     super.setConf(conf);
   }
 
+  /**
+   * Try to map the given namenode ID to its service address.
+   */
+  @Override
+  protected String getServiceAddr(String nnId) {
+    HdfsConfiguration conf = (HdfsConfiguration)getConf();
+    String serviceAddr = 
+      DFSUtil.getNamenodeServiceAddr(conf, nameserviceId, nnId);
+    if (serviceAddr == null) {
+      throw new IllegalArgumentException(
+          "Unable to determine service address for namenode '" + nnId + "'");
+    }
+    return serviceAddr;
+  }
+
+  @Override
+  protected String getUsageString() {
+    return "Usage: DFSHAAdmin [-ns <nameserviceId>]";
+  }
+
+  @Override
+  protected int runCmd(String[] argv) throws Exception {
+    if (argv.length < 1) {
+      printUsage(errOut);
+      return -1;
+    }
+
+    int i = 0;
+    String cmd = argv[i++];
+
+    if ("-ns".equals(cmd)) {
+      if (i == argv.length) {
+        errOut.println("Missing nameservice ID");
+        printUsage(errOut);
+        return -1;
+      }
+      nameserviceId = argv[i++];
+      if (i >= argv.length) {
+        errOut.println("Missing command");
+        printUsage(errOut);
+        return -1;
+      }
+      argv = Arrays.copyOfRange(argv, i, argv.length);
+    }
+    
+    return super.runCmd(argv);
+  }
+
   public static void main(String[] argv) throws Exception {
     int res = ToolRunner.run(new DFSHAAdmin(), argv);
     System.exit(res);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index d3ea10f87ed..916bcbdbdc9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -406,6 +406,53 @@ public class TestDFSUtil {
     assertEquals(NS1_NN2_HOST, map.get("ns1").get("ns1-nn2").toString());
     assertEquals(NS2_NN1_HOST, map.get("ns2").get("ns2-nn1").toString());
     assertEquals(NS2_NN2_HOST, map.get("ns2").get("ns2-nn2").toString());
+    
+    assertEquals(NS1_NN1_HOST, 
+        DFSUtil.getNamenodeServiceAddr(conf, "ns1", "ns1-nn1"));
+    assertEquals(NS1_NN2_HOST, 
+        DFSUtil.getNamenodeServiceAddr(conf, "ns1", "ns1-nn2"));
+    assertEquals(NS2_NN1_HOST, 
+        DFSUtil.getNamenodeServiceAddr(conf, "ns2", "ns2-nn1"));
+
+    // No nameservice was given and we can't determine which to use
+    // as two nameservices could share a namenode ID.
+    assertEquals(null, DFSUtil.getNamenodeServiceAddr(conf, null, "ns1-nn1"));
+  }
+
+  @Test
+  public void getNameNodeServiceAddr() throws IOException {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    
+    // One nameservice with two NNs
+    final String NS1_NN1_HOST = "ns1-nn1.example.com:8020";
+    final String NS1_NN1_HOST_SVC = "ns1-nn2.example.com:8021";
+    final String NS1_NN2_HOST = "ns1-nn1.example.com:8020";
+    final String NS1_NN2_HOST_SVC = "ns1-nn2.example.com:8021";
+   
+    conf.set(DFS_FEDERATION_NAMESERVICES, "ns1");
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, "ns1"),"nn1,nn2"); 
+
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn1"), NS1_NN1_HOST);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn2"), NS1_NN2_HOST);
+
+    // The rpc address is used if no service address is defined
+    assertEquals(NS1_NN1_HOST, DFSUtil.getNamenodeServiceAddr(conf, null, "nn1"));
+    assertEquals(NS1_NN2_HOST, DFSUtil.getNamenodeServiceAddr(conf, null, "nn2"));
+
+    // A nameservice is specified explicitly
+    assertEquals(NS1_NN1_HOST, DFSUtil.getNamenodeServiceAddr(conf, "ns1", "nn1"));
+    assertEquals(null, DFSUtil.getNamenodeServiceAddr(conf, "invalid", "nn1"));
+    
+    // The service addrs are used when they are defined
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "ns1", "nn1"), NS1_NN1_HOST_SVC);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "ns1", "nn2"), NS1_NN2_HOST_SVC);
+
+    assertEquals(NS1_NN1_HOST_SVC, DFSUtil.getNamenodeServiceAddr(conf, null, "nn1"));
+    assertEquals(NS1_NN2_HOST_SVC, DFSUtil.getNamenodeServiceAddr(conf, null, "nn2"));
   }
 
   @Test
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckPointForSecurityTokens.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckPointForSecurityTokens.java
index fbbcfc72f85..1ba527702b0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckPointForSecurityTokens.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckPointForSecurityTokens.java
@@ -57,7 +57,7 @@ public class TestCheckPointForSecurityTokens {
   }
   
   /**
-   * Tests save namepsace.
+   * Tests save namespace.
    */
   @Test
   public void testSaveNamespace() throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index 7c7017cea94..c1277299899 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -861,7 +861,7 @@ public class TestCheckpoint extends TestCase {
   }
 
   /**
-   * Tests save namepsace.
+   * Tests save namespace.
    */
   public void testSaveNamespace() throws IOException {
     MiniDFSCluster cluster = null;

From a660c5dd2c9cdeff4b11fa224c9b7edda0a418d0 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Sat, 4 Feb 2012 20:13:48 +0000
Subject: [PATCH 122/177] Add missing file from previous commit.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240601 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop/hdfs/tools/TestDFSHAAdmin.java     | 234 ++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java
new file mode 100644
index 00000000000..5b539b17145
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hdfs.tools;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.ha.HAServiceProtocol;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.HealthCheckFailedException;
+import org.apache.hadoop.ha.NodeFencer;
+
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+import static org.mockito.Mockito.when;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Joiner;
+
+public class TestDFSHAAdmin {
+  private static final Log LOG = LogFactory.getLog(TestDFSHAAdmin.class);
+  
+  private DFSHAAdmin tool;
+  private ByteArrayOutputStream errOutBytes = new ByteArrayOutputStream();
+  private String errOutput;
+  private HAServiceProtocol mockProtocol;
+  
+  private static final String NSID = "ns1";
+  private static String HOST_A = "1.2.3.1";
+  private static String HOST_B = "1.2.3.2";
+
+  private HdfsConfiguration getHAConf() {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, NSID);    
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, NSID);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFSConfigKeys.DFS_HA_NAMENODES_KEY, NSID), "nn1,nn2");    
+    conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1");
+    conf.set(DFSUtil.addKeySuffixes(
+            DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, NSID, "nn1"),
+        HOST_A + ":12345");
+    conf.set(DFSUtil.addKeySuffixes(
+            DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, NSID, "nn2"),
+        HOST_B + ":12345");
+    return conf;
+  }
+
+  @Before
+  public void setup() throws IOException {
+    mockProtocol = Mockito.mock(HAServiceProtocol.class);
+    when(mockProtocol.readyToBecomeActive()).thenReturn(true);
+    tool = new DFSHAAdmin() {
+      @Override
+      protected HAServiceProtocol getProtocol(String serviceId) throws IOException {
+        getServiceAddr(serviceId);
+        return mockProtocol;
+      }
+    };
+    tool.setConf(getHAConf());
+    tool.setErrOut(new PrintStream(errOutBytes));
+  }
+
+  private void assertOutputContains(String string) {
+    if (!errOutput.contains(string)) {
+      fail("Expected output to contain '" + string + "' but was:\n" +
+          errOutput);
+    }
+  }
+  
+  @Test
+  public void testNameserviceOption() throws Exception {
+    assertEquals(-1, runTool("-ns"));
+    assertOutputContains("Missing nameservice ID");
+    assertEquals(-1, runTool("-ns", "ns1"));
+    assertOutputContains("Missing command");
+    // "ns1" isn't defined but we check this lazily and help doesn't use the ns
+    assertEquals(0, runTool("-ns", "ns1", "-help", "transitionToActive"));
+    assertOutputContains("Transitions the service into Active");
+  }
+
+  @Test
+  public void testNamenodeResolution() throws Exception {
+    assertEquals(0, runTool("-getServiceState", "nn1"));
+    Mockito.verify(mockProtocol).getServiceState();
+    assertEquals(-1, runTool("-getServiceState", "undefined"));
+    assertOutputContains(
+        "Unable to determine service address for namenode 'undefined'");
+  }
+
+  @Test
+  public void testHelp() throws Exception {
+    assertEquals(-1, runTool("-help"));
+    assertEquals(0, runTool("-help", "transitionToActive"));
+    assertOutputContains("Transitions the service into Active");
+  }
+  
+  @Test
+  public void testTransitionToActive() throws Exception {
+    assertEquals(0, runTool("-transitionToActive", "nn1"));
+    Mockito.verify(mockProtocol).transitionToActive();
+  }
+
+  @Test
+  public void testTransitionToStandby() throws Exception {
+    assertEquals(0, runTool("-transitionToStandby", "nn1"));
+    Mockito.verify(mockProtocol).transitionToStandby();
+  }
+
+  @Test
+  public void testFailoverWithNoFencerConfigured() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    assertEquals(-1, runTool("-failover", "nn1", "nn2"));
+  }
+
+  @Test
+  public void testFailoverWithFencerConfigured() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    HdfsConfiguration conf = getHAConf();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "nn1", "nn2"));
+  }
+
+  @Test
+  public void testFailoverWithFencerAndNameservice() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    HdfsConfiguration conf = getHAConf();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-ns", "ns1", "-failover", "nn1", "nn2"));
+  }
+
+  @Test
+  public void testFailoverWithFencerConfiguredAndForce() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    HdfsConfiguration conf = getHAConf();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "nn1", "nn2", "--forcefence"));
+  }
+
+  @Test
+  public void testFailoverWithForceActive() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    HdfsConfiguration conf = getHAConf();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "nn1", "nn2", "--forceactive"));
+  }
+
+  @Test
+  public void testFailoverWithInvalidFenceArg() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    HdfsConfiguration conf = getHAConf();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(-1, runTool("-failover", "nn1", "nn2", "notforcefence"));
+  }
+
+  @Test
+  public void testFailoverWithFenceButNoFencer() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    assertEquals(-1, runTool("-failover", "nn1", "nn2", "--forcefence"));
+  }
+
+  @Test
+  public void testFailoverWithFenceAndBadFencer() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    HdfsConfiguration conf = getHAConf();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "foobar!");
+    tool.setConf(conf);
+    assertEquals(-1, runTool("-failover", "nn1", "nn2", "--forcefence"));
+  }
+
+  @Test
+  public void testForceFenceOptionListedBeforeArgs() throws Exception {
+    Mockito.doReturn(HAServiceState.STANDBY).when(mockProtocol).getServiceState();
+    HdfsConfiguration conf = getHAConf();
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "--forcefence", "nn1", "nn2"));
+  }
+
+  @Test
+  public void testGetServiceState() throws Exception {
+    assertEquals(0, runTool("-getServiceState", "nn1"));
+    Mockito.verify(mockProtocol).getServiceState();
+  }
+
+  @Test
+  public void testCheckHealth() throws Exception {
+    assertEquals(0, runTool("-checkHealth", "nn1"));
+    Mockito.verify(mockProtocol).monitorHealth();
+    
+    Mockito.doThrow(new HealthCheckFailedException("fake health check failure"))
+      .when(mockProtocol).monitorHealth();
+    assertEquals(-1, runTool("-checkHealth", "nn1"));
+    assertOutputContains("Health check failed: fake health check failure");
+  }
+
+  private Object runTool(String ... args) throws Exception {
+    errOutBytes.reset();
+    LOG.info("Running: DFSHAAdmin " + Joiner.on(" ").join(args));
+    int ret = tool.run(args);
+    errOutput = new String(errOutBytes.toByteArray(), Charsets.UTF_8);
+    LOG.info("Output:\n" + errOutput);
+    return ret;
+  }
+}

From c17b4f8eefe5b77b77761a0bb46b49cd1ea6965d Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Mon, 6 Feb 2012 08:21:06 +0000
Subject: [PATCH 123/177] HDFS-2819. Document new HA-related configs in
 hdfs-default.xml. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240914 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/hadoop/ha/NodeFencer.java |   6 +-
 .../apache/hadoop/ha/SshFenceByTcpPort.java   |   6 +-
 .../src/main/resources/core-default.xml       |  26 ++++
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   4 +-
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  |   2 +-
 .../java/org/apache/hadoop/hdfs/HAUtil.java   |   2 +-
 .../src/main/resources/hdfs-default.xml       | 122 +++++++++++++++++-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   2 +-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   |   8 +-
 .../hdfs/server/namenode/ha/HATestUtil.java   |   4 +-
 .../namenode/ha/TestHAConfiguration.java      |   2 +-
 .../hadoop/hdfs/tools/TestDFSHAAdmin.java     |   2 +-
 13 files changed, 167 insertions(+), 21 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
index 90e682172f1..34a2c8b823a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java
@@ -55,7 +55,7 @@ import com.google.common.collect.Lists;
 @InterfaceStability.Evolving
 public class NodeFencer {
   public static final String CONF_METHODS_KEY =
-    "dfs.namenode.ha.fencing.methods";
+    "dfs.ha.fencing.methods";
   
   private static final String CLASS_RE = "([a-zA-Z0-9\\.\\$]+)";
   private static final Pattern CLASS_WITH_ARGUMENT =
@@ -92,7 +92,7 @@ public class NodeFencer {
   }
 
   public boolean fence(InetSocketAddress serviceAddr) {
-    LOG.info("====== Beginning NameNode Fencing Process... ======");
+    LOG.info("====== Beginning Service Fencing Process... ======");
     int i = 0;
     for (FenceMethodWithArg method : methods) {
       LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
@@ -112,7 +112,7 @@ public class NodeFencer {
       LOG.warn("Fencing method " + method + " was unsuccessful.");
     }
     
-    LOG.error("Unable to fence NameNode by any configured method.");
+    LOG.error("Unable to fence service by any configured method.");
     return false;
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
index c9272491ebf..88404b92fd4 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
@@ -52,7 +52,7 @@ import com.jcraft.jsch.Session;
  * with ssh.
  * <p>
  * In order to achieve passwordless SSH, the operator must also configure
- * <code>dfs.namenode.ha.fencing.ssh.private-key-files<code> to point to an
+ * <code>dfs.ha.fencing.ssh.private-key-files<code> to point to an
  * SSH key that has passphrase-less access to the given username and host.
  */
 public class SshFenceByTcpPort extends Configured
@@ -62,11 +62,11 @@ public class SshFenceByTcpPort extends Configured
       SshFenceByTcpPort.class);
   
   static final String CONF_CONNECT_TIMEOUT_KEY =
-    "dfs.namenode.ha.fencing.ssh.connect-timeout";
+    "dfs.ha.fencing.ssh.connect-timeout";
   private static final int CONF_CONNECT_TIMEOUT_DEFAULT =
     30*1000;
   static final String CONF_IDENTITIES_KEY =
-    "dfs.namenode.ha.fencing.ssh.private-key-files";
+    "dfs.ha.fencing.ssh.private-key-files";
 
   /**
    * Verify that the argument, if given, in the conf is parseable.
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index 9cf1eaf3116..c284faf9e6a 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -842,4 +842,30 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.ha.fencing.methods</name>
+  <value></value>
+  <description>
+    List of fencing methods to use for service fencing. May contain
+    builtin methods (eg shell and sshfence) or user-defined method.
+  </description>
+</property>
+
+<property>
+  <name>dfs.ha.fencing.ssh.connect-timeout</name>
+  <value>30000</value>
+  <description>
+    SSH connection timeout, in milliseconds, to use with the builtin
+    sshfence fencer.
+  </description>
+</property>
+
+<property>
+  <name>dfs.ha.fencing.ssh.private-key-files</name>
+  <value></value>
+  <description>
+    The SSH private key files to use with the builtin sshfence fencer.
+  </description>
+</property>
+
 </configuration>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 31bf349df4e..7b7deca9170 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -168,3 +168,5 @@ HDFS-2890. DFSUtil#getSuffixIDs should skip unset configurations. (atm)
 HDFS-2792. Make fsck work. (atm)
 
 HDFS-2808. HA: haadmin should use namenode ids. (eli)
+
+HDFS-2819. Document new HA-related configs in hdfs-default.xml. (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 55d1ccd1ce0..b655bbddaad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -48,6 +48,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String  DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_DEFAULT = "DEFAULT";
   public static final String  DFS_CLIENT_SOCKET_CACHE_CAPACITY_KEY = "dfs.client.socketcache.capacity";
   public static final int     DFS_CLIENT_SOCKET_CACHE_CAPACITY_DEFAULT = 16;
+
+  // HA related configuration
   public static final String  DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX = "dfs.client.failover.proxy.provider";
   public static final String  DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY = "dfs.client.failover.max.attempts";
   public static final int     DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT = 15;
@@ -329,7 +331,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final String DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY = "dfs.block.local-path-access.user";
 
   // HA related configuration
-  public static final String DFS_HA_NAMENODES_KEY = "dfs.ha.namenodes";
+  public static final String DFS_HA_NAMENODES_KEY_PREFIX = "dfs.ha.namenodes";
   public static final String DFS_HA_NAMENODE_ID_KEY = "dfs.ha.namenode.id";
   public static final String  DFS_HA_STANDBY_CHECKPOINTS_KEY = "dfs.ha.standby.checkpoints";
   public static final boolean DFS_HA_STANDBY_CHECKPOINTS_DEFAULT = true;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index be447f9093e..f13e99ff1ad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -349,7 +349,7 @@ public class DFSUtil {
    * @return collection of namenode Ids
    */
   public static Collection<String> getNameNodeIds(Configuration conf, String nsId) {
-    String key = addSuffix(DFS_HA_NAMENODES_KEY, nsId);
+    String key = addSuffix(DFS_HA_NAMENODES_KEY_PREFIX, nsId);
     return conf.getTrimmedStringCollection(key);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 88122b9d8f8..753cb3bf678 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -139,7 +139,7 @@ public class HAUtil {
     Preconditions.checkArgument(nnIds != null,
         "Could not determine namenode ids in namespace '%s'. " +
         "Please configure " +
-        DFSUtil.addKeySuffixes(DFSConfigKeys.DFS_HA_NAMENODES_KEY,
+        DFSUtil.addKeySuffixes(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX,
             nsId),
         nsId);
     Preconditions.checkArgument(nnIds.size() == 2,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 2682c7f1750..9fa8e26d4d2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -33,9 +33,11 @@
 <property>
   <name>dfs.namenode.logging.level</name>
   <value>info</value>
-  <description>The logging level for dfs namenode. Other values are "dir"(trac
-e namespace mutations), "block"(trace block under/over replications and block
-creations/deletions), or "all".</description>
+  <description>
+    The logging level for dfs namenode. Other values are "dir" (trace
+    namespace mutations), "block" (trace block under/over replications
+    and block creations/deletions), or "all".
+  </description>
 </property>
 
 <property>
@@ -710,4 +712,118 @@ creations/deletions), or "all".</description>
   </description>
 </property>
 
+<property>
+  <name>dfs.client.failover.max.attempts</name>
+  <value>15</value>
+  <description>
+    Expert only. The number of client failover attempts that should be
+    made before the failover is considered failed.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.failover.sleep.base.millis</name>
+  <value>500</value>
+  <description>
+    Expert only. The time to wait, in milliseconds, between failover
+    attempts increases exponentially as a function of the number of
+    attempts made so far, with a random factor of +/- 50%. This option
+    specifies the base value used in the failover calculation. The
+    first failover will retry immediately. The 2nd failover attempt
+    will delay at least dfs.client.failover.sleep.base.millis
+    milliseconds. And so on.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.failover.sleep.max.millis</name>
+  <value>15000</value>
+  <description>
+    Expert only. The time to wait, in milliseconds, between failover
+    attempts increases exponentially as a function of the number of
+    attempts made so far, with a random factor of +/- 50%. This option
+    specifies the maximum value to wait between failovers. 
+    Specifically, the time between two failover attempts will not
+    exceed +/- 50% of dfs.client.failover.sleep.max.millis
+    milliseconds.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.failover.connection.retries</name>
+  <value>0</value>
+  <description>
+    Expert only. Indicates the number of retries a failover IPC client
+    will make to establish a server connection.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.failover.connection.retries.on.timeouts</name>
+  <value>0</value>
+  <description>
+    Expert only. The number of retry attempts a failover IPC client
+    will make on socket timeout when establishing a server connection.
+  </description>
+</property>
+
+<property>
+  <name>dfs.federation.nameservices</name>
+  <value></value>
+  <description>
+    Comma-separated list of nameservices.
+  </description>
+</property>
+
+<property>
+  <name>dfs.federation.nameservice.id</name>
+  <value></value>
+  <description>
+    The ID of this nameservice. If the nameservice ID is not
+    configured or more than one nameservice is configured for
+    dfs.federation.nameservices it is determined automatically by
+    matching the local node's address with the configured address.
+  </description>
+</property>
+
+<property>
+  <name>dfs.ha.namenodes.EXAMPLENAMESERVICE</name>
+  <value></value>
+  <description>
+    The prefix for a given nameservice, contains a comma-separated
+    list of namenodes for a given nameservice (eg EXAMPLENAMESERVICE).
+  </description>
+</property>
+
+<property>
+  <name>dfs.ha.namenode.id</name>
+  <value></value>
+  <description>
+    The ID of this namenode. If the namenode ID is not configured it
+    is determined automatically by matching the local node's address
+    with the configured address.
+  </description>
+</property>
+
+<property>
+  <name>dfs.ha.log-roll.period</name>
+  <value>120</value>
+  <description>
+    How often, in seconds, the StandbyNode should ask the active to
+    roll edit logs. Since the StandbyNode only reads from finalized
+    log segments, the StandbyNode will only be as up-to-date as how
+    often the logs are rolled. Note that failover triggers a log roll
+    so the StandbyNode will be up to date before it becomes active.
+  </description>
+</property>
+
+<property>
+  <name>dfs.ha.tail-edits.period</name>
+  <value>60</value>
+  <description>
+    How often, in seconds, the StandbyNode should check for new
+    finalized log segments in the shared edits log.
+  </description>
+</property>
+
 </configuration>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 0173f46814d..c7dea88ed81 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -618,7 +618,7 @@ public class MiniDFSCluster {
       // If HA is enabled on this nameservice, enumerate all the namenodes
       // in the configuration. Also need to set a shared edits dir
       if (nnIds.size() > 1) {
-        conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, nameservice.getId()),
+        conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, nameservice.getId()),
             Joiner.on(",").join(nnIds));
         if (manageNameDfsDirs) {
           URI sharedEditsUri = getSharedEditsDir(nnCounter, nnCounter+nnIds.size()-1); 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index 916bcbdbdc9..ea7bcdec9ff 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -303,7 +303,7 @@ public class TestDFSUtil {
     
     conf.set(DFS_FEDERATION_NAMESERVICES, nsId);
     conf.set(DFS_FEDERATION_NAMESERVICE_ID, nsId);
-    conf.set(DFS_HA_NAMENODES_KEY + "." + nsId, nnId);
+    conf.set(DFS_HA_NAMENODES_KEY_PREFIX + "." + nsId, nnId);
 
     // Set the nameservice specific keys with nameserviceId in the config key
     for (String key : NameNode.NAMESERVICE_SPECIFIC_KEYS) {
@@ -380,9 +380,9 @@ public class TestDFSUtil {
     
     // Two nameservices, each with two NNs.
     conf.set(DFS_FEDERATION_NAMESERVICES, "ns1,ns2");
-    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, "ns1"),
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"),
         "ns1-nn1,ns1-nn2");
-    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, "ns2"),
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns2"),
         "ns2-nn1,ns2-nn2");
     conf.set(DFSUtil.addKeySuffixes(
           DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "ns1-nn1"),
@@ -430,7 +430,7 @@ public class TestDFSUtil {
     final String NS1_NN2_HOST_SVC = "ns1-nn2.example.com:8021";
    
     conf.set(DFS_FEDERATION_NAMESERVICES, "ns1");
-    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, "ns1"),"nn1,nn2"); 
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"),"nn1,nn2"); 
 
     conf.set(DFSUtil.addKeySuffixes(
         DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn1"), NS1_NN1_HOST);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index 572b97dc07f..b844b60b1bc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -18,7 +18,7 @@
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
-import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
 
 import java.io.IOException;
@@ -171,7 +171,7 @@ public abstract class HATestUtil {
         logicalName, nameNodeId2), address2);
     
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, logicalName);
-    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY, logicalName),
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, logicalName),
         nameNodeId1 + "," + nameNodeId2);
     conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalName,
         ConfiguredFailoverProxyProvider.class.getName());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
index 12472c4f10b..a51648d9cf2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
@@ -61,7 +61,7 @@ public class TestHAConfiguration {
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, NSID);    
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, NSID);
     conf.set(DFSUtil.addKeySuffixes(
-        DFSConfigKeys.DFS_HA_NAMENODES_KEY, NSID),
+        DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, NSID),
         "nn1,nn2");    
     conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1");
     conf.set(DFSUtil.addKeySuffixes(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java
index 5b539b17145..355009a765b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdmin.java
@@ -59,7 +59,7 @@ public class TestDFSHAAdmin {
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, NSID);    
     conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, NSID);
     conf.set(DFSUtil.addKeySuffixes(
-        DFSConfigKeys.DFS_HA_NAMENODES_KEY, NSID), "nn1,nn2");    
+        DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, NSID), "nn1,nn2");    
     conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1");
     conf.set(DFSUtil.addKeySuffixes(
             DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, NSID, "nn1"),

From 2e4cf977aea30c2a8b35be4eb876874e0055e5f4 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Mon, 6 Feb 2012 08:24:11 +0000
Subject: [PATCH 124/177] HDFS-2752. HA: exit if multiple shared dirs are
 configured. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240916 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hdfs/server/namenode/FSNamesystem.java    | 15 ++++++++--
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  2 +-
 .../server/namenode/TestFSNamesystem.java     |  3 +-
 .../namenode/ha/TestFailureOfSharedDir.java   | 28 +++++++++++++++++--
 .../namenode/ha/TestHAConfiguration.java      |  3 +-
 6 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 7b7deca9170..627c6caeeb7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -170,3 +170,5 @@ HDFS-2792. Make fsck work. (atm)
 HDFS-2808. HA: haadmin should use namenode ids. (eli)
 
 HDFS-2819. Document new HA-related configs in hdfs-default.xml. (eli)
+
+HDFS-2752. HA: exit if multiple shared dirs are configured. (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 58584be2918..13cb6536b0c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -683,17 +683,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * are ordered before non-shared directories, and any duplicates
    * are removed. The order they are specified in the configuration
    * is retained.
+   * @return Collection of shared edits directories.
+   * @throws IOException if multiple shared edits directories are configured
    */
-  public static List<URI> getNamespaceEditsDirs(Configuration conf) {
+  public static List<URI> getNamespaceEditsDirs(Configuration conf)
+      throws IOException {
     // Use a LinkedHashSet so that order is maintained while we de-dup
     // the entries.
     LinkedHashSet<URI> editsDirs = new LinkedHashSet<URI>();
     
+    List<URI> sharedDirs = getSharedEditsDirs(conf);
+
+    // Fail until multiple shared edits directories are supported (HDFS-2782)
+    if (sharedDirs.size() > 1) {
+      throw new IOException(
+          "Multiple shared edits directories are not yet supported");
+    }
+
     // First add the shared edits dirs. It's critical that the shared dirs
     // are added first, since JournalSet syncs them in the order they are listed,
     // and we need to make sure all edits are in place in the shared storage
     // before they are replicated locally. See HDFS-2874.
-    for (URI dir : getSharedEditsDirs(conf)) {
+    for (URI dir : sharedDirs) {
       if (!editsDirs.add(dir)) {
         LOG.warn("Edits URI " + dir + " listed multiple times in " + 
             DFS_NAMENODE_SHARED_EDITS_DIR_KEY + ". Ignoring duplicates.");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index c7dea88ed81..9f969477d15 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -1594,7 +1594,7 @@ public class MiniDFSCluster {
   /**
    * Get the directories where the namenode stores its edits.
    */
-  public Collection<URI> getNameEditsDirs(int nnIndex) {
+  public Collection<URI> getNameEditsDirs(int nnIndex) throws IOException {
     return FSNamesystem.getNamespaceEditsDirs(nameNodes[nnIndex].conf);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
index d79f583efd4..de3a89c0834 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystem.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hdfs.server.namenode;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import static org.junit.Assert.*;
 
+import java.io.IOException;
 import java.net.URI;
 import java.util.Collection;
 
@@ -33,7 +34,7 @@ public class TestFSNamesystem {
    * Tests that the namenode edits dirs are gotten with duplicates removed
    */
   @Test
-  public void testUniqueEditDirs() {
+  public void testUniqueEditDirs() throws IOException {
     Configuration config = new Configuration();
 
     config.set(DFS_NAMENODE_EDITS_DIR_KEY, "file://edits/dir, "
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
index 1a4e797cc84..84d45c05914 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
@@ -68,6 +68,29 @@ public class TestFailureOfSharedDir {
         requiredEditsDirs.contains(bar));
   }
 
+  /**
+   * Multiple shared edits directories is an invalid configuration.
+   */
+  @Test
+  public void testMultipleSharedDirsFails() throws Exception {
+    Configuration conf = new Configuration();
+    URI sharedA = new URI("file:///shared-A");
+    URI sharedB = new URI("file:///shared-B");
+    URI localA = new URI("file:///local-A");
+
+    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
+        Joiner.on(",").join(sharedA,sharedB));
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
+        localA.toString());
+
+    try {
+      FSNamesystem.getNamespaceEditsDirs(conf);
+      fail("Allowed multiple shared edits directories");
+    } catch (IOException ioe) {
+      assertEquals("Multiple shared edits directories are not yet supported",
+          ioe.getMessage());
+    }
+  }
   
   /**
    * Make sure that the shared edits dirs are listed before non-shared dirs
@@ -78,13 +101,12 @@ public class TestFailureOfSharedDir {
   public void testSharedDirsComeFirstInEditsList() throws Exception {
     Configuration conf = new Configuration();
     URI sharedA = new URI("file:///shared-A");
-    URI sharedB = new URI("file:///shared-B");
     URI localA = new URI("file:///local-A");
     URI localB = new URI("file:///local-B");
     URI localC = new URI("file:///local-C");
     
     conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
-        Joiner.on(",").join(sharedA,sharedB));
+        sharedA.toString());
     // List them in reverse order, to make sure they show up in
     // the order listed, regardless of lexical sort order.
     conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
@@ -93,7 +115,7 @@ public class TestFailureOfSharedDir {
     assertEquals(
         "Shared dirs should come first, then local dirs, in the order " +
         "they were listed in the configuration.",
-        Joiner.on(",").join(sharedA, sharedB, localC, localB, localA),
+        Joiner.on(",").join(sharedA, localC, localB, localA),
         Joiner.on(",").join(dirs));
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
index a51648d9cf2..785f4b22374 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import static org.junit.Assert.*;
 
+import java.io.IOException;
 import java.net.URI;
 import java.util.Collection;
 
@@ -87,7 +88,7 @@ public class TestHAConfiguration {
    * duplicates removed
    */
   @Test
-  public void testHAUniqueEditDirs() {
+  public void testHAUniqueEditDirs() throws IOException {
     Configuration config = new Configuration();
 
     config.set(DFS_NAMENODE_EDITS_DIR_KEY, "file://edits/dir, "

From 296b6c0063a319f4b80e8f62468be95f39d4f4e3 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Mon, 6 Feb 2012 08:25:52 +0000
Subject: [PATCH 125/177] HDFS-2894. HA: automatically determine the
 nameservice Id if only one nameservice is configured. Contributed by Eli
 Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1240917 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 17 +++--
 .../server/namenode/SecondaryNameNode.java    | 21 +++---
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   | 22 ++++--
 .../namenode/ha/TestHAConfiguration.java      | 70 +++++++++++++------
 5 files changed, 89 insertions(+), 43 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 627c6caeeb7..edddfb11477 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -172,3 +172,5 @@ HDFS-2808. HA: haadmin should use namenode ids. (eli)
 HDFS-2819. Document new HA-related configs in hdfs-default.xml. (eli)
 
 HDFS-2752. HA: exit if multiple shared dirs are configured. (eli)
+
+HDFS-2894. HA: automatically determine the nameservice Id if only one nameservice is configured. (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index f13e99ff1ad..8356b41053b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -907,9 +907,10 @@ public class DFSUtil {
    * the address of the local node. 
    * 
    * If {@link DFSConfigKeys#DFS_FEDERATION_NAMESERVICE_ID} is not specifically
-   * configured, this method determines the nameservice Id by matching the local
-   * node's address with the configured addresses. When a match is found, it
-   * returns the nameservice Id from the corresponding configuration key.
+   * configured, and more than one nameservice Id is configured, this method 
+   * determines the nameservice Id by matching the local node's address with the
+   * configured addresses. When a match is found, it returns the nameservice Id
+   * from the corresponding configuration key.
    * 
    * @param conf Configuration
    * @param addressKey configuration key to get the address.
@@ -921,6 +922,10 @@ public class DFSUtil {
     if (nameserviceId != null) {
       return nameserviceId;
     }
+    Collection<String> nsIds = getNameServiceIds(conf);
+    if (1 == nsIds.size()) {
+      return nsIds.toArray(new String[1])[0];
+    }
     String nnId = conf.get(DFS_HA_NAMENODE_ID_KEY);
     
     return getSuffixIDs(conf, addressKey, null, nnId, LOCAL_ADDRESS_MATCHER)[0];
@@ -1057,11 +1062,11 @@ public class DFSUtil {
 
     if (nsId == null) {
       Collection<String> nsIds = getNameServiceIds(conf);
-      if (nsIds.size() != 1) {
+      if (1 == nsIds.size()) {
+        nsId = nsIds.toArray(new String[1])[0];
+      } else {
         // No nameservice ID was given and more than one is configured
         return null;
-      } else {
-        nsId = nsIds.toArray(new String[1])[0];
       }
     }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 73f59900d1b..12127064e26 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -47,7 +47,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.DFSUtil.ErrorSimulator;
-import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
@@ -170,20 +169,17 @@ public class SecondaryNameNode implements Runnable {
     try {
       String nsId = DFSUtil.getSecondaryNameServiceId(conf);
       if (HAUtil.isHAEnabled(conf, nsId)) {
-        LOG.fatal("Cannot use SecondaryNameNode in an HA cluster." +
+        throw new IOException(
+            "Cannot use SecondaryNameNode in an HA cluster." +
             " The Standby Namenode will perform checkpointing.");
-        shutdown();
-        return;
       }
       NameNode.initializeGenericKeys(conf, nsId, null);
       initialize(conf, commandLineOpts);
-    } catch(IOException e) {
+    } catch (IOException e) {
       shutdown();
-      LOG.fatal("Failed to start secondary namenode. ", e);
       throw e;
-    } catch(HadoopIllegalArgumentException e) {
+    } catch (HadoopIllegalArgumentException e) {
       shutdown();
-      LOG.fatal("Failed to start secondary namenode. ", e);
       throw e;
     }
   }
@@ -335,7 +331,6 @@ public class SecondaryNameNode implements Runnable {
   // The main work loop
   //
   public void doWork() {
-
     //
     // Poll the Namenode (once every checkpointCheckPeriod seconds) to find the
     // number of transactions in the edit log that haven't yet been checkpointed.
@@ -612,7 +607,13 @@ public class SecondaryNameNode implements Runnable {
     
     StringUtils.startupShutdownMessage(SecondaryNameNode.class, argv, LOG);
     Configuration tconf = new HdfsConfiguration();
-    SecondaryNameNode secondary = new SecondaryNameNode(tconf, opts);
+    SecondaryNameNode secondary = null;
+    try {
+      secondary = new SecondaryNameNode(tconf, opts);
+    } catch (IOException ioe) {
+      LOG.fatal("Failed to start secondary namenode", ioe);
+      System.exit(-1);
+    }
 
     if (opts.getCommand() != null) {
       int ret = secondary.processStartupCommand(opts);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index ea7bcdec9ff..5b67cf5491d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -214,6 +214,10 @@ public class TestDFSUtil {
     checkNameServiceId(conf, NN1_ADDRESS, "nn1");
     checkNameServiceId(conf, NN2_ADDRESS, "nn2");
     checkNameServiceId(conf, NN3_ADDRESS, null);
+
+    // HA is not enabled in a purely federated config
+    assertFalse(HAUtil.isHAEnabled(conf, "nn1"));
+    assertFalse(HAUtil.isHAEnabled(conf, "nn2"));
   }
 
   public void checkNameServiceId(Configuration conf, String addr,
@@ -399,8 +403,10 @@ public class TestDFSUtil {
     
     Map<String, Map<String, InetSocketAddress>> map =
       DFSUtil.getHaNnRpcAddresses(conf);
-    System.err.println("TestHANameNodesWithFederation:\n" +
-        DFSUtil.addressMapToString(map));
+
+    assertTrue(HAUtil.isHAEnabled(conf, "ns1"));
+    assertTrue(HAUtil.isHAEnabled(conf, "ns2"));
+    assertFalse(HAUtil.isHAEnabled(conf, "ns3"));
     
     assertEquals(NS1_NN1_HOST, map.get("ns1").get("ns1-nn1").toString());
     assertEquals(NS1_NN2_HOST, map.get("ns1").get("ns1-nn2").toString());
@@ -414,9 +420,13 @@ public class TestDFSUtil {
     assertEquals(NS2_NN1_HOST, 
         DFSUtil.getNamenodeServiceAddr(conf, "ns2", "ns2-nn1"));
 
-    // No nameservice was given and we can't determine which to use
-    // as two nameservices could share a namenode ID.
+    // No nameservice was given and we can't determine which service addr
+    // to use as two nameservices could share a namenode ID.
     assertEquals(null, DFSUtil.getNamenodeServiceAddr(conf, null, "ns1-nn1"));
+
+    // Ditto for nameservice IDs, if multiple are defined
+    assertEquals(null, DFSUtil.getNamenodeNameServiceId(conf));
+    assertEquals(null, DFSUtil.getSecondaryNameServiceId(conf));
   }
 
   @Test
@@ -453,6 +463,10 @@ public class TestDFSUtil {
 
     assertEquals(NS1_NN1_HOST_SVC, DFSUtil.getNamenodeServiceAddr(conf, null, "nn1"));
     assertEquals(NS1_NN2_HOST_SVC, DFSUtil.getNamenodeServiceAddr(conf, null, "nn2"));
+
+    // We can determine the nameservice ID, there's only one listed
+    assertEquals("ns1", DFSUtil.getNamenodeNameServiceId(conf));
+    assertEquals("ns1", DFSUtil.getSecondaryNameServiceId(conf));
   }
 
   @Test
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
index 785f4b22374..9cd6ab7089f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAConfiguration.java
@@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Test;
 import org.mockito.Mockito;
@@ -39,16 +40,13 @@ import org.mockito.Mockito;
  * which don't start daemons.
  */
 public class TestHAConfiguration {
-  private static final String NSID = "ns1";
-  private static String HOST_A = "1.2.3.1";
-  private static String HOST_B = "1.2.3.2";
 
   private FSNamesystem fsn = Mockito.mock(FSNamesystem.class);
-  private Configuration conf = new Configuration();
 
   @Test
   public void testCheckpointerValidityChecks() throws Exception {
     try {
+      Configuration conf = new Configuration();
       new StandbyCheckpointer(conf, fsn);
       fail("Bad config did not throw an error");
     } catch (IllegalArgumentException iae) {
@@ -56,30 +54,37 @@ public class TestHAConfiguration {
           "Invalid URI for NameNode address", iae);
     }
   }
-  
-  @Test
-  public void testGetOtherNNHttpAddress() {
-    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, NSID);    
-    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, NSID);
+
+  private Configuration getHAConf(String nsId, String host1, String host2) {
+    Configuration conf = new Configuration();
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, nsId);    
     conf.set(DFSUtil.addKeySuffixes(
-        DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, NSID),
+        DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, nsId),
         "nn1,nn2");    
     conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1");
     conf.set(DFSUtil.addKeySuffixes(
-            DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY,
-            NSID, "nn1"),
-        HOST_A + ":12345");
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, "nn1"),
+        host1 + ":12345");
     conf.set(DFSUtil.addKeySuffixes(
-            DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY,
-            NSID, "nn2"),
-        HOST_B + ":12345");
-    NameNode.initializeGenericKeys(conf, NSID, "nn1");
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY, nsId, "nn2"),
+        host2 + ":12345");
+    return conf;
+  }
+
+  @Test
+  public void testGetOtherNNHttpAddress() {
+    // Use non-local addresses to avoid host address matching
+    Configuration conf = getHAConf("ns1", "1.2.3.1", "1.2.3.2");
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICE_ID, "ns1");
+
+    // This is done by the NN before the StandbyCheckpointer is created
+    NameNode.initializeGenericKeys(conf, "ns1", "nn1");
 
     // Since we didn't configure the HTTP address, and the default is
-    // 0.0.0.0, it should substitute the address from the RPC configuratoin
+    // 0.0.0.0, it should substitute the address from the RPC configuration
     // above.
     StandbyCheckpointer checkpointer = new StandbyCheckpointer(conf, fsn);
-    assertEquals(HOST_B + ":" + DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT,
+    assertEquals("1.2.3.2:" + DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT,
         checkpointer.getActiveNNAddress());
   }
   
@@ -89,14 +94,33 @@ public class TestHAConfiguration {
    */
   @Test
   public void testHAUniqueEditDirs() throws IOException {
-    Configuration config = new Configuration();
+    Configuration conf = new Configuration();
 
-    config.set(DFS_NAMENODE_EDITS_DIR_KEY, "file://edits/dir, "
+    conf.set(DFS_NAMENODE_EDITS_DIR_KEY, "file://edits/dir, "
         + "file://edits/shared/dir"); // overlapping
-    config.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, "file://edits/shared/dir");
+    conf.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, "file://edits/shared/dir");
 
     // getNamespaceEditsDirs removes duplicates across edits and shared.edits
-    Collection<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(config);
+    Collection<URI> editsDirs = FSNamesystem.getNamespaceEditsDirs(conf);
     assertEquals(2, editsDirs.size());
   }
+  
+  /**
+   * Test that the 2NN does not start if given a config with HA NNs.
+   */
+  @Test
+  public void testSecondaryNameNodeDoesNotStart() throws IOException {
+    // Note we're not explicitly setting the nameservice Id in the
+    // config as it is not required to be set and we want to test
+    // that we can determine if HA is enabled when the nameservice Id
+    // is not explicitly defined.
+    Configuration conf = getHAConf("ns1", "1.2.3.1", "1.2.3.2");
+    try {
+      new SecondaryNameNode(conf);
+      fail("Created a 2NN with an HA config");
+    } catch (IOException ioe) {
+      GenericTestUtils.assertExceptionContains(
+          "Cannot use SecondaryNameNode in an HA cluster", ioe);
+    }
+  }
 }

From db7c013877f4c711ba5899d03b59994e20671e5a Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Mon, 6 Feb 2012 21:18:11 +0000
Subject: [PATCH 126/177] HDFS-2733. Document HA configuration and CLI.
 Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1241183 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../src/site/apt/HDFSHighAvailability.apt.vm  | 434 ++++++++++++++++++
 hadoop-project/src/site/site.xml              |   1 +
 3 files changed, 437 insertions(+)
 create mode 100644 hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/HDFSHighAvailability.apt.vm

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index edddfb11477..10706d0abda 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -174,3 +174,5 @@ HDFS-2819. Document new HA-related configs in hdfs-default.xml. (eli)
 HDFS-2752. HA: exit if multiple shared dirs are configured. (eli)
 
 HDFS-2894. HA: automatically determine the nameservice Id if only one nameservice is configured. (eli)
+
+HDFS-2733. Document HA configuration and CLI. (atm)
diff --git a/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/HDFSHighAvailability.apt.vm b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/HDFSHighAvailability.apt.vm
new file mode 100644
index 00000000000..c66506734eb
--- /dev/null
+++ b/hadoop-mapreduce-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/HDFSHighAvailability.apt.vm
@@ -0,0 +1,434 @@
+~~ Licensed under the Apache License, Version 2.0 (the "License");
+~~ you may not use this file except in compliance with the License.
+~~ You may obtain a copy of the License at
+~~
+~~   http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License. See accompanying LICENSE file.
+
+  ---
+  Hadoop Distributed File System-${project.version} - High Availability
+  ---
+  ---
+  ${maven.build.timestamp}
+
+HDFS High Availability
+
+  \[ {{{./index.html}Go Back}} \]
+
+%{toc|section=1|fromDepth=0}
+
+* {Purpose}
+
+  This guide provides an overview of the HDFS High Availability (HA) feature and
+  how to configure and manage an HA HDFS cluster.
+ 
+  This document assumes that the reader has a general understanding of
+  general components and node types in an HDFS cluster. Please refer to the
+  HDFS Architecture guide for details.
+
+* {Background}
+
+  Prior to Hadoop 0.23.2, the NameNode was a single point of failure (SPOF) in
+  an HDFS cluster. Each cluster had a single NameNode, and if that machine or
+  process became unavailable, the cluster as a whole would be unavailable
+  until the NameNode was either restarted or brought up on a separate machine.
+  
+  This impacted the total availability of the HDFS cluster in two major ways:
+
+    * In the case of an unplanned event such as a machine crash, the cluster would
+      be unavailable until an operator restarted the NameNode.
+
+    * Planned maintenance events such as software or hardware upgrades on the
+      NameNode machine would result in windows of cluster downtime.
+  
+  The HDFS High Availability feature addresses the above problems by providing
+  the option of running two redundant NameNodes in the same cluster in an
+  Active/Passive configuration with a hot standby. This allows a fast failover to
+  a new NameNode in the case that a machine crashes, or a graceful
+  administrator-initiated failover for the purpose of planned maintenance.
+
+* {Architecture}
+
+  In a typical HA cluster, two separate machines are configured as NameNodes.
+  At any point in time, exactly one of the NameNodes is in an <Active> state,
+  and the other is in a <Standby> state. The Active NameNode is responsible
+  for all client operations in the cluster, while the Standby is simply acting
+  as a slave, maintaining enough state to provide a fast failover if
+  necessary.
+  
+  In order for the Standby node to keep its state synchronized with the Active
+  node, the current implementation requires that the two nodes both have access
+  to a directory on a shared storage device (eg an NFS mount from a NAS). This
+  restriction will likely be relaxed in future versions.
+
+  When any namespace modification is performed by the Active node, it durably
+  logs a record of the modification to an edit log file stored in the shared
+  directory.  The Standby node is constantly watching this directory for edits,
+  and as it sees the edits, it applies them to its own namespace. In the event of
+  a failover, the Standby will ensure that it has read all of the edits from the
+  shared storage before promoting itself to the Active state. This ensures that
+  the namespace state is fully synchronized before a failover occurs.
+  
+  In order to provide a fast failover, it is also necessary that the Standby node
+  have up-to-date information regarding the location of blocks in the cluster.
+  In order to achieve this, the DataNodes are configured with the location of
+  both NameNodes, and send block location information and heartbeats to both.
+  
+  It is vital for the correct operation of an HA cluster that only one of the
+  NameNodes be Active at a time. Otherwise, the namespace state would quickly
+  diverge between the two, risking data loss or other incorrect results.  In
+  order to ensure this property and prevent the so-called "split-brain scenario,"
+  the administrator must configure at least one <fencing method> for the shared
+  storage. During a failover, if it cannot be verified that the previous Active
+  node has relinquished its Active state, the fencing process is responsible for
+  cutting off the previous Active's access to the shared edits storage. This
+  prevents it from making any further edits to the namespace, allowing the new
+  Active to safely proceed with failover.
+
+  <<Note:>> Currently, only manual failover is supported. This means the HA
+  NameNodes are incapable of automatically detecting a failure of the Active
+  NameNode, and instead rely on the operator to manually initiate a failover.
+  Automatic failure detection and initiation of a failover will be implemented in
+  future versions.
+
+* {Hardware resources}
+
+  In order to deploy an HA cluster, you should prepare the following:
+
+    * <<NameNode machines>> - the machines on which you run the Active and
+    Standby NameNodes should have equivalent hardware to each other, and
+    equivalent hardware to what would be used in a non-HA cluster.
+
+    * <<Shared storage>> - you will need to have a shared directory which both
+    NameNode machines can have read/write access to. Typically this is a remote
+    filer which supports NFS and is mounted on each of the NameNode machines.
+    Currently only a single shared edits directory is supported. Thus, the
+    availability of the system is limited by the availability of this shared edits
+    directory, and therefore in order to remove all single points of failure there
+    needs to be redundancy for the shared edits directory. Specifically, multiple
+    network paths to the storage, and redundancy in the storage itself (disk,
+    network, and power). Beacuse of this, it is recommended that the shared storage
+    server be a high-quality dedicated NAS appliance rather than a simple Linux
+    server.
+  
+  Note that, in an HA cluster, the Standby NameNode also performs checkpoints of
+  the namespace state, and thus it is not necessary to run a Secondary NameNode,
+  CheckpointNode, or BackupNode in an HA cluster. In fact, to do so would be an
+  error. This also allows one who is reconfiguring a non-HA-enabled HDFS cluster
+  to be HA-enabled to reuse the hardware which they had previously dedicated to
+  the Secondary NameNode.
+
+* {Deployment}
+
+** Configuration overview
+
+  Similar to Federation configuration, HA configuration is backward compatible
+  and allows existing single NameNode configurations to work without change.
+  The new configuration is designed such that all the nodes in the cluster may
+  have the same configuration without the need for deploying different
+  configuration files to different machines based on the type of the node.
+ 
+  Like HDFS Federation, HA clusters reuse the <<<nameservice ID>>> to identify a
+  single HDFS instance that may in fact consist of multiple HA NameNodes. In
+  addition, a new abstraction called <<<NameNode ID>>> is added with HA. Each
+  distinct NameNode in the cluster has a different NameNode ID to distinguish it.
+  To support a single configuration file for all of the NameNodes, the relevant
+  configuration parameters are suffixed with the <<nameservice ID>> as well as
+  the <<NameNode ID>>.
+
+** Configuration details
+
+  To configure HA NameNodes, you must add several configuration options to your
+  <<hdfs-site.xml>> configuration file.
+
+  The order in which you set these configurations is unimportant, but the values
+  you choose for <<dfs.federation.nameservices>> and
+  <<dfs.ha.namenodes.[nameservice ID]>> will determine the keys of those that
+  follow. Thus, you should decide on these values before setting the rest of the
+  configuration options.
+
+  * <<dfs.federation.nameservices>> - the logical name for this new nameservice
+
+    Choose a logical name for this nameservice, for example "mycluster", and use
+    this logical name for the value of this config option. The name you choose is
+    arbitrary. It will be used both for configuration and as the authority
+    component of absolute HDFS paths in the cluster.
+
+    <<Note:>> If you are also using HDFS Federation, this configuration setting
+    should also include the list of other nameservices, HA or otherwise, as a
+    comma-separated list.
+
+----
+<property>
+  <name>dfs.federation.nameservices</name>
+  <value>mycluster</value>
+</property>
+----
+
+  * <<dfs.ha.namenodes.[nameservice ID]>> - unique identifiers for each NameNode in the nameservice
+
+    Configure with a list of comma-separated NameNode IDs. This will be used by
+    DataNodes to determine all the NameNodes in the cluster. For example, if you
+    used "mycluster" as the nameservice ID previously, and you wanted to use "nn1"
+    and "nn2" as the individual IDs of the NameNodes, you would configure this as
+    such:
+
+----
+<property>
+  <name>dfs.ha.namenodes.mycluster</name>
+  <value>nn1,nn2</value>
+</property>
+----
+
+    <<Note:>> Currently, only a maximum of two NameNodes may be configured per
+    nameservice.
+
+  * <<dfs.namenode.rpc-address.[nameservice ID].[name node ID]>> - the fully-qualified RPC address for each NameNode to listen on
+
+    For both of the previously-configured NameNode IDs, set the full address and
+    IPC port of the NameNode processs. Note that this results in two separate
+    configuration options. For example:
+
+----
+<property>
+  <name>dfs.namenode.rpc-address.mycluster.nn1</name>
+  <value>machine1.example.com:8020</value>
+</property>
+<property>
+  <name>dfs.namenode.rpc-address.mycluster.nn2</name>
+  <value>machine2.example.com:8020</value>
+</property>
+----
+
+    <<Note:>> You may similarly configure the "<<servicerpc-address>>" setting if
+    you so desire.
+
+  * <<dfs.namenode.http-address.[nameservice ID].[name node ID]>> - the fully-qualified HTTP address for each NameNode to listen on
+
+    Similarly to <rpc-address> above, set the addresses for both NameNodes' HTTP
+    servers to listen on. For example:
+
+----
+<property>
+  <name>dfs.namenode.http-address.mycluster.nn1</name>
+  <value>machine1.example.com:50070</value>
+</property>
+<property>
+  <name>dfs.namenode.http-address.mycluster.nn2</name>
+  <value>machine2.example.com:50070</value>
+</property>
+----
+
+    <<Note:>> If you have Hadoop's security features enabled, you should also set
+    the <https-address> similarly for each NameNode.
+
+  * <<dfs.namenode.shared.edits.dir>> - the location of the shared storage directory
+
+    This is where one configures the path to the remote shared edits directory
+    which the Standby NameNode uses to stay up-to-date with all the file system
+    changes the Active NameNode makes. <<You should only configure one of these
+    directories.>> This directory should be mounted r/w on both NameNode machines.
+    The value of this setting should be the absolute path to this directory on the
+    NameNode machines. For example:
+
+----
+<property>
+  <name>dfs.namenode.shared.edits.dir</name>
+  <value>file:///mnt/filer1/dfs/ha-name-dir-shared</value>
+</property>
+----
+
+  * <<dfs.client.failover.proxy.provider.[nameservice ID]>> - the Java class that HDFS clients use to contact the Active NameNode
+
+    Configure the name of the Java class which will be used by the DFS Client to
+    determine which NameNode is the current Active, and therefore which NameNode is
+    currently serving client requests. The only implementation which currently
+    ships with Hadoop is the <<ConfiguredFailoverProxyProvider>>, so use this
+    unless you are using a custom one. For example:
+
+----
+<property>
+  <name>dfs.client.failover.proxy.provider.mycluster</name>
+  <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+</property>
+----
+
+  * <<dfs.ha.fencing.methods>> - a list of scripts or Java classes which will be used to fence the Active NameNode during a failover
+
+    It is critical for correctness of the system that only one NameNode be in the
+    Active state at any given time. Thus, during a failover, we first ensure that
+    the Active NameNode is either in the Standby state, or the process has
+    terminated, before transitioning the other NameNode to the Active state. In
+    order to do this, you must configure at least one <<fencing method.>> These are
+    configured as a carriage-return-separated list, which will be attempted in order
+    until one indicates that fencing has succeeded. There are two methods which
+    ship with Hadoop: <shell> and <sshfence>. For information on implementing
+    your own custom fencing method, see the <org.apache.hadoop.ha.NodeFencer> class.
+
+    * <<sshfence>> - SSH to the Active NameNode and kill the process
+
+      The <sshfence> option SSHes to the target node and uses <fuser> to kill the
+      process listening on the service's TCP port. In order for this fencing option
+      to work, it must be able to SSH to the target node without providing a
+      passphrase. Thus, one must also configure the
+      <<dfs.ha.fencing.ssh.private-key-files>> option, which is a
+      comma-separated list of SSH private key files. For example:
+
+---
+<property>
+  <name>dfs.ha.fencing.methods</name>
+  <value>sshfence</value>
+</property>
+
+<property>
+  <name>dfs.ha.fencing.ssh.private-key-files</name>
+  <value>/home/exampleuser/.ssh/id_rsa</value>
+</property>
+---
+
+      Optionally, one may configure a non-standard username or port to perform the
+      SSH. One may also configure a timeout, in milliseconds, for the SSH, after
+      which this fencing method will be considered to have failed. It may be
+      configured like so:
+
+---
+<property>
+  <name>dfs.ha.fencing.methods</name>
+  <value>sshfence([[username][:port]])</value>
+</property>
+<property>
+  <name>dfs.ha.fencing.ssh.connect-timeout</name>
+  <value>
+</property>
+---
+
+    * <<shell>> - run an arbitrary shell command to fence the Active NameNode
+
+      The <shell> fencing method runs an arbitrary shell command. It may be
+      configured like so:
+
+---
+<property>
+  <name>dfs.ha.fencing.methods</name>
+  <value>shell(/path/to/my/script.sh arg1 arg2 ...)</value>
+</property>
+---
+
+      The string between '(' and ')' is passed directly to a bash shell and may not
+      include any closing parentheses.
+
+      When executed, the first argument to the configured script will be the address
+      of the NameNode to be fenced, followed by all arguments specified in the
+      configuration.
+
+      The shell command will be run with an environment set up to contain all of the
+      current Hadoop configuration variables, with the '_' character replacing any
+      '.' characters in the configuration keys. If the shell command returns an exit
+      code of 0, the fencing is determined to be successful. If it returns any other
+      exit code, the fencing was not successful and the next fencing method in the
+      list will be attempted.
+
+      <<Note:>> This fencing method does not implement any timeout. If timeouts are
+      necessary, they should be implemented in the shell script itself (eg by forking
+      a subshell to kill its parent in some number of seconds).
+
+  * <<fs.defaultFS>> - the default path prefix used by the Hadoop FS client when none is given
+
+    Optionally, you may now configure the default path for Hadoop clients to use
+    the new HA-enabled logical URI. If you used "mycluster" as the nameservice ID
+    earlier, this will be the value of the authority portion of all of your HDFS
+    paths. This may be configured like so, in your <<core-site.xml>> file:
+
+---
+<property>
+  <name>fs.defaultFS</name>
+  <value>hdfs://mycluster</value>
+</property>
+---
+
+** Deployment details
+
+  After all of the necessary configuration options have been set, one must
+  initially synchronize the two HA NameNodes' on-disk metadata. If you are
+  setting up a fresh HDFS cluster, you should first run the format command (<hdfs
+  namenode -format>) on one of NameNodes. If you have already formatted the
+  NameNode, or are converting a non-HA-enabled cluster to be HA-enabled, you
+  should now copy over the contents of your NameNode metadata directories to
+  the other, unformatted NameNode using <scp> or a similar utility. The location
+  of the directories containing the NameNode metadata are configured via the
+  configuration options <<dfs.namenode.name.dir>> and/or
+  <<dfs.namenode.edits.dir>>. At this time, you should also ensure that the
+  shared edits dir (as configured by <<dfs.namenode.shared.edits.dir>>) includes
+  all recent edits files which are in your NameNode metadata directories.
+
+  At this point you may start both of your HA NameNodes as you normally would
+  start a NameNode.
+
+  You can visit each of the NameNodes' web pages separately by browsing to their
+  configured HTTP addresses. You should notice that next to the configured
+  address will be the HA state of the NameNode (either "standby" or "active".)
+  Whenever an HA NameNode starts, it is initially in the Standby state.
+
+** Administrative commands
+
+  Now that your HA NameNodes are configured and started, you will have access
+  to some additional commands to administer your HA HDFS cluster. Specifically,
+  you should familiarize yourself with all of the subcommands of the "<hdfs
+  haadmin>" command. Running this command without any additional arguments will
+  display the following usage information:
+
+---
+Usage: DFSHAAdmin [-ns <nameserviceId>]
+    [-transitionToActive <serviceId>]
+    [-transitionToStandby <serviceId>]
+    [-failover [--forcefence] [--forceactive] <serviceId> <serviceId>]
+    [-getServiceState <serviceId>]
+    [-checkHealth <serviceId>]
+    [-help <command>]
+---
+
+  This guide describes high-level uses of each of these subcommands. For
+  specific usage information of each subcommand, you should run "<hdfs haadmin
+  -help <command>>".
+
+  * <<transitionToActive>> and <<transitionToStandby>> - transition the state of the given NameNode to Active or Standby
+
+    These subcommands cause a given NameNode to transition to the Active or Standby
+    state, respectively. <<These commands do not attempt to perform any fencing,
+    and thus should rarely be used.>> Instead, one should almost always prefer to
+    use the "<hdfs haadmin -failover>" subcommand.
+
+  * <<failover>> - initiate a failover between two NameNodes
+
+    This subcommand causes a failover from the first provided NameNode to the
+    second. If the first NameNode is in the Standby state, this command simply
+    transitions the second to the Active state without error. If the first NameNode
+    is in the Active state, an attempt will be made to gracefully transition it to
+    the Standby state. If this fails, the fencing methods (as configured by
+    <<dfs.ha.fencing.methods>>) will be attempted in order until one
+    succeeds. Only after this process will the second NameNode be transitioned to
+    the Active state. If no fencing method succeeds, the second NameNode will not
+    be transitioned to the Active state, and an error will be returned.
+
+  * <<getServiceState>> - determine whether the given NameNode is Active or Standby
+
+    Connect to the provided NameNode to determine its current state, printing
+    either "standby" or "active" to STDOUT appropriately. This subcommand might be
+    used by cron jobs or monitoring scripts which need to behave differently based
+    on whether the NameNode is currently Active or Standby.
+
+  * <<checkHealth>> - check the health of the given NameNode
+
+    Connect to the provided NameNode to check its health. The NameNode is capable
+    of performing some diagnostics on itself, including checking if internal
+    services are running as expected. This command will return 0 if the NameNode is
+    healthy, non-zero otherwise. One might use this command for monitoring
+    purposes.
+
+    <<Note:>> This is not yet implemented, and at present will always return
+    success, unless the given NameNode is completely down.
diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml
index df7afbb05f8..9c48b080d49 100644
--- a/hadoop-project/src/site/site.xml
+++ b/hadoop-project/src/site/site.xml
@@ -53,6 +53,7 @@
     </menu>
     
     <menu name="HDFS" inherit="top">
+      <item name="High Availability" href="hadoop-yarn/hadoop-yarn-site/HDFSHighAvailability.html"/>
       <item name="Federation" href="hadoop-yarn/hadoop-yarn-site/Federation.html"/>
       <item name="WebHDFS REST API" href="hadoop-yarn/hadoop-yarn-site/WebHDFS.html"/>
     </menu>

From 7f0a99f6e63fcbc4e6971b719d235af1cf5cb514 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 7 Feb 2012 01:39:14 +0000
Subject: [PATCH 127/177] HDFS-2794. Active NN may purge edit log files before
 standby NN has a chance to read them. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1241317 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |  3 ++
 .../namenode/NNStorageRetentionManager.java   | 19 ++++++++--
 .../src/main/resources/hdfs-default.xml       | 13 +++++++
 .../TestNNStorageRetentionFunctional.java     |  1 +
 .../TestNNStorageRetentionManager.java        | 35 +++++++++++++++++--
 6 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 10706d0abda..baeffd7bf34 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -176,3 +176,5 @@ HDFS-2752. HA: exit if multiple shared dirs are configured. (eli)
 HDFS-2894. HA: automatically determine the nameservice Id if only one nameservice is configured. (eli)
 
 HDFS-2733. Document HA configuration and CLI. (atm)
+
+HDFS-2794. Active NN may purge edit log files before standby NN has a chance to read them (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index b655bbddaad..2843d8d4074 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -144,6 +144,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final boolean DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT = true;
   public static final String  DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY = "dfs.namenode.num.checkpoints.retained";
   public static final int     DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT = 2;
+  public static final String  DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY = "dfs.namenode.num.extra.edits.retained";
+  public static final int     DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT = 1000000; //1M
+
   public static final String  DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY = "dfs.namenode.edits.dir.minimum";
   public static final int     DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT = 1;
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java
index fe651001aa3..fe75247b8e0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NNStorageRetentionManager.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFil
 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
 import org.apache.hadoop.hdfs.util.MD5FileUtils;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
@@ -46,6 +47,7 @@ import com.google.common.collect.Sets;
 public class NNStorageRetentionManager {
   
   private final int numCheckpointsToRetain;
+  private final long numExtraEditsToRetain;
   private static final Log LOG = LogFactory.getLog(
       NNStorageRetentionManager.class);
   private final NNStorage storage;
@@ -60,6 +62,15 @@ public class NNStorageRetentionManager {
     this.numCheckpointsToRetain = conf.getInt(
         DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
         DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_DEFAULT);
+    this.numExtraEditsToRetain = conf.getLong(
+        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY,
+        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_DEFAULT);
+    Preconditions.checkArgument(numCheckpointsToRetain > 0,
+        "Must retain at least one checkpoint");
+    Preconditions.checkArgument(numExtraEditsToRetain >= 0,
+        DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY +
+        " must not be negative");
+    
     this.storage = storage;
     this.editLog = editLog;
     this.purger = purger;
@@ -79,8 +90,12 @@ public class NNStorageRetentionManager {
     purgeCheckpointsOlderThan(inspector, minImageTxId);
     // If fsimage_N is the image we want to keep, then we need to keep
     // all txns > N. We can remove anything < N+1, since fsimage_N
-    // reflects the state up to and including N.
-    editLog.purgeLogsOlderThan(minImageTxId + 1);
+    // reflects the state up to and including N. However, we also
+    // provide a "cushion" of older txns that we keep, which is
+    // handy for HA, where a remote node may not have as many
+    // new images.
+    long purgeLogsFrom = Math.max(0, minImageTxId + 1 - numExtraEditsToRetain);
+    editLog.purgeLogsOlderThan(purgeLogsFrom);
   }
   
   private void purgeCheckpointsOlderThan(
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 9fa8e26d4d2..d84f5da65e8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -637,6 +637,19 @@
   </description>
 </property>
 
+<property>
+  <name>dfs.namenode.num.extra.edits.retained</name>
+  <value>1000000</value>
+  <description>The number of extra transactions which should be retained
+  beyond what is minimally necessary for a NN restart. This can be useful for
+  audit purposes or for an HA setup where a remote Standby Node may have
+  been offline for some time and need to have a longer backlog of retained
+  edits in order to start again.
+  Typically each edit is on the order of a few hundred bytes, so the default
+  of 1 million edits should be on the order of hundreds of MBs or low GBs.
+  </description>
+</property>
+
 <property>
   <name>dfs.namenode.delegation.key.update-interval</name>
   <value>86400000</value>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java
index aad8d7dc0a2..e7a9cc1d49a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionFunctional.java
@@ -61,6 +61,7 @@ public class TestNNStorageRetentionFunctional {
       throws IOException {
     MiniDFSCluster cluster = null;    
     Configuration conf = new HdfsConfiguration();
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0);
 
     File sd0 = new File(TEST_ROOT_DIR, "nn0");
     File sd1 = new File(TEST_ROOT_DIR, "nn1");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java
index aadca5cc20d..6ff91f41a28 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java
@@ -23,6 +23,7 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.FileJournalManager.EditLogFile;
 import org.apache.hadoop.hdfs.server.namenode.FSImageStorageInspector.FSImageFile;
@@ -33,6 +34,7 @@ import static org.apache.hadoop.hdfs.server.namenode.NNStorage.getImageFileName;
 
 import org.apache.hadoop.hdfs.server.namenode.NNStorageRetentionManager.StoragePurger;
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
 import org.mockito.Mockito;
@@ -46,6 +48,17 @@ import com.google.common.collect.Sets;
 
 
 public class TestNNStorageRetentionManager {
+  Configuration conf = new Configuration();
+
+  /**
+   * For the purpose of this test, purge as many edits as we can 
+   * with no extra "safety cushion"
+   */
+  @Before
+  public void setNoExtraEditRetention() {
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0);
+  }
+  
   /**
    * Test the "easy case" where we have more images in the
    * directory than we need to keep. Should purge the
@@ -163,9 +176,27 @@ public class TestNNStorageRetentionManager {
     runTest(tc);    
   }
   
-  private void runTest(TestCaseDescription tc) throws IOException {
-    Configuration conf = new Configuration();
+  @Test
+  public void testRetainExtraLogs() throws IOException {
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY,
+        50);
+    TestCaseDescription tc = new TestCaseDescription();
+    tc.addRoot("/foo1", NameNodeDirType.IMAGE);
+    tc.addRoot("/foo2", NameNodeDirType.EDITS);
+    tc.addImage("/foo1/current/" + getImageFileName(100), true);
+    tc.addImage("/foo1/current/" + getImageFileName(200), true);
+    tc.addImage("/foo1/current/" + getImageFileName(300), false);
+    tc.addImage("/foo1/current/" + getImageFileName(400), false);
 
+    tc.addLog("/foo2/current/" + getFinalizedEditsFileName(101, 200), true);
+    // Since we need 50 extra edits, *do* retain the 201-300 segment 
+    tc.addLog("/foo2/current/" + getFinalizedEditsFileName(201, 300), false);
+    tc.addLog("/foo2/current/" + getFinalizedEditsFileName(301, 400), false);
+    tc.addLog("/foo2/current/" + getInProgressEditsFileName(401), false);
+    runTest(tc);
+  }
+  
+  private void runTest(TestCaseDescription tc) throws IOException {
     StoragePurger mockPurger =
       Mockito.mock(NNStorageRetentionManager.StoragePurger.class);
     ArgumentCaptor<FSImageFile> imagesPurgedCaptor =

From acacde55e6a4488cd749eba630ff2e68c4dc5c63 Mon Sep 17 00:00:00 2001
From: Jitendra Nath Pandey <jitendra@apache.org>
Date: Tue, 7 Feb 2012 19:29:39 +0000
Subject: [PATCH 128/177] HDFS-2901. Improvements for SBN web UI - not show
 under-replicated/missing blocks. Contributed by Brandon Li.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1241568 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt          |  2 ++
 .../server/namenode/NamenodeJspHelper.java     | 18 +++++++++++++-----
 .../hdfs/server/namenode/ha/TestHAWebUI.java   |  7 +++++--
 3 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index baeffd7bf34..8b10741ed4e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -178,3 +178,5 @@ HDFS-2894. HA: automatically determine the nameservice Id if only one nameservic
 HDFS-2733. Document HA configuration and CLI. (atm)
 
 HDFS-2794. Active NN may purge edit log files before standby NN has a chance to read them (todd)
+
+HDFS-2901. Improvements for SBN web UI - not show under-replicated/missing blocks. (Brandon Li via jitendra)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
index 496423d4a67..6f81b940751 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
@@ -36,6 +36,7 @@ import javax.servlet.http.HttpServletResponse;
 import javax.servlet.jsp.JspWriter;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.Block;
@@ -308,7 +309,16 @@ class NamenodeJspHelper {
 
       long bpUsed = fsnStats[6];
       float percentBpUsed = DFSUtil.getPercentUsed(bpUsed, total);
-      
+
+      // don't show under-replicated/missing blocks or corrupt files for SBN
+      // since the standby namenode doesn't compute replication queues 
+      String underReplicatedBlocks = "";
+      if (nn.getServiceState() == HAServiceState.ACTIVE) {
+    	  underReplicatedBlocks = new String(rowTxt() 
+              + colTxt("Excludes missing blocks.")
+              + "Number of Under-Replicated Blocks" + colTxt() + ":" + colTxt()
+              + fsn.getBlockManager().getUnderReplicatedNotMissingBlocks()); 
+      }
       out.print("<div id=\"dfstable\"> <table>\n" + rowTxt() + colTxt()
           + "Configured Capacity" + colTxt() + ":" + colTxt()
           + StringUtils.byteDesc(total) + rowTxt() + colTxt() + "DFS Used"
@@ -343,10 +353,8 @@ class NamenodeJspHelper {
           + rowTxt() + colTxt()
           + "<a href=\"dfsnodelist.jsp?whatNodes=DECOMMISSIONING\">"
           + "Decommissioning Nodes</a> "
-          + colTxt() + ":" + colTxt() + decommissioning.size() 
-          + rowTxt() + colTxt("Excludes missing blocks.")
-          + "Number of Under-Replicated Blocks" + colTxt() + ":" + colTxt()
-          + fsn.getBlockManager().getUnderReplicatedNotMissingBlocks()
+          + colTxt() + ":" + colTxt() + decommissioning.size()
+          + underReplicatedBlocks
           + "</table></div><br>\n");
 
       if (live.isEmpty() && dead.isEmpty()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAWebUI.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAWebUI.java
index ccb4f5b5cdb..be014301179 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAWebUI.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAWebUI.java
@@ -32,11 +32,11 @@ public class TestHAWebUI {
 
   /**
    * Tests that the web UI of the name node provides a link to browse the file
-   * system only in active state
+   * system and summary of under-replicated blocks only in active state
    * 
    */
   @Test
-  public void testLinkToBrowseFilesystem() throws Exception {
+  public void testLinkAndClusterSummary() throws Exception {
     Configuration conf = new Configuration();
 
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
@@ -50,18 +50,21 @@ public class TestHAWebUI {
           + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort()
           + "/dfshealth.jsp"));
       assertTrue(pageContents.contains("Browse the filesystem"));
+      assertTrue(pageContents.contains("Number of Under-Replicated Blocks"));
 
       cluster.transitionToStandby(0);
       pageContents = DFSTestUtil.urlGet(new URL("http://localhost:"
           + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort()
           + "/dfshealth.jsp"));
       assertFalse(pageContents.contains("Browse the filesystem"));
+      assertFalse(pageContents.contains("Number of Under-Replicated Blocks"));
 
       cluster.transitionToActive(0);
       pageContents = DFSTestUtil.urlGet(new URL("http://localhost:"
           + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort()
           + "/dfshealth.jsp"));
       assertTrue(pageContents.contains("Browse the filesystem"));
+      assertTrue(pageContents.contains("Number of Under-Replicated Blocks"));
 
     } finally {
       cluster.shutdown();

From b9e74da41b750ff93f2524da09f06ded1a7bd6e2 Mon Sep 17 00:00:00 2001
From: Jitendra Nath Pandey <jitendra@apache.org>
Date: Wed, 8 Feb 2012 03:17:09 +0000
Subject: [PATCH 129/177] HDFS-2905. HA: Standby NN NPE when shared edits dir
 is deleted. Contributed by Bikas Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1241757 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../server/namenode/FileJournalManager.java   | 21 +++++++++++++++----
 .../hdfs/server/namenode/FSImageTestUtil.java |  2 +-
 .../namenode/TestFileJournalManager.java      |  9 ++++++++
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 8b10741ed4e..36c162482b0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -180,3 +180,5 @@ HDFS-2733. Document HA configuration and CLI. (atm)
 HDFS-2794. Active NN may purge edit log files before standby NN has a chance to read them (todd)
 
 HDFS-2901. Improvements for SBN web UI - not show under-replicated/missing blocks. (Brandon Li via jitendra)
+
+HDFS-2905. HA: Standby NN NPE when shared edits dir is deleted. (Bikas Saha via jitendra)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index 90bf1a77def..1eca2797b44 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -135,8 +135,7 @@ class FileJournalManager implements JournalManager {
    */
   List<RemoteEditLog> getRemoteEditLogs(long firstTxId) throws IOException {
     File currentDir = sd.getCurrentDir();
-    List<EditLogFile> allLogFiles = matchEditLogs(
-        FileUtil.listFiles(currentDir));
+    List<EditLogFile> allLogFiles = matchEditLogs(currentDir);
     List<RemoteEditLog> ret = Lists.newArrayListWithCapacity(
         allLogFiles.size());
 
@@ -155,6 +154,20 @@ class FileJournalManager implements JournalManager {
     return ret;
   }
 
+  /**
+   * returns matching edit logs via the log directory. Simple helper function
+   * that lists the files in the logDir and calls matchEditLogs(File[])
+   * 
+   * @param logDir
+   *          directory to match edit logs in
+   * @return matched edit logs
+   * @throws IOException
+   *           IOException thrown for invalid logDir
+   */
+  static List<EditLogFile> matchEditLogs(File logDir) throws IOException {
+    return matchEditLogs(FileUtil.listFiles(logDir));
+  }
+  
   static List<EditLogFile> matchEditLogs(File[] filesInStorage) {
     List<EditLogFile> ret = Lists.newArrayList();
     for (File f : filesInStorage) {
@@ -278,7 +291,7 @@ class FileJournalManager implements JournalManager {
   synchronized public void recoverUnfinalizedSegments() throws IOException {
     File currentDir = sd.getCurrentDir();
     LOG.info("Recovering unfinalized segments in " + currentDir);
-    List<EditLogFile> allLogFiles = matchEditLogs(currentDir.listFiles());
+    List<EditLogFile> allLogFiles = matchEditLogs(currentDir);
 
     for (EditLogFile elf : allLogFiles) {
       if (elf.getFile().equals(currentInProgress)) {
@@ -318,7 +331,7 @@ class FileJournalManager implements JournalManager {
 
   private List<EditLogFile> getLogFiles(long fromTxId) throws IOException {
     File currentDir = sd.getCurrentDir();
-    List<EditLogFile> allLogFiles = matchEditLogs(currentDir.listFiles());
+    List<EditLogFile> allLogFiles = matchEditLogs(currentDir);
     List<EditLogFile> logFiles = Lists.newArrayList();
     
     for (EditLogFile elf : allLogFiles) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
index f0b8a6d2b30..665e088cb80 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/FSImageTestUtil.java
@@ -440,7 +440,7 @@ public abstract class FSImageTestUtil {
   throws IOException {
     File currentDir = sd.getCurrentDir();
     List<EditLogFile> foundEditLogs 
-      = Lists.newArrayList(FileJournalManager.matchEditLogs(currentDir.listFiles()));
+      = Lists.newArrayList(FileJournalManager.matchEditLogs(currentDir));
     return Collections.max(foundEditLogs, EditLogFile.COMPARE_BY_START_TXID);
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
index b862727b0e6..def29365776 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
@@ -315,6 +315,15 @@ public class TestFileJournalManager {
         "", getLogsAsString(fjm, 9999));
   }
 
+  /**
+   * tests that passing an invalid dir to matchEditLogs throws IOException 
+   */
+  @Test(expected = IOException.class)
+  public void testMatchEditLogInvalidDirThrowsIOException() throws IOException {
+    File badDir = new File("does not exist");
+    FileJournalManager.matchEditLogs(badDir);
+  }
+  
   /**
    * Make sure that we starting reading the correct op when we request a stream
    * with a txid in the middle of an edit log file.

From da10820fe558ab481ec9cb74617b1f2ee85fd82a Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 8 Feb 2012 17:57:36 +0000
Subject: [PATCH 130/177] HADOOP-8038. Add
 'ipc.client.connect.max.retries.on.timeouts' entry in core-default.xml file.
 Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242017 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt | 3 +++
 .../hadoop-common/src/main/resources/core-default.xml     | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 2170cd2a69a..d2e6b1725ea 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -42,3 +42,6 @@ active during failover. (eli)
 
 HADOOP-7991. HA: the FailoverController should check the standby is
 ready before failing over. (eli)
+
+HADOOP-8038. Add 'ipc.client.connect.max.retries.on.timeouts' entry in
+core-default.xml file. (Uma Maheswara Rao G via atm)
diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
index c284faf9e6a..8e08568b5a5 100644
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@@ -480,6 +480,14 @@
   </description>
 </property>
 
+<property>
+  <name>ipc.client.connect.max.retries.on.timeouts</name>
+  <value>45</value>
+  <description>Indicates the number of retries a client will make on socket timeout
+               to establish a server connection.
+  </description>
+</property>
+
 <property>
   <name>ipc.server.listen.queue.size</name>
   <value>128</value>

From e918b91e23985fa1bb353c54a2e733f8ba6dbe49 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 9 Feb 2012 06:12:01 +0000
Subject: [PATCH 131/177] HDFS-2579. Starting delegation token manager during
 safemode fails. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242225 13f79535-47bb-0310-9956-ffa450edef68
---
 .../AbstractDelegationTokenSecretManager.java |  24 +++-
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSConfigKeys.java |   2 +
 .../DelegationTokenSecretManager.java         |  14 +-
 .../namenode/EditLogFileOutputStream.java     |   5 +
 .../hdfs/server/namenode/FSNamesystem.java    |  64 ++++++---
 .../hadoop/fs/TestResolveHdfsSymlink.java     |   5 +-
 .../fs/viewfs/TestViewFileSystemHdfs.java     |   7 +-
 .../hadoop/fs/viewfs/TestViewFsHdfs.java      |   6 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   4 +
 .../hdfs/security/TestDelegationToken.java    |  56 +++++++-
 .../TestDelegationTokenForProxyUser.java      |   4 +-
 .../namenode/OfflineEditsViewerHelper.java    |   3 +-
 .../TestCheckPointForSecurityTokens.java      |   7 +-
 .../namenode/TestSecurityTokenEditLog.java    |   5 +-
 .../namenode/ha/TestHAStateTransitions.java   | 126 +++++++++++++++++-
 16 files changed, 295 insertions(+), 39 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
index 3c2e666a39e..11df9811b29 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java
@@ -40,6 +40,8 @@ import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.security.token.SecretManager;
 import org.apache.hadoop.util.Daemon;
 
+import com.google.common.base.Preconditions;
+
 @InterfaceAudience.LimitedPrivate({"HDFS", "MapReduce"})
 @InterfaceStability.Evolving
 public abstract 
@@ -84,6 +86,12 @@ extends AbstractDelegationTokenIdentifier>
   private Thread tokenRemoverThread;
   protected volatile boolean running;
 
+  /**
+   * If the delegation token update thread holds this lock, it will
+   * not get interrupted.
+   */
+  protected Object noInterruptsLock = new Object();
+
   public AbstractDelegationTokenSecretManager(long delegationKeyUpdateInterval,
       long delegationTokenMaxLifetime, long delegationTokenRenewInterval,
       long delegationTokenRemoverScanInterval) {
@@ -95,6 +103,7 @@ extends AbstractDelegationTokenIdentifier>
 
   /** should be called before this object is used */
   public void startThreads() throws IOException {
+    Preconditions.checkState(!running);
     updateCurrentKey();
     synchronized (this) {
       running = true;
@@ -354,12 +363,21 @@ extends AbstractDelegationTokenIdentifier>
     }
   }
 
-  public synchronized void stopThreads() {
+  public void stopThreads() {
     if (LOG.isDebugEnabled())
       LOG.debug("Stopping expired delegation token remover thread");
     running = false;
+    
     if (tokenRemoverThread != null) {
-      tokenRemoverThread.interrupt();
+      synchronized (noInterruptsLock) {
+        tokenRemoverThread.interrupt();
+      }
+      try {
+        tokenRemoverThread.join();
+      } catch (InterruptedException e) {
+        throw new RuntimeException(
+            "Unable to join on token removal thread", e);
+      }
     }
   }
   
@@ -395,7 +413,7 @@ extends AbstractDelegationTokenIdentifier>
             lastTokenCacheCleanup = now;
           }
           try {
-            Thread.sleep(5000); // 5 seconds
+            Thread.sleep(Math.min(5000, keyUpdateInterval)); // 5 seconds
           } catch (InterruptedException ie) {
             LOG
             .error("InterruptedExcpetion recieved for ExpiredTokenRemover thread "
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 36c162482b0..628c0dcbf2f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -182,3 +182,5 @@ HDFS-2794. Active NN may purge edit log files before standby NN has a chance to
 HDFS-2901. Improvements for SBN web UI - not show under-replicated/missing blocks. (Brandon Li via jitendra)
 
 HDFS-2905. HA: Standby NN NPE when shared edits dir is deleted. (Bikas Saha via jitendra)
+
+HDFS-2579. Starting delegation token manager during safemode fails. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 2843d8d4074..b655b6ebd45 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -166,6 +166,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
   public static final long    DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT = 24*60*60*1000;  // 1 day
   public static final String  DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY = "dfs.namenode.delegation.token.max-lifetime";
   public static final long    DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT = 7*24*60*60*1000; // 7 days
+  public static final String  DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY = "dfs.namenode.delegation.token.always-use"; // for tests
+  public static final boolean DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT = false;
 
   //Filesystem limit keys
   public static final String  DFS_NAMENODE_MAX_COMPONENT_LENGTH_KEY = "dfs.namenode.fs-limits.max-component-length";
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
index 8f8ef8e0676..af083108f03 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSecretManager.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hdfs.security.token.delegation;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.io.InterruptedIOException;
 import java.net.InetSocketAddress;
 import java.util.Iterator;
 
@@ -282,7 +283,18 @@ public class DelegationTokenSecretManager
   @Override //AbstractDelegationTokenManager
   protected void logUpdateMasterKey(DelegationKey key)
       throws IOException {
-    namesystem.logUpdateMasterKey(key);
+    synchronized (noInterruptsLock) {
+      // The edit logging code will fail catastrophically if it
+      // is interrupted during a logSync, since the interrupt
+      // closes the edit log files. Doing this inside the
+      // above lock and then checking interruption status
+      // prevents this bug.
+      if (Thread.interrupted()) {
+        throw new InterruptedIOException(
+            "Interrupted before updating master key");
+      }
+      namesystem.logUpdateMasterKey(key);
+    }
   }
 
   /** A utility method for creating credentials. */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
index 938c3e085f8..f7e1f01250b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogFileOutputStream.java
@@ -219,6 +219,11 @@ public class EditLogFileOutputStream extends EditLogOutputStream {
   File getFile() {
     return file;
   }
+  
+  @Override
+  public String toString() {
+    return "EditLogFileOutputStream(" + file + ")";
+  }
 
   /**
    * @return true if this stream is currently open.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 13cb6536b0c..e42407a33c8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -32,6 +32,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_REQUIRED_KEY;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_MAX_OBJECTS_DEFAULT;
@@ -269,6 +271,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   private static final long DELEGATION_TOKEN_REMOVER_SCAN_INTERVAL =
     TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
   private DelegationTokenSecretManager dtSecretManager;
+  private boolean alwaysUseDelegationTokensForTests;
+  
 
   //
   // Stores the correct file name hierarchy
@@ -447,13 +451,28 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     dir.imageLoadComplete();
   }
 
-  void startSecretManager() throws IOException {
+  private void startSecretManager() {
     if (dtSecretManager != null) {
-      dtSecretManager.startThreads();
+      try {
+        dtSecretManager.startThreads();
+      } catch (IOException e) {
+        // Inability to start secret manager
+        // can't be recovered from.
+        throw new RuntimeException(e);
+      }
     }
   }
   
-  void stopSecretManager() {
+  private void startSecretManagerIfNecessary() {
+    boolean shouldRun = shouldUseDelegationTokens() &&
+      !isInSafeMode() && getEditLog().isOpenForWrite();
+    boolean running = dtSecretManager.isRunning();
+    if (shouldRun && !running) {
+      startSecretManager();
+    }
+  }
+
+  private void stopSecretManager() {
     if (dtSecretManager != null) {
       dtSecretManager.stopThreads();
     }
@@ -539,9 +558,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
 
         dir.fsImage.editLog.openForWrite();
       }
-      if (UserGroupInformation.isSecurityEnabled()) {
-        startSecretManager();
-      }
       if (haEnabled) {
         // Renew all of the leases before becoming active.
         // This is because, while we were in standby mode,
@@ -550,11 +566,17 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
         leaseManager.renewAllLeases();
       }
       leaseManager.startMonitor();
+      startSecretManagerIfNecessary();
     } finally {
       writeUnlock();
     }
   }
 
+  private boolean shouldUseDelegationTokens() {
+    return UserGroupInformation.isSecurityEnabled() ||
+      alwaysUseDelegationTokensForTests;
+  }
+
   /** 
    * Stop services required in active state
    * @throws InterruptedException
@@ -839,6 +861,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     this.standbyShouldCheckpoint = conf.getBoolean(
         DFS_HA_STANDBY_CHECKPOINTS_KEY,
         DFS_HA_STANDBY_CHECKPOINTS_DEFAULT);
+    
+    // For testing purposes, allow the DT secret manager to be started regardless
+    // of whether security is enabled.
+    alwaysUseDelegationTokensForTests = 
+      conf.getBoolean(DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY,
+          DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT);
   }
 
   /**
@@ -3479,6 +3507,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
           + nt.getNumOfLeaves() + " datanodes");
       NameNode.stateChangeLog.info("STATE* UnderReplicatedBlocks has "
           + blockManager.numOfUnderReplicatedBlocks() + " blocks");
+
+      startSecretManagerIfNecessary();
     }
 
     /**
@@ -3956,6 +3986,10 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   void enterSafeMode(boolean resourcesLow) throws IOException {
     writeLock();
     try {
+      // Stop the secret manager, since rolling the master key would
+      // try to write to the edit log
+      stopSecretManager();
+
       // Ensure that any concurrent operations have been fully synced
       // before entering safe mode. This ensures that the FSImage
       // is entirely stable on disk as soon as we're in safe mode.
@@ -4805,16 +4839,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * @param key new delegation key.
    */
   public void logUpdateMasterKey(DelegationKey key) throws IOException {
-    writeLock();
-    try {
-      if (isInSafeMode()) {
-        throw new SafeModeException(
-          "Cannot log master key update in safe mode", safeMode);
-      }
-      getEditLog().logUpdateMasterKey(key);
-    } finally {
-      writeUnlock();
-    }
+    
+    assert !isInSafeMode() :
+      "this should never be called while in safemode, since we stop " +
+      "the DT manager before entering safemode!";
+    // No need to hold FSN lock since we don't access any internal
+    // structures, and this is stopped before the FSN shuts itself
+    // down, etc.
+    getEditLog().logUpdateMasterKey(key);
     getEditLog().logSync();
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestResolveHdfsSymlink.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestResolveHdfsSymlink.java
index 17608ac1f7b..1d5def6b484 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestResolveHdfsSymlink.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestResolveHdfsSymlink.java
@@ -23,6 +23,7 @@ import java.util.List;
 import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -47,9 +48,11 @@ public class TestResolveHdfsSymlink {
   @BeforeClass
   public static void setUp() throws IOException {
     Configuration conf = new HdfsConfiguration();
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
     cluster = new MiniDFSCluster.Builder(conf).build();
     cluster.waitActive();
-    NameNodeAdapter.getDtSecretManager(cluster.getNamesystem()).startThreads();
+
   }
 
   @AfterClass
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemHdfs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemHdfs.java
index f2b9a232372..7ad56c0e93c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemHdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemHdfs.java
@@ -27,9 +27,9 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystemTestHelper;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.junit.After;
 import org.junit.AfterClass;
@@ -52,14 +52,15 @@ public class TestViewFileSystemHdfs extends ViewFileSystemBaseTest {
   public static void clusterSetupAtBegining() throws IOException,
       LoginException, URISyntaxException {
     SupportsBlocks = true;
+    CONF.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+    
     cluster =
         new MiniDFSCluster.Builder(CONF).nnTopology(
                 MiniDFSNNTopology.simpleFederatedTopology(2))
             .numDataNodes(2)
             .build();
     cluster.waitClusterUp();
-    NameNodeAdapter.getDtSecretManager(cluster.getNamesystem(0)).startThreads();
-    NameNodeAdapter.getDtSecretManager(cluster.getNamesystem(1)).startThreads();
     
     fHdfs = cluster.getFileSystem(0);
     fHdfs2 = cluster.getFileSystem(1);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsHdfs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsHdfs.java
index 4a60556a43f..0e94b4eb3d2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsHdfs.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFsHdfs.java
@@ -26,9 +26,9 @@ import javax.security.auth.login.LoginException;
 
 import org.apache.hadoop.fs.FileContext;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 
@@ -51,9 +51,11 @@ public class TestViewFsHdfs extends ViewFsBaseTest {
   public static void clusterSetupAtBegining() throws IOException,
       LoginException, URISyntaxException {
     SupportsBlocks = true;
+    CONF.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+
     cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(2).build();
     cluster.waitClusterUp();
-    NameNodeAdapter.getDtSecretManager(cluster.getNamesystem()).startThreads();
     fc = FileContext.getFileContext(cluster.getURI(0), CONF);
     defaultWorkingDirectory = fc.makeQualified( new Path("/user/" + 
         UserGroupInformation.getCurrentUser().getShortUserName()));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 9f969477d15..e458213a1d0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -1827,6 +1827,10 @@ public class MiniDFSCluster {
   public void setLeasePeriod(long soft, long hard) {
     NameNodeAdapter.setLeasePeriod(getNamesystem(), soft, hard);
   }
+  
+  public void setWaitSafeMode(boolean wait) {
+    this.waitSafeMode = wait;
+  }
 
   /**
    * Returns the current set of datanodes
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
index 4d18e98d1db..c2aaf0615c2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.security;
 
 
 
+import static org.junit.Assert.*;
+
 import java.io.ByteArrayInputStream;
 import java.io.DataInputStream;
 import java.io.IOException;
@@ -32,12 +34,16 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
 import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
@@ -64,6 +70,7 @@ public class TestDelegationToken {
     config.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
     config.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_MAX_LIFETIME_KEY, 10000);
     config.setLong(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000);
+    config.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
     config.set("hadoop.security.auth_to_local",
         "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT");
     FileSystem.setDefaultUri(config, "hdfs://localhost:" + "0");
@@ -71,7 +78,6 @@ public class TestDelegationToken {
     cluster.waitActive();
     dtSecretManager = NameNodeAdapter.getDtSecretManager(
         cluster.getNamesystem());
-    dtSecretManager.startThreads();
   }
 
   @After
@@ -269,5 +275,51 @@ public class TestDelegationToken {
       }
     });
   }
- 
+  
+  /**
+   * Test that the delegation token secret manager only runs when the
+   * NN is out of safe mode. This is because the secret manager
+   * has to log to the edit log, which should not be written in
+   * safe mode. Regression test for HDFS-2579.
+   */
+  @Test
+  public void testDTManagerInSafeMode() throws Exception {
+    cluster.startDataNodes(config, 1, true, StartupOption.REGULAR, null);
+    FileSystem fs = cluster.getFileSystem();
+    for (int i = 0; i < 5; i++) {
+      DFSTestUtil.createFile(fs, new Path("/test-" + i), 100, (short)1, 1L);
+    }
+    cluster.getConfiguration(0).setInt(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 500); 
+    cluster.getConfiguration(0).setInt(
+        DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 30000);
+    cluster.setWaitSafeMode(false);
+    cluster.restartNameNode();
+    NameNode nn = cluster.getNameNode();
+    assertTrue(nn.isInSafeMode());
+    DelegationTokenSecretManager sm =
+      NameNodeAdapter.getDtSecretManager(nn.getNamesystem());
+    assertFalse("Secret manager should not run in safe mode", sm.isRunning());
+    
+    NameNodeAdapter.leaveSafeMode(nn, false);
+    assertTrue("Secret manager should start when safe mode is exited",
+        sm.isRunning());
+    
+    LOG.info("========= entering safemode again");
+    
+    NameNodeAdapter.enterSafeMode(nn, false);
+    assertFalse("Secret manager should stop again when safe mode " +
+        "is manually entered", sm.isRunning());
+    
+    // Set the cluster to leave safemode quickly on its own.
+    cluster.getConfiguration(0).setInt(
+        DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 0);
+    cluster.setWaitSafeMode(true);
+    cluster.restartNameNode();
+    nn = cluster.getNameNode();
+    sm = NameNodeAdapter.getDtSecretManager(nn.getNamesystem());
+
+    assertFalse(nn.isInSafeMode());
+    assertTrue(sm.isRunning());
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java
index cdad31cc9b1..6837f65afce 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationTokenForProxyUser.java
@@ -48,7 +48,6 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
-import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.web.resources.NamenodeWebHdfsMethods;
 import org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
 import org.apache.hadoop.hdfs.web.WebHdfsTestUtil;
@@ -114,11 +113,12 @@ public class TestDelegationTokenForProxyUser {
         DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_RENEW_INTERVAL_KEY, 5000);
     config.setStrings(ProxyUsers.getProxySuperuserGroupConfKey(REAL_USER),
         "group1");
+    config.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
     configureSuperUserIPAddresses(config, REAL_USER);
     FileSystem.setDefaultUri(config, "hdfs://localhost:" + "0");
     cluster = new MiniDFSCluster.Builder(config).build();
     cluster.waitActive();
-    NameNodeAdapter.getDtSecretManager(cluster.getNamesystem()).startThreads();
     ProxyUsers.refreshSuperUserGroupsConfiguration(config);
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
index e22fa29927a..392cc9dd913 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java
@@ -108,10 +108,11 @@ public class OfflineEditsViewerHelper {
     // for security to work (fake JobTracker user)
     config.set("hadoop.security.auth_to_local",
       "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT");
+    config.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
     cluster =
       new MiniDFSCluster.Builder(config).manageNameDfsDirs(false).build();
     cluster.waitClusterUp();
-    cluster.getNamesystem().getDelegationTokenSecretManager().startThreads();
   }
 
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckPointForSecurityTokens.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckPointForSecurityTokens.java
index 1ba527702b0..20d4c720dec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckPointForSecurityTokens.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckPointForSecurityTokens.java
@@ -22,6 +22,7 @@ import junit.framework.Assert;
 import java.io.*;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
@@ -64,11 +65,12 @@ public class TestCheckPointForSecurityTokens {
     DistributedFileSystem fs = null;
     try {
       Configuration conf = new HdfsConfiguration();
+      conf.setBoolean(
+          DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes).build();
       cluster.waitActive();
       fs = (DistributedFileSystem)(cluster.getFileSystem());
       FSNamesystem namesystem = cluster.getNamesystem();
-      namesystem.getDelegationTokenSecretManager().startThreads();
       String renewer = UserGroupInformation.getLoginUser().getUserName();
       Token<DelegationTokenIdentifier> token1 = namesystem
           .getDelegationToken(new Text(renewer)); 
@@ -122,7 +124,6 @@ public class TestCheckPointForSecurityTokens {
       }
 
       namesystem = cluster.getNamesystem();
-      namesystem.getDelegationTokenSecretManager().startThreads();
       Token<DelegationTokenIdentifier> token3 = namesystem
           .getDelegationToken(new Text(renewer));
       Token<DelegationTokenIdentifier> token4 = namesystem
@@ -136,7 +137,6 @@ public class TestCheckPointForSecurityTokens {
       cluster.waitActive();
 
       namesystem = cluster.getNamesystem();
-      namesystem.getDelegationTokenSecretManager().startThreads();
       Token<DelegationTokenIdentifier> token5 = namesystem
           .getDelegationToken(new Text(renewer));
 
@@ -159,7 +159,6 @@ public class TestCheckPointForSecurityTokens {
       cluster.waitActive();
 
       namesystem = cluster.getNamesystem();
-      namesystem.getDelegationTokenSecretManager().startThreads();
       try {
         renewToken(token1);
         cancelToken(token1);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
index c0012be5baa..596df8d76b1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSecurityTokenEditLog.java
@@ -24,6 +24,7 @@ import java.util.Iterator;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
@@ -91,6 +92,9 @@ public class TestSecurityTokenEditLog extends TestCase {
     FileSystem fileSys = null;
 
     try {
+      conf.setBoolean(
+          DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+
       cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATA_NODES).build();
       cluster.waitActive();
       fileSys = cluster.getFileSystem();
@@ -106,7 +110,6 @@ public class TestSecurityTokenEditLog extends TestCase {
   
       // set small size of flush buffer
       editLog.setOutputBufferCapacity(2048);
-      namesystem.getDelegationTokenSecretManager().startThreads();
     
       // Create threads and make them run transactions concurrently.
       Thread threadId[] = new Thread[NUM_THREADS];
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 97a88d1e739..2595621641a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -317,6 +317,9 @@ public class TestHAStateTransitions {
   public void testDelegationTokensAfterFailover() throws IOException,
       URISyntaxException {
     Configuration conf = new Configuration();
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+    
     MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
         .nnTopology(MiniDFSNNTopology.simpleHATopology())
         .numDataNodes(0)
@@ -326,7 +329,6 @@ public class TestHAStateTransitions {
       cluster.transitionToActive(0);
       NameNode nn1 = cluster.getNameNode(0);
       NameNode nn2 = cluster.getNameNode(1);
-      NameNodeAdapter.getDtSecretManager(nn1.getNamesystem()).startThreads();
 
       String renewer = UserGroupInformation.getLoginUser().getUserName();
       Token<DelegationTokenIdentifier> token = nn1.getRpcServer()
@@ -335,8 +337,6 @@ public class TestHAStateTransitions {
       LOG.info("Failing over to NN 1");
       cluster.transitionToStandby(0);
       cluster.transitionToActive(1);
-      // Need to explicitly start threads because security is not enabled.
-      NameNodeAdapter.getDtSecretManager(nn2.getNamesystem()).startThreads();
 
       nn2.getRpcServer().renewDelegationToken(token);
       nn2.getRpcServer().cancelDelegationToken(token);
@@ -421,4 +421,124 @@ public class TestHAStateTransitions {
       EditLogFileOutputStream.writeHeader(out);
     }
   }
+  
+
+  /**
+   * The secret manager needs to start/stop - the invariant should be that
+   * the secret manager runs if and only if the NN is active and not in
+   * safe mode. As a state diagram, we need to test all of the following
+   * transitions to make sure the secret manager is started when we transition
+   * into state 4, but none of the others.
+   * <pre>
+   *         SafeMode     Not SafeMode 
+   * Standby   1 <------> 2
+   *           ^          ^
+   *           |          |
+   *           v          v
+   * Active    3 <------> 4
+   * </pre>
+   */
+  @Test(timeout=60000)
+  public void testSecretManagerState() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+    conf.setInt(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY, 50);
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(1)
+         .waitSafeMode(false)
+        .build();
+    try {
+      cluster.transitionToActive(0);
+      DFSTestUtil.createFile(cluster.getFileSystem(0),
+          TEST_FILE_PATH, 6000, (short)1, 1L);
+      
+      cluster.getConfiguration(0).setInt(
+          DFSConfigKeys.DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, 60000);
+
+      cluster.restartNameNode(0);
+      NameNode nn = cluster.getNameNode(0);
+      
+      banner("Started in state 1.");
+      assertTrue(nn.isStandbyState());
+      assertTrue(nn.isInSafeMode());
+      assertFalse(isDTRunning(nn));
+      
+      banner("Transition 1->2. Should not start secret manager");
+      NameNodeAdapter.leaveSafeMode(nn, false);
+      assertTrue(nn.isStandbyState());
+      assertFalse(nn.isInSafeMode());
+      assertFalse(isDTRunning(nn));
+  
+      banner("Transition 2->1. Should not start secret manager.");
+      NameNodeAdapter.enterSafeMode(nn, false);
+      assertTrue(nn.isStandbyState());
+      assertTrue(nn.isInSafeMode());
+      assertFalse(isDTRunning(nn));
+  
+      banner("Transition 1->3. Should not start secret manager.");
+      nn.getRpcServer().transitionToActive();
+      assertFalse(nn.isStandbyState());
+      assertTrue(nn.isInSafeMode());
+      assertFalse(isDTRunning(nn));
+  
+      banner("Transition 3->1. Should not start secret manager.");
+      nn.getRpcServer().transitionToStandby();
+      assertTrue(nn.isStandbyState());
+      assertTrue(nn.isInSafeMode());
+      assertFalse(isDTRunning(nn));
+  
+      banner("Transition 1->3->4. Should start secret manager.");
+      nn.getRpcServer().transitionToActive();
+      NameNodeAdapter.leaveSafeMode(nn, false);
+      assertFalse(nn.isStandbyState());
+      assertFalse(nn.isInSafeMode());
+      assertTrue(isDTRunning(nn));
+      
+      banner("Transition 4->3. Should stop secret manager");
+      NameNodeAdapter.enterSafeMode(nn, false);
+      assertFalse(nn.isStandbyState());
+      assertTrue(nn.isInSafeMode());
+      assertFalse(isDTRunning(nn));
+  
+      banner("Transition 3->4. Should start secret manager");
+      NameNodeAdapter.leaveSafeMode(nn, false);
+      assertFalse(nn.isStandbyState());
+      assertFalse(nn.isInSafeMode());
+      assertTrue(isDTRunning(nn));
+      
+      for (int i = 0; i < 20; i++) {
+        // Loop the last check to suss out races.
+        banner("Transition 4->2. Should stop secret manager.");
+        nn.getRpcServer().transitionToStandby();
+        assertTrue(nn.isStandbyState());
+        assertFalse(nn.isInSafeMode());
+        assertFalse(isDTRunning(nn));
+    
+        banner("Transition 2->4. Should start secret manager");
+        nn.getRpcServer().transitionToActive();
+        assertFalse(nn.isStandbyState());
+        assertFalse(nn.isInSafeMode());
+        assertTrue(isDTRunning(nn));
+      }
+    } finally {
+      cluster.shutdown();
+    }
+  }
+  
+  private boolean isDTRunning(NameNode nn) {
+    return NameNodeAdapter.getDtSecretManager(nn.getNamesystem()).isRunning();
+  }
+
+  /**
+   * Print a big banner in the test log to make debug easier.
+   */
+  static void banner(String string) {
+    LOG.info("\n\n\n\n================================================\n" +
+        string + "\n" +
+        "==================================================\n\n");
+  }
 }

From bad0a2a4a14efe2f1d19015d3d59aa3c75faa2db Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 9 Feb 2012 17:39:50 +0000
Subject: [PATCH 132/177] HDFS-2510. Add HA-related metrics. Contributed by
 Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242410 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/server/namenode/FSNamesystem.java    |  21 +++-
 .../server/namenode/ha/EditLogTailer.java     |   7 ++
 .../server/namenode/ha/TestHAMetrics.java     | 109 ++++++++++++++++++
 4 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 628c0dcbf2f..c9688ae64c0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -184,3 +184,5 @@ HDFS-2901. Improvements for SBN web UI - not show under-replicated/missing block
 HDFS-2905. HA: Standby NN NPE when shared edits dir is deleted. (Bikas Saha via jitendra)
 
 HDFS-2579. Starting delegation token manager during safemode fails. (todd)
+
+HDFS-2510. Add HA-related metrics. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index e42407a33c8..453511d34e6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -118,6 +118,7 @@ import org.apache.hadoop.fs.permission.FsAction;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.fs.permission.PermissionStatus;
 import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
@@ -4241,16 +4242,34 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     return blockManager.getExcessBlocksCount();
   }
   
+  // HA-only metric
   @Metric
   public long getPostponedMisreplicatedBlocks() {
     return blockManager.getPostponedMisreplicatedBlocksCount();
   }
-  
+
+  // HA-only metric
   @Metric
   public int getPendingDataNodeMessageCount() {
     return blockManager.getPendingDataNodeMessageCount();
   }
   
+  // HA-only metric
+  @Metric
+  public String getHAState() {
+    return haContext.getState().toString();
+  }
+
+  // HA-only metric
+  @Metric
+  public long getMillisSinceLastLoadedEdits() {
+    if (isInStandbyState() && editLogTailer != null) {
+      return now() - editLogTailer.getLastLoadTimestamp();
+    } else {
+      return 0;
+    }
+  }
+  
   @Metric
   public int getBlockCapacity() {
     return blockManager.getCapacity();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 8a2312e08aa..219dd80f151 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -239,6 +239,13 @@ public class EditLogTailer {
     }
   }
 
+  /**
+   * @return timestamp (in msec) of when we last loaded a non-zero number of edits.
+   */
+  public long getLastLoadTimestamp() {
+    return lastLoadTimestamp;
+  }
+
   /**
    * @return true if the configured log roll period has elapsed.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java
new file mode 100644
index 00000000000..cc85c83b3d7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAMetrics.java
@@ -0,0 +1,109 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.io.IOUtils;
+import org.junit.Test;
+
+/**
+ * Make sure HA-related metrics are updated and reported appropriately.
+ */
+public class TestHAMetrics {
+  
+  private static final Log LOG = LogFactory.getLog(TestHAMetrics.class);
+  
+  @Test
+  public void testHAMetrics() throws Exception {
+    Configuration conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);
+
+    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
+        .build();
+    FileSystem fs = null;
+    try {
+      cluster.waitActive();
+      
+      FSNamesystem nn0 = cluster.getNamesystem(0);
+      FSNamesystem nn1 = cluster.getNamesystem(1);
+      
+      assertEquals(nn0.getHAState(), "standby");
+      assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
+      assertEquals(nn1.getHAState(), "standby");
+      assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
+
+      cluster.transitionToActive(0);
+      
+      assertEquals("active", nn0.getHAState());
+      assertEquals(0, nn0.getMillisSinceLastLoadedEdits());
+      assertEquals("standby", nn1.getHAState());
+      assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
+      
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      
+      assertEquals("standby", nn0.getHAState());
+      assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
+      assertEquals("active", nn1.getHAState());
+      assertEquals(0, nn1.getMillisSinceLastLoadedEdits());
+      
+      Thread.sleep(2000); // make sure standby gets a little out-of-date
+      assertTrue(2000 <= nn0.getMillisSinceLastLoadedEdits());
+      
+      assertEquals(0, nn0.getPendingDataNodeMessageCount());
+      assertEquals(0, nn1.getPendingDataNodeMessageCount());
+      
+      fs = HATestUtil.configureFailoverFs(cluster, conf);
+      DFSTestUtil.createFile(fs, new Path("/foo"),
+          10, (short)1, 1L);
+      
+      assertTrue(0 < nn0.getPendingDataNodeMessageCount());
+      assertEquals(0, nn1.getPendingDataNodeMessageCount());
+      long millisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
+      
+      HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(1),
+          cluster.getNameNode(0));
+      
+      assertEquals(0, nn0.getPendingDataNodeMessageCount());
+      assertEquals(0, nn1.getPendingDataNodeMessageCount());
+      long newMillisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
+      // Since we just waited for the standby to catch up, the time since we
+      // last loaded edits should be very low.
+      assertTrue("expected " + millisSinceLastLoadedEdits + " > " +
+          newMillisSinceLastLoadedEdits,
+          millisSinceLastLoadedEdits > newMillisSinceLastLoadedEdits);
+    } finally {
+      IOUtils.cleanup(LOG, fs);
+      cluster.shutdown();
+    }
+  }
+}

From 5e26de982b1ab68fffeb897fef4c97458ad46708 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 9 Feb 2012 18:22:02 +0000
Subject: [PATCH 133/177] HDFS-2924. Standby checkpointing fails to
 authenticate in secure cluster. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242439 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/security/SecurityUtil.java  | 24 ++++++++++++++++++-
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../server/namenode/SecondaryNameNode.java    | 15 ++----------
 .../server/namenode/ha/EditLogTailer.java     | 13 ++++++++++
 .../namenode/ha/StandbyCheckpointer.java      | 17 +++----------
 5 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java
index 26858874649..43132d263af 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java
@@ -23,6 +23,7 @@ import java.net.URI;
 import java.net.URL;
 import java.net.UnknownHostException;
 import java.security.AccessController;
+import java.security.PrivilegedAction;
 import java.util.Arrays;
 import java.util.List;
 import java.util.ServiceLoader;
@@ -448,6 +449,27 @@ public class SecurityUtil {
     return buildTokenService(NetUtils.createSocketAddr(uri.getAuthority()));
   }
   
+  /**
+   * Perform the given action as the daemon's login user. If the login
+   * user cannot be determined, this will log a FATAL error and exit
+   * the whole JVM.
+   */
+  public static <T> T doAsLoginUserOrFatal(PrivilegedAction<T> action) { 
+    if (UserGroupInformation.isSecurityEnabled()) {
+      UserGroupInformation ugi = null;
+      try { 
+        ugi = UserGroupInformation.getLoginUser();
+      } catch (IOException e) {
+        LOG.fatal("Exception while getting login user", e);
+        e.printStackTrace();
+        Runtime.getRuntime().exit(-1);
+      }
+      return ugi.doAs(action);
+    } else {
+      return action.run();
+    }
+  }
+
   /**
    * Resolves a host subject to the security requirements determined by
    * hadoop.security.token.service.use_ip.
@@ -597,5 +619,5 @@ public class SecurityUtil {
     void setSearchDomains(String ... domains) {
       searchDomains = Arrays.asList(domains);
     }
-  }  
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index c9688ae64c0..f84ed7b63fd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -186,3 +186,5 @@ HDFS-2905. HA: Standby NN NPE when shared edits dir is deleted. (Bikas Saha via
 HDFS-2579. Starting delegation token manager during safemode fails. (todd)
 
 HDFS-2510. Add HA-related metrics. (atm)
+
+HDFS-2924. Standby checkpointing fails to authenticate in secure cluster. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 12127064e26..61b533d3442 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -307,25 +307,14 @@ public class SecondaryNameNode implements Runnable {
   }
 
   public void run() {
-    if (UserGroupInformation.isSecurityEnabled()) {
-      UserGroupInformation ugi = null;
-      try { 
-        ugi = UserGroupInformation.getLoginUser();
-      } catch (IOException e) {
-        LOG.error("Exception while getting login user", e);
-        e.printStackTrace();
-        Runtime.getRuntime().exit(-1);
-      }
-      ugi.doAs(new PrivilegedAction<Object>() {
+    SecurityUtil.doAsLoginUserOrFatal(
+        new PrivilegedAction<Object>() {
         @Override
         public Object run() {
           doWork();
           return null;
         }
       });
-    } else {
-      doWork();
-    }
   }
   //
   // The main work loop
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
index 219dd80f151..780bad72e94 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/EditLogTailer.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.security.PrivilegedAction;
 import java.util.Collection;
 
 import org.apache.commons.logging.Log;
@@ -40,6 +41,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.security.SecurityUtil;
 
 import static org.apache.hadoop.hdfs.server.common.Util.now;
 
@@ -284,6 +286,17 @@ public class EditLogTailer {
     
     @Override
     public void run() {
+      SecurityUtil.doAsLoginUserOrFatal(
+          new PrivilegedAction<Object>() {
+          @Override
+          public Object run() {
+            doWork();
+            return null;
+          }
+        });
+    }
+    
+    private void doWork() {
       while (shouldRun) {
         try {
           // There's no point in triggering a log roll if the Standby hasn't
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index 9c5a3e5cd2b..edfc53fb12c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.SaveNamespaceCancelledException;
 import org.apache.hadoop.hdfs.server.namenode.TransferFsImage;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import static org.apache.hadoop.hdfs.server.common.Util.now;
 
@@ -212,26 +213,14 @@ public class StandbyCheckpointer {
     public void run() {
       // We have to make sure we're logged in as far as JAAS
       // is concerned, in order to use kerberized SSL properly.
-      // This code copied from SecondaryNameNode - TODO: refactor
-      // to a utility function.
-      if (UserGroupInformation.isSecurityEnabled()) {
-        UserGroupInformation ugi = null;
-        try { 
-          ugi = UserGroupInformation.getLoginUser();
-        } catch (IOException e) {
-          LOG.error("Exception while getting login user", e);
-          Runtime.getRuntime().exit(-1);
-        }
-        ugi.doAs(new PrivilegedAction<Object>() {
+      SecurityUtil.doAsLoginUserOrFatal(
+          new PrivilegedAction<Object>() {
           @Override
           public Object run() {
             doWork();
             return null;
           }
         });
-      } else {
-        doWork();
-      }
     }
 
     /**

From 1b4c990b61fa4527e6dd4e8bb7e10f1c11ad280f Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 9 Feb 2012 18:25:44 +0000
Subject: [PATCH 134/177] HADOOP-8041. Log a warning when a failover is first
 attempted. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242441 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  2 ++
 .../io/retry/RetryInvocationHandler.java      | 27 +++++++++++++------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index d2e6b1725ea..3f23987ef1f 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -45,3 +45,5 @@ ready before failing over. (eli)
 
 HADOOP-8038. Add 'ipc.client.connect.max.retries.on.timeouts' entry in
 core-default.xml file. (Uma Maheswara Rao G via atm)
+
+HADOOP-8041. Log a warning when a failover is first attempted (todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
index 28e88501d0a..3d702d9879d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -39,6 +39,7 @@ class RetryInvocationHandler implements RpcInvocationHandler {
    * The number of times the associated proxyProvider has ever been failed over.
    */
   private long proxyProviderFailoverCount = 0;
+  private volatile boolean hasMadeASuccessfulCall = false;
   
   private RetryPolicy defaultPolicy;
   private Map<String,RetryPolicy> methodNameToPolicyMap;
@@ -79,7 +80,9 @@ class RetryInvocationHandler implements RpcInvocationHandler {
         invocationAttemptFailoverCount = proxyProviderFailoverCount;
       }
       try {
-        return invokeMethod(method, args);
+        Object ret = invokeMethod(method, args);
+        hasMadeASuccessfulCall = true;
+        return ret;
       } catch (Exception e) {
         boolean isMethodIdempotent = proxyProvider.getInterface()
             .getMethod(method.getName(), method.getParameterTypes())
@@ -94,12 +97,20 @@ class RetryInvocationHandler implements RpcInvocationHandler {
           }
           return null;
         } else { // retry or failover
-
-          if (action.action == RetryAction.RetryDecision.FAILOVER_AND_RETRY) {
+          // avoid logging the failover if this is the first call on this
+          // proxy object, and we successfully achieve the failover without
+          // any flip-flopping
+          boolean worthLogging = 
+            !(invocationFailoverCount == 0 && !hasMadeASuccessfulCall);
+          worthLogging |= LOG.isDebugEnabled();
+          if (action.action == RetryAction.RetryDecision.FAILOVER_AND_RETRY &&
+              worthLogging) {
             String msg = "Exception while invoking " + method.getName()
-              + " of " + currentProxy.getClass()
-              + " after " + invocationFailoverCount + " fail over attempts."
-              + " Trying to fail over " + formatSleepMessage(action.delayMillis);
+              + " of class " + currentProxy.getClass().getSimpleName();
+            if (invocationFailoverCount > 0) {
+              msg += " after " + invocationFailoverCount + " fail over attempts"; 
+            }
+            msg += ". Trying to fail over " + formatSleepMessage(action.delayMillis);
             if (LOG.isDebugEnabled()) {
               LOG.debug(msg, e);
             } else {
@@ -108,8 +119,8 @@ class RetryInvocationHandler implements RpcInvocationHandler {
           } else {
             if(LOG.isDebugEnabled()) {
               LOG.debug("Exception while invoking " + method.getName()
-                  + " of " + currentProxy.getClass() + ". Retrying " +
-                  formatSleepMessage(action.delayMillis), e);
+                  + " of class " + currentProxy.getClass().getSimpleName() +
+                  ". Retrying " + formatSleepMessage(action.delayMillis), e);
             }
           }
           

From b5d02a63cbce062874e3b51816cf9ec962995dbd Mon Sep 17 00:00:00 2001
From: Jitendra Nath Pandey <jitendra@apache.org>
Date: Thu, 9 Feb 2012 21:08:17 +0000
Subject: [PATCH 135/177] HDFS-2915. HA:
 TestFailureOfSharedDir.testFailureOfSharedDir() has race condition.
 Contributed by Bikas Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242522 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt          |  3 +++
 .../namenode/ha/TestFailureOfSharedDir.java    | 18 +++++++++++++++---
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index f84ed7b63fd..eac0563d7f9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -188,3 +188,6 @@ HDFS-2579. Starting delegation token manager during safemode fails. (todd)
 HDFS-2510. Add HA-related metrics. (atm)
 
 HDFS-2924. Standby checkpointing fails to authenticate in secure cluster. (todd)
+
+HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race condition. (Bikas Saha via jitendra)
+
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
index 84d45c05914..71098d8aaa9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
@@ -129,6 +129,8 @@ public class TestFailureOfSharedDir {
     
     // The shared edits dir will automatically be marked required.
     MiniDFSCluster cluster = null;
+    int chmodSucceeded = -1;
+    File sharedEditsDir = null;
     try {
       cluster = new MiniDFSCluster.Builder(conf)
         .nnTopology(MiniDFSNNTopology.simpleHATopology())
@@ -143,9 +145,15 @@ public class TestFailureOfSharedDir {
       assertTrue(fs.mkdirs(new Path("/test1")));
       
       // Blow away the shared edits dir.
-      URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);      
-      FileUtil.fullyDelete(new File(sharedEditsUri));
-      
+      URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
+      sharedEditsDir = new File(sharedEditsUri);
+      chmodSucceeded = FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
+          true);
+      if (chmodSucceeded != 0) {
+        LOG.error("Failed to remove write permissions on shared edits dir:"
+            + sharedEditsDir.getAbsolutePath());
+      }
+
       NameNode nn0 = cluster.getNameNode(0);
       try {
         // Make sure that subsequent operations on the NN fail.
@@ -171,6 +179,10 @@ public class TestFailureOfSharedDir {
             NNStorage.getInProgressEditsFileName(1));
       }
     } finally {
+      if (chmodSucceeded == 0) {
+        // without this test cleanup will fail
+        FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "+w", true);
+      }
       if (cluster != null) {
         cluster.shutdown();
       }

From 2c9ca86c9cef1d0d1c54b1f61432c420259775d4 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 9 Feb 2012 21:57:16 +0000
Subject: [PATCH 136/177] Fix weird unicode character in CHANGES.txt on HA
 branch.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242549 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 3f23987ef1f..1d8ce4dd255 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -25,7 +25,7 @@ HAServiceProtocol (eli via todd)
 HADOOP-7932. Make client connection retries on socket time outs configurable.
 (Uma Maheswara Rao G via todd)
 
-HADOOP-7924.  FailoverController for client-based configuration (eli)
+HADOOP-7924. FailoverController for client-based configuration (eli)
 
 HADOOP-7961. Move HA fencing to common. (eli)
 

From 467059b4ab5fcb8251b57c60ec3ddfce30c486c2 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 9 Feb 2012 22:23:47 +0000
Subject: [PATCH 137/177] HDFS-2912. Namenode not shutting down when shared
 edits dir is inaccessible. Contributed by Bikas Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242564 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  1 +
 .../hdfs/server/namenode/FSEditLog.java       |  8 +++++++
 .../hdfs/server/namenode/JournalSet.java      | 16 ++++++++++++--
 .../namenode/ha/TestFailureOfSharedDir.java   | 21 ++++++++++++-------
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index eac0563d7f9..07d61b60834 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -191,3 +191,4 @@ HDFS-2924. Standby checkpointing fails to authenticate in secure cluster. (todd)
 
 HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race condition. (Bikas Saha via jitendra)
 
+HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. (Bikas Saha via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index d9a64589cec..2c72a7d5f2b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -805,6 +805,14 @@ public class FSEditLog  {
     return journalSet.getAllJournalStreams();
   }
   
+  /**
+   * Used only by tests.
+   */
+  @VisibleForTesting
+  public JournalSet getJournalSet() {
+    return journalSet;
+  }
+  
   /**
    * Used only by unit tests.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
index 8fc323c31d5..d84d79dcb5a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/JournalSet.java
@@ -25,8 +25,10 @@ import java.util.SortedSet;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
+
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
@@ -35,8 +37,6 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Multimaps;
 import com.google.common.collect.Sets;
 
-import org.apache.hadoop.classification.InterfaceAudience;
-
 /**
  * Manages a collection of Journals. None of the methods are synchronized, it is
  * assumed that FSEditLog methods, that use this class, use proper
@@ -148,11 +148,17 @@ public class JournalSet implements JournalManager {
   
   private List<JournalAndStream> journals = Lists.newArrayList();
   final int minimumRedundantJournals;
+  private volatile Runtime runtime = Runtime.getRuntime();
   
   JournalSet(int minimumRedundantResources) {
     this.minimumRedundantJournals = minimumRedundantResources;
   }
   
+  @VisibleForTesting
+  public void setRuntimeForTesting(Runtime runtime) {
+    this.runtime = runtime;
+  }
+  
   @Override
   public EditLogOutputStream startLogSegment(final long txId) throws IOException {
     mapJournalsAndReportErrors(new JournalClosure() {
@@ -323,6 +329,12 @@ public class JournalSet implements JournalManager {
           // continue on any of the other journals. Abort them to ensure that
           // retry behavior doesn't allow them to keep going in any way.
           abortAllJournals();
+          // the current policy is to shutdown the NN on errors to shared edits
+          // dir. There are many code paths to shared edits failures - syncs,
+          // roll of edits etc. All of them go through this common function 
+          // where the isRequired() check is made. Applying exit policy here 
+          // to catch all code paths.
+          runtime.exit(1);
           throw new IOException(msg);
         } else {
           LOG.error("Error: " + status + " failed for (journal " + jas + ")", t);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
index 71098d8aaa9..cc9552aec2a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestFailureOfSharedDir.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.server.namenode.NNStorage;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 import com.google.common.base.Joiner;
 
@@ -129,7 +130,6 @@ public class TestFailureOfSharedDir {
     
     // The shared edits dir will automatically be marked required.
     MiniDFSCluster cluster = null;
-    int chmodSucceeded = -1;
     File sharedEditsDir = null;
     try {
       cluster = new MiniDFSCluster.Builder(conf)
@@ -145,16 +145,15 @@ public class TestFailureOfSharedDir {
       assertTrue(fs.mkdirs(new Path("/test1")));
       
       // Blow away the shared edits dir.
+      Runtime mockRuntime = Mockito.mock(Runtime.class);
       URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
       sharedEditsDir = new File(sharedEditsUri);
-      chmodSucceeded = FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
-          true);
-      if (chmodSucceeded != 0) {
-        LOG.error("Failed to remove write permissions on shared edits dir:"
-            + sharedEditsDir.getAbsolutePath());
-      }
+      assertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w",
+          true));
 
       NameNode nn0 = cluster.getNameNode(0);
+      nn0.getNamesystem().getFSImage().getEditLog().getJournalSet()
+          .setRuntimeForTesting(mockRuntime);
       try {
         // Make sure that subsequent operations on the NN fail.
         nn0.getRpcServer().rollEditLog();
@@ -163,6 +162,12 @@ public class TestFailureOfSharedDir {
         GenericTestUtils.assertExceptionContains(
             "Unable to start log segment 4: too few journals successfully started",
             ioe);
+        // By current policy the NN should exit upon this error.
+        // exit() should be called once, but since it is mocked, exit gets
+        // called once during FSEditsLog.endCurrentLogSegment() and then after
+        // that during FSEditsLog.startLogSegment(). So the check is atLeast(1)
+        Mockito.verify(mockRuntime, Mockito.atLeastOnce()).exit(
+            Mockito.anyInt());
         LOG.info("Got expected exception", ioe);
       }
       
@@ -179,7 +184,7 @@ public class TestFailureOfSharedDir {
             NNStorage.getInProgressEditsFileName(1));
       }
     } finally {
-      if (chmodSucceeded == 0) {
+      if (sharedEditsDir != null) {
         // without this test cleanup will fail
         FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "+w", true);
       }

From a63e12c4c8b6d637eb6ab04f84de183e8d34bb00 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Thu, 9 Feb 2012 22:33:20 +0000
Subject: [PATCH 138/177] HDFS-2922. HA: close out operation categories.
 Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242572 13f79535-47bb-0310-9956-ffa450edef68
---
 .../io/retry/RetryInvocationHandler.java      |  2 --
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hadoop/hdfs/DistributedFileSystem.java    |  4 +--
 .../server/namenode/NameNodeRpcServer.java    | 26 +++++++++----------
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
index 3d702d9879d..dc65f2c2aba 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -113,8 +113,6 @@ class RetryInvocationHandler implements RpcInvocationHandler {
             msg += ". Trying to fail over " + formatSleepMessage(action.delayMillis);
             if (LOG.isDebugEnabled()) {
               LOG.debug(msg, e);
-            } else {
-              LOG.warn(msg);
             }
           } else {
             if(LOG.isDebugEnabled()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 07d61b60834..943cf86ec5a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -192,3 +192,5 @@ HDFS-2924. Standby checkpointing fails to authenticate in secure cluster. (todd)
 HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race condition. (Bikas Saha via jitendra)
 
 HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. (Bikas Saha via atm)
+
+HDFS-2922. HA: close out operation categories. (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index 7f42f12cc5c..be5b70549f2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -681,7 +681,7 @@ public class DistributedFileSystem extends FileSystem {
   }
 
   /*
-   * Requests the namenode to dump data strcutures into specified 
+   * Requests the namenode to dump data structures into specified 
    * file.
    */
   public void metaSave(String pathname) throws IOException {
@@ -699,7 +699,7 @@ public class DistributedFileSystem extends FileSystem {
    * we can consider figuring out exactly which block is corrupt.
    */
   // We do not see a need for user to report block checksum errors and do not  
-  // want to rely on user to report block corruptions.
+  // want to rely on user to report block corruption.
   @Deprecated
   public boolean reportChecksumFailure(Path f, 
     FSDataInputStream in, long inPos, 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 45dd8ec55ce..d63dfae11a7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -697,7 +697,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public DatanodeInfo[] getDatanodeReport(DatanodeReportType type)
       throws IOException {
-    // TODO(HA): decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.READ);
     DatanodeInfo results[] = namesystem.datanodeReport(type);
     if (results == null ) {
       throw new IOException("Cannot find datanode report");
@@ -707,32 +707,32 @@ class NameNodeRpcServer implements NamenodeProtocols {
     
   @Override // ClientProtocol
   public boolean setSafeMode(SafeModeAction action) throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    // NB: not checking OperationCategory so this works on a standby
     return namesystem.setSafeMode(action);
   }
   @Override // ClientProtocol
   public boolean restoreFailedStorage(String arg) 
       throws AccessControlException {
-    // TODO:HA decide on OperationCategory for this
+    // NB: not checking OperationCategory so this works on a standby
     return namesystem.restoreFailedStorage(arg);
   }
 
   @Override // ClientProtocol
   public void saveNamespace() throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    // NB: not checking OperationCategory so this works on a standby
     namesystem.saveNamespace();
   }
 
   @Override // ClientProtocol
   public void refreshNodes() throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    // NB: not checking OperationCategory so this works on a standby
     namesystem.getBlockManager().getDatanodeManager().refreshNodes(
         new HdfsConfiguration());
   }
 
   @Override // NamenodeProtocol
-  public long getTransactionID() {
-    // TODO:HA decide on OperationCategory for this
+  public long getTransactionID()  throws IOException {
+    namesystem.checkOperation(OperationCategory.READ);
     return namesystem.getEditLog().getSyncTxId();
   }
 
@@ -744,26 +744,26 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // NamenodeProtocol
   public RemoteEditLogManifest getEditLogManifest(long sinceTxId)
   throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.READ);
     return namesystem.getEditLog().getEditLogManifest(sinceTxId);
   }
     
   @Override // ClientProtocol
   public void finalizeUpgrade() throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.WRITE);
     namesystem.finalizeUpgrade();
   }
 
   @Override // ClientProtocol
   public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action)
       throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.READ);
     return namesystem.distributedUpgradeProgress(action);
   }
 
   @Override // ClientProtocol
   public void metaSave(String filename) throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    // NB: not checking OperationCategory so this works on a standby
     namesystem.metaSave(filename);
   }
   @Override // ClientProtocol
@@ -784,12 +784,12 @@ class NameNodeRpcServer implements NamenodeProtocols {
   /**
    * Tell all datanodes to use a new, non-persistent bandwidth value for
    * dfs.datanode.balance.bandwidthPerSec.
-   * @param bandwidth Blanacer bandwidth in bytes per second for all datanodes.
+   * @param bandwidth Balancer bandwidth in bytes per second for all datanodes.
    * @throws IOException
    */
   @Override // ClientProtocol
   public void setBalancerBandwidth(long bandwidth) throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    // NB: not checking OperationCategory so this works on a standby
     namesystem.getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
   }
   

From 8af96c7b22f92ab84c142c37252f85df7b9b98aa Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Fri, 10 Feb 2012 00:46:17 +0000
Subject: [PATCH 139/177] HDFS-2917. HA: haadmin should not work if run by
 regular user. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242626 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/ha/FailoverController.java  |  4 +++
 .../java/org/apache/hadoop/ha/HAAdmin.java    |  5 +++-
 .../apache/hadoop/ha/HAServiceProtocol.java   | 18 ++++++++++++-
 .../hadoop/ha/TestFailoverController.java     | 26 +++++++++++++++++++
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hadoop/hdfs/server/namenode/NameNode.java | 22 +++++++++++-----
 .../server/namenode/NameNodeRpcServer.java    | 15 +++++++----
 7 files changed, 79 insertions(+), 13 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
index 7205f9f53b5..0960fb7cbd1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java
@@ -61,6 +61,7 @@ public class FailoverController {
                                         boolean forceActive)
       throws FailoverFailedException {
     HAServiceState toSvcState;
+
     try {
       toSvcState = toSvc.getServiceState();
     } catch (IOException e) {
@@ -68,10 +69,12 @@ public class FailoverController {
       LOG.error(msg, e);
       throw new FailoverFailedException(msg, e);
     }
+
     if (!toSvcState.equals(HAServiceState.STANDBY)) {
       throw new FailoverFailedException(
           "Can't failover to an active service");
     }
+
     try {
       HAServiceProtocolHelper.monitorHealth(toSvc);
     } catch (HealthCheckFailedException hce) {
@@ -81,6 +84,7 @@ public class FailoverController {
       throw new FailoverFailedException(
           "Got an IO exception", e);
     }
+
     try {
       if (!toSvc.readyToBecomeActive()) {
         if (!forceActive) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index 6ceafb9ea69..ccfa11f43dd 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -249,7 +249,10 @@ public abstract class HAAdmin extends Configured implements Tool {
     try {
       return runCmd(argv);
     } catch (IllegalArgumentException iae) {
-      errOut.println("Illegal argument: " + iae.getMessage());
+      errOut.println("Illegal argument: " + iae.getLocalizedMessage());
+      return -1;
+    } catch (IOException ioe) {
+      errOut.println("Operation failed: " + ioe.getLocalizedMessage());
       return -1;
     }
   }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
index 9a7316db054..c0e0d2b389e 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceProtocol.java
@@ -21,6 +21,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.ipc.VersionedProtocol;
+import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.KerberosInfo;
 
 import java.io.IOException;
@@ -75,10 +76,13 @@ public interface HAServiceProtocol extends VersionedProtocol {
    * 
    * @throws HealthCheckFailedException
    *           if the health check of a service fails.
+   * @throws AccessControlException
+   *           if access is denied.
    * @throws IOException
    *           if other errors happen
    */
   public void monitorHealth() throws HealthCheckFailedException,
+                                     AccessControlException,
                                      IOException;
 
   /**
@@ -87,10 +91,13 @@ public interface HAServiceProtocol extends VersionedProtocol {
    * 
    * @throws ServiceFailedException
    *           if transition from standby to active fails.
+   * @throws AccessControlException
+   *           if access is denied.
    * @throws IOException
    *           if other errors happen
    */
   public void transitionToActive() throws ServiceFailedException,
+                                          AccessControlException,
                                           IOException;
 
   /**
@@ -99,28 +106,37 @@ public interface HAServiceProtocol extends VersionedProtocol {
    * 
    * @throws ServiceFailedException
    *           if transition from active to standby fails.
+   * @throws AccessControlException
+   *           if access is denied.
    * @throws IOException
    *           if other errors happen
    */
   public void transitionToStandby() throws ServiceFailedException,
+                                           AccessControlException,
                                            IOException;
 
   /**
    * Return the current state of the service.
    * 
+   * @throws AccessControlException
+   *           if access is denied.
    * @throws IOException
    *           if other errors happen
    */
-  public HAServiceState getServiceState() throws IOException;
+  public HAServiceState getServiceState() throws AccessControlException,
+                                                 IOException;
 
   /**
    * Return true if the service is capable and ready to transition
    * from the standby state to the active state.
    * 
    * @return true if the service is ready to become active, false otherwise.
+   * @throws AccessControlException
+   *           if access is denied.
    * @throws IOException
    *           if other errors happen
    */
   public boolean readyToBecomeActive() throws ServiceFailedException,
+                                              AccessControlException,
                                               IOException;
 }
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
index 7b5cc32b765..39fc47ef406 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
@@ -32,6 +32,7 @@ import static org.apache.hadoop.ha.TestNodeFencer.setupFencer;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.AccessControlException;
 
 import org.junit.Test;
 import static org.junit.Assert.*;
@@ -133,6 +134,31 @@ public class TestFailoverController {
     assertEquals(HAServiceState.ACTIVE, svc2.getServiceState());
   }
 
+  @Test
+  public void testFailoverWithoutPermission() throws Exception {
+    DummyService svc1 = new DummyService(HAServiceState.ACTIVE) {
+      @Override
+      public HAServiceState getServiceState() throws IOException {
+        throw new AccessControlException("Access denied");
+      }
+    };
+    DummyService svc2 = new DummyService(HAServiceState.STANDBY) {
+      @Override
+      public HAServiceState getServiceState() throws IOException {
+        throw new AccessControlException("Access denied");
+      }
+    };
+    NodeFencer fencer = setupFencer(AlwaysSucceedFencer.class.getName());
+
+    try {
+      FailoverController.failover(svc1,  svc1Addr,  svc2,  svc2Addr, fencer, false, false);
+      fail("Can't failover when access is denied");
+    } catch (FailoverFailedException ffe) {
+      assertTrue(ffe.getCause().getMessage().contains("Access denied"));
+    }
+  }
+
+
   @Test
   public void testFailoverToUnreadyService() throws Exception {
     DummyService svc1 = new DummyService(HAServiceState.ACTIVE);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 943cf86ec5a..23028003649 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -194,3 +194,5 @@ HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race conditio
 HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. (Bikas Saha via atm)
 
 HDFS-2922. HA: close out operation categories. (eli)
+
+HDFS-2917. HA: haadmin should not work if run by regular user (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index eb7e3c667b6..eb7f4616909 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -900,36 +901,45 @@ public class NameNode {
     }
   }
 
-  synchronized void monitorHealth() throws HealthCheckFailedException {
+  synchronized void monitorHealth() 
+      throws HealthCheckFailedException, AccessControlException {
+    namesystem.checkSuperuserPrivilege();
     if (!haEnabled) {
-      return; // no-op, if HA is not eanbled
+      return; // no-op, if HA is not enabled
     }
     // TODO:HA implement health check
     return;
   }
   
-  synchronized void transitionToActive() throws ServiceFailedException {
+  synchronized void transitionToActive() 
+      throws ServiceFailedException, AccessControlException {
+    namesystem.checkSuperuserPrivilege();
     if (!haEnabled) {
       throw new ServiceFailedException("HA for namenode is not enabled");
     }
     state.setState(haContext, ACTIVE_STATE);
   }
   
-  synchronized void transitionToStandby() throws ServiceFailedException {
+  synchronized void transitionToStandby() 
+      throws ServiceFailedException, AccessControlException {
+    namesystem.checkSuperuserPrivilege();
     if (!haEnabled) {
       throw new ServiceFailedException("HA for namenode is not enabled");
     }
     state.setState(haContext, STANDBY_STATE);
   }
 
-  synchronized HAServiceState getServiceState() {
+  synchronized HAServiceState getServiceState() throws AccessControlException {
+    namesystem.checkSuperuserPrivilege();
     if (state == null) {
       return HAServiceState.INITIALIZING;
     }
     return state.getServiceState();
   }
 
-  synchronized boolean readyToBecomeActive() throws ServiceFailedException {
+  synchronized boolean readyToBecomeActive()
+      throws ServiceFailedException, AccessControlException {
+    namesystem.checkSuperuserPrivilege();
     if (!haEnabled) {
       throw new ServiceFailedException("HA for namenode is not enabled");
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index d63dfae11a7..aa5e8134218 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -988,27 +988,32 @@ class NameNodeRpcServer implements NamenodeProtocols {
   }
 
   @Override // HAServiceProtocol
-  public synchronized void monitorHealth() throws HealthCheckFailedException {
+  public synchronized void monitorHealth() 
+      throws HealthCheckFailedException, AccessControlException {
     nn.monitorHealth();
   }
   
   @Override // HAServiceProtocol
-  public synchronized void transitionToActive() throws ServiceFailedException {
+  public synchronized void transitionToActive() 
+      throws ServiceFailedException, AccessControlException {
     nn.transitionToActive();
   }
   
   @Override // HAServiceProtocol
-  public synchronized void transitionToStandby() throws ServiceFailedException {
+  public synchronized void transitionToStandby() 
+      throws ServiceFailedException, AccessControlException {
     nn.transitionToStandby();
   }
 
   @Override // HAServiceProtocol
-  public synchronized HAServiceState getServiceState() {
+  public synchronized HAServiceState getServiceState() 
+      throws AccessControlException {
     return nn.getServiceState();
   }
 
   @Override // HAServiceProtocol
-  public synchronized boolean readyToBecomeActive() throws ServiceFailedException {
+  public synchronized boolean readyToBecomeActive() 
+      throws ServiceFailedException, AccessControlException {
     return nn.readyToBecomeActive();
   }
 

From a626fa04f983623b1e2c00189df6f0b83b806b5f Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Fri, 10 Feb 2012 18:17:21 +0000
Subject: [PATCH 140/177] Revert HDFS-2922 via svn merge -c -1242572

The patch broke a lot of unit tests in the nightly build. Will recommit after it is fixed.



git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1242874 13f79535-47bb-0310-9956-ffa450edef68
---
 .../io/retry/RetryInvocationHandler.java      |  2 ++
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 --
 .../hadoop/hdfs/DistributedFileSystem.java    |  4 +--
 .../server/namenode/NameNodeRpcServer.java    | 26 +++++++++----------
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
index dc65f2c2aba..3d702d9879d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -113,6 +113,8 @@ class RetryInvocationHandler implements RpcInvocationHandler {
             msg += ". Trying to fail over " + formatSleepMessage(action.delayMillis);
             if (LOG.isDebugEnabled()) {
               LOG.debug(msg, e);
+            } else {
+              LOG.warn(msg);
             }
           } else {
             if(LOG.isDebugEnabled()) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 23028003649..3eec7452792 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -193,6 +193,4 @@ HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race conditio
 
 HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. (Bikas Saha via atm)
 
-HDFS-2922. HA: close out operation categories. (eli)
-
 HDFS-2917. HA: haadmin should not work if run by regular user (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
index eae4cf1d3ac..8dfced350a2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java
@@ -646,7 +646,7 @@ public class DistributedFileSystem extends FileSystem {
   }
 
   /*
-   * Requests the namenode to dump data structures into specified 
+   * Requests the namenode to dump data strcutures into specified 
    * file.
    */
   public void metaSave(String pathname) throws IOException {
@@ -664,7 +664,7 @@ public class DistributedFileSystem extends FileSystem {
    * we can consider figuring out exactly which block is corrupt.
    */
   // We do not see a need for user to report block checksum errors and do not  
-  // want to rely on user to report block corruption.
+  // want to rely on user to report block corruptions.
   @Deprecated
   public boolean reportChecksumFailure(Path f, 
     FSDataInputStream in, long inPos, 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index c6058851086..9f85dacc624 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -700,7 +700,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // ClientProtocol
   public DatanodeInfo[] getDatanodeReport(DatanodeReportType type)
       throws IOException {
-    namesystem.checkOperation(OperationCategory.READ);
+    // TODO(HA): decide on OperationCategory for this
     DatanodeInfo results[] = namesystem.datanodeReport(type);
     if (results == null ) {
       throw new IOException("Cannot find datanode report");
@@ -710,32 +710,32 @@ class NameNodeRpcServer implements NamenodeProtocols {
     
   @Override // ClientProtocol
   public boolean setSafeMode(SafeModeAction action) throws IOException {
-    // NB: not checking OperationCategory so this works on a standby
+    // TODO:HA decide on OperationCategory for this
     return namesystem.setSafeMode(action);
   }
   @Override // ClientProtocol
   public boolean restoreFailedStorage(String arg) 
       throws AccessControlException {
-    // NB: not checking OperationCategory so this works on a standby
+    // TODO:HA decide on OperationCategory for this
     return namesystem.restoreFailedStorage(arg);
   }
 
   @Override // ClientProtocol
   public void saveNamespace() throws IOException {
-    // NB: not checking OperationCategory so this works on a standby
+    // TODO:HA decide on OperationCategory for this
     namesystem.saveNamespace();
   }
 
   @Override // ClientProtocol
   public void refreshNodes() throws IOException {
-    // NB: not checking OperationCategory so this works on a standby
+    // TODO:HA decide on OperationCategory for this
     namesystem.getBlockManager().getDatanodeManager().refreshNodes(
         new HdfsConfiguration());
   }
 
   @Override // NamenodeProtocol
-  public long getTransactionID()  throws IOException {
-    namesystem.checkOperation(OperationCategory.READ);
+  public long getTransactionID() {
+    // TODO:HA decide on OperationCategory for this
     return namesystem.getEditLog().getSyncTxId();
   }
 
@@ -747,26 +747,26 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // NamenodeProtocol
   public RemoteEditLogManifest getEditLogManifest(long sinceTxId)
   throws IOException {
-    namesystem.checkOperation(OperationCategory.READ);
+    // TODO:HA decide on OperationCategory for this
     return namesystem.getEditLog().getEditLogManifest(sinceTxId);
   }
     
   @Override // ClientProtocol
   public void finalizeUpgrade() throws IOException {
-    namesystem.checkOperation(OperationCategory.WRITE);
+    // TODO:HA decide on OperationCategory for this
     namesystem.finalizeUpgrade();
   }
 
   @Override // ClientProtocol
   public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action)
       throws IOException {
-    namesystem.checkOperation(OperationCategory.READ);
+    // TODO:HA decide on OperationCategory for this
     return namesystem.distributedUpgradeProgress(action);
   }
 
   @Override // ClientProtocol
   public void metaSave(String filename) throws IOException {
-    // NB: not checking OperationCategory so this works on a standby
+    // TODO:HA decide on OperationCategory for this
     namesystem.metaSave(filename);
   }
   @Override // ClientProtocol
@@ -787,12 +787,12 @@ class NameNodeRpcServer implements NamenodeProtocols {
   /**
    * Tell all datanodes to use a new, non-persistent bandwidth value for
    * dfs.datanode.balance.bandwidthPerSec.
-   * @param bandwidth Balancer bandwidth in bytes per second for all datanodes.
+   * @param bandwidth Blanacer bandwidth in bytes per second for all datanodes.
    * @throws IOException
    */
   @Override // ClientProtocol
   public void setBalancerBandwidth(long bandwidth) throws IOException {
-    // NB: not checking OperationCategory so this works on a standby
+    // TODO:HA decide on OperationCategory for this
     namesystem.getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
   }
   

From f6e407b436370903650e0120132078113a1984b6 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sun, 12 Feb 2012 18:43:39 +0000
Subject: [PATCH 141/177] HDFS-2939. TestHAStateTransitions fails on Windows.
 Contributed by Uma Maheswara Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1243293 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt           | 2 ++
 .../hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java  | 1 +
 2 files changed, 3 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 3eec7452792..4cc37a3d1c3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -194,3 +194,5 @@ HDFS-2915. HA: TestFailureOfSharedDir.testFailureOfSharedDir() has race conditio
 HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. (Bikas Saha via atm)
 
 HDFS-2917. HA: haadmin should not work if run by regular user (eli)
+
+HDFS-2939. TestHAStateTransitions fails on Windows. (Uma Maheswara Rao G via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
index 2595621641a..092bb5af4ab 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java
@@ -419,6 +419,7 @@ public class TestHAStateTransitions {
       DataOutputStream out = new DataOutputStream(new FileOutputStream(
           inProgressFile));
       EditLogFileOutputStream.writeHeader(out);
+      out.close();
     }
   }
   

From 1b2ecc6336ae15ede5f2e4b2bcee8c3ceb7db3e5 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Tue, 14 Feb 2012 08:42:25 +0000
Subject: [PATCH 142/177] HDFS-2947. On startup NN throws an NPE in the metrics
 system. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1243826 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt  |  2 ++
 .../apache/hadoop/hdfs/server/namenode/NameNode.java   | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4cc37a3d1c3..523c3c81684 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -196,3 +196,5 @@ HDFS-2912. Namenode not shutting down when shared edits dir is inaccessible. (Bi
 HDFS-2917. HA: haadmin should not work if run by regular user (eli)
 
 HDFS-2939. TestHAStateTransitions fails on Windows. (Uma Maheswara Rao G via atm)
+
+HDFS-2947. On startup NN throws an NPE in the metrics system. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index eb7f4616909..2e7292229cf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -534,16 +534,16 @@ public class NameNode {
     String nsId = getNameServiceId(conf);
     String namenodeId = HAUtil.getNameNodeId(conf, nsId);
     this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
+    if (!haEnabled) {
+      state = ACTIVE_STATE;
+    } else {
+      state = STANDBY_STATE;;
+    }
     this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
     this.haContext = createHAContext();
     try {
       initializeGenericKeys(conf, nsId, namenodeId);
       initialize(conf);
-      if (!haEnabled) {
-        state = ACTIVE_STATE;
-      } else {
-        state = STANDBY_STATE;;
-      }
       state.prepareToEnterState(haContext);
       state.enterState(haContext);
     } catch (IOException e) {

From a0e4ae55f2d0f64c1e869213513af5e22bab07b0 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Tue, 14 Feb 2012 08:43:58 +0000
Subject: [PATCH 143/177] HDFS-2942. TestActiveStandbyElectorRealZK fails if
 build dir does not exist. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1243827 13f79535-47bb-0310-9956-ffa450edef68
---
 .../apache/hadoop/ha/TestActiveStandbyElectorRealZK.java  | 8 ++++++++
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt     | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
index 85a5f8b682c..672e8d30d1e 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.ha;
 
+import java.io.File;
 import java.io.IOException;
 import java.util.List;
 
@@ -39,6 +40,13 @@ public class TestActiveStandbyElectorRealZK extends ClientBase {
   static final int NUM_ELECTORS = 2;
   static ZooKeeper[] zkClient = new ZooKeeper[NUM_ELECTORS];
   static int currentClientIndex = 0;
+  
+  @Override
+  public void setUp() throws Exception {
+    // build.test.dir is used by zookeeper
+    new File(System.getProperty("build.test.dir", "build")).mkdirs();
+    super.setUp();
+  }
 
   class ActiveStandbyElectorTesterRealZK extends ActiveStandbyElector {
     ActiveStandbyElectorTesterRealZK(String hostPort, int timeout,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 523c3c81684..2eac773f966 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -198,3 +198,5 @@ HDFS-2917. HA: haadmin should not work if run by regular user (eli)
 HDFS-2939. TestHAStateTransitions fails on Windows. (Uma Maheswara Rao G via atm)
 
 HDFS-2947. On startup NN throws an NPE in the metrics system. (atm)
+
+HDFS-2942. TestActiveStandbyElectorRealZK fails if build dir does not exist. (atm)

From 439490f9158741234e638f41a5c0bdb236c4e988 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 14 Feb 2012 19:38:02 +0000
Subject: [PATCH 144/177] HDFS-2948. NN throws NPE during shutdown if it fails
 to startup. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244186 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt         | 2 ++
 .../java/org/apache/hadoop/hdfs/server/namenode/NameNode.java | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 2eac773f966..d6eb3c83f7b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -200,3 +200,5 @@ HDFS-2939. TestHAStateTransitions fails on Windows. (Uma Maheswara Rao G via atm
 HDFS-2947. On startup NN throws an NPE in the metrics system. (atm)
 
 HDFS-2942. TestActiveStandbyElectorRealZK fails if build dir does not exist. (atm)
+
+HDFS-2948. NN throws NPE during shutdown if it fails to startup (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 2e7292229cf..f1bfa74888f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -974,7 +974,9 @@ public class NameNode {
 
     @Override
     public void stopActiveServices() throws IOException {
-      namesystem.stopActiveServices();
+      if (namesystem != null) {
+        namesystem.stopActiveServices();
+      }
       stopTrashEmptier();
     }
 

From cdc8146bd46ae8bf3395498c529513730d446824 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 15 Feb 2012 18:20:11 +0000
Subject: [PATCH 145/177] HADOOP-8068. void methods can swallow exceptions when
 going through failover path. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244628 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt       |  2 ++
 .../io/retry/RetryInvocationHandler.java      | 10 ++++----
 .../apache/hadoop/io/retry/RetryPolicies.java | 24 +++++--------------
 .../apache/hadoop/io/retry/RetryPolicy.java   |  8 ++++++-
 .../hadoop/io/retry/TestFailoverProxy.java    | 23 +++++++++++++++++-
 .../hadoop/io/retry/TestRetryProxy.java       | 14 -----------
 .../io/retry/UnreliableImplementation.java    | 12 ++++++++++
 .../hadoop/io/retry/UnreliableInterface.java  |  3 +++
 8 files changed, 57 insertions(+), 39 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index 1d8ce4dd255..c9dd46062f4 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -47,3 +47,5 @@ HADOOP-8038. Add 'ipc.client.connect.max.retries.on.timeouts' entry in
 core-default.xml file. (Uma Maheswara Rao G via atm)
 
 HADOOP-8041. Log a warning when a failover is first attempted (todd)
+
+HADOOP-8068. void methods can swallow exceptions when going through failover path (todd)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
index 3d702d9879d..323542cbd39 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryInvocationHandler.java
@@ -90,12 +90,12 @@ class RetryInvocationHandler implements RpcInvocationHandler {
         RetryAction action = policy.shouldRetry(e, retries++, invocationFailoverCount,
             isMethodIdempotent);
         if (action.action == RetryAction.RetryDecision.FAIL) {
-          LOG.warn("Exception while invoking " + method.getName()
-                   + " of " + currentProxy.getClass() + ". Not retrying.", e);
-          if (!method.getReturnType().equals(Void.TYPE)) {
-            throw e; // non-void methods can't fail without an exception
+          if (action.reason != null) {
+            LOG.warn("Exception while invoking " + 
+                currentProxy.getClass() + "." + method.getName() +
+                ". Not retrying because " + action.reason, e);
           }
-          return null;
+          throw e;
         } else { // retry or failover
           // avoid logging the failover if this is the first call on this
           // proxy object, and we successfully achieve the failover without
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
index a96dc9ee0bc..2be8b759998 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java
@@ -54,14 +54,6 @@ public class RetryPolicies {
    */
   public static final RetryPolicy TRY_ONCE_THEN_FAIL = new TryOnceThenFail();
   
-  /**
-   * <p>
-   * Try once, and fail silently for <code>void</code> methods, or by
-   * re-throwing the exception for non-<code>void</code> methods.
-   * </p>
-   */
-  public static final RetryPolicy TRY_ONCE_DONT_FAIL = new TryOnceDontFail();
-  
   /**
    * <p>
    * Keep trying forever.
@@ -152,12 +144,6 @@ public class RetryPolicies {
   }
   
   static class TryOnceThenFail implements RetryPolicy {
-    public RetryAction shouldRetry(Exception e, int retries, int failovers,
-        boolean isMethodIdempotent) throws Exception {
-      throw e;
-    }
-  }
-  static class TryOnceDontFail implements RetryPolicy {
     public RetryAction shouldRetry(Exception e, int retries, int failovers,
         boolean isMethodIdempotent) throws Exception {
       return RetryAction.FAIL;
@@ -185,7 +171,7 @@ public class RetryPolicies {
     public RetryAction shouldRetry(Exception e, int retries, int failovers,
         boolean isMethodIdempotent) throws Exception {
       if (retries >= maxRetries) {
-        throw e;
+        return RetryAction.FAIL;
       }
       return new RetryAction(RetryAction.RetryDecision.RETRY,
           timeUnit.toMillis(calculateSleepTime(retries)));
@@ -325,9 +311,9 @@ public class RetryPolicies {
     public RetryAction shouldRetry(Exception e, int retries,
         int failovers, boolean isMethodIdempotent) throws Exception {
       if (failovers >= maxFailovers) {
-        LOG.info("Failovers (" + failovers + ") exceeded maximum allowed ("
+        return new RetryAction(RetryAction.RetryDecision.FAIL, 0,
+            "failovers (" + failovers + ") exceeded maximum allowed ("
             + maxFailovers + ")");
-        return RetryAction.FAIL;
       }
       
       if (e instanceof ConnectException ||
@@ -345,7 +331,9 @@ public class RetryPolicies {
         if (isMethodIdempotent) {
           return RetryAction.FAILOVER_AND_RETRY;
         } else {
-          return RetryAction.FAIL;
+          return new RetryAction(RetryAction.RetryDecision.FAIL, 0,
+              "the invoked method is not idempotent, and unable to determine " +
+              "whether it was invoked");
         }
       } else {
         return fallbackPolicy.shouldRetry(e, retries, failovers,
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicy.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicy.java
index 90e5eaea671..ed673e950f8 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicy.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicy.java
@@ -44,14 +44,20 @@ public interface RetryPolicy {
     
     public final RetryDecision action;
     public final long delayMillis;
+    public final String reason;
     
     public RetryAction(RetryDecision action) {
-      this(action, 0);
+      this(action, 0, null);
     }
     
     public RetryAction(RetryDecision action, long delayTime) {
+      this(action, delayTime, null);
+    }
+    
+    public RetryAction(RetryDecision action, long delayTime, String reason) {
       this.action = action;
       this.delayMillis = delayTime;
+      this.reason = reason;
     }
     
     public enum RetryDecision {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
index 2a6dc2622fd..4949ef31406 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestFailoverProxy.java
@@ -128,7 +128,7 @@ public class TestFailoverProxy {
         new FlipFlopProxyProvider(UnreliableInterface.class,
           new UnreliableImplementation("impl1"),
           new UnreliableImplementation("impl2")),
-        RetryPolicies.TRY_ONCE_DONT_FAIL);
+        RetryPolicies.TRY_ONCE_THEN_FAIL);
 
     unreliable.succeedsOnceThenFailsReturningString();
     try {
@@ -196,6 +196,27 @@ public class TestFailoverProxy {
     assertEquals("impl2", unreliable.succeedsOnceThenFailsReturningStringIdempotent());
   }
   
+  /**
+   * Test that if a non-idempotent void function is called, and there is an exception,
+   * the exception is properly propagated
+   */
+  @Test
+  public void testExceptionPropagatedForNonIdempotentVoid() throws Exception {
+    UnreliableInterface unreliable = (UnreliableInterface)RetryProxy
+    .create(UnreliableInterface.class,
+        new FlipFlopProxyProvider(UnreliableInterface.class,
+          new UnreliableImplementation("impl1", TypeOfExceptionToFailWith.IO_EXCEPTION),
+          new UnreliableImplementation("impl2", TypeOfExceptionToFailWith.UNRELIABLE_EXCEPTION)),
+        RetryPolicies.failoverOnNetworkException(1));
+
+    try {
+      unreliable.nonIdempotentVoidFailsIfIdentifierDoesntMatch("impl2");
+      fail("did not throw an exception");
+    } catch (Exception e) {
+    }
+
+  }
+  
   private static class SynchronizedUnreliableImplementation extends UnreliableImplementation {
     
     private CountDownLatch methodLatch;
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java
index c48e87b7dd9..696f40d8376 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/TestRetryProxy.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.io.retry;
 
 import static org.apache.hadoop.io.retry.RetryPolicies.RETRY_FOREVER;
-import static org.apache.hadoop.io.retry.RetryPolicies.TRY_ONCE_DONT_FAIL;
 import static org.apache.hadoop.io.retry.RetryPolicies.TRY_ONCE_THEN_FAIL;
 import static org.apache.hadoop.io.retry.RetryPolicies.retryByException;
 import static org.apache.hadoop.io.retry.RetryPolicies.retryByRemoteException;
@@ -59,19 +58,6 @@ public class TestRetryProxy extends TestCase {
     }
   }
   
-  public void testTryOnceDontFail() throws UnreliableException {
-    UnreliableInterface unreliable = (UnreliableInterface)
-      RetryProxy.create(UnreliableInterface.class, unreliableImpl, TRY_ONCE_DONT_FAIL);
-    unreliable.alwaysSucceeds();
-    unreliable.failsOnceThenSucceeds();
-    try {
-      unreliable.failsOnceThenSucceedsWithReturnValue();
-      fail("Should fail");
-    } catch (UnreliableException e) {
-      // expected
-    }
-  }
-  
   public void testRetryForever() throws UnreliableException {
     UnreliableInterface unreliable = (UnreliableInterface)
       RetryProxy.create(UnreliableInterface.class, unreliableImpl, RETRY_FOREVER);
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
index 185ed2a4426..54fe6778440 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableImplementation.java
@@ -136,6 +136,18 @@ public class UnreliableImplementation implements UnreliableInterface {
       return null;
     }
   }
+  
+  @Override
+  public void nonIdempotentVoidFailsIfIdentifierDoesntMatch(String identifier)
+      throws UnreliableException, StandbyException, IOException {
+    if (this.identifier.equals(identifier)) {
+      return;
+    } else {
+      String message = "expected '" + this.identifier + "' but received '" +
+          identifier + "'";
+      throwAppropriateException(exceptionToFailWith, message);
+    }
+  }
 
   private static void throwAppropriateException(TypeOfExceptionToFailWith eType,
       String message) throws UnreliableException, StandbyException, IOException {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java
index e794c1686c2..66a8b853606 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/retry/UnreliableInterface.java
@@ -67,4 +67,7 @@ public interface UnreliableInterface {
   @Idempotent
   public String failsIfIdentifierDoesntMatch(String identifier)
       throws UnreliableException, StandbyException, IOException;
+
+  void nonIdempotentVoidFailsIfIdentifierDoesntMatch(String identifier)
+      throws UnreliableException, StandbyException, IOException;
 }

From 3c145d3492331959d21f6d0c3b8c7e71d35de69f Mon Sep 17 00:00:00 2001
From: Jitendra Nath Pandey <jitendra@apache.org>
Date: Wed, 15 Feb 2012 22:00:18 +0000
Subject: [PATCH 146/177] HDFS-2909. HA: Inaccessible shared edits dir not
 getting removed from FSImage storage dirs upon error. Contributed by Bikas
 Saha.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244753 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/FSEditLog.java       |  2 +-
 .../server/namenode/FileJournalManager.java   | 20 +++++---
 .../namenode/TestFileJournalManager.java      | 51 +++++++++++++------
 .../TestNNStorageRetentionManager.java        |  3 +-
 5 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index d6eb3c83f7b..6a3d45bdc26 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -202,3 +202,5 @@ HDFS-2947. On startup NN throws an NPE in the metrics system. (atm)
 HDFS-2942. TestActiveStandbyElectorRealZK fails if build dir does not exist. (atm)
 
 HDFS-2948. NN throws NPE during shutdown if it fails to startup (todd)
+
+HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage storage dirs upon error. (Bikas Saha via jitendra)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 2c72a7d5f2b..5cd8be26a95 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -221,7 +221,7 @@ public class FSEditLog  {
       if (u.getScheme().equals(NNStorage.LOCAL_URI_SCHEME)) {
         StorageDirectory sd = storage.getStorageDirectory(u);
         if (sd != null) {
-          journalSet.add(new FileJournalManager(sd), required);
+          journalSet.add(new FileJournalManager(sd, storage), required);
         }
       } else {
         journalSet.add(createJournal(u), required);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index 1eca2797b44..eaaf65b5fc2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -52,6 +52,7 @@ class FileJournalManager implements JournalManager {
   private static final Log LOG = LogFactory.getLog(FileJournalManager.class);
 
   private final StorageDirectory sd;
+  private final NNStorage storage;
   private int outputBufferCapacity = 512*1024;
 
   private static final Pattern EDITS_REGEX = Pattern.compile(
@@ -65,8 +66,9 @@ class FileJournalManager implements JournalManager {
   StoragePurger purger
     = new NNStorageRetentionManager.DeletionStoragePurger();
 
-  public FileJournalManager(StorageDirectory sd) {
+  public FileJournalManager(StorageDirectory sd, NNStorage storage) {
     this.sd = sd;
+    this.storage = storage;
   }
 
   @Override 
@@ -75,11 +77,16 @@ class FileJournalManager implements JournalManager {
   @Override
   synchronized public EditLogOutputStream startLogSegment(long txid) 
       throws IOException {
-    currentInProgress = NNStorage.getInProgressEditsFile(sd, txid);
-    EditLogOutputStream stm = new EditLogFileOutputStream(currentInProgress,
-        outputBufferCapacity);
-    stm.create();
-    return stm;
+    try {
+      currentInProgress = NNStorage.getInProgressEditsFile(sd, txid);
+      EditLogOutputStream stm = new EditLogFileOutputStream(currentInProgress,
+          outputBufferCapacity);
+      stm.create();
+      return stm;
+    } catch (IOException e) {
+      storage.reportErrorsOnDirectory(sd);
+      throw e;
+    }
   }
 
   @Override
@@ -95,6 +102,7 @@ class FileJournalManager implements JournalManager {
         "Can't finalize edits file " + inprogressFile + " since finalized file " +
         "already exists");
     if (!inprogressFile.renameTo(dstFile)) {
+      storage.reportErrorsOnDirectory(sd);
       throw new IllegalStateException("Unable to finalize edits file " + inprogressFile);
     }
     if (inprogressFile.equals(currentInProgress)) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
index def29365776..0ac194439d3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFileJournalManager.java
@@ -29,6 +29,7 @@ import java.io.File;
 import java.io.FilenameFilter;
 import java.io.IOException;
 import org.junit.Test;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
 import org.apache.hadoop.hdfs.server.namenode.JournalManager.CorruptionException;
 import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
@@ -59,7 +60,7 @@ public class TestFileJournalManager {
     
     long numJournals = 0;
     for (StorageDirectory sd : storage.dirIterable(NameNodeDirType.EDITS)) {
-      FileJournalManager jm = new FileJournalManager(sd);
+      FileJournalManager jm = new FileJournalManager(sd, storage);
       assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
       numJournals++;
     }
@@ -79,7 +80,7 @@ public class TestFileJournalManager {
                                    5, new AbortSpec(5, 0));
     StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
 
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, 
                  jm.getNumberOfTransactions(1, true));
   }
@@ -102,16 +103,16 @@ public class TestFileJournalManager {
                                    5, new AbortSpec(5, 1));
     Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS);
     StorageDirectory sd = dirs.next();
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
     
     sd = dirs.next();
-    jm = new FileJournalManager(sd);
+    jm = new FileJournalManager(sd, storage);
     assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
         true));
 
     sd = dirs.next();
-    jm = new FileJournalManager(sd);
+    jm = new FileJournalManager(sd, storage);
     assertEquals(6*TXNS_PER_ROLL, jm.getNumberOfTransactions(1, true));
   }
 
@@ -135,17 +136,17 @@ public class TestFileJournalManager {
                                    new AbortSpec(5, 2));
     Iterator<StorageDirectory> dirs = storage.dirIterator(NameNodeDirType.EDITS);
     StorageDirectory sd = dirs.next();
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
         true));
     
     sd = dirs.next();
-    jm = new FileJournalManager(sd);
+    jm = new FileJournalManager(sd, storage);
     assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
         true));
 
     sd = dirs.next();
-    jm = new FileJournalManager(sd);
+    jm = new FileJournalManager(sd, storage);
     assertEquals(5*TXNS_PER_ROLL + TXNS_PER_FAIL, jm.getNumberOfTransactions(1,
         true));
   }
@@ -161,6 +162,25 @@ public class TestFileJournalManager {
     }
     raf.close();
   }
+  
+  @Test(expected=IllegalStateException.class)
+  public void testFinalizeErrorReportedToNNStorage() throws IOException, InterruptedException {
+    File f = new File(TestEditLog.TEST_DIR + "/filejournaltestError");
+    // abort after 10th roll
+    NNStorage storage = setupEdits(Collections.<URI>singletonList(f.toURI()),
+                                   10, new AbortSpec(10, 0));
+    StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
+
+    FileJournalManager jm = new FileJournalManager(sd, storage);
+    String sdRootPath = sd.getRoot().getAbsolutePath();
+    FileUtil.chmod(sdRootPath, "-w", true);
+    try {
+      jm.finalizeLogSegment(0, 1);
+    } finally {
+      assertTrue(storage.getRemovedStorageDirs().contains(sd));
+      FileUtil.chmod(sdRootPath, "+w", true);
+    }
+  }
 
   /** 
    * Test that we can read from a stream created by FileJournalManager.
@@ -176,7 +196,7 @@ public class TestFileJournalManager {
                                    10, new AbortSpec(10, 0));
     StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
 
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     long expectedTotalTxnCount = TXNS_PER_ROLL*10 + TXNS_PER_FAIL;
     assertEquals(expectedTotalTxnCount, jm.getNumberOfTransactions(1, true));
 
@@ -211,7 +231,7 @@ public class TestFileJournalManager {
                                    10);
     StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
     
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     
     // 10 rolls, so 11 rolled files, 110 txids total.
     final int TOTAL_TXIDS = 10 * 11;
@@ -248,7 +268,7 @@ public class TestFileJournalManager {
     assertEquals(1, files.length);
     assertTrue(files[0].delete());
     
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     assertEquals(startGapTxId-1, jm.getNumberOfTransactions(1, true));
 
     try {
@@ -286,7 +306,7 @@ public class TestFileJournalManager {
     
     corruptAfterStartSegment(files[0]);
 
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     assertEquals(10*TXNS_PER_ROLL+1, 
                  jm.getNumberOfTransactions(1, true));
   }
@@ -300,7 +320,8 @@ public class TestFileJournalManager {
         NNStorage.getInProgressEditsFileName(201),
         NNStorage.getFinalizedEditsFileName(1001, 1100));
         
-    FileJournalManager fjm = new FileJournalManager(sd);
+    // passing null for NNStorage because this unit test will not use it
+    FileJournalManager fjm = new FileJournalManager(sd, null);
     assertEquals("[1,100],[101,200],[1001,1100]", getLogsAsString(fjm, 1));
     assertEquals("[101,200],[1001,1100]", getLogsAsString(fjm, 101));
     assertEquals("[1001,1100]", getLogsAsString(fjm, 201));
@@ -336,7 +357,7 @@ public class TestFileJournalManager {
                                    10);
     StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
     
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     
     EditLogInputStream elis = jm.getInputStream(5, true);
     FSEditLogOp op = elis.readOp();
@@ -357,7 +378,7 @@ public class TestFileJournalManager {
                                    10, false);
     StorageDirectory sd = storage.dirIterator(NameNodeDirType.EDITS).next();
     
-    FileJournalManager jm = new FileJournalManager(sd);
+    FileJournalManager jm = new FileJournalManager(sd, storage);
     
     // If we exclude the in-progess stream, we should only have 100 tx.
     assertEquals(100, jm.getNumberOfTransactions(1, false));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java
index 6ff91f41a28..4c6334f53ad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNStorageRetentionManager.java
@@ -292,8 +292,9 @@ public class TestNNStorageRetentionManager {
       for (FakeRoot root : dirRoots.values()) {
         if (!root.type.isOfType(NameNodeDirType.EDITS)) continue;
         
+        // passing null NNStorage for unit test because it does not use it
         FileJournalManager fjm = new FileJournalManager(
-            root.mockStorageDir());
+            root.mockStorageDir(), null);
         fjm.purger = purger;
         jms.add(fjm);
       }

From 05151ecf79a5d3dff38fe738a3e5be9f3c253b86 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 15 Feb 2012 22:19:12 +0000
Subject: [PATCH 147/177] HDFS-2934. Allow configs to be scoped to all NNs in
 the nameservice. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244759 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 10 +++++-
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   | 33 +++++++++++++++++++
 3 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 6a3d45bdc26..de3ba8c18c1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -204,3 +204,5 @@ HDFS-2942. TestActiveStandbyElectorRealZK fails if build dir does not exist. (at
 HDFS-2948. NN throws NPE during shutdown if it fails to startup (todd)
 
 HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage storage dirs upon error. (Bikas Saha via jitendra)
+
+HDFS-2934. Allow configs to be scoped to all NNs in the nameservice. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index c56a6ad387f..1c7afd40bac 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -746,7 +746,10 @@ public class DFSUtil {
   /**
    * Sets the node specific setting into generic configuration key. Looks up
    * value of "key.nameserviceId.namenodeId" and if found sets that value into 
-   * generic key in the conf. Note that this only modifies the runtime conf.
+   * generic key in the conf. If this is not found, falls back to
+   * "key.nameserviceId" and then the unmodified key.
+   *
+   * Note that this only modifies the runtime conf.
    * 
    * @param conf
    *          Configuration object to lookup specific key and to set the value
@@ -764,6 +767,11 @@ public class DFSUtil {
       String nameserviceId, String nnId, String... keys) {
     for (String key : keys) {
       String value = conf.get(addKeySuffixes(key, nameserviceId, nnId));
+      if (value != null) {
+        conf.set(key, value);
+        continue;
+      }
+      value = conf.get(addKeySuffixes(key, nameserviceId));
       if (value != null) {
         conf.set(key, value);
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index 5b67cf5491d..e49bb107e20 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -325,6 +325,39 @@ public class TestDFSUtil {
     }
   }
 
+  /**
+   * Regression test for HDFS-2934.
+   */
+  @Test
+  public void testSomeConfsNNSpecificSomeNSSpecific() {
+    final HdfsConfiguration conf = new HdfsConfiguration();
+
+    String key = DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY;
+    conf.set(key, "global-default");
+    conf.set(key + ".ns1", "ns1-override");
+    conf.set(key + ".ns1.nn1", "nn1-override");
+
+    // A namenode in another nameservice should get the global default.
+    Configuration newConf = new Configuration(conf);
+    NameNode.initializeGenericKeys(newConf, "ns2", "nn1");
+    assertEquals("global-default", newConf.get(key));
+    
+    // A namenode in another non-HA nameservice should get global default.
+    newConf = new Configuration(conf);
+    NameNode.initializeGenericKeys(newConf, "ns2", null);
+    assertEquals("global-default", newConf.get(key));    
+    
+    // A namenode in the same nameservice should get the ns setting
+    newConf = new Configuration(conf);
+    NameNode.initializeGenericKeys(newConf, "ns1", "nn2");
+    assertEquals("ns1-override", newConf.get(key));    
+
+    // The nn with the nn-specific setting should get its own override
+    newConf = new Configuration(conf);
+    NameNode.initializeGenericKeys(newConf, "ns1", "nn1");
+    assertEquals("nn1-override", newConf.get(key));    
+  }
+  
   /**
    * Tests for empty configuration, an exception is thrown from
    * {@link DFSUtil#getNNServiceRpcAddresses(Configuration)}

From f7f2ef4447f462b2dd56c26ba2b813f1a3166cec Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 15 Feb 2012 22:20:37 +0000
Subject: [PATCH 148/177] HDFS-2935. Shared edits dir property should be
 suffixed with nameservice and namenodeID. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244761 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt           | 2 ++
 .../java/org/apache/hadoop/hdfs/server/namenode/NameNode.java   | 1 +
 2 files changed, 3 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index de3ba8c18c1..7c67b9887ca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -206,3 +206,5 @@ HDFS-2948. NN throws NPE during shutdown if it fails to startup (todd)
 HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage storage dirs upon error. (Bikas Saha via jitendra)
 
 HDFS-2934. Allow configs to be scoped to all NNs in the nameservice. (todd)
+
+HDFS-2935. Shared edits dir property should be suffixed with nameservice and namenodeID (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index fec017664b8..7886f2dee7a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -139,6 +139,7 @@ public class NameNode {
     DFS_NAMENODE_RPC_ADDRESS_KEY,
     DFS_NAMENODE_NAME_DIR_KEY,
     DFS_NAMENODE_EDITS_DIR_KEY,
+    DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
     DFS_NAMENODE_CHECKPOINT_DIR_KEY,
     DFS_NAMENODE_CHECKPOINT_EDITS_DIR_KEY,
     DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,

From 0663b51ed44db97740096a1002d2b63f3e17a3eb Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 16 Feb 2012 00:58:03 +0000
Subject: [PATCH 149/177] Amend HDFS-2948. NN throws NPE during shutdown if it
 fails to startup

Previous patch missed the case where the NN fails to start up when it's starting in standby state.


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244813 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/hadoop/hdfs/server/namenode/NameNode.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 7886f2dee7a..847dc040532 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -995,7 +995,9 @@ public class NameNode {
     @Override
     public void stopStandbyServices() throws IOException {
       // TODO(HA): Are we guaranteed to be the only active here?
-      namesystem.stopStandbyServices();
+      if (namesystem != null) {
+        namesystem.stopStandbyServices();
+      }
     }
     
     @Override

From 83a922b55ee22ef8e643dc4148474deb84dad38a Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 16 Feb 2012 03:59:09 +0000
Subject: [PATCH 150/177] ConfiguredFailoverProxyProvider should not create a
 NameNode proxy with an underlying retry proxy. Contributed by Uma Maheswara
 Rao G.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1244845 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 144 ++++++++++++++----
 .../ClientNamenodeProtocolTranslatorPB.java   |  66 +++-----
 .../ha/ConfiguredFailoverProxyProvider.java   |   8 +-
 4 files changed, 137 insertions(+), 83 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 7c67b9887ca..c87faa70d34 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -208,3 +208,5 @@ HDFS-2909. HA: Inaccessible shared edits dir not getting removed from FSImage st
 HDFS-2934. Allow configs to be scoped to all NNs in the nameservice. (todd)
 
 HDFS-2935. Shared edits dir property should be suffixed with nameservice and namenodeID (todd)
+
+HDFS-2928. ConfiguredFailoverProxyProvider should not create a NameNode proxy with an underlying retry proxy. (Uma Maheswara Rao G via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index 1c7afd40bac..52b1eb9f35e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -42,13 +42,17 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -57,6 +61,7 @@ import org.apache.hadoop.io.retry.RetryPolicy;
 import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NodeBase;
@@ -807,23 +812,16 @@ public class DFSUtil {
   /** Create a {@link NameNode} proxy */
   public static ClientProtocol createNamenode(InetSocketAddress nameNodeAddr,
       Configuration conf) throws IOException {   
-    return createNamenode(nameNodeAddr, conf,
+    return createNamenode(nameNodeAddr, conf, 
         UserGroupInformation.getCurrentUser());
   }
 
   /** Create a {@link NameNode} proxy */
-  public static ClientProtocol createNamenode( InetSocketAddress nameNodeAddr,
+  public static ClientProtocol createNamenode(InetSocketAddress nameNodeAddr,
       Configuration conf, UserGroupInformation ugi) throws IOException {
-    /** 
-     * Currently we have simply burnt-in support for a SINGLE
-     * protocol - protocolPB. This will be replaced
-     * by a way to pick the right protocol based on the 
-     * version of the target server.  
-     */
-    return new org.apache.hadoop.hdfs.protocolPB.
-        ClientNamenodeProtocolTranslatorPB(nameNodeAddr, conf, ugi);
+    return createNNProxyWithClientProtocol(nameNodeAddr, conf, ugi, true);
   }
-
+  
   /** Create a {@link ClientDatanodeProtocol} proxy */
   public static ClientDatanodeProtocol createClientDatanodeProtocolProxy(
       DatanodeID datanodeid, Configuration conf, int socketTimeout,
@@ -846,31 +844,115 @@ public class DFSUtil {
       SocketFactory factory) throws IOException {
     return new ClientDatanodeProtocolTranslatorPB(addr, ticket, conf, factory);
   }
-  
+
   /**
-   * Build a NamenodeProtocol connection to the namenode and set up the retry
-   * policy
+   * Build a proxy connection to the namenode with NamenodeProtocol and set up
+   * the proxy with retry policy.
+   * @param address - namenode address
+   * @param conf - configuration
+   * @param ugi - User group information
+   * @return a proxy connection with NamenodeProtocol
+   * @throws - IOException
    */
   public static NamenodeProtocolTranslatorPB createNNProxyWithNamenodeProtocol(
       InetSocketAddress address, Configuration conf, UserGroupInformation ugi)
       throws IOException {
-    RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(5, 200,
-        TimeUnit.MILLISECONDS);
-    Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap 
-        = new HashMap<Class<? extends Exception>, RetryPolicy>();
-    RetryPolicy methodPolicy = RetryPolicies.retryByException(timeoutPolicy,
-        exceptionToPolicyMap);
-    Map<String, RetryPolicy> methodNameToPolicyMap = new HashMap<String, RetryPolicy>();
-    methodNameToPolicyMap.put("getBlocks", methodPolicy);
-    methodNameToPolicyMap.put("getAccessKeys", methodPolicy);
-    RPC.setProtocolEngine(conf, NamenodeProtocolPB.class,
-        ProtobufRpcEngine.class);
-    NamenodeProtocolPB proxy = RPC.getProxy(NamenodeProtocolPB.class, RPC
-        .getProtocolVersion(NamenodeProtocolPB.class), address, ugi, conf,
-        NetUtils.getDefaultSocketFactory(conf));
-    NamenodeProtocolPB retryProxy = (NamenodeProtocolPB) RetryProxy.create(
-        NamenodeProtocolPB.class, proxy, methodNameToPolicyMap);
-    return new NamenodeProtocolTranslatorPB(retryProxy);
+    return createNNProxyWithNamenodeProtocol(address, conf, ugi, true);
+  }
+  
+  /**
+   * Build a proxy connection to the namenode with NamenodeProtocol.
+   * @param address - namenode address
+   * @param conf - configuration
+   * @param ugi - User group information
+   * @param withRetries  - indicates whether to create retry proxy or not
+   * @return a proxy connection with NamenodeProtocol
+   * @throws - IOException
+   */
+  public static NamenodeProtocolTranslatorPB createNNProxyWithNamenodeProtocol(
+      InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
+      boolean withRetries) throws IOException {
+    NamenodeProtocolPB proxy = (NamenodeProtocolPB) createNameNodeProxy(
+        address, conf, ugi, NamenodeProtocolPB.class);
+    if (withRetries) { // create the proxy with retries
+      RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(5, 200,
+          TimeUnit.MILLISECONDS);
+      Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap 
+                     = new HashMap<Class<? extends Exception>, RetryPolicy>();
+      RetryPolicy methodPolicy = RetryPolicies.retryByException(timeoutPolicy,
+          exceptionToPolicyMap);
+      Map<String, RetryPolicy> methodNameToPolicyMap 
+                     = new HashMap<String, RetryPolicy>();
+      methodNameToPolicyMap.put("getBlocks", methodPolicy);
+      methodNameToPolicyMap.put("getAccessKeys", methodPolicy);
+      proxy = (NamenodeProtocolPB) RetryProxy.create(NamenodeProtocolPB.class,
+          proxy, methodNameToPolicyMap);
+    }
+    return new NamenodeProtocolTranslatorPB(proxy);
+  }
+
+  /**
+   * Build a proxy connection to the namenode with ClientProtocol.
+   * @param address - namenode address
+   * @param conf - configuration
+   * @param ugi - User group information
+   * @param withRetries  - indicates whether to create retry proxy or not
+   * @return a proxy connection with ClientProtocol
+   * @throws IOException
+   */
+  public static ClientNamenodeProtocolTranslatorPB createNNProxyWithClientProtocol(
+      InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
+      boolean withRetries) throws IOException {
+    ClientNamenodeProtocolPB proxy 
+        = (ClientNamenodeProtocolPB) createNameNodeProxy(address, conf, ugi,
+           ClientNamenodeProtocolPB.class);
+    if (withRetries) { // create the proxy with retries
+      proxy = createNameNodeProxyWithRetries(proxy);
+    }
+    return new ClientNamenodeProtocolTranslatorPB(proxy);
+  }
+
+  /**
+   * Creates the retry proxy by setting up the retry policy.
+   * @param proxy - non retry proxy connection
+   * @return a retry proxy connection
+   */
+  public static ClientNamenodeProtocolPB createNameNodeProxyWithRetries(
+      ClientNamenodeProtocolPB proxy) {
+    RetryPolicy createPolicy = RetryPolicies
+        .retryUpToMaximumCountWithFixedSleep(5,
+            HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS);
+
+    Map<Class<? extends Exception>, RetryPolicy> remoteExceptionToPolicyMap 
+               = new HashMap<Class<? extends Exception>, RetryPolicy>();
+    remoteExceptionToPolicyMap.put(AlreadyBeingCreatedException.class,
+        createPolicy);
+
+    Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap
+               = new HashMap<Class<? extends Exception>, RetryPolicy>();
+    exceptionToPolicyMap.put(RemoteException.class, RetryPolicies
+        .retryByRemoteException(RetryPolicies.TRY_ONCE_THEN_FAIL,
+            remoteExceptionToPolicyMap));
+    RetryPolicy methodPolicy = RetryPolicies.retryByException(
+        RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
+    Map<String, RetryPolicy> methodNameToPolicyMap 
+               = new HashMap<String, RetryPolicy>();
+
+    methodNameToPolicyMap.put("create", methodPolicy);
+
+    ClientNamenodeProtocolPB retryProxy = (ClientNamenodeProtocolPB) RetryProxy
+        .create(ClientNamenodeProtocolPB.class, proxy, methodNameToPolicyMap);
+    return retryProxy;
+  }
+  
+  @SuppressWarnings("unchecked")
+  private static Object createNameNodeProxy(InetSocketAddress address,
+      Configuration conf, UserGroupInformation ugi, Class xface)
+      throws IOException {
+    RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine.class);
+    Object proxy = RPC.getProxy(xface, RPC.getProtocolVersion(xface), address,
+        ugi, conf, NetUtils.getDefaultSocketFactory(conf));
+    return proxy;
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
index dd3dc723a65..c6dc3e3a2bc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
@@ -22,9 +22,6 @@ import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
@@ -37,6 +34,7 @@ import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
@@ -55,28 +53,22 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.UpgradeAction;
 import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.NotReplicatedYetException;
 import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
 import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.retry.RetryPolicies;
-import org.apache.hadoop.io.retry.RetryPolicy;
-import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.ipc.ProtocolTranslator;
 import org.apache.hadoop.ipc.ProtobufHelper;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolMetaInterface;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
-import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.RpcClientUtil;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AbandonBlockRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AddBlockRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AppendRequestProto;
@@ -143,49 +135,29 @@ public class ClientNamenodeProtocolTranslatorPB implements
     ProtocolMetaInterface, ClientProtocol, Closeable, ProtocolTranslator {
   final private ClientNamenodeProtocolPB rpcProxy;
 
-  private static ClientNamenodeProtocolPB createNamenode(
+  public ClientNamenodeProtocolTranslatorPB(InetSocketAddress nameNodeAddr,
+      Configuration conf, UserGroupInformation ugi) throws IOException {
+    this(getNamenodeRetryProxy(nameNodeAddr, conf, ugi));
+  }
+
+  public ClientNamenodeProtocolTranslatorPB(ClientNamenodeProtocolPB proxy)
+      throws IOException {
+    rpcProxy = proxy;
+  }
+  
+  private static ClientNamenodeProtocolPB getNamenodeRetryProxy(
       InetSocketAddress nameNodeAddr, Configuration conf,
       UserGroupInformation ugi) throws IOException {
     RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class,
         ProtobufRpcEngine.class);
-    return RPC.getProxy(ClientNamenodeProtocolPB.class,
-        RPC.getProtocolVersion(ClientNamenodeProtocolPB.class), nameNodeAddr, ugi, conf,
-        NetUtils.getSocketFactory(conf, ClientNamenodeProtocolPB.class));
+    ClientNamenodeProtocolPB proxy = RPC.getProxy(
+        ClientNamenodeProtocolPB.class, RPC
+            .getProtocolVersion(ClientNamenodeProtocolPB.class), nameNodeAddr,
+        ugi, conf, NetUtils.getSocketFactory(conf,
+            ClientNamenodeProtocolPB.class));
+    return DFSUtil.createNameNodeProxyWithRetries(proxy);
   }
-
-  /** Create a {@link NameNode} proxy */
-  static ClientNamenodeProtocolPB createNamenodeWithRetry(
-      ClientNamenodeProtocolPB rpcNamenode) {
-    RetryPolicy createPolicy = RetryPolicies
-        .retryUpToMaximumCountWithFixedSleep(5,
-            HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS);
-
-    Map<Class<? extends Exception>, RetryPolicy> remoteExceptionToPolicyMap 
-        = new HashMap<Class<? extends Exception>, RetryPolicy>();
-    remoteExceptionToPolicyMap.put(AlreadyBeingCreatedException.class,
-        createPolicy);
-
-    Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap = 
-        new HashMap<Class<? extends Exception>, RetryPolicy>();
-    exceptionToPolicyMap.put(RemoteException.class, RetryPolicies
-        .retryByRemoteException(RetryPolicies.TRY_ONCE_THEN_FAIL,
-            remoteExceptionToPolicyMap));
-    RetryPolicy methodPolicy = RetryPolicies.retryByException(
-        RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
-    Map<String, RetryPolicy> methodNameToPolicyMap = new HashMap<String, RetryPolicy>();
-
-    methodNameToPolicyMap.put("create", methodPolicy);
-
-    return (ClientNamenodeProtocolPB) RetryProxy.create(
-        ClientNamenodeProtocolPB.class, rpcNamenode, methodNameToPolicyMap);
-  }
-
-  public ClientNamenodeProtocolTranslatorPB(InetSocketAddress nameNodeAddr,
-      Configuration conf, UserGroupInformation ugi) throws IOException {
-    
-    rpcProxy = createNamenodeWithRetry(createNamenode(nameNodeAddr, conf, ugi));
-  }
-
+  
   public void close() {
     RPC.stopProxy(rpcProxy);
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index d2d0c00b557..6f6f88f9e8e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -27,7 +27,6 @@ import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@@ -118,11 +117,10 @@ public class ConfiguredFailoverProxyProvider<T> implements
       try {
         if (NamenodeProtocol.class.equals(xface)) {
           current.namenode = DFSUtil.createNNProxyWithNamenodeProtocol(
-              current.address, conf, ugi);
+              current.address, conf, ugi, false);
         } else if (ClientProtocol.class.equals(xface)) {
-          // TODO(HA): This will create a NN proxy with an underlying retry
-          // proxy. We don't want this.
-          current.namenode = DFSUtil.createNamenode(current.address, conf, ugi);
+          current.namenode = DFSUtil.createNNProxyWithClientProtocol(
+              current.address, conf, ugi, false);
         } else {
           throw new IllegalStateException(
               "Upsupported protocol found when creating the proxy conection to NameNode. "

From 153e0cc37aacf04fec3de51ebc1690e50f16b614 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 16 Feb 2012 22:45:40 +0000
Subject: [PATCH 151/177] HDFS-2955. IllegalStateException during standby
 startup in getCurSegmentTxId. Contributed by Hari Mankude.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1245230 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt     | 2 ++
 .../apache/hadoop/hdfs/server/namenode/FSNamesystem.java  | 8 ++++++--
 .../hadoop/hdfs/server/namenode/ha/TestHASafeMode.java    | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index c87faa70d34..4a8b185d9ba 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -210,3 +210,5 @@ HDFS-2934. Allow configs to be scoped to all NNs in the nameservice. (todd)
 HDFS-2935. Shared edits dir property should be suffixed with nameservice and namenodeID (todd)
 
 HDFS-2928. ConfiguredFailoverProxyProvider should not create a NameNode proxy with an underlying retry proxy. (Uma Maheswara Rao G via atm)
+
+HDFS-2955. IllegalStateException during standby startup in getCurSegmentTxId. (Hari Mankude via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 128222bce0a..8edc4bc88a0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3168,8 +3168,12 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   @Metric({"TransactionsSinceLastLogRoll",
       "Number of transactions since last edit log roll"})
   public long getTransactionsSinceLastLogRoll() {
-    return (getEditLog().getLastWrittenTxId() -
-        getEditLog().getCurSegmentTxId()) + 1;
+    if (isInStandbyState()) {
+      return 0;
+    } else {
+      return getEditLog().getLastWrittenTxId() -
+        getEditLog().getCurSegmentTxId() + 1;
+    }
   }
   
   @Metric({"LastWrittenTransactionId", "Transaction ID written to the edit log"})
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index d6babb788a7..e07338f8c82 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -116,6 +116,7 @@ public class TestHASafeMode {
 
     cluster.restartNameNode(1);
     nn1 = cluster.getNameNode(1);
+    assertEquals(nn1.getNamesystem().getTransactionsSinceLastLogRoll(), 0L);
   }
   
   /**

From 1a03127385d1613b31567630559e34cd83e08376 Mon Sep 17 00:00:00 2001
From: Suresh Srinivas <suresh@apache.org>
Date: Fri, 17 Feb 2012 06:40:40 +0000
Subject: [PATCH 152/177] HDFS-2937. TestDFSHAAdmin needs tests with
 MiniDFSCluster. Contributed by Brandon Li.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1245326 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hdfs/tools/TestDFSHAAdminMiniCluster.java | 143 ++++++++++++++++++
 2 files changed, 145 insertions(+)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4a8b185d9ba..c3780a7df6e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -212,3 +212,5 @@ HDFS-2935. Shared edits dir property should be suffixed with nameservice and nam
 HDFS-2928. ConfiguredFailoverProxyProvider should not create a NameNode proxy with an underlying retry proxy. (Uma Maheswara Rao G via atm)
 
 HDFS-2955. IllegalStateException during standby startup in getCurSegmentTxId. (Hari Mankude via atm)
+
+HDFS-2937. TestDFSHAAdmin needs tests with MiniDFSCluster. (Brandon Li via suresh)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java
new file mode 100644
index 00000000000..0302c8e9036
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java
@@ -0,0 +1,143 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.tools;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.logging.Log;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.ha.NodeFencer;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+import com.google.common.base.Joiner;
+
+/**
+ * Tests for HAAdmin command with {@link MiniDFSCluster} set up in HA mode.
+ */
+public class TestDFSHAAdminMiniCluster {
+  private static final Log LOG = LogFactory.getLog(TestDFSHAAdminMiniCluster.class);
+  
+  private MiniDFSCluster cluster;
+  private Configuration conf; 
+  private DFSHAAdmin tool;
+  
+  @Before
+  public void setup() throws IOException {
+    conf = new Configuration();
+    cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
+        .build();
+    tool = new DFSHAAdmin();  
+    tool.setConf(conf);
+    cluster.waitActive();
+  }
+
+  @After
+  public void shutdown() throws Exception {
+    cluster.shutdown();
+  }
+  
+  @Test
+  public void testGetServiceState() throws Exception {
+    assertEquals(0, runTool("-getServiceState", "nn1"));
+    assertEquals(0, runTool("-getServiceState", "nn2"));
+  }
+    
+  @Test 
+  public void testStateTransition() throws Exception {
+    NameNode nnode1 = cluster.getNameNode(0);
+    assertTrue(nnode1.isStandbyState());
+    assertEquals(0, runTool("-transitionToActive", "nn1"));
+    assertFalse(nnode1.isStandbyState());       
+    assertEquals(0, runTool("-transitionToStandby", "nn1"));
+    assertTrue(nnode1.isStandbyState());
+    
+    NameNode nnode2 = cluster.getNameNode(1);
+    assertTrue(nnode2.isStandbyState());
+    assertEquals(0, runTool("-transitionToActive", "nn2"));
+    assertFalse(nnode2.isStandbyState());
+    assertEquals(0, runTool("-transitionToStandby", "nn2"));
+    assertTrue(nnode2.isStandbyState());
+  }
+    
+  /**
+   * Test failover with various options
+   */
+  @Test
+  public void testFencer() throws Exception { 
+    // Test failover with no fencer
+    assertEquals(-1, runTool("-failover", "nn1", "nn2"));
+    
+    // Test failover with fencer
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-transitionToActive", "nn1"));
+    assertEquals(0, runTool("-failover", "nn1", "nn2"));
+    
+    // Test failover with fencer and nameservice
+    assertEquals(0, runTool("-ns", "minidfs-ns", "-failover", "nn2", "nn1"));
+
+    // Test failover with fencer and forcefence option
+    assertEquals(0, runTool("-failover", "nn1", "nn2", "--forcefence"));
+      
+    // Test failover with forceactive option
+    assertEquals(0, runTool("-failover", "nn2", "nn1", "--forceactive"));
+          
+    // Test failover with not fencer and forcefence option
+    conf.unset(NodeFencer.CONF_METHODS_KEY);
+    tool.setConf(conf);
+    assertEquals(-1, runTool("-failover", "nn1", "nn2", "--forcefence"));
+    
+    // Test failover with bad fencer and forcefence option
+    conf.set(NodeFencer.CONF_METHODS_KEY, "foobar!");
+    tool.setConf(conf);
+    assertEquals(-1, runTool("-failover", "nn1", "nn2", "--forcefence"));
+
+    // Test failover with force fence listed before the other arguments
+    conf.set(NodeFencer.CONF_METHODS_KEY, "shell(true)");
+    tool.setConf(conf);
+    assertEquals(0, runTool("-failover", "--forcefence", "nn1", "nn2"));
+  }
+     
+  @Test
+  public void testCheckHealth() throws Exception {
+    assertEquals(0, runTool("-checkHealth", "nn1"));
+    assertEquals(0, runTool("-checkHealth", "nn2"));
+  }
+  
+  private int runTool(String ... args) throws Exception {
+    ByteArrayOutputStream errOutBytes = new ByteArrayOutputStream();
+    errOutBytes.reset();
+    LOG.info("Running: DFSHAAdmin " + Joiner.on(" ").join(args));
+    int ret = tool.run(args);
+    String errOutput = new String(errOutBytes.toByteArray(), Charsets.UTF_8);
+    LOG.info("Output:\n" + errOutput);
+    return ret;
+  }
+}

From 7933dc583838fa7273cf55c03400a591a41d23db Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 17 Feb 2012 07:37:43 +0000
Subject: [PATCH 153/177] HDFS-2586. Add protobuf service and implementation
 for HAServiceProtocol. Contributed by Suresh Srinivas.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1245338 13f79535-47bb-0310-9956-ffa450edef68
---
 .../java/org/apache/hadoop/ha/HAAdmin.java    |   5 +-
 ...ServiceProtocolClientSideTranslatorPB.java | 148 ++++++++++++++++
 .../ha/protocolPB/HAServiceProtocolPB.java    |  39 +++++
 ...ServiceProtocolServerSideTranslatorPB.java | 158 ++++++++++++++++++
 .../src/main/proto/HAServiceProtocol.proto    | 128 ++++++++++++++
 .../hadoop/ha/TestFailoverController.java     |   4 +-
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../server/namenode/NameNodeRpcServer.java    |  16 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   4 +-
 9 files changed, 493 insertions(+), 11 deletions(-)
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolPB.java
 create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java
 create mode 100644 hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index ccfa11f43dd..dedbebb58b5 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -31,6 +31,7 @@ import org.apache.commons.cli.ParseException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.util.Tool;
@@ -239,9 +240,7 @@ public abstract class HAAdmin extends Configured implements Tool {
       throws IOException {
     String serviceAddr = getServiceAddr(serviceId);
     InetSocketAddress addr = NetUtils.createSocketAddr(serviceAddr);
-    return (HAServiceProtocol)RPC.getProxy(
-          HAServiceProtocol.class, HAServiceProtocol.versionID,
-          addr, getConf());
+    return new HAServiceProtocolClientSideTranslatorPB(addr, getConf());
   }
 
   @Override
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java
new file mode 100644
index 00000000000..4a67aa61410
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha.protocolPB;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HAServiceProtocol;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto;
+import org.apache.hadoop.ipc.ProtobufHelper;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
+import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.RPC;
+
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+
+/**
+ * This class is the client side translator to translate the requests made on
+ * {@link HAServiceProtocol} interfaces to the RPC server implementing
+ * {@link HAServiceProtocolPB}.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Stable
+public class HAServiceProtocolClientSideTranslatorPB implements
+    HAServiceProtocol, Closeable {
+  /** RpcController is not used and hence is set to null */
+  private final static RpcController NULL_CONTROLLER = null;
+  private final static MonitorHealthRequestProto MONITOR_HEALTH_REQ = 
+      MonitorHealthRequestProto.newBuilder().build();
+  private final static TransitionToActiveRequestProto TRANSITION_TO_ACTIVE_REQ = 
+      TransitionToActiveRequestProto.newBuilder().build();
+  private final static TransitionToStandbyRequestProto TRANSITION_TO_STANDBY_REQ = 
+      TransitionToStandbyRequestProto.newBuilder().build();
+  private final static GetServiceStateRequestProto GET_SERVICE_STATE_REQ = 
+      GetServiceStateRequestProto.newBuilder().build();
+  private final static ReadyToBecomeActiveRequestProto ACTIVE_READY_REQ = 
+      ReadyToBecomeActiveRequestProto.newBuilder().build();
+  
+  private final HAServiceProtocolPB rpcProxy;
+
+  public HAServiceProtocolClientSideTranslatorPB(InetSocketAddress addr,
+      Configuration conf) throws IOException {
+    RPC.setProtocolEngine(conf, HAServiceProtocolPB.class,
+        ProtobufRpcEngine.class);
+    rpcProxy = RPC.getProxy(HAServiceProtocolPB.class,
+        RPC.getProtocolVersion(HAServiceProtocolPB.class), addr, conf);
+  }
+  
+  @Override
+  public long getProtocolVersion(String protocol, long clientVersion)
+      throws IOException {
+    return rpcProxy.getProtocolVersion(protocol, clientVersion);
+  }
+
+  @Override
+  public ProtocolSignature getProtocolSignature(String protocol,
+      long clientVersion, int clientMethodsHash) throws IOException {
+    return rpcProxy.getProtocolSignature(protocol, clientVersion,
+        clientMethodsHash);
+  }
+
+  @Override
+  public void monitorHealth() throws IOException {
+    try {
+      rpcProxy.monitorHealth(NULL_CONTROLLER, MONITOR_HEALTH_REQ);
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
+  @Override
+  public void transitionToActive() throws IOException {
+    try {
+      rpcProxy.transitionToActive(NULL_CONTROLLER, TRANSITION_TO_ACTIVE_REQ);
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
+  @Override
+  public void transitionToStandby() throws IOException {
+    try {
+      rpcProxy.transitionToStandby(NULL_CONTROLLER, TRANSITION_TO_STANDBY_REQ);
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+
+  @Override
+  public HAServiceState getServiceState() throws IOException {
+    HAServiceStateProto state;
+    try {
+      state = rpcProxy.getServiceState(NULL_CONTROLLER,
+          GET_SERVICE_STATE_REQ).getState();
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+    switch(state) {
+    case ACTIVE:
+      return HAServiceState.ACTIVE;
+    case STANDBY:
+      return HAServiceState.STANDBY;
+    case INITIALIZING:
+    default:
+      return HAServiceState.INITIALIZING;
+    }
+  }
+  
+  @Override
+  public void close() {
+    RPC.stopProxy(rpcProxy);
+  }
+
+  @Override
+  public boolean readyToBecomeActive() throws IOException {
+    try {
+      return rpcProxy.readyToBecomeActive(NULL_CONTROLLER, ACTIVE_READY_REQ)
+          .getReadyToBecomeActive();
+    } catch (ServiceException e) {
+      throw ProtobufHelper.getRemoteException(e);
+    }
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolPB.java
new file mode 100644
index 00000000000..57eefce54a4
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolPB.java
@@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha.protocolPB;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceProtocolService;
+import org.apache.hadoop.ipc.ProtocolInfo;
+import org.apache.hadoop.ipc.VersionedProtocol;
+import org.apache.hadoop.security.KerberosInfo;
+
+@KerberosInfo(
+    serverPrincipal=CommonConfigurationKeys.HADOOP_SECURITY_SERVICE_USER_NAME_KEY)
+@ProtocolInfo(protocolName = "org.apache.hadoop.ha.HAServiceProtocol", 
+    protocolVersion = 1)
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public interface HAServiceProtocolPB extends
+    HAServiceProtocolService.BlockingInterface, VersionedProtocol {
+  /**
+   * If any methods need annotation, it can be added here
+   */
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java
new file mode 100644
index 00000000000..3655a4e7121
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolServerSideTranslatorPB.java
@@ -0,0 +1,158 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.ha.protocolPB;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.ha.HAServiceProtocol;
+import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.GetServiceStateResponseProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceStateProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.MonitorHealthResponseProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.ReadyToBecomeActiveResponseProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToActiveResponseProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyRequestProto;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.TransitionToStandbyResponseProto;
+import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.RPC;
+
+import com.google.protobuf.RpcController;
+import com.google.protobuf.ServiceException;
+
+/**
+ * This class is used on the server side. Calls come across the wire for the
+ * for protocol {@link HAServiceProtocolPB}.
+ * This class translates the PB data types
+ * to the native data types used inside the NN as specified in the generic
+ * ClientProtocol.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Stable
+public class HAServiceProtocolServerSideTranslatorPB implements
+    HAServiceProtocolPB {
+  private final HAServiceProtocol server;
+  private static final MonitorHealthResponseProto MONITOR_HEALTH_RESP = 
+      MonitorHealthResponseProto.newBuilder().build();
+  private static final TransitionToActiveResponseProto TRANSITION_TO_ACTIVE_RESP = 
+      TransitionToActiveResponseProto.newBuilder().build();
+  private static final TransitionToStandbyResponseProto TRANSITION_TO_STANDBY_RESP = 
+      TransitionToStandbyResponseProto.newBuilder().build();
+  
+  public HAServiceProtocolServerSideTranslatorPB(HAServiceProtocol server) {
+    this.server = server;
+  }
+
+  @Override
+  public MonitorHealthResponseProto monitorHealth(RpcController controller,
+      MonitorHealthRequestProto request) throws ServiceException {
+    try {
+      server.monitorHealth();
+      return MONITOR_HEALTH_RESP;
+    } catch(IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public TransitionToActiveResponseProto transitionToActive(
+      RpcController controller, TransitionToActiveRequestProto request)
+      throws ServiceException {
+    try {
+      server.transitionToActive();
+      return TRANSITION_TO_ACTIVE_RESP;
+    } catch(IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public TransitionToStandbyResponseProto transitionToStandby(
+      RpcController controller, TransitionToStandbyRequestProto request)
+      throws ServiceException {
+    try {
+      server.transitionToStandby();
+      return TRANSITION_TO_STANDBY_RESP;
+    } catch(IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+
+  @Override
+  public GetServiceStateResponseProto getServiceState(RpcController controller,
+      GetServiceStateRequestProto request) throws ServiceException {
+    HAServiceState s;
+    try {
+      s = server.getServiceState();
+    } catch(IOException e) {
+      throw new ServiceException(e);
+    }
+    
+    HAServiceStateProto ret;
+    switch (s) {
+    case ACTIVE:
+      ret = HAServiceStateProto.ACTIVE;
+      break;
+    case STANDBY:
+      ret = HAServiceStateProto.STANDBY;
+      break;
+    case INITIALIZING:
+    default:
+      ret = HAServiceStateProto.INITIALIZING;
+      break;
+    }
+    return GetServiceStateResponseProto.newBuilder().setState(ret).build();
+  }
+
+  @Override
+  public long getProtocolVersion(String protocol, long clientVersion)
+      throws IOException {
+    return RPC.getProtocolVersion(HAServiceProtocolPB.class);
+  }
+
+  @Override
+  public ProtocolSignature getProtocolSignature(String protocol,
+      long clientVersion, int clientMethodsHash) throws IOException {
+    if (!protocol.equals(RPC.getProtocolName(HAServiceProtocolPB.class))) {
+      throw new IOException("Serverside implements " +
+          RPC.getProtocolName(HAServiceProtocolPB.class) +
+          ". The following requested protocol is unknown: " + protocol);
+    }
+
+    return ProtocolSignature.getProtocolSignature(clientMethodsHash,
+        RPC.getProtocolVersion(HAServiceProtocolPB.class),
+        HAServiceProtocolPB.class);
+  }
+
+  @Override
+  public ReadyToBecomeActiveResponseProto readyToBecomeActive(
+      RpcController controller, ReadyToBecomeActiveRequestProto request)
+      throws ServiceException {
+    try {
+      return ReadyToBecomeActiveResponseProto.newBuilder()
+          .setReadyToBecomeActive(server.readyToBecomeActive()).build();
+    } catch (IOException e) {
+      throw new ServiceException(e);
+    }
+  }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto b/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto
new file mode 100644
index 00000000000..a3fd86c0401
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/proto/HAServiceProtocol.proto
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option java_package = "org.apache.hadoop.ha.proto";
+option java_outer_classname = "HAServiceProtocolProtos";
+option java_generic_services = true;
+option java_generate_equals_and_hash = true;
+
+enum HAServiceStateProto {
+  INITIALIZING = 0;
+  ACTIVE = 1;
+  STANDBY = 2;
+}
+
+/**
+ * void request
+ */
+message MonitorHealthRequestProto { 
+}
+
+/**
+ * void response
+ */
+message MonitorHealthResponseProto { 
+}
+
+/**
+ * void request
+ */
+message TransitionToActiveRequestProto { 
+}
+
+/**
+ * void response
+ */
+message TransitionToActiveResponseProto { 
+}
+
+/**
+ * void request
+ */
+message TransitionToStandbyRequestProto { 
+}
+
+/**
+ * void response
+ */
+message TransitionToStandbyResponseProto { 
+}
+
+/**
+ * void request
+ */
+message GetServiceStateRequestProto { 
+}
+
+/**
+ * Returns the state of the service
+ */
+message GetServiceStateResponseProto { 
+  required HAServiceStateProto state = 1;
+}
+
+/**
+ * void request
+ */
+message ReadyToBecomeActiveRequestProto { 
+}
+
+/**
+ * Returns true if service is ready to become active
+ */
+message ReadyToBecomeActiveResponseProto { 
+  required bool readyToBecomeActive = 1;
+}
+
+/**
+ * Protocol interface provides High availability related 
+ * primitives to monitor and failover a service.
+ *
+ * For details see o.a.h.ha.HAServiceProtocol.
+ */
+service HAServiceProtocolService {
+  /**
+   * Monitor the health of a service.
+   */
+  rpc monitorHealth(MonitorHealthRequestProto)
+      returns(MonitorHealthResponseProto);
+
+  /**
+   * Request service to tranisition to active state.
+   */
+  rpc transitionToActive(TransitionToActiveRequestProto)
+      returns(TransitionToActiveResponseProto);
+
+  /**
+   * Request service to transition to standby state.
+   */
+  rpc transitionToStandby(TransitionToStandbyRequestProto)
+      returns(TransitionToStandbyResponseProto);
+
+  /**
+   * Get the current state of the service.
+   */
+  rpc getServiceState(GetServiceStateRequestProto)
+      returns(GetServiceStateResponseProto);
+
+  /**
+   * Check if the service is ready to become active
+   */
+  rpc readyToBecomeActive(ReadyToBecomeActiveRequestProto)
+      returns(ReadyToBecomeActiveResponseProto);
+}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
index 39fc47ef406..1e206b4c3b8 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
@@ -26,6 +26,7 @@ import static org.mockito.Mockito.verify;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
+import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB;
 import org.apache.hadoop.ha.TestNodeFencer.AlwaysSucceedFencer;
 import org.apache.hadoop.ha.TestNodeFencer.AlwaysFailFencer;
 import static org.apache.hadoop.ha.TestNodeFencer.setupFencer;
@@ -285,8 +286,7 @@ public class TestFailoverController {
     Configuration conf = new Configuration();
     // Lower the timeout so we quickly fail to connect
     conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 1);
-    return (HAServiceProtocol)RPC.getProxy(
-        HAServiceProtocol.class, HAServiceProtocol.versionID, addr, conf);
+    return new HAServiceProtocolClientSideTranslatorPB(addr, conf);
   }
 
   @Test
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index c3780a7df6e..48edb091173 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -214,3 +214,5 @@ HDFS-2928. ConfiguredFailoverProxyProvider should not create a NameNode proxy wi
 HDFS-2955. IllegalStateException during standby startup in getCurSegmentTxId. (Hari Mankude via atm)
 
 HDFS-2937. TestDFSHAAdmin needs tests with MiniDFSCluster. (Brandon Li via suresh)
+
+HDFS-2586. Add protobuf service and implementation for HAServiceProtocol. (suresh via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 9f85dacc624..98d09101e8b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -42,6 +42,9 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.ha.HealthCheckFailedException;
 import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.ha.proto.HAServiceProtocolProtos.HAServiceProtocolService;
+import org.apache.hadoop.ha.protocolPB.HAServiceProtocolPB;
+import org.apache.hadoop.ha.protocolPB.HAServiceProtocolServerSideTranslatorPB;
 
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HDFSPolicyProvider;
@@ -194,6 +197,11 @@ class NameNodeRpcServer implements NamenodeProtocols {
         new GetUserMappingsProtocolServerSideTranslatorPB(this);
     BlockingService getUserMappingService = GetUserMappingsProtocolService
         .newReflectiveBlockingService(getUserMappingXlator);
+    
+    HAServiceProtocolServerSideTranslatorPB haServiceProtocolXlator = 
+        new HAServiceProtocolServerSideTranslatorPB(this);
+    BlockingService haPbService = HAServiceProtocolService
+        .newReflectiveBlockingService(haServiceProtocolXlator);
 	  
     WritableRpcEngine.ensureInitialized();
     
@@ -209,8 +217,8 @@ class NameNodeRpcServer implements NamenodeProtocols {
           dnSocketAddr.getHostName(), dnSocketAddr.getPort(), 
           serviceHandlerCount,
           false, conf, namesystem.getDelegationTokenSecretManager());
-      this.serviceRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
-          HAServiceProtocol.class, this);
+      DFSUtil.addPBProtocol(conf, HAServiceProtocolPB.class, haPbService,
+          serviceRpcServer);
       DFSUtil.addPBProtocol(conf, NamenodeProtocolPB.class, NNPbService,
           serviceRpcServer);
       DFSUtil.addPBProtocol(conf, DatanodeProtocolPB.class, dnProtoPbService,
@@ -234,8 +242,8 @@ class NameNodeRpcServer implements NamenodeProtocols {
         clientNNPbService, socAddr.getHostName(),
             socAddr.getPort(), handlerCount, false, conf,
             namesystem.getDelegationTokenSecretManager());
-    this.clientRpcServer.addProtocol(RpcKind.RPC_WRITABLE,
-        HAServiceProtocol.class, this);
+    DFSUtil.addPBProtocol(conf, HAServiceProtocolPB.class, haPbService,
+        clientRpcServer);
     DFSUtil.addPBProtocol(conf, NamenodeProtocolPB.class, NNPbService,
         clientRpcServer);
     DFSUtil.addPBProtocol(conf, DatanodeProtocolPB.class, dnProtoPbService,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 35a05885699..c324111a2fb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -49,6 +49,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import org.apache.hadoop.ha.HAServiceProtocol;
 import org.apache.hadoop.ha.HAServiceProtocolHelper;
 import org.apache.hadoop.ha.ServiceFailedException;
+import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
@@ -1590,8 +1591,7 @@ public class MiniDFSCluster {
   
   private HAServiceProtocol getHaServiceClient(int nnIndex) throws IOException {
     InetSocketAddress addr = nameNodes[nnIndex].nameNode.getServiceRpcAddress();
-    return RPC.getProxy(HAServiceProtocol.class,
-        HAServiceProtocol.versionID, addr, conf);
+    return new HAServiceProtocolClientSideTranslatorPB(addr, conf);
   }
   
   public void transitionToActive(int nnIndex) throws IOException,

From 41e56dfecee0db1975c9859017c0de1226afb4b5 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Sat, 18 Feb 2012 07:12:27 +0000
Subject: [PATCH 154/177] HDFS-2952. NN should not start with upgrade option or
 with a pending an unfinalized upgrade. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1245875 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hadoop/hdfs/server/namenode/FSImage.java  |   4 +-
 .../hadoop/hdfs/server/namenode/NameNode.java |  10 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |   9 ++
 .../namenode/ha/TestDFSUpgradeWithHA.java     | 107 ++++++++++++++++++
 5 files changed, 129 insertions(+), 3 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 48edb091173..36faf4e569c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -216,3 +216,5 @@ HDFS-2955. IllegalStateException during standby startup in getCurSegmentTxId. (H
 HDFS-2937. TestDFSHAAdmin needs tests with MiniDFSCluster. (Brandon Li via suresh)
 
 HDFS-2586. Add protobuf service and implementation for HAServiceProtocol. (suresh via atm)
+
+HDFS-2952. NN should not start with upgrade option or with a pending an unfinalized upgrade. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index 12b2016b008..adc3b46b7f5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -340,8 +340,8 @@ public class FSImage implements Closeable {
         File prevDir = sd.getPreviousDir();
         File tmpDir = sd.getPreviousTmp();
         assert curDir.exists() : "Current directory must exist.";
-        assert !prevDir.exists() : "prvious directory must not exist.";
-        assert !tmpDir.exists() : "prvious.tmp directory must not exist.";
+        assert !prevDir.exists() : "previous directory must not exist.";
+        assert !tmpDir.exists() : "previous.tmp directory must not exist.";
         assert !editLog.isSegmentOpen() : "Edits log must not be open.";
 
         // rename current to tmp
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 847dc040532..caedb5bae35 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -539,7 +539,7 @@ public class NameNode {
     if (!haEnabled) {
       state = ACTIVE_STATE;
     } else {
-      state = STANDBY_STATE;;
+      state = STANDBY_STATE;
     }
     this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
     this.haContext = createHAContext();
@@ -814,6 +814,14 @@ public class NameNode {
       return null;
     }
     setStartupOption(conf, startOpt);
+    
+    if (HAUtil.isHAEnabled(conf, DFSUtil.getNamenodeNameServiceId(conf)) &&
+        (startOpt == StartupOption.UPGRADE ||
+         startOpt == StartupOption.ROLLBACK ||
+         startOpt == StartupOption.FINALIZE)) {
+      throw new HadoopIllegalArgumentException("Invalid startup option. " +
+          "Cannot perform DFS upgrade with HA enabled.");
+    }
 
     switch (startOpt) {
       case FORMAT:
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index c324111a2fb..4511095cb45 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -1252,6 +1252,15 @@ public class MiniDFSCluster {
     }
   }
   
+  /**
+   * Restart all namenodes.
+   */
+  public synchronized void restartNameNodes() throws IOException {
+    for (int i = 0; i < nameNodes.length; i++) {
+      restartNameNode(i);
+    }
+  }
+  
   /**
    * Restart the namenode.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
new file mode 100644
index 00000000000..ccc46a204b3
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDFSUpgradeWithHA.java
@@ -0,0 +1,107 @@
+/**
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.URI;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+
+import com.google.common.collect.Lists;
+
+/**
+ * Tests for upgrading with HA enabled.
+ */
+public class TestDFSUpgradeWithHA {
+  
+  private static final Log LOG = LogFactory.getLog(TestDFSUpgradeWithHA.class);
+
+  /**
+   * Make sure that an HA NN refuses to start if given an upgrade-related
+   * startup option.
+   */
+  @Test
+  public void testStartingWithUpgradeOptionsFails() throws IOException {
+    for (StartupOption startOpt : Lists.newArrayList(new StartupOption[] {
+        StartupOption.UPGRADE, StartupOption.FINALIZE,
+        StartupOption.ROLLBACK })) {
+      MiniDFSCluster cluster = null;
+      try {
+        cluster = new MiniDFSCluster.Builder(new Configuration())
+            .nnTopology(MiniDFSNNTopology.simpleHATopology())
+            .startupOption(startOpt)
+            .numDataNodes(0)
+            .build();
+        fail("Should not have been able to start an HA NN in upgrade mode");
+      } catch (IllegalArgumentException iae) {
+        GenericTestUtils.assertExceptionContains(
+            "Cannot perform DFS upgrade with HA enabled.", iae);
+        LOG.info("Got expected exception", iae);
+      } finally {
+        if (cluster != null) {
+          cluster.shutdown();
+        }
+      }
+    }
+  }
+  
+  /**
+   * Make sure that an HA NN won't start if a previous upgrade was in progress.
+   */
+  @Test
+  public void testStartingWithUpgradeInProgressFails() throws Exception {
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(new Configuration())
+          .nnTopology(MiniDFSNNTopology.simpleHATopology())
+          .numDataNodes(0)
+          .build();
+      
+      // Simulate an upgrade having started.
+      for (int i = 0; i < 2; i++) {
+        for (URI uri : cluster.getNameDirs(i)) {
+          File prevTmp = new File(new File(uri), Storage.STORAGE_TMP_PREVIOUS);
+          LOG.info("creating previous tmp dir: " + prevTmp);
+          assertTrue(prevTmp.mkdirs());
+        }
+      }
+      
+      cluster.restartNameNodes();
+      fail("Should not have been able to start an HA NN with an in-progress upgrade");
+    } catch (IOException ioe) {
+      GenericTestUtils.assertExceptionContains(
+          "Cannot start an HA namenode with name dirs that need recovery.",
+          ioe);
+      LOG.info("Got expected exception", ioe);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+}

From d03dac743206046c2db7b73d09e177310e217dcb Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Mon, 20 Feb 2012 04:12:01 +0000
Subject: [PATCH 155/177] HDFS-2974. MiniDFSCluster does not delete standby NN
 name dirs during format. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1291126 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt               |  2 ++
 .../java/org/apache/hadoop/hdfs/MiniDFSCluster.java | 13 +++++++++++--
 .../hadoop/hdfs/server/namenode/TestCheckpoint.java |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 36faf4e569c..4d96857e727 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -218,3 +218,5 @@ HDFS-2937. TestDFSHAAdmin needs tests with MiniDFSCluster. (Brandon Li via sures
 HDFS-2586. Add protobuf service and implementation for HAServiceProtocol. (suresh via atm)
 
 HDFS-2952. NN should not start with upgrade option or with a pending an unfinalized upgrade. (atm)
+
+HDFS-2974. MiniDFSCluster does not delete standby NN name dirs during format. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 4511095cb45..7d40ed6108a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -626,6 +626,15 @@ public class MiniDFSCluster {
       for (NNConf nn : nameservice.getNNs()) {
         initNameNodeConf(conf, nsId, nn.getNnId(), manageNameDfsDirs,
             nnCounterForFormat);
+        Collection<URI> namespaceDirs = FSNamesystem.getNamespaceDirs(conf);
+        if (format) {
+          for (URI nameDirUri : namespaceDirs) {
+            File nameDir = new File(nameDirUri);
+            if (nameDir.exists() && !FileUtil.fullyDelete(nameDir)) {
+              throw new IOException("Could not fully delete " + nameDir);
+            }
+          }
+        }
         
         boolean formatThisOne = format;
         if (format && i++ > 0) {
@@ -635,14 +644,14 @@ public class MiniDFSCluster {
           // from the first one.
           formatThisOne = false;
           assert (null != prevNNDirs);
-          copyNameDirs(prevNNDirs, FSNamesystem.getNamespaceDirs(conf), conf);
+          copyNameDirs(prevNNDirs, namespaceDirs, conf);
         }
         
         nnCounterForFormat++;
         if (formatThisOne) {
           DFSTestUtil.formatNameNode(conf);
         }
-        prevNNDirs = FSNamesystem.getNamespaceDirs(conf);
+        prevNNDirs = namespaceDirs;
       }
 
       // Start all Namenodes
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
index c1277299899..b298fd48b0f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java
@@ -655,6 +655,7 @@ public class TestCheckpoint extends TestCase {
     sdToLock.lock();
     try {      
       MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+        .format(false)
         .manageNameDfsDirs(false)
         .numDataNodes(0)
         .build();

From c78f6aa299f5fd271e1a83ee8e700cef230b8048 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 22 Feb 2012 14:53:21 +0000
Subject: [PATCH 156/177] Merge trunk into HA branch.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1292326 13f79535-47bb-0310-9956-ffa450edef68

From c14912785d22734d735b5c4f8638b57dff009a97 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 22 Feb 2012 20:31:52 +0000
Subject: [PATCH 157/177] HDFS-2929. Stress test and fixes for block
 synchronization. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1292494 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hadoop/hdfs/server/datanode/DataNode.java |   8 +-
 .../hdfs/server/namenode/FSNamesystem.java    |   9 +-
 .../namenode/ha/HAStressTestHarness.java      | 150 +++++++++
 .../ha/TestDNFencingWithReplication.java      |  75 +----
 .../namenode/ha/TestPipelinesFailover.java    | 295 +++++++++++++++++-
 .../apache/hadoop/test/GenericTestUtils.java  |  41 ++-
 7 files changed, 493 insertions(+), 87 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HAStressTestHarness.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4d96857e727..4893c54829f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -220,3 +220,5 @@ HDFS-2586. Add protobuf service and implementation for HAServiceProtocol. (sures
 HDFS-2952. NN should not start with upgrade option or with a pending an unfinalized upgrade. (atm)
 
 HDFS-2974. MiniDFSCluster does not delete standby NN name dirs during format. (atm)
+
+HDFS-2929. Stress test and fixes for block synchronization (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index 76371918e51..b13041b3ea8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -1804,6 +1804,13 @@ public class DataNode extends Configured
                                           long newLength) throws IOException {
     ReplicaInfo r = data.updateReplicaUnderRecovery(oldBlock,
         recoveryId, newLength);
+    // Notify the namenode of the updated block info. This is important
+    // for HA, since otherwise the standby node may lose track of the
+    // block locations until the next block report.
+    ExtendedBlock newBlock = new ExtendedBlock(oldBlock);
+    newBlock.setGenerationStamp(recoveryId);
+    newBlock.setNumBytes(newLength);
+    notifyNamenodeReceivedBlock(newBlock, "");
     return new ExtendedBlock(oldBlock.getBlockPoolId(), r);
   }
 
@@ -1930,7 +1937,6 @@ public class DataNode extends Configured
     // or their replicas have 0 length.
     // The block can be deleted.
     if (syncList.isEmpty()) {
-      // TODO: how does this work in HA??
       nn.commitBlockSynchronization(block, recoveryId, 0,
           true, true, DatanodeID.EMPTY_ARRAY);
       return;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 8edc4bc88a0..823ce8bce93 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -2826,12 +2826,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     writeLock();
     try {
       checkOperation(OperationCategory.WRITE);
-      if (haContext.getState().equals(NameNode.STANDBY_STATE)) {
-        // TODO(HA) we'll never get here, since we check for WRITE operation above!
-        // Need to implement tests, etc, for this - block recovery spanning
-        // failover.
-      }
-
+      // If a DN tries to commit to the standby, the recovery will
+      // fail, and the next retry will succeed on the new NN.
+  
       if (isInSafeMode()) {
         throw new SafeModeException(
           "Cannot commitBlockSynchronization while in safe mode",
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HAStressTestHarness.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HAStressTestHarness.java
new file mode 100644
index 00000000000..39667eddf17
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HAStressTestHarness.java
@@ -0,0 +1,150 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
+import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
+
+/**
+ * Utility class to start an HA cluster, and then start threads
+ * to periodically fail back and forth, accelerate block deletion
+ * processing, etc.
+ */
+public class HAStressTestHarness {
+  Configuration conf;
+  private MiniDFSCluster cluster;
+  static final int BLOCK_SIZE = 1024;
+  TestContext testCtx = new TestContext();
+  
+  public HAStressTestHarness() {
+    conf = new Configuration();
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
+    // Increase max streams so that we re-replicate quickly.
+    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 1000);
+  }
+
+  /**
+   * Start and return the MiniDFSCluster.
+   */
+  public MiniDFSCluster startCluster() throws IOException {
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .build();
+    return cluster;
+  }
+
+  /**
+   * Return a filesystem with client-failover configured for the
+   * cluster.
+   */
+  public FileSystem getFailoverFs() throws IOException, URISyntaxException {
+    return HATestUtil.configureFailoverFs(cluster, conf);
+  }
+
+  /**
+   * Add a thread which periodically triggers deletion reports,
+   * heartbeats, and NN-side block work.
+   * @param interval millisecond period on which to run
+   */
+  public void addReplicationTriggerThread(final int interval) {
+
+    testCtx.addThread(new RepeatingTestThread(testCtx) {
+      
+      @Override
+      public void doAnAction() throws Exception {
+        for (DataNode dn : cluster.getDataNodes()) {
+          DataNodeAdapter.triggerDeletionReport(dn);
+          DataNodeAdapter.triggerHeartbeat(dn);
+        }
+        for (int i = 0; i < 2; i++) {
+          NameNode nn = cluster.getNameNode(i);
+          BlockManagerTestUtil.computeAllPendingWork(
+              nn.getNamesystem().getBlockManager());
+        }
+        Thread.sleep(interval);
+      }
+    });
+  }
+
+  /**
+   * Add a thread which periodically triggers failover back and forth between
+   * the two namenodes.
+   */
+  public void addFailoverThread(final int msBetweenFailovers) {
+    testCtx.addThread(new RepeatingTestThread(testCtx) {
+      
+      @Override
+      public void doAnAction() throws Exception {
+        System.err.println("==============================\n" +
+            "Failing over from 0->1\n" +
+            "==================================");
+        cluster.transitionToStandby(0);
+        cluster.transitionToActive(1);
+        
+        Thread.sleep(msBetweenFailovers);
+        System.err.println("==============================\n" +
+            "Failing over from 1->0\n" +
+            "==================================");
+
+        cluster.transitionToStandby(1);
+        cluster.transitionToActive(0);
+        Thread.sleep(msBetweenFailovers);
+      }
+    });
+  }
+
+  /**
+   * Start all of the threads which have been added.
+   */
+  public void startThreads() {
+    this.testCtx.startThreads();
+  }
+
+  /**
+   * Stop threads, propagating any exceptions that might have been thrown.
+   */
+  public void stopThreads() throws Exception {
+    this.testCtx.stop();
+  }
+
+  /**
+   * Shutdown the minicluster, as well as any of the running threads.
+   */
+  public void shutdown() throws Exception {
+    this.testCtx.stop();
+    if (cluster != null) {
+      this.cluster.shutdown();
+      cluster = null;
+    }
+  }
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
index 44bc01d1cdc..95d5eb941ee 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java
@@ -22,19 +22,13 @@ import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
-import org.apache.hadoop.hdfs.server.datanode.DataNode;
-import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.ipc.Server;
 import org.apache.hadoop.test.GenericTestUtils;
 import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
@@ -111,28 +105,16 @@ public class TestDNFencingWithReplication {
   
   @Test
   public void testFencingStress() throws Exception {
-    Configuration conf = new Configuration();
-    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
-    conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
-    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
-    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
-    // Increase max streams so that we re-replicate quickly.
-    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 1000);
+    HAStressTestHarness harness = new HAStressTestHarness();
+    harness.conf.setInt(
+        DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
 
-    
-    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
-      .nnTopology(MiniDFSNNTopology.simpleHATopology())
-      .numDataNodes(3)
-      .build();
+    final MiniDFSCluster cluster = harness.startCluster();
     try {
       cluster.waitActive();
       cluster.transitionToActive(0);
       
-      final NameNode nn1 = cluster.getNameNode(0);
-      final NameNode nn2 = cluster.getNameNode(1);
-      
-      FileSystem fs = HATestUtil.configureFailoverFs(
-          cluster, conf);
+      FileSystem fs = harness.getFailoverFs();
       TestContext togglers = new TestContext();
       for (int i = 0; i < NUM_THREADS; i++) {
         Path p = new Path("/test-" + i);
@@ -143,51 +125,14 @@ public class TestDNFencingWithReplication {
       // Start a separate thread which will make sure that replication
       // happens quickly by triggering deletion reports and replication
       // work calculation frequently.
-      TestContext triggerCtx = new TestContext();
-      triggerCtx.addThread(new RepeatingTestThread(triggerCtx) {
-        
-        @Override
-        public void doAnAction() throws Exception {
-          for (DataNode dn : cluster.getDataNodes()) {
-            DataNodeAdapter.triggerDeletionReport(dn);
-            DataNodeAdapter.triggerHeartbeat(dn);
-          }
-          for (int i = 0; i < 2; i++) {
-            NameNode nn = cluster.getNameNode(i);
-            BlockManagerTestUtil.computeAllPendingWork(
-                nn.getNamesystem().getBlockManager());
-          }
-          Thread.sleep(500);
-        }
-      });
-      
-      triggerCtx.addThread(new RepeatingTestThread(triggerCtx) {
-        
-        @Override
-        public void doAnAction() throws Exception {
-          System.err.println("==============================\n" +
-              "Failing over from 0->1\n" +
-              "==================================");
-          cluster.transitionToStandby(0);
-          cluster.transitionToActive(1);
-          
-          Thread.sleep(5000);
-          System.err.println("==============================\n" +
-              "Failing over from 1->0\n" +
-              "==================================");
-
-          cluster.transitionToStandby(1);
-          cluster.transitionToActive(0);
-          Thread.sleep(5000);
-        }
-      });
-      
-      triggerCtx.startThreads();
+      harness.addReplicationTriggerThread(500);
+      harness.addFailoverThread(5000);
+      harness.startThreads();
       togglers.startThreads();
       
       togglers.waitFor(RUNTIME);
       togglers.stop();
-      triggerCtx.stop();
+      harness.stopThreads();
 
       // CHeck that the files can be read without throwing
       for (int i = 0; i < NUM_THREADS; i++) {
@@ -196,7 +141,7 @@ public class TestDNFencingWithReplication {
       }
     } finally {
       System.err.println("===========================\n\n\n\n");
-      cluster.shutdown();
+      harness.shutdown();
     }
 
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
index ce7347cdf06..465987c6cb1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
@@ -18,9 +18,10 @@
 package org.apache.hadoop.hdfs.server.namenode.ha;
 
 import static org.junit.Assert.*;
-import static org.junit.Assert.assertTrue;
 
+import java.io.IOException;
 import java.security.PrivilegedExceptionAction;
+import java.util.concurrent.TimeoutException;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -31,19 +32,35 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.AppendTestUtil;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.TestDFSClientFailover;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
+import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
+import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
+import org.apache.hadoop.hdfs.server.datanode.DataNode;
+import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
+import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread;
+import org.apache.hadoop.test.MultithreadedTestUtil.TestContext;
+
 import org.apache.log4j.Level;
-import org.junit.Ignore;
+
 import org.junit.Test;
+import org.mockito.Mockito;
+
+import com.google.common.base.Supplier;
 
 /**
  * Test cases regarding pipeline recovery during NN failover.
@@ -64,6 +81,9 @@ public class TestPipelinesFailover {
     new Path("/test-file");
   private static final int BLOCK_SIZE = 4096;
   private static final int BLOCK_AND_A_HALF = BLOCK_SIZE * 3 / 2;
+  
+  private static final int STRESS_NUM_THREADS = 25;
+  private static final int STRESS_RUNTIME = 40000;
 
   /**
    * Tests continuing a write pipeline over a failover.
@@ -216,22 +236,271 @@ public class TestPipelinesFailover {
       cluster.transitionToActive(1);
       
       assertTrue(fs.exists(TEST_PATH));
-      
-      FileSystem fsOtherUser = UserGroupInformation.createUserForTesting(
-          "otheruser", new String[] { "othergroup"})
-          .doAs(new PrivilegedExceptionAction<FileSystem>() {
-            @Override
-            public FileSystem run() throws Exception {
-              return HATestUtil.configureFailoverFs(cluster, conf);
-            }
-          });
-      ((DistributedFileSystem)fsOtherUser).recoverLease(TEST_PATH);
+
+      FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
+      loopRecoverLease(fsOtherUser, TEST_PATH);
       
       AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
+      
+      // Fail back to ensure that the block locations weren't lost on the
+      // original node.
+      cluster.transitionToStandby(1);
+      cluster.transitionToActive(0);
+      AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
     } finally {
       IOUtils.closeStream(stm);
       cluster.shutdown();
     }
   }
 
+  /**
+   * Test the scenario where the NN fails over after issuing a block
+   * synchronization request, but before it is committed. The
+   * DN running the recovery should then fail to commit the synchronization
+   * and a later retry will succeed.
+   */
+  @Test(timeout=30000)
+  public void testFailoverRightBeforeCommitSynchronization() throws Exception {
+    final Configuration conf = new Configuration();
+    // Disable permissions so that another user can recover the lease.
+    conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
+    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
+    
+    FSDataOutputStream stm = null;
+    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(3)
+      .build();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      Thread.sleep(500);
+
+      LOG.info("Starting with NN 0 active");
+      FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
+      stm = fs.create(TEST_PATH);
+      
+      // write a half block
+      AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
+      stm.hflush();
+      
+      // Look into the block manager on the active node for the block
+      // under construction.
+      
+      NameNode nn0 = cluster.getNameNode(0);
+      ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
+      DatanodeDescriptor expectedPrimary = getExpectedPrimaryNode(nn0, blk);
+      LOG.info("Expecting block recovery to be triggered on DN " +
+          expectedPrimary);
+      
+      // Find the corresponding DN daemon, and spy on its connection to the
+      // active.
+      DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
+      DatanodeProtocolClientSideTranslatorPB nnSpy =
+          DataNodeAdapter.spyOnBposToNN(primaryDN, nn0);
+      
+      // Delay the commitBlockSynchronization call
+      DelayAnswer delayer = new DelayAnswer(LOG);
+      Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
+          Mockito.eq(blk),
+          Mockito.anyInt(), // new genstamp
+          Mockito.anyLong(), // new length
+          Mockito.eq(true), // close file
+          Mockito.eq(false), // delete block
+          (DatanodeID[]) Mockito.anyObject()); // new targets
+
+      DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
+      assertFalse(fsOtherUser.recoverLease(TEST_PATH));
+      
+      LOG.info("Waiting for commitBlockSynchronization call from primary");
+      delayer.waitForCall();
+
+      LOG.info("Failing over to NN 1");
+      
+      cluster.transitionToStandby(0);
+      cluster.transitionToActive(1);
+      
+      // Let the commitBlockSynchronization call go through, and check that
+      // it failed with the correct exception.
+      delayer.proceed();
+      delayer.waitForResult();
+      Throwable t = delayer.getThrown();
+      if (t == null) {
+        fail("commitBlockSynchronization call did not fail on standby");
+      }
+      GenericTestUtils.assertExceptionContains(
+          "Operation category WRITE is not supported",
+          t);
+      
+      // Now, if we try again to recover the block, it should succeed on the new
+      // active.
+      loopRecoverLease(fsOtherUser, TEST_PATH);
+      
+      AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
+    } finally {
+      IOUtils.closeStream(stm);
+      cluster.shutdown();
+    }
+  }
+  
+  /**
+   * Stress test for pipeline/lease recovery. Starts a number of
+   * threads, each of which creates a file and has another client
+   * break the lease. While these threads run, failover proceeds
+   * back and forth between two namenodes.
+   */
+  @Test(timeout=STRESS_RUNTIME*3)
+  public void testPipelineRecoveryStress() throws Exception {
+    HAStressTestHarness harness = new HAStressTestHarness();
+    // Disable permissions so that another user can recover the lease.
+    harness.conf.setBoolean(
+        DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
+
+    final MiniDFSCluster cluster = harness.startCluster();
+    try {
+      cluster.waitActive();
+      cluster.transitionToActive(0);
+      
+      FileSystem fs = harness.getFailoverFs();
+      DistributedFileSystem fsAsOtherUser = createFsAsOtherUser(
+          cluster, harness.conf);
+      
+      TestContext testers = new TestContext();
+      for (int i = 0; i < STRESS_NUM_THREADS; i++) {
+        Path p = new Path("/test-" + i);
+        testers.addThread(new PipelineTestThread(
+            testers, fs, fsAsOtherUser, p));
+      }
+      
+      // Start a separate thread which will make sure that replication
+      // happens quickly by triggering deletion reports and replication
+      // work calculation frequently.
+      harness.addReplicationTriggerThread(500);
+      harness.addFailoverThread(5000);
+      harness.startThreads();
+      testers.startThreads();
+      
+      testers.waitFor(STRESS_RUNTIME);
+      testers.stop();
+      harness.stopThreads();
+    } finally {
+      System.err.println("===========================\n\n\n\n");
+      harness.shutdown();
+    }
+  }
+
+  /**
+   * Test thread which creates a file, has another fake user recover
+   * the lease on the file, and then ensures that the file's contents
+   * are properly readable. If any of these steps fails, propagates
+   * an exception back to the test context, causing the test case
+   * to fail.
+   */
+  private static class PipelineTestThread extends RepeatingTestThread {
+    private final FileSystem fs;
+    private final FileSystem fsOtherUser;
+    private final Path path;
+    
+
+    public PipelineTestThread(TestContext ctx,
+        FileSystem fs, FileSystem fsOtherUser, Path p) {
+      super(ctx);
+      this.fs = fs;
+      this.fsOtherUser = fsOtherUser;
+      this.path = p;
+    }
+
+    @Override
+    public void doAnAction() throws Exception {
+      FSDataOutputStream stm = fs.create(path, true);
+      try {
+        AppendTestUtil.write(stm, 0, 100);
+        stm.hflush();
+        loopRecoverLease(fsOtherUser, path);
+        AppendTestUtil.check(fs, path, 100);
+      } finally {
+        try {
+          stm.close();
+        } catch (IOException e) {
+          // should expect this since we lost the lease
+        }
+      }
+    }
+    
+    @Override
+    public String toString() {
+      return "Pipeline test thread for " + path;
+    }
+  }
+
+
+
+  /**
+   * @return the node which is expected to run the recovery of the
+   * given block, which is known to be under construction inside the
+   * given NameNOde.
+   */
+  private DatanodeDescriptor getExpectedPrimaryNode(NameNode nn,
+      ExtendedBlock blk) {
+    BlockManager bm0 = nn.getNamesystem().getBlockManager();
+    BlockInfo storedBlock = bm0.getStoredBlock(blk.getLocalBlock());
+    assertTrue("Block " + blk + " should be under construction, " +
+        "got: " + storedBlock,
+        storedBlock instanceof BlockInfoUnderConstruction);
+    BlockInfoUnderConstruction ucBlock =
+      (BlockInfoUnderConstruction)storedBlock;
+    // We expect that the first indexed replica will be the one
+    // to be in charge of the synchronization / recovery protocol.
+    DatanodeDescriptor expectedPrimary = ucBlock.getExpectedLocations()[0];
+    return expectedPrimary;
+  }
+
+  private DistributedFileSystem createFsAsOtherUser(
+      final MiniDFSCluster cluster, final Configuration conf)
+      throws IOException, InterruptedException {
+    return (DistributedFileSystem) UserGroupInformation.createUserForTesting(
+        "otheruser", new String[] { "othergroup"})
+    .doAs(new PrivilegedExceptionAction<FileSystem>() {
+      @Override
+      public FileSystem run() throws Exception {
+        return HATestUtil.configureFailoverFs(
+            cluster, conf);
+      }
+    });
+  }
+  
+  /**
+   * Try to cover the lease on the given file for up to 30
+   * seconds.
+   * @param fsOtherUser the filesystem to use for the recoverLease call
+   * @param testPath the path on which to run lease recovery
+   * @throws TimeoutException if lease recover does not succeed within 30
+   * seconds
+   * @throws InterruptedException if the thread is interrupted
+   */
+  private static void loopRecoverLease(
+      final FileSystem fsOtherUser, final Path testPath)
+      throws TimeoutException, InterruptedException {
+    try {
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        @Override
+        public Boolean get() {
+          boolean success;
+          try {
+            success = ((DistributedFileSystem)fsOtherUser)
+              .recoverLease(testPath);
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+          if (!success) {
+            LOG.info("Waiting to recover lease successfully");
+          }
+          return success;
+        }
+      }, 1000, 30000);
+    } catch (TimeoutException e) {
+      throw new TimeoutException("Timed out recovering lease for " +
+          testPath);
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
index f723a85bf45..23d1bb13a55 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/test/GenericTestUtils.java
@@ -110,7 +110,11 @@ public abstract class GenericTestUtils {
     
     private final CountDownLatch fireLatch = new CountDownLatch(1);
     private final CountDownLatch waitLatch = new CountDownLatch(1);
-  
+    private final CountDownLatch resultLatch = new CountDownLatch(1);
+    
+    // Result fields set after proceed() is called.
+    private volatile Throwable thrown;
+    private volatile Object returnValue;
     
     public DelayAnswer(Log log) {
       this.LOG = log;
@@ -145,7 +149,40 @@ public abstract class GenericTestUtils {
     }
 
     protected Object passThrough(InvocationOnMock invocation) throws Throwable {
-      return invocation.callRealMethod();
+      try {
+        Object ret = invocation.callRealMethod();
+        returnValue = ret;
+        return ret;
+      } catch (Throwable t) {
+        thrown = t;
+        throw t;
+      } finally {
+        resultLatch.countDown();
+      }
+    }
+    
+    /**
+     * After calling proceed(), this will wait until the call has
+     * completed and a result has been returned to the caller.
+     */
+    public void waitForResult() throws InterruptedException {
+      resultLatch.await();
+    }
+    
+    /**
+     * After the call has gone through, return any exception that
+     * was thrown, or null if no exception was thrown.
+     */
+    public Throwable getThrown() {
+      return thrown;
+    }
+    
+    /**
+     * After the call has gone through, return the call's return value,
+     * or null in case it was void or an exception was thrown.
+     */
+    public Object getReturnValue() {
+      return returnValue;
     }
   }
   

From 7527e943e6c8ea909f22d9d66246ac6c8bc2d6a0 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Wed, 22 Feb 2012 20:37:28 +0000
Subject: [PATCH 158/177] HDFS-2972. Small optimization building incremental
 block report. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1292497 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/datanode/BPServiceActor.java  | 85 ++++++++++++-------
 2 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4893c54829f..d6cc04098db 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -222,3 +222,5 @@ HDFS-2952. NN should not start with upgrade option or with a pending an unfinali
 HDFS-2974. MiniDFSCluster does not delete standby NN name dirs during format. (atm)
 
 HDFS-2929. Stress test and fixes for block synchronization (todd)
+
+HDFS-2972. Small optimization building incremental block report (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index 95819304e88..982a5685033 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -24,7 +24,8 @@ import java.net.InetSocketAddress;
 import java.net.SocketTimeoutException;
 import java.net.URI;
 import java.util.Collection;
-import java.util.LinkedList;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.hadoop.classification.InterfaceAudience;
@@ -54,6 +55,7 @@ import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.util.StringUtils;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Maps;
 
 /**
  * A thread per active or standby namenode to perform:
@@ -81,8 +83,16 @@ class BPServiceActor implements Runnable {
   DatanodeProtocolClientSideTranslatorPB bpNamenode;
   private long lastHeartbeat = 0;
   private volatile boolean initialized = false;
-  private final LinkedList<ReceivedDeletedBlockInfo> receivedAndDeletedBlockList 
-    = new LinkedList<ReceivedDeletedBlockInfo>();
+  
+  /**
+   * Between block reports (which happen on the order of once an hour) the
+   * DN reports smaller incremental changes to its block list. This map,
+   * keyed by block ID, contains the pending changes which have yet to be
+   * reported to the NN. Access should be synchronized on this object.
+   */
+  private final Map<Long, ReceivedDeletedBlockInfo> pendingIncrementalBR 
+    = Maps.newHashMap();
+  
   private volatile int pendingReceivedRequests = 0;
   private volatile boolean shouldServiceRun = true;
   private final DataNode dn;
@@ -242,28 +252,39 @@ class BPServiceActor implements Runnable {
 
     // check if there are newly received blocks
     ReceivedDeletedBlockInfo[] receivedAndDeletedBlockArray = null;
-    int currentReceivedRequestsCounter;
-    synchronized (receivedAndDeletedBlockList) {
-      currentReceivedRequestsCounter = pendingReceivedRequests;
-      int numBlocks = receivedAndDeletedBlockList.size();
+    synchronized (pendingIncrementalBR) {
+      int numBlocks = pendingIncrementalBR.size();
       if (numBlocks > 0) {
         //
         // Send newly-received and deleted blockids to namenode
         //
-        receivedAndDeletedBlockArray = receivedAndDeletedBlockList
-            .toArray(new ReceivedDeletedBlockInfo[numBlocks]);
+        receivedAndDeletedBlockArray = pendingIncrementalBR
+            .values().toArray(new ReceivedDeletedBlockInfo[numBlocks]);
       }
+      pendingIncrementalBR.clear();
     }
     if (receivedAndDeletedBlockArray != null) {
       StorageReceivedDeletedBlocks[] report = { new StorageReceivedDeletedBlocks(
           bpRegistration.getStorageID(), receivedAndDeletedBlockArray) };
-      bpNamenode.blockReceivedAndDeleted(bpRegistration, bpos.getBlockPoolId(),
-          report);
-      synchronized (receivedAndDeletedBlockList) {
-        for (int i = 0; i < receivedAndDeletedBlockArray.length; i++) {
-          receivedAndDeletedBlockList.remove(receivedAndDeletedBlockArray[i]);
+      boolean success = false;
+      try {
+        bpNamenode.blockReceivedAndDeleted(bpRegistration, bpos.getBlockPoolId(),
+            report);
+        success = true;
+      } finally {
+        synchronized (pendingIncrementalBR) {
+          if (!success) {
+            // If we didn't succeed in sending the report, put all of the
+            // blocks back onto our queue, but only in the case where we didn't
+            // put something newer in the meantime.
+            for (ReceivedDeletedBlockInfo rdbi : receivedAndDeletedBlockArray) {
+              if (!pendingIncrementalBR.containsKey(rdbi.getBlock().getBlockId())) {
+                pendingIncrementalBR.put(rdbi.getBlock().getBlockId(), rdbi);
+              }
+            }
+          }
+          pendingReceivedRequests = pendingIncrementalBR.size();
         }
-        pendingReceivedRequests -= currentReceivedRequestsCounter;
       }
     }
   }
@@ -274,16 +295,18 @@ class BPServiceActor implements Runnable {
    * client? For now we don't.
    */
   void notifyNamenodeBlockImmediately(ReceivedDeletedBlockInfo bInfo) {
-    synchronized (receivedAndDeletedBlockList) {
-      receivedAndDeletedBlockList.add(bInfo);
+    synchronized (pendingIncrementalBR) {
+      pendingIncrementalBR.put(
+          bInfo.getBlock().getBlockId(), bInfo);
       pendingReceivedRequests++;
-      receivedAndDeletedBlockList.notifyAll();
+      pendingIncrementalBR.notifyAll();
     }
   }
 
   void notifyNamenodeDeletedBlock(ReceivedDeletedBlockInfo bInfo) {
-    synchronized (receivedAndDeletedBlockList) {
-      receivedAndDeletedBlockList.add(bInfo);
+    synchronized (pendingIncrementalBR) {
+      pendingIncrementalBR.put(
+          bInfo.getBlock().getBlockId(), bInfo);
     }
   }
 
@@ -292,13 +315,13 @@ class BPServiceActor implements Runnable {
    */
   @VisibleForTesting
   void triggerBlockReportForTests() throws IOException {
-    synchronized (receivedAndDeletedBlockList) {
+    synchronized (pendingIncrementalBR) {
       lastBlockReport = 0;
       lastHeartbeat = 0;
-      receivedAndDeletedBlockList.notifyAll();
+      pendingIncrementalBR.notifyAll();
       while (lastBlockReport == 0) {
         try {
-          receivedAndDeletedBlockList.wait(100);
+          pendingIncrementalBR.wait(100);
         } catch (InterruptedException e) {
           return;
         }
@@ -308,12 +331,12 @@ class BPServiceActor implements Runnable {
   
   @VisibleForTesting
   void triggerHeartbeatForTests() throws IOException {
-    synchronized (receivedAndDeletedBlockList) {
+    synchronized (pendingIncrementalBR) {
       lastHeartbeat = 0;
-      receivedAndDeletedBlockList.notifyAll();
+      pendingIncrementalBR.notifyAll();
       while (lastHeartbeat == 0) {
         try {
-          receivedAndDeletedBlockList.wait(100);
+          pendingIncrementalBR.wait(100);
         } catch (InterruptedException e) {
           return;
         }
@@ -323,13 +346,13 @@ class BPServiceActor implements Runnable {
 
   @VisibleForTesting
   void triggerDeletionReportForTests() throws IOException {
-    synchronized (receivedAndDeletedBlockList) {
+    synchronized (pendingIncrementalBR) {
       lastDeletedReport = 0;
-      receivedAndDeletedBlockList.notifyAll();
+      pendingIncrementalBR.notifyAll();
 
       while (lastDeletedReport == 0) {
         try {
-          receivedAndDeletedBlockList.wait(100);
+          pendingIncrementalBR.wait(100);
         } catch (InterruptedException e) {
           return;
         }
@@ -527,10 +550,10 @@ class BPServiceActor implements Runnable {
         //
         long waitTime = dnConf.heartBeatInterval - 
         (System.currentTimeMillis() - lastHeartbeat);
-        synchronized(receivedAndDeletedBlockList) {
+        synchronized(pendingIncrementalBR) {
           if (waitTime > 0 && pendingReceivedRequests == 0) {
             try {
-              receivedAndDeletedBlockList.wait(waitTime);
+              pendingIncrementalBR.wait(waitTime);
             } catch (InterruptedException ie) {
               LOG.warn("BPOfferService for " + this + " interrupted");
             }

From 90a14f89e178e78cdcb16aec217fab99ad89fbfa Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 23 Feb 2012 01:25:14 +0000
Subject: [PATCH 159/177] HDFS-2973. Re-enable NO_ACK optimization for block
 deletion. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1292611 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt           |  2 ++
 .../server/blockmanagement/BlockManager.java    |  9 +++++----
 .../hdfs/server/namenode/ha/TestHASafeMode.java | 17 +++++++++++------
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index d6cc04098db..93ae86a37de 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -224,3 +224,5 @@ HDFS-2974. MiniDFSCluster does not delete standby NN name dirs during format. (a
 HDFS-2929. Stress test and fixes for block synchronization (todd)
 
 HDFS-2972. Small optimization building incremental block report (todd)
+
+HDFS-2973. Re-enable NO_ACK optimization for block deletion. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 5d2ce5cbafb..440ac396070 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hdfs.server.namenode.INodeFile;
 import org.apache.hadoop.hdfs.server.namenode.INodeFileUnderConstruction;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.Namesystem;
+import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand;
@@ -2672,10 +2673,10 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
 
   public void removeBlock(Block block) {
     assert namesystem.hasWriteLock();
-    // TODO(HA): the following causes some problems for HA:
-    // the SBN doesn't get block deletions until the next
-    // BR...
-    // block.setNumBytes(BlockCommand.NO_ACK);
+    // No need to ACK blocks that are being removed entirely
+    // from the namespace, since the removal of the associated
+    // file already removes them from the block map below.
+    block.setNumBytes(BlockCommand.NO_ACK);
     addToInvalidates(block);
     corruptReplicas.removeFromCorruptReplicasMap(block);
     blocksMap.removeBlock(block);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
index e07338f8c82..8790d0f3315 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java
@@ -311,8 +311,9 @@ public class TestHASafeMode {
     // It will initially have all of the blocks necessary.
     assertSafeMode(nn1, 10, 10);
 
-    // Delete those blocks while the SBN is in safe mode - this
-    // should reduce it back below the threshold
+    // Delete those blocks while the SBN is in safe mode.
+    // This doesn't affect the SBN, since deletions are not
+    // ACKed when due to block removals.
     banner("Removing the blocks without rolling the edit log");
     fs.delete(new Path("/test"), true);
     BlockManagerTestUtil.computeAllPendingWork(
@@ -323,8 +324,10 @@ public class TestHASafeMode {
     HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
 
-    assertSafeMode(nn1, 0, 10);
+    assertSafeMode(nn1, 10, 10);
 
+    // When we catch up to active namespace, it will restore back
+    // to 0 blocks.
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 
@@ -372,8 +375,9 @@ public class TestHASafeMode {
       IOUtils.closeStream(stm);
     }
     
-    // Delete those blocks while the SBN is in safe mode - this
-    // should reduce it back below the threshold
+    // Delete those blocks while the SBN is in safe mode.
+    // This will not ACK the deletions to the SBN, so it won't
+    // notice until we roll the edit log.
     banner("Removing the blocks without rolling the edit log");
     fs.delete(new Path("/test"), true);
     BlockManagerTestUtil.computeAllPendingWork(
@@ -384,8 +388,9 @@ public class TestHASafeMode {
     HATestUtil.waitForDNDeletions(cluster);
     cluster.triggerDeletionReports();
 
-    assertSafeMode(nn1, 0, 4);
+    assertSafeMode(nn1, 4, 4);
 
+    // When we roll the edit log, the deletions will go through.
     banner("Waiting for standby to catch up to active namespace");
     HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
 

From 8db31c5972873a4f919469fb3387454116a02869 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Thu, 23 Feb 2012 02:08:54 +0000
Subject: [PATCH 160/177] HDFS-2922. HA: close out operation categories.
 Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1292620 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/BackupNode.java      |  3 ++
 .../hadoop/hdfs/server/namenode/NameNode.java |  2 +
 .../server/namenode/NameNodeRpcServer.java    | 37 +++++++++----------
 .../hdfs/server/namenode/ha/StandbyState.java |  3 +-
 5 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 93ae86a37de..baa7cec6e75 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -226,3 +226,5 @@ HDFS-2929. Stress test and fixes for block synchronization (todd)
 HDFS-2972. Small optimization building incremental block report (todd)
 
 HDFS-2973. Re-enable NO_ACK optimization for block deletion. (todd)
+
+HDFS-2922. HA: close out operation categories (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index d6aa5e5a705..5e7de78d77b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -407,6 +407,9 @@ public class BackupNode extends NameNode {
     @Override // NameNode
     public void checkOperation(OperationCategory op)
         throws StandbyException {
+      if (op == OperationCategory.UNCHECKED) {
+        return;
+      }
       if (OperationCategory.JOURNAL != op &&
           !(OperationCategory.READ == op && allowStaleStandbyReads)) {
         String msg = "Operation category " + op
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index caedb5bae35..a243971453c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -114,6 +114,8 @@ public class NameNode {
    * Categories of operations supported by the namenode.
    */
   public static enum OperationCategory {
+    /** Operations that are state agnostic */
+    UNCHECKED,
     /** Read operation that does not change the namespace state */
     READ,
     /** Write operation that changes the namespace state */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
index 98d09101e8b..b6444ce65f9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java
@@ -353,9 +353,7 @@ class NameNodeRpcServer implements NamenodeProtocols {
   public void errorReport(NamenodeRegistration registration,
                           int errorCode, 
                           String msg) throws IOException {
-    // nn.checkOperation(OperationCategory.WRITE);
-    // TODO: I dont think this should be checked - it's just for logging
-    // and dropping backups
+    namesystem.checkOperation(OperationCategory.UNCHECKED);
     verifyRequest(registration);
     LOG.info("Error report from " + registration + ": " + msg);
     if(errorCode == FATAL)
@@ -707,8 +705,8 @@ class NameNodeRpcServer implements NamenodeProtocols {
 
   @Override // ClientProtocol
   public DatanodeInfo[] getDatanodeReport(DatanodeReportType type)
-      throws IOException {
-    // TODO(HA): decide on OperationCategory for this
+  throws IOException {
+    namesystem.checkOperation(OperationCategory.UNCHECKED);
     DatanodeInfo results[] = namesystem.datanodeReport(type);
     if (results == null ) {
       throw new IOException("Cannot find datanode report");
@@ -718,32 +716,32 @@ class NameNodeRpcServer implements NamenodeProtocols {
     
   @Override // ClientProtocol
   public boolean setSafeMode(SafeModeAction action) throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.UNCHECKED);
     return namesystem.setSafeMode(action);
   }
+
   @Override // ClientProtocol
-  public boolean restoreFailedStorage(String arg) 
-      throws AccessControlException {
-    // TODO:HA decide on OperationCategory for this
+  public boolean restoreFailedStorage(String arg) throws IOException { 
+    namesystem.checkOperation(OperationCategory.UNCHECKED);
     return namesystem.restoreFailedStorage(arg);
   }
 
   @Override // ClientProtocol
   public void saveNamespace() throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.UNCHECKED);
     namesystem.saveNamespace();
   }
 
   @Override // ClientProtocol
   public void refreshNodes() throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.UNCHECKED);
     namesystem.getBlockManager().getDatanodeManager().refreshNodes(
         new HdfsConfiguration());
   }
 
   @Override // NamenodeProtocol
-  public long getTransactionID() {
-    // TODO:HA decide on OperationCategory for this
+  public long getTransactionID() throws IOException {
+    namesystem.checkOperation(OperationCategory.CHECKPOINT);
     return namesystem.getEditLog().getSyncTxId();
   }
 
@@ -755,28 +753,29 @@ class NameNodeRpcServer implements NamenodeProtocols {
   @Override // NamenodeProtocol
   public RemoteEditLogManifest getEditLogManifest(long sinceTxId)
   throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.READ);
     return namesystem.getEditLog().getEditLogManifest(sinceTxId);
   }
     
   @Override // ClientProtocol
   public void finalizeUpgrade() throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.WRITE);
     namesystem.finalizeUpgrade();
   }
 
   @Override // ClientProtocol
   public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action)
       throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.READ);
     return namesystem.distributedUpgradeProgress(action);
   }
 
   @Override // ClientProtocol
   public void metaSave(String filename) throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.UNCHECKED);
     namesystem.metaSave(filename);
   }
+
   @Override // ClientProtocol
   public CorruptFileBlocks listCorruptFileBlocks(String path, String cookie)
       throws IOException {
@@ -795,12 +794,12 @@ class NameNodeRpcServer implements NamenodeProtocols {
   /**
    * Tell all datanodes to use a new, non-persistent bandwidth value for
    * dfs.datanode.balance.bandwidthPerSec.
-   * @param bandwidth Blanacer bandwidth in bytes per second for all datanodes.
+   * @param bandwidth Balancer bandwidth in bytes per second for all datanodes.
    * @throws IOException
    */
   @Override // ClientProtocol
   public void setBalancerBandwidth(long bandwidth) throws IOException {
-    // TODO:HA decide on OperationCategory for this
+    namesystem.checkOperation(OperationCategory.UNCHECKED);
     namesystem.getBlockManager().getDatanodeManager().setBalancerBandwidth(bandwidth);
   }
   
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
index 80f42e60fea..60e83713b86 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyState.java
@@ -78,7 +78,8 @@ public class StandbyState extends HAState {
   @Override
   public void checkOperation(HAContext context, OperationCategory op)
       throws StandbyException {
-    if (op == OperationCategory.READ && context.allowStaleReads()) {
+    if (op == OperationCategory.UNCHECKED ||
+        (op == OperationCategory.READ && context.allowStaleReads())) {
       return;
     }
     String msg = "Operation category " + op + " is not supported in state "

From 2e9b8df963eae74e390c090be82142a7bacd3f51 Mon Sep 17 00:00:00 2001
From: Eli Collins <eli@apache.org>
Date: Thu, 23 Feb 2012 07:58:33 +0000
Subject: [PATCH 161/177] HDFS-2993. HA: BackupNode#checkOperation should
 permit CHECKPOINT operations. Contributed by Eli Collins

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1292688 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt          | 2 ++
 .../org/apache/hadoop/hdfs/server/namenode/BackupNode.java     | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index baa7cec6e75..fbdf1ecf360 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -228,3 +228,5 @@ HDFS-2972. Small optimization building incremental block report (todd)
 HDFS-2973. Re-enable NO_ACK optimization for block deletion. (todd)
 
 HDFS-2922. HA: close out operation categories (eli)
+
+HDFS-2993. HA: BackupNode#checkOperation should permit CHECKPOINT operations (eli)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index 5e7de78d77b..9ffd2085768 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -407,7 +407,8 @@ public class BackupNode extends NameNode {
     @Override // NameNode
     public void checkOperation(OperationCategory op)
         throws StandbyException {
-      if (op == OperationCategory.UNCHECKED) {
+      if (op == OperationCategory.UNCHECKED ||
+          op == OperationCategory.CHECKPOINT) {
         return;
       }
       if (OperationCategory.JOURNAL != op &&

From 481f84597bf842df45b068cc24c328112e8bcf40 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Sat, 25 Feb 2012 00:03:26 +0000
Subject: [PATCH 162/177] HDFS-2904. Client support for getting delegation
 tokens. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1293486 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSClient.java     |  90 ++++---
 .../java/org/apache/hadoop/hdfs/HAUtil.java   | 176 +++++++++++--
 .../hadoop/hdfs/protocol/HdfsConstants.java   |   8 +
 .../delegation/DelegationTokenSelector.java   |   5 +
 .../server/balancer/NameNodeConnector.java    |  24 +-
 .../ha/ConfiguredFailoverProxyProvider.java   |   6 +
 .../ha/TestDelegationTokensWithHA.java        | 234 ++++++++++++++++++
 8 files changed, 479 insertions(+), 66 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index fbdf1ecf360..e8e63bf7029 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -230,3 +230,5 @@ HDFS-2973. Re-enable NO_ACK optimization for block deletion. (todd)
 HDFS-2922. HA: close out operation categories (eli)
 
 HDFS-2993. HA: BackupNode#checkOperation should permit CHECKPOINT operations (eli)
+
+HDFS-2904. Client support for getting delegation tokens. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index a80e34b2eb4..dcbc88f10c6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -30,6 +30,7 @@ import java.net.NetworkInterface;
 import java.net.Socket;
 import java.net.SocketException;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashMap;
@@ -60,6 +61,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsPermission;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
+
+import org.apache.hadoop.hdfs.HAUtil.ProxyAndInfo;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
 import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
@@ -107,6 +110,8 @@ import org.apache.hadoop.security.token.TokenRenewer;
 import org.apache.hadoop.util.DataChecksum;
 import org.apache.hadoop.util.Progressable;
 
+import com.google.common.base.Preconditions;
+
 /********************************************************
  * DFSClient can connect to a Hadoop Filesystem and 
  * perform basic file tasks.  It uses the ClientProtocol
@@ -124,7 +129,9 @@ public class DFSClient implements java.io.Closeable {
   public static final long SERVER_DEFAULTS_VALIDITY_PERIOD = 60 * 60 * 1000L; // 1 hour
   static final int TCP_WINDOW_SIZE = 128 * 1024; // 128 KB
   final ClientProtocol namenode;
-  private final InetSocketAddress nnAddress;
+  /* The service used for delegation tokens */
+  private Text dtService;
+
   final UserGroupInformation ugi;
   volatile boolean clientRunning = true;
   private volatile FsServerDefaults serverDefaults;
@@ -308,29 +315,22 @@ public class DFSClient implements java.io.Closeable {
     this.clientName = leaserenewer.getClientName(dfsClientConf.taskId);
     
     this.socketCache = new SocketCache(dfsClientConf.socketCacheCapacity);
-    ClientProtocol failoverNNProxy = (ClientProtocol) HAUtil
-        .createFailoverProxy(conf, nameNodeUri, ClientProtocol.class);
-    if (nameNodeUri != null && failoverNNProxy != null) {
-      this.namenode = failoverNNProxy;
-      nnAddress = null;
-    } else if (nameNodeUri != null && rpcNamenode == null) {
-      this.namenode = DFSUtil.createNamenode(NameNode.getAddress(nameNodeUri), conf);
-
-      // TODO(HA): This doesn't really apply in the case of HA. Need to get smart
-      // about tokens in an HA setup, generally.
-      nnAddress = NameNode.getAddress(nameNodeUri);
-    } else if (nameNodeUri == null && rpcNamenode != null) {
-      //This case is used for testing.
+    
+    
+    if (rpcNamenode != null) {
+      // This case is used for testing.
+      Preconditions.checkArgument(nameNodeUri == null);
       this.namenode = rpcNamenode;
-
-      // TODO(HA): This doesn't really apply in the case of HA. Need to get smart
-      // about tokens in an HA setup, generally.
-      nnAddress = null; 
+      dtService = null;
     } else {
-      throw new IllegalArgumentException(
-          "Expecting exactly one of nameNodeUri and rpcNamenode being null: "
-          + "nameNodeUri=" + nameNodeUri + ", rpcNamenode=" + rpcNamenode);
+      Preconditions.checkArgument(nameNodeUri != null,
+          "null URI");
+      ProxyAndInfo<ClientProtocol> proxyInfo =
+        HAUtil.createProxy(conf, nameNodeUri, ClientProtocol.class);
+      this.dtService = proxyInfo.getDelegationTokenService();
+      this.namenode = proxyInfo.getProxy();
     }
+
     // read directly from the block file if configured.
     this.shortCircuitLocalReads = conf.getBoolean(
         DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY,
@@ -523,11 +523,13 @@ public class DFSClient implements java.io.Closeable {
    */
   public Token<DelegationTokenIdentifier> getDelegationToken(Text renewer)
       throws IOException {
-    Token<DelegationTokenIdentifier> result =
+    assert dtService != null;
+    Token<DelegationTokenIdentifier> token =
       namenode.getDelegationToken(renewer);
-    SecurityUtil.setTokenService(result, nnAddress);
-    LOG.info("Created " + DelegationTokenIdentifier.stringifyToken(result));
-    return result;
+    token.setService(this.dtService);
+
+    LOG.info("Created " + DelegationTokenIdentifier.stringifyToken(token));
+    return token;
   }
 
   /**
@@ -658,13 +660,8 @@ public class DFSClient implements java.io.Closeable {
     @Override
     public long renew(Token<?> token, Configuration conf) throws IOException {
       Token<DelegationTokenIdentifier> delToken = 
-          (Token<DelegationTokenIdentifier>) token;
-      LOG.info("Renewing " + 
-               DelegationTokenIdentifier.stringifyToken(delToken));
-      ClientProtocol nn = 
-        DFSUtil.createNamenode
-           (SecurityUtil.getTokenServiceAddr(delToken),
-            conf, UserGroupInformation.getCurrentUser());
+        (Token<DelegationTokenIdentifier>) token;
+      ClientProtocol nn = getNNProxy(delToken, conf);
       try {
         return nn.renewDelegationToken(delToken);
       } catch (RemoteException re) {
@@ -680,9 +677,7 @@ public class DFSClient implements java.io.Closeable {
           (Token<DelegationTokenIdentifier>) token;
       LOG.info("Cancelling " + 
                DelegationTokenIdentifier.stringifyToken(delToken));
-      ClientProtocol nn = DFSUtil.createNamenode(
-          SecurityUtil.getTokenServiceAddr(delToken), conf,
-          UserGroupInformation.getCurrentUser());
+      ClientProtocol nn = getNNProxy(delToken, conf);
       try {
         nn.cancelDelegationToken(delToken);
       } catch (RemoteException re) {
@@ -690,6 +685,31 @@ public class DFSClient implements java.io.Closeable {
             AccessControlException.class);
       }
     }
+    
+    private static ClientProtocol getNNProxy(
+        Token<DelegationTokenIdentifier> token, Configuration conf)
+        throws IOException {
+      URI uri = HAUtil.getServiceUriFromToken(token);
+      if (HAUtil.isTokenForLogicalUri(token) &&
+          !HAUtil.isLogicalUri(conf, uri)) {
+        // If the token is for a logical nameservice, but the configuration
+        // we have disagrees about that, we can't actually renew it.
+        // This can be the case in MR, for example, if the RM doesn't
+        // have all of the HA clusters configured in its configuration.
+        throw new IOException("Unable to map logical nameservice URI '" +
+            uri + "' to a NameNode. Local configuration does not have " +
+            "a failover proxy provider configured.");
+      }
+      
+      ProxyAndInfo<ClientProtocol> info =
+        HAUtil.createProxy(conf, uri, ClientProtocol.class);
+      assert info.getDelegationTokenService().equals(token.getService()) :
+        "Returned service '" + info.getDelegationTokenService().toString() +
+        "' doesn't match expected service '" +
+        token.getService().toString() + "'";
+        
+      return info.getProxy();
+    }
 
     @Override
     public boolean isManaged(Token<?> token) throws IOException {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 753cb3bf678..0a322140da8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.lang.reflect.Constructor;
 import java.net.InetSocketAddress;
 import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Map;
@@ -31,11 +32,21 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.DFSClient.Conf;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.io.retry.RetryPolicies;
 import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import static org.apache.hadoop.hdfs.protocol.HdfsConstants.HA_DT_SERVICE_PREFIX;
 
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
@@ -177,14 +188,14 @@ public class HAUtil {
  
   /** Creates the Failover proxy provider instance*/
   @SuppressWarnings("unchecked")
-  public static <T> FailoverProxyProvider<T> createFailoverProxyProvider(
-      Configuration conf, Class<FailoverProxyProvider<?>> failoverProxyProviderClass,
-      Class xface, URI nameNodeUri) throws IOException {
+  private static <T> FailoverProxyProvider<T> createFailoverProxyProvider(
+      Configuration conf, Class<FailoverProxyProvider<T>> failoverProxyProviderClass,
+      Class<T> xface, URI nameNodeUri) throws IOException {
     Preconditions.checkArgument(
         xface.isAssignableFrom(NamenodeProtocols.class),
         "Interface %s is not a NameNode protocol", xface);
     try {
-      Constructor<FailoverProxyProvider<?>> ctor = failoverProxyProviderClass
+      Constructor<FailoverProxyProvider<T>> ctor = failoverProxyProviderClass
           .getConstructor(Configuration.class, URI.class, Class.class);
       FailoverProxyProvider<?> provider = ctor.newInstance(conf, nameNodeUri,
           xface);
@@ -203,7 +214,7 @@ public class HAUtil {
   }
 
   /** Gets the configured Failover proxy provider's class */
-  public static <T> Class<FailoverProxyProvider<T>> getFailoverProxyProviderClass(
+  private static <T> Class<FailoverProxyProvider<T>> getFailoverProxyProviderClass(
       Configuration conf, URI nameNodeUri, Class<T> xface) throws IOException {
     if (nameNodeUri == null) {
       return null;
@@ -238,24 +249,161 @@ public class HAUtil {
       }
     }
   }
+  
+  /**
+   * @return true if the given nameNodeUri appears to be a logical URI.
+   * This is the case if there is a failover proxy provider configured
+   * for it in the given configuration.
+   */
+  public static boolean isLogicalUri(
+      Configuration conf, URI nameNodeUri) {
+    String host = nameNodeUri.getHost();
+    String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "."
+        + host;
+    return conf.get(configKey) != null;
+  }
 
-  /** Creates the namenode proxy with the passed Protocol */
+  /**
+   * Creates the namenode proxy with the passed Protocol.
+   * @param conf the configuration containing the required IPC
+   * properties, client failover configurations, etc.
+   * @param nameNodeUri the URI pointing either to a specific NameNode
+   * or to a logical nameservice.
+   * @param xface the IPC interface which should be created
+   * @return an object containing both the proxy and the associated
+   * delegation token service it corresponds to
+   **/
   @SuppressWarnings("unchecked")
-  public static Object createFailoverProxy(Configuration conf, URI nameNodeUri,
-      Class xface) throws IOException {
-    Class<FailoverProxyProvider<?>> failoverProxyProviderClass = HAUtil
-        .getFailoverProxyProviderClass(conf, nameNodeUri, xface);
-    if (failoverProxyProviderClass != null) {
-      FailoverProxyProvider<?> failoverProxyProvider = HAUtil
+  public static <T> ProxyAndInfo<T> createProxy(
+      Configuration conf, URI nameNodeUri,
+      Class<T> xface) throws IOException {
+    Class<FailoverProxyProvider<T>> failoverProxyProviderClass =
+        HAUtil.getFailoverProxyProviderClass(conf, nameNodeUri, xface);
+
+    if (failoverProxyProviderClass == null) {
+      // Non-HA case
+      return createNonHAProxy(conf, nameNodeUri, xface);
+    } else {
+      // HA case
+      FailoverProxyProvider<T> failoverProxyProvider = HAUtil
           .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface,
               nameNodeUri);
       Conf config = new Conf(conf);
-      return RetryProxy.create(xface, failoverProxyProvider, RetryPolicies
+      T proxy = (T) RetryProxy.create(xface, failoverProxyProvider, RetryPolicies
           .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
               config.maxFailoverAttempts, config.failoverSleepBaseMillis,
               config.failoverSleepMaxMillis));
+      
+      Text dtService = buildTokenServiceForLogicalUri(nameNodeUri);
+      return new ProxyAndInfo<T>(proxy, dtService);
     }
-    return null;
   }
   
+  @SuppressWarnings("unchecked")
+  private static <T> ProxyAndInfo<T> createNonHAProxy(
+      Configuration conf, URI nameNodeUri, Class<T> xface) throws IOException {
+    InetSocketAddress nnAddr = NameNode.getAddress(nameNodeUri);
+    Text dtService = SecurityUtil.buildTokenService(nnAddr);
+
+    if (xface == ClientProtocol.class) {
+      T proxy = (T)DFSUtil.createNamenode(nnAddr, conf);
+      return new ProxyAndInfo<T>(proxy, dtService);
+    } else if (xface == NamenodeProtocol.class) {
+      T proxy = (T) DFSUtil.createNNProxyWithNamenodeProtocol(
+          nnAddr, conf, UserGroupInformation.getCurrentUser());
+      return new ProxyAndInfo<T>(proxy, dtService);
+    } else {
+      throw new AssertionError("Unsupported proxy type: " + xface);
+    }
+  }
+    
+  /**
+   * Parse the HDFS URI out of the provided token.
+   * @throws IOException if the token is invalid
+   */
+  public static URI getServiceUriFromToken(
+      Token<DelegationTokenIdentifier> token)
+      throws IOException {
+    String tokStr = token.getService().toString();
+
+    if (tokStr.startsWith(HA_DT_SERVICE_PREFIX)) {
+      tokStr = tokStr.replaceFirst(HA_DT_SERVICE_PREFIX, "");
+    }
+    
+    try {
+      return new URI(HdfsConstants.HDFS_URI_SCHEME + "://" +
+          tokStr);
+    } catch (URISyntaxException e) {
+      throw new IOException("Invalid token contents: '" +
+          tokStr + "'");
+    }
+  }
+  
+  /**
+   * Get the service name used in the delegation token for the given logical
+   * HA service.
+   * @param uri the logical URI of the cluster
+   * @return the service name
+   */
+  public static Text buildTokenServiceForLogicalUri(URI uri) {
+    return new Text(HA_DT_SERVICE_PREFIX + uri.getHost());
+  }
+  
+  /**
+   * @return true if this token corresponds to a logical nameservice
+   * rather than a specific namenode.
+   */
+  public static boolean isTokenForLogicalUri(
+      Token<DelegationTokenIdentifier> token) {
+    return token.getService().toString().startsWith(HA_DT_SERVICE_PREFIX);
+  }
+  
+  /**
+   * Locate a delegation token associated with the given HA cluster URI, and if
+   * one is found, clone it to also represent the underlying namenode address.
+   * @param ugi the UGI to modify
+   * @param haUri the logical URI for the cluster
+   * @param singleNNAddr one of the NNs in the cluster to which the token
+   * applies
+   */
+  public static void cloneDelegationTokenForLogicalUri(
+      UserGroupInformation ugi, URI haUri,
+      InetSocketAddress singleNNAddr) {
+    Text haService = buildTokenServiceForLogicalUri(haUri);
+    Token<DelegationTokenIdentifier> haToken =
+        DelegationTokenSelector.selectHdfsDelegationToken(haService, ugi);
+    if (haToken == null) {
+      // no token
+      return;
+    }
+    Token<DelegationTokenIdentifier> specificToken =
+        new Token<DelegationTokenIdentifier>(haToken);
+    specificToken.setService(SecurityUtil.buildTokenService(singleNNAddr));
+    ugi.addToken(specificToken);
+    LOG.debug("Mapped HA service delegation token for logical URI " +
+        haUri + " to namenode " + singleNNAddr);
+  }
+  
+  /**
+   * Wrapper for a client proxy as well as its associated service ID.
+   * This is simply used as a tuple-like return type for
+   * {@link HAUtil#createProxy(Configuration, URI, Class)}.
+   */
+  public static class ProxyAndInfo<PROXYTYPE> {
+    private final PROXYTYPE proxy;
+    private final Text dtService;
+    
+    public ProxyAndInfo(PROXYTYPE proxy, Text dtService) {
+      this.proxy = proxy;
+      this.dtService = dtService;
+    }
+    
+    public PROXYTYPE getProxy() {
+      return proxy;
+    }
+    
+    public Text getDelegationTokenService() {
+      return dtService;
+    }
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java
index 6b4835faccc..da64b9e7648 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/HdfsConstants.java
@@ -99,6 +99,14 @@ public class HdfsConstants {
    */
   public static final String HDFS_URI_SCHEME = "hdfs";
 
+  /**
+   * A prefix put before the namenode URI inside the "service" field
+   * of a delgation token, indicating that the URI is a logical (HA)
+   * URI.
+   */
+  public static final String HA_DT_SERVICE_PREFIX = "ha-hdfs:";
+
+
   /**
    * Please see {@link LayoutVersion} on adding new layout version.
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSelector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSelector.java
index 1822b27a1cb..4f73b851645 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSelector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenSelector.java
@@ -59,6 +59,11 @@ public class DelegationTokenSelector
         new InetSocketAddress(nnAddr.getHostName(), nnRpcPort));
     return INSTANCE.selectToken(serviceName, ugi.getTokens());
   }
+  
+  public static Token<DelegationTokenIdentifier> selectHdfsDelegationToken(
+      Text serviceName, UserGroupInformation ugi) {
+    return INSTANCE.selectToken(serviceName, ugi.getTokens());
+  }
 
   public DelegationTokenSelector() {
     super(DelegationTokenIdentifier.HDFS_DELEGATION_KIND);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
index 04657715c2b..e45f721b75a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
@@ -31,7 +31,6 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
@@ -44,7 +43,6 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
 
@@ -76,21 +74,13 @@ class NameNodeConnector {
       Configuration conf) throws IOException {
     this.namenodeAddress = Lists.newArrayList(haNNs).get(0);
     URI nameNodeUri = NameNode.getUri(this.namenodeAddress);
-    NamenodeProtocol failoverNamenode = (NamenodeProtocol) HAUtil
-        .createFailoverProxy(conf, nameNodeUri, NamenodeProtocol.class);
-    if (null != failoverNamenode) {
-      this.namenode = failoverNamenode;
-    } else {
-      this.namenode = DFSUtil.createNNProxyWithNamenodeProtocol(
-          this.namenodeAddress, conf, UserGroupInformation.getCurrentUser());
-    }
-    ClientProtocol failOverClient = (ClientProtocol) HAUtil
-        .createFailoverProxy(conf, nameNodeUri, ClientProtocol.class);
-    if (null != failOverClient) {
-      this.client = failOverClient;
-    } else {
-      this.client = DFSUtil.createNamenode(conf);
-    }
+    
+    this.namenode =
+      HAUtil.createProxy(conf, nameNodeUri, NamenodeProtocol.class)
+        .getProxy();
+    this.client =
+      HAUtil.createProxy(conf, nameNodeUri, ClientProtocol.class)
+        .getProxy();
     this.fs = FileSystem.get(nameNodeUri, conf);
 
     final NamespaceInfo namespaceinfo = namenode.versionRequest();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index 6f6f88f9e8e..79223a0455f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.HAUtil;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
@@ -95,6 +96,11 @@ public class ConfiguredFailoverProxyProvider<T> implements
       
       for (InetSocketAddress address : addressesInNN.values()) {
         proxies.add(new AddressRpcProxyPair<T>(address));
+        
+        // The client may have a delegation token set for the logical
+        // URI of the cluster. Clone this token to apply to each of the
+        // underlying IPC addresses so that the IPC code can find it.
+        HAUtil.cloneDelegationTokenForLogicalUri(ugi, uri, address);
       }
     } catch (IOException e) {
       throw new RuntimeException(e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java
new file mode 100644
index 00000000000..561e4d61033
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDelegationTokensWithHA.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.security.PrivilegedExceptionAction;
+import java.util.Collection;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
+import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.security.token.TokenIdentifier;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.google.common.base.Joiner;
+
+/**
+ * Test case for client support of delegation tokens in an HA cluster.
+ * See HDFS-2904 for more info.
+ **/
+public class TestDelegationTokensWithHA {
+  private static Configuration conf = new Configuration();
+  private static final Log LOG =
+    LogFactory.getLog(TestDelegationTokensWithHA.class);
+  private static MiniDFSCluster cluster;
+  private static NameNode nn0;
+  private static NameNode nn1;
+  private static FileSystem fs;
+  private static DelegationTokenSecretManager dtSecretManager;
+  private static DistributedFileSystem dfs;
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    conf.setBoolean(
+        DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
+    conf.set("hadoop.security.auth_to_local",
+        "RULE:[2:$1@$0](JobTracker@.*FOO.COM)s/@.*//" + "DEFAULT");
+
+    cluster = new MiniDFSCluster.Builder(conf)
+      .nnTopology(MiniDFSNNTopology.simpleHATopology())
+      .numDataNodes(0)
+      .build();
+    cluster.waitActive();
+    
+    nn0 = cluster.getNameNode(0);
+    nn1 = cluster.getNameNode(1);
+    fs = HATestUtil.configureFailoverFs(cluster, conf);
+    dfs = (DistributedFileSystem)fs;
+
+    cluster.transitionToActive(0);
+    dtSecretManager = NameNodeAdapter.getDtSecretManager(
+        nn0.getNamesystem());
+  }
+
+  @AfterClass
+  public static void shutdownCluster() throws IOException {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+
+  @Test
+  public void testDelegationTokenDFSApi() throws Exception {
+    Token<DelegationTokenIdentifier> token = dfs.getDelegationToken("JobTracker");
+    DelegationTokenIdentifier identifier = new DelegationTokenIdentifier();
+    byte[] tokenId = token.getIdentifier();
+    identifier.readFields(new DataInputStream(
+             new ByteArrayInputStream(tokenId)));
+
+    // Ensure that it's present in the NN's secret manager and can
+    // be renewed directly from there.
+    LOG.info("A valid token should have non-null password, " +
+        "and should be renewed successfully");
+    assertTrue(null != dtSecretManager.retrievePassword(identifier));
+    dtSecretManager.renewToken(token, "JobTracker");
+    
+    // Use the client conf with the failover info present to check
+    // renewal.
+    Configuration clientConf = dfs.getConf();
+    doRenewOrCancel(token, clientConf, TokenTestAction.RENEW);
+    
+    // Using a configuration that doesn't have the logical nameservice
+    // configured should result in a reasonable error message.
+    Configuration emptyConf = new Configuration();
+    try {
+      doRenewOrCancel(token, emptyConf, TokenTestAction.RENEW);
+      fail("Did not throw trying to renew with an empty conf!");
+    } catch (IOException ioe) {
+      GenericTestUtils.assertExceptionContains(
+          "Unable to map logical nameservice URI", ioe);
+    }
+
+    
+    // Ensure that the token can be renewed again after a failover.
+    cluster.transitionToStandby(0);
+    cluster.transitionToActive(1);
+    doRenewOrCancel(token, clientConf, TokenTestAction.RENEW);
+    
+    doRenewOrCancel(token, clientConf, TokenTestAction.CANCEL);
+  }
+  
+  @SuppressWarnings("deprecation")
+  @Test
+  public void testDelegationTokenWithDoAs() throws Exception {
+    final Token<DelegationTokenIdentifier> token = 
+        dfs.getDelegationToken("JobTracker");
+    final UserGroupInformation longUgi = UserGroupInformation
+        .createRemoteUser("JobTracker/foo.com@FOO.COM");
+    final UserGroupInformation shortUgi = UserGroupInformation
+        .createRemoteUser("JobTracker");
+    longUgi.doAs(new PrivilegedExceptionAction<Void>() {
+      public Void run() throws Exception {
+        DistributedFileSystem dfs = (DistributedFileSystem)
+            HATestUtil.configureFailoverFs(cluster, conf);
+        // try renew with long name
+        dfs.renewDelegationToken(token);
+        return null;
+      }
+    });
+    shortUgi.doAs(new PrivilegedExceptionAction<Void>() {
+      public Void run() throws Exception {
+        DistributedFileSystem dfs = (DistributedFileSystem)
+            HATestUtil.configureFailoverFs(cluster, conf);
+        dfs.renewDelegationToken(token);
+        return null;
+      }
+    });
+    longUgi.doAs(new PrivilegedExceptionAction<Void>() {
+      public Void run() throws Exception {
+        DistributedFileSystem dfs = (DistributedFileSystem)
+            HATestUtil.configureFailoverFs(cluster, conf);
+        // try cancel with long name
+        dfs.cancelDelegationToken(token);
+        return null;
+      }
+    });
+  }
+  
+  @Test
+  public void testHAUtilClonesDelegationTokens() throws Exception {
+    final Token<DelegationTokenIdentifier> token = 
+      dfs.getDelegationToken("test");
+
+    UserGroupInformation ugi = UserGroupInformation.createRemoteUser("test");
+    
+    URI haUri = new URI("hdfs://my-ha-uri/");
+    token.setService(HAUtil.buildTokenServiceForLogicalUri(haUri));
+    ugi.addToken(token);
+    HAUtil.cloneDelegationTokenForLogicalUri(ugi, haUri, nn0.getNameNodeAddress());
+    HAUtil.cloneDelegationTokenForLogicalUri(ugi, haUri, nn1.getNameNodeAddress());
+    
+    Collection<Token<? extends TokenIdentifier>> tokens = ugi.getTokens();
+    assertEquals(3, tokens.size());
+    
+    LOG.info("Tokens:\n" + Joiner.on("\n").join(tokens));
+    
+    // check that the token selected for one of the physical IPC addresses
+    // matches the one we received
+    InetSocketAddress addr = nn0.getNameNodeAddress();
+    Text ipcDtService = new Text(
+        addr.getAddress().getHostAddress() + ":" + addr.getPort());
+    Token<DelegationTokenIdentifier> token2 =
+        DelegationTokenSelector.selectHdfsDelegationToken(ipcDtService, ugi);
+    assertNotNull(token2);
+    assertArrayEquals(token.getIdentifier(), token2.getIdentifier());
+    assertArrayEquals(token.getPassword(), token2.getPassword());
+  }
+  
+  enum TokenTestAction {
+    RENEW, CANCEL;
+  }
+  
+  private static void doRenewOrCancel(
+      final Token<DelegationTokenIdentifier> token, final Configuration conf,
+      final TokenTestAction action)
+      throws IOException, InterruptedException {
+    UserGroupInformation.createRemoteUser("JobTracker").doAs(
+        new PrivilegedExceptionAction<Void>() {
+          @Override
+          public Void run() throws Exception {
+            switch (action) {
+            case RENEW:
+              token.renew(conf);
+              break;
+            case CANCEL:
+              token.cancel(conf);
+              break;
+            default:
+              fail("bad action:" + action);
+            }
+            return null;
+          }
+        });
+  }
+}

From da9aa34bec7c68ba7f2988abb917b0a7aeb7f23f Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 28 Feb 2012 00:43:54 +0000
Subject: [PATCH 163/177] HDFS-3013. HA: NameNode format doesn't pick up
 dfs.namenode.name.dir.NameServiceId configuration. Contributed by Mingjie
 Lai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1294425 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hadoop/hdfs/server/namenode/NameNode.java |  8 +++++++
 .../TestValidateConfigurationSettings.java    | 23 +++++++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index e8e63bf7029..0dc073f332f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -232,3 +232,5 @@ HDFS-2922. HA: close out operation categories (eli)
 HDFS-2993. HA: BackupNode#checkOperation should permit CHECKPOINT operations (eli)
 
 HDFS-2904. Client support for getting delegation tokens. (todd)
+
+HDFS-3013. HA: NameNode format doesn't pick up dfs.namenode.name.dir.NameServiceId configuration (Mingjie Lai via todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 5915183d493..5dc62560232 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -649,6 +649,10 @@ public class NameNode {
   private static boolean format(Configuration conf,
                                 boolean isConfirmationNeeded)
       throws IOException {
+    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
+    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
+    initializeGenericKeys(conf, nsId, namenodeId);
+
     if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
                          DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_DEFAULT)) {
       throw new IOException("The option " + DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY
@@ -693,6 +697,10 @@ public class NameNode {
   private static boolean finalize(Configuration conf,
                                boolean isConfirmationNeeded
                                ) throws IOException {
+    String nsId = DFSUtil.getNamenodeNameServiceId(conf);
+    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
+    initializeGenericKeys(conf, nsId, namenodeId);
+
     FSNamesystem nsys = new FSNamesystem(conf, new FSImage(conf));
     System.err.print(
         "\"finalize\" will remove the previous state of the files system.\n"
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestValidateConfigurationSettings.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestValidateConfigurationSettings.java
index 397ad725ccb..53f4f966de2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestValidateConfigurationSettings.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestValidateConfigurationSettings.java
@@ -19,10 +19,12 @@ package org.apache.hadoop.hdfs.server.namenode;
 
 import static org.junit.Assert.*;
 import org.junit.Test;
+import java.io.File;
 import java.io.IOException;
 import junit.framework.Assert;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
@@ -71,4 +73,25 @@ public class TestValidateConfigurationSettings {
     DFSTestUtil.formatNameNode(conf);
     NameNode nameNode = new NameNode(conf); // should be OK!
   }
+
+  /**
+   * HDFS-3013: NameNode format command doesn't pick up
+   * dfs.namenode.name.dir.NameServiceId configuration.
+   */
+  @Test
+  public void testGenericKeysForNameNodeFormat()
+      throws IOException {
+    Configuration conf = new HdfsConfiguration();
+    FileSystem.setDefaultUri(conf, "hdfs://localhost:8070");
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, "ns1");
+    String nameDir = System.getProperty("java.io.tmpdir") + "/test.dfs.name";
+    File dir = new File(nameDir);
+    if (dir.exists()) {
+      FileUtil.fullyDelete(dir);
+    }
+    conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY + ".ns1", nameDir);
+    DFSTestUtil.formatNameNode(conf);
+    NameNode nameNode = new NameNode(conf);
+    FileUtil.fullyDelete(dir);
+  }
 }

From dcd2056e463b0f66039372c31c5319d5b368588f Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Tue, 28 Feb 2012 16:51:55 +0000
Subject: [PATCH 164/177] HADOOP-8116. RetriableCommand is using RetryPolicy
 incorrectly after HADOOP-7896. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1294729 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-common/CHANGES.HDFS-1623.txt              |  2 ++
 .../apache/hadoop/tools/util/RetriableCommand.java   | 12 ++++++++++--
 .../apache/hadoop/tools/mapred/TestCopyMapper.java   |  7 ++++++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
index c9dd46062f4..748ff939cee 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.HDFS-1623.txt
@@ -49,3 +49,5 @@ core-default.xml file. (Uma Maheswara Rao G via atm)
 HADOOP-8041. Log a warning when a failover is first attempted (todd)
 
 HADOOP-8068. void methods can swallow exceptions when going through failover path (todd)
+
+HADOOP-8116. RetriableCommand is using RetryPolicy incorrectly after HADOOP-7896. (atm)
diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java
index 1d248f082a7..563372e0097 100644
--- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java
+++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/RetriableCommand.java
@@ -22,7 +22,9 @@ package org.apache.hadoop.tools.util;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
 import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.util.ThreadUtil;
 
 import java.io.IOException;
 import java.util.concurrent.TimeUnit;
@@ -80,7 +82,7 @@ public abstract class RetriableCommand {
   public Object execute(Object... arguments) throws Exception {
     Exception latestException;
     int counter = 0;
-    do {
+    while (true) {
       try {
         return doExecute(arguments);
       } catch(Exception exception) {
@@ -88,7 +90,13 @@ public abstract class RetriableCommand {
         latestException = exception;
       }
       counter++;
-    } while (retryPolicy.shouldRetry(latestException, counter, 0, true).equals(RetryPolicy.RetryAction.RETRY));
+      RetryAction action = retryPolicy.shouldRetry(latestException, counter, 0, true);
+      if (action.action == RetryPolicy.RetryAction.RetryDecision.RETRY) {
+        ThreadUtil.sleepAtLeastIgnoreInterrupts(action.delayMillis);
+      } else {
+        break;
+      }
+    }
 
     throw new IOException("Couldn't run retriable-command: " + description,
                           latestException);
diff --git a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
index e5ab0595c38..5ba5eb88673 100644
--- a/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
+++ b/hadoop-tools/hadoop-distcp/src/test/java/org/apache/hadoop/tools/mapred/TestCopyMapper.java
@@ -545,7 +545,12 @@ public class TestCopyMapper {
             Assert.fail("Didn't expect the file to be copied");
           } catch (AccessControlException ignore) {
           } catch (Exception e) {
-            if (e.getCause() == null || !(e.getCause() instanceof AccessControlException)) {
+            // We want to make sure the underlying cause of the exception is
+            // due to permissions error. The exception we're interested in is
+            // wrapped twice - once in RetriableCommand and again in CopyMapper
+            // itself.
+            if (e.getCause() == null || e.getCause().getCause() == null ||
+                !(e.getCause().getCause() instanceof AccessControlException)) {
               throw new RuntimeException(e);
             }
           }

From 1ab31b1715e9db498847725dadfb82b16f71143b Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Tue, 28 Feb 2012 18:38:40 +0000
Subject: [PATCH 165/177] HDFS-3019. Fix silent failure of
 TestEditLogJournalFailures. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1294772 13f79535-47bb-0310-9956-ffa450edef68
---
 hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt           | 2 ++
 .../java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java  | 1 +
 2 files changed, 3 insertions(+)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 0dc073f332f..6d2de54f3d5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -234,3 +234,5 @@ HDFS-2993. HA: BackupNode#checkOperation should permit CHECKPOINT operations (el
 HDFS-2904. Client support for getting delegation tokens. (todd)
 
 HDFS-3013. HA: NameNode format doesn't pick up dfs.namenode.name.dir.NameServiceId configuration (Mingjie Lai via todd)
+
+HDFS-3019. Fix silent failure of TestEditLogJournalFailures (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 5cd8be26a95..58f329f142e 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -819,6 +819,7 @@ public class FSEditLog  {
   @VisibleForTesting
   synchronized void setRuntimeForTesting(Runtime runtime) {
     this.runtime = runtime;
+    this.journalSet.setRuntimeForTesting(runtime);
   }
   
   /**

From c69dfdd5e14af490790dff8227b11962ec816577 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Tue, 28 Feb 2012 20:09:18 +0000
Subject: [PATCH 166/177] HDFS-2958. Sweep for remaining proxy construction
 which doesn't go through failover path.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1294811 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../org/apache/hadoop/hdfs/DFSClient.java     |   9 +-
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 143 --------
 .../java/org/apache/hadoop/hdfs/HAUtil.java   | 149 --------
 .../apache/hadoop/hdfs/NameNodeProxies.java   | 332 ++++++++++++++++++
 .../ClientNamenodeProtocolTranslatorPB.java   |  24 --
 ...appingsProtocolClientSideTranslatorPB.java |  19 +-
 .../JournalProtocolTranslatorPB.java          |  12 +-
 .../NamenodeProtocolTranslatorPB.java         |  14 -
 ...nPolicyProtocolClientSideTranslatorPB.java |  18 +-
 ...appingsProtocolClientSideTranslatorPB.java |  18 +-
 .../server/balancer/NameNodeConnector.java    |   6 +-
 .../hdfs/server/namenode/BackupNode.java      |  12 +-
 .../hdfs/server/namenode/DfsServlet.java      |   5 +-
 .../namenode/EditLogBackupOutputStream.java   |  17 +-
 .../server/namenode/SecondaryNameNode.java    |   7 +-
 .../ha/ConfiguredFailoverProxyProvider.java   |  17 +-
 .../apache/hadoop/hdfs/tools/DFSAdmin.java    |  23 +-
 .../apache/hadoop/hdfs/tools/GetGroups.java   |  13 +-
 .../org/apache/hadoop/hdfs/TestGetBlocks.java |   7 +-
 .../hadoop/hdfs/TestIsMethodSupported.java    |  41 ++-
 .../apache/hadoop/hdfs/TestReplication.java   |   3 +-
 .../hdfs/server/balancer/TestBalancer.java    |  13 +-
 .../balancer/TestBalancerWithHANameNodes.java |   7 +-
 .../hdfs/server/namenode/ha/HATestUtil.java   |   5 +
 .../namenode/ha/TestGetGroupsWithHA.java      |  57 +++
 26 files changed, 507 insertions(+), 466 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestGetGroupsWithHA.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 6d2de54f3d5..18de5f63349 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -236,3 +236,5 @@ HDFS-2904. Client support for getting delegation tokens. (todd)
 HDFS-3013. HA: NameNode format doesn't pick up dfs.namenode.name.dir.NameServiceId configuration (Mingjie Lai via todd)
 
 HDFS-3019. Fix silent failure of TestEditLogJournalFailures (todd)
+
+HDFS-2958. Sweep for remaining proxy construction which doesn't go through failover path. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index 2478bacdd38..f0dc8ceff2a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -62,7 +62,6 @@ import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.permission.FsPermission;
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
-import org.apache.hadoop.hdfs.HAUtil.ProxyAndInfo;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.CorruptFileBlocks;
 import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException;
@@ -325,8 +324,8 @@ public class DFSClient implements java.io.Closeable {
     } else {
       Preconditions.checkArgument(nameNodeUri != null,
           "null URI");
-      ProxyAndInfo<ClientProtocol> proxyInfo =
-        HAUtil.createProxy(conf, nameNodeUri, ClientProtocol.class);
+      NameNodeProxies.ProxyAndInfo<ClientProtocol> proxyInfo =
+        NameNodeProxies.createProxy(conf, nameNodeUri, ClientProtocol.class);
       this.dtService = proxyInfo.getDelegationTokenService();
       this.namenode = proxyInfo.getProxy();
     }
@@ -694,8 +693,8 @@ public class DFSClient implements java.io.Closeable {
             "a failover proxy provider configured.");
       }
       
-      ProxyAndInfo<ClientProtocol> info =
-        HAUtil.createProxy(conf, uri, ClientProtocol.class);
+      NameNodeProxies.ProxyAndInfo<ClientProtocol> info =
+        NameNodeProxies.createProxy(conf, uri, ClientProtocol.class);
       assert info.getDelegationTokenService().equals(token.getService()) :
         "Returned service '" + info.getDelegationTokenService().toString() +
         "' doesn't match expected service '" +
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index db9cf6960ed..f4a861089be 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -28,12 +28,10 @@ import java.security.SecureRandom;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
 import java.util.StringTokenizer;
-import java.util.concurrent.TimeUnit;
 
 import javax.net.SocketFactory;
 
@@ -42,26 +40,15 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolTranslatorPB;
-import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB;
-import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB;
-import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
-import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.io.retry.RetryPolicies;
-import org.apache.hadoop.io.retry.RetryPolicy;
-import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.RPC;
-import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.NodeBase;
@@ -801,26 +788,6 @@ public class DFSUtil {
   public static int roundBytesToGB(long bytes) {
     return Math.round((float)bytes/ 1024 / 1024 / 1024);
   }
-
-
-  /** Create a {@link NameNode} proxy */
-  public static ClientProtocol createNamenode(Configuration conf)
-      throws IOException {
-    return createNamenode(NameNode.getAddress(conf), conf);
-  }
-
-  /** Create a {@link NameNode} proxy */
-  public static ClientProtocol createNamenode(InetSocketAddress nameNodeAddr,
-      Configuration conf) throws IOException {   
-    return createNamenode(nameNodeAddr, conf, 
-        UserGroupInformation.getCurrentUser());
-  }
-
-  /** Create a {@link NameNode} proxy */
-  public static ClientProtocol createNamenode(InetSocketAddress nameNodeAddr,
-      Configuration conf, UserGroupInformation ugi) throws IOException {
-    return createNNProxyWithClientProtocol(nameNodeAddr, conf, ugi, true);
-  }
   
   /** Create a {@link ClientDatanodeProtocol} proxy */
   public static ClientDatanodeProtocol createClientDatanodeProtocolProxy(
@@ -845,116 +812,6 @@ public class DFSUtil {
     return new ClientDatanodeProtocolTranslatorPB(addr, ticket, conf, factory);
   }
 
-  /**
-   * Build a proxy connection to the namenode with NamenodeProtocol and set up
-   * the proxy with retry policy.
-   * @param address - namenode address
-   * @param conf - configuration
-   * @param ugi - User group information
-   * @return a proxy connection with NamenodeProtocol
-   * @throws - IOException
-   */
-  public static NamenodeProtocolTranslatorPB createNNProxyWithNamenodeProtocol(
-      InetSocketAddress address, Configuration conf, UserGroupInformation ugi)
-      throws IOException {
-    return createNNProxyWithNamenodeProtocol(address, conf, ugi, true);
-  }
-  
-  /**
-   * Build a proxy connection to the namenode with NamenodeProtocol.
-   * @param address - namenode address
-   * @param conf - configuration
-   * @param ugi - User group information
-   * @param withRetries  - indicates whether to create retry proxy or not
-   * @return a proxy connection with NamenodeProtocol
-   * @throws - IOException
-   */
-  public static NamenodeProtocolTranslatorPB createNNProxyWithNamenodeProtocol(
-      InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
-      boolean withRetries) throws IOException {
-    NamenodeProtocolPB proxy = (NamenodeProtocolPB) createNameNodeProxy(
-        address, conf, ugi, NamenodeProtocolPB.class);
-    if (withRetries) { // create the proxy with retries
-      RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(5, 200,
-          TimeUnit.MILLISECONDS);
-      Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap 
-                     = new HashMap<Class<? extends Exception>, RetryPolicy>();
-      RetryPolicy methodPolicy = RetryPolicies.retryByException(timeoutPolicy,
-          exceptionToPolicyMap);
-      Map<String, RetryPolicy> methodNameToPolicyMap 
-                     = new HashMap<String, RetryPolicy>();
-      methodNameToPolicyMap.put("getBlocks", methodPolicy);
-      methodNameToPolicyMap.put("getAccessKeys", methodPolicy);
-      proxy = (NamenodeProtocolPB) RetryProxy.create(NamenodeProtocolPB.class,
-          proxy, methodNameToPolicyMap);
-    }
-    return new NamenodeProtocolTranslatorPB(proxy);
-  }
-
-  /**
-   * Build a proxy connection to the namenode with ClientProtocol.
-   * @param address - namenode address
-   * @param conf - configuration
-   * @param ugi - User group information
-   * @param withRetries  - indicates whether to create retry proxy or not
-   * @return a proxy connection with ClientProtocol
-   * @throws IOException
-   */
-  public static ClientNamenodeProtocolTranslatorPB createNNProxyWithClientProtocol(
-      InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
-      boolean withRetries) throws IOException {
-    ClientNamenodeProtocolPB proxy 
-        = (ClientNamenodeProtocolPB) createNameNodeProxy(address, conf, ugi,
-           ClientNamenodeProtocolPB.class);
-    if (withRetries) { // create the proxy with retries
-      proxy = createNameNodeProxyWithRetries(proxy);
-    }
-    return new ClientNamenodeProtocolTranslatorPB(proxy);
-  }
-
-  /**
-   * Creates the retry proxy by setting up the retry policy.
-   * @param proxy - non retry proxy connection
-   * @return a retry proxy connection
-   */
-  public static ClientNamenodeProtocolPB createNameNodeProxyWithRetries(
-      ClientNamenodeProtocolPB proxy) {
-    RetryPolicy createPolicy = RetryPolicies
-        .retryUpToMaximumCountWithFixedSleep(5,
-            HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS);
-
-    Map<Class<? extends Exception>, RetryPolicy> remoteExceptionToPolicyMap 
-               = new HashMap<Class<? extends Exception>, RetryPolicy>();
-    remoteExceptionToPolicyMap.put(AlreadyBeingCreatedException.class,
-        createPolicy);
-
-    Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap
-               = new HashMap<Class<? extends Exception>, RetryPolicy>();
-    exceptionToPolicyMap.put(RemoteException.class, RetryPolicies
-        .retryByRemoteException(RetryPolicies.TRY_ONCE_THEN_FAIL,
-            remoteExceptionToPolicyMap));
-    RetryPolicy methodPolicy = RetryPolicies.retryByException(
-        RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
-    Map<String, RetryPolicy> methodNameToPolicyMap 
-               = new HashMap<String, RetryPolicy>();
-
-    methodNameToPolicyMap.put("create", methodPolicy);
-
-    ClientNamenodeProtocolPB retryProxy = (ClientNamenodeProtocolPB) RetryProxy
-        .create(ClientNamenodeProtocolPB.class, proxy, methodNameToPolicyMap);
-    return retryProxy;
-  }
-  
-  @SuppressWarnings("unchecked")
-  private static Object createNameNodeProxy(InetSocketAddress address,
-      Configuration conf, UserGroupInformation ugi, Class xface)
-      throws IOException {
-    RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine.class);
-    Object proxy = RPC.getProxy(xface, RPC.getProtocolVersion(xface), address,
-        ugi, conf, NetUtils.getDefaultSocketFactory(conf));
-    return proxy;
-  }
-  
   /**
    * Get nameservice Id for the {@link NameNode} based on namenode RPC address
    * matching the local node address.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 0a322140da8..30792984d17 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs;
 
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 import java.io.IOException;
-import java.lang.reflect.Constructor;
 import java.net.InetSocketAddress;
 import java.net.URI;
 import java.net.URISyntaxException;
@@ -31,18 +30,11 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSClient.Conf;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
-import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.retry.FailoverProxyProvider;
-import org.apache.hadoop.io.retry.RetryPolicies;
-import org.apache.hadoop.io.retry.RetryProxy;
 import org.apache.hadoop.security.SecurityUtil;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
@@ -186,70 +178,6 @@ public class HAUtil {
     conf.setBoolean("dfs.ha.allow.stale.reads", val);
   }
  
-  /** Creates the Failover proxy provider instance*/
-  @SuppressWarnings("unchecked")
-  private static <T> FailoverProxyProvider<T> createFailoverProxyProvider(
-      Configuration conf, Class<FailoverProxyProvider<T>> failoverProxyProviderClass,
-      Class<T> xface, URI nameNodeUri) throws IOException {
-    Preconditions.checkArgument(
-        xface.isAssignableFrom(NamenodeProtocols.class),
-        "Interface %s is not a NameNode protocol", xface);
-    try {
-      Constructor<FailoverProxyProvider<T>> ctor = failoverProxyProviderClass
-          .getConstructor(Configuration.class, URI.class, Class.class);
-      FailoverProxyProvider<?> provider = ctor.newInstance(conf, nameNodeUri,
-          xface);
-      return (FailoverProxyProvider<T>) provider;
-    } catch (Exception e) {
-      String message = "Couldn't create proxy provider " + failoverProxyProviderClass;
-      if (LOG.isDebugEnabled()) {
-        LOG.debug(message, e);
-      }
-      if (e.getCause() instanceof IOException) {
-        throw (IOException) e.getCause();
-      } else {
-        throw new IOException(message, e);
-      }
-    }
-  }
-
-  /** Gets the configured Failover proxy provider's class */
-  private static <T> Class<FailoverProxyProvider<T>> getFailoverProxyProviderClass(
-      Configuration conf, URI nameNodeUri, Class<T> xface) throws IOException {
-    if (nameNodeUri == null) {
-      return null;
-    }
-    String host = nameNodeUri.getHost();
-
-    String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "."
-        + host;
-    try {
-      @SuppressWarnings("unchecked")
-      Class<FailoverProxyProvider<T>> ret = (Class<FailoverProxyProvider<T>>) conf
-          .getClass(configKey, null, FailoverProxyProvider.class);
-      if (ret != null) {
-        // If we found a proxy provider, then this URI should be a logical NN.
-        // Given that, it shouldn't have a non-default port number.
-        int port = nameNodeUri.getPort();
-        if (port > 0 && port != NameNode.DEFAULT_PORT) {
-          throw new IOException("Port " + port + " specified in URI "
-              + nameNodeUri + " but host '" + host
-              + "' is a logical (HA) namenode"
-              + " and does not use port information.");
-        }
-      }
-      return ret;
-    } catch (RuntimeException e) {
-      if (e.getCause() instanceof ClassNotFoundException) {
-        throw new IOException("Could not load failover proxy provider class "
-            + conf.get(configKey) + " which is configured for authority "
-            + nameNodeUri, e);
-      } else {
-        throw e;
-      }
-    }
-  }
-  
   /**
    * @return true if the given nameNodeUri appears to be a logical URI.
    * This is the case if there is a failover proxy provider configured
@@ -263,60 +191,6 @@ public class HAUtil {
     return conf.get(configKey) != null;
   }
 
-  /**
-   * Creates the namenode proxy with the passed Protocol.
-   * @param conf the configuration containing the required IPC
-   * properties, client failover configurations, etc.
-   * @param nameNodeUri the URI pointing either to a specific NameNode
-   * or to a logical nameservice.
-   * @param xface the IPC interface which should be created
-   * @return an object containing both the proxy and the associated
-   * delegation token service it corresponds to
-   **/
-  @SuppressWarnings("unchecked")
-  public static <T> ProxyAndInfo<T> createProxy(
-      Configuration conf, URI nameNodeUri,
-      Class<T> xface) throws IOException {
-    Class<FailoverProxyProvider<T>> failoverProxyProviderClass =
-        HAUtil.getFailoverProxyProviderClass(conf, nameNodeUri, xface);
-
-    if (failoverProxyProviderClass == null) {
-      // Non-HA case
-      return createNonHAProxy(conf, nameNodeUri, xface);
-    } else {
-      // HA case
-      FailoverProxyProvider<T> failoverProxyProvider = HAUtil
-          .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface,
-              nameNodeUri);
-      Conf config = new Conf(conf);
-      T proxy = (T) RetryProxy.create(xface, failoverProxyProvider, RetryPolicies
-          .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
-              config.maxFailoverAttempts, config.failoverSleepBaseMillis,
-              config.failoverSleepMaxMillis));
-      
-      Text dtService = buildTokenServiceForLogicalUri(nameNodeUri);
-      return new ProxyAndInfo<T>(proxy, dtService);
-    }
-  }
-  
-  @SuppressWarnings("unchecked")
-  private static <T> ProxyAndInfo<T> createNonHAProxy(
-      Configuration conf, URI nameNodeUri, Class<T> xface) throws IOException {
-    InetSocketAddress nnAddr = NameNode.getAddress(nameNodeUri);
-    Text dtService = SecurityUtil.buildTokenService(nnAddr);
-
-    if (xface == ClientProtocol.class) {
-      T proxy = (T)DFSUtil.createNamenode(nnAddr, conf);
-      return new ProxyAndInfo<T>(proxy, dtService);
-    } else if (xface == NamenodeProtocol.class) {
-      T proxy = (T) DFSUtil.createNNProxyWithNamenodeProtocol(
-          nnAddr, conf, UserGroupInformation.getCurrentUser());
-      return new ProxyAndInfo<T>(proxy, dtService);
-    } else {
-      throw new AssertionError("Unsupported proxy type: " + xface);
-    }
-  }
-    
   /**
    * Parse the HDFS URI out of the provided token.
    * @throws IOException if the token is invalid
@@ -383,27 +257,4 @@ public class HAUtil {
     LOG.debug("Mapped HA service delegation token for logical URI " +
         haUri + " to namenode " + singleNNAddr);
   }
-  
-  /**
-   * Wrapper for a client proxy as well as its associated service ID.
-   * This is simply used as a tuple-like return type for
-   * {@link HAUtil#createProxy(Configuration, URI, Class)}.
-   */
-  public static class ProxyAndInfo<PROXYTYPE> {
-    private final PROXYTYPE proxy;
-    private final Text dtService;
-    
-    public ProxyAndInfo(PROXYTYPE proxy, Text dtService) {
-      this.proxy = proxy;
-      this.dtService = dtService;
-    }
-    
-    public PROXYTYPE getProxy() {
-      return proxy;
-    }
-    
-    public Text getDelegationTokenService() {
-      return dtService;
-    }
-  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
new file mode 100644
index 00000000000..d895734f332
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
@@ -0,0 +1,332 @@
+package org.apache.hadoop.hdfs;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
+
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.DFSClient.Conf;
+import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.GetUserMappingsProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.GetUserMappingsProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.RefreshAuthorizationPolicyProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.RefreshAuthorizationPolicyProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.RefreshUserMappingsProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.protocolPB.RefreshUserMappingsProtocolPB;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
+import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.RefreshUserMappingsProtocol;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
+import org.apache.hadoop.tools.GetUserMappingsProtocol;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * Create proxy objects to communicate with a remote NN. All remote access to an
+ * NN should be funneled through this class. Most of the time you'll want to use
+ * {@link NameNodeProxies#createProxy(Configuration, URI, Class)}, which will
+ * create either an HA- or non-HA-enabled client proxy as appropriate.
+ */
+public class NameNodeProxies {
+  
+  private static final Log LOG = LogFactory.getLog(NameNodeProxies.class);
+
+  /**
+   * Wrapper for a client proxy as well as its associated service ID.
+   * This is simply used as a tuple-like return type for
+   * {@link createProxy} and {@link createNonHaProxy}.
+   */
+  public static class ProxyAndInfo<PROXYTYPE> {
+    private final PROXYTYPE proxy;
+    private final Text dtService;
+    
+    public ProxyAndInfo(PROXYTYPE proxy, Text dtService) {
+      this.proxy = proxy;
+      this.dtService = dtService;
+    }
+    
+    public PROXYTYPE getProxy() {
+      return proxy;
+    }
+    
+    public Text getDelegationTokenService() {
+      return dtService;
+    }
+  }
+
+  /**
+   * Creates the namenode proxy with the passed protocol. This will handle
+   * creation of either HA- or non-HA-enabled proxy objects, depending upon
+   * if the provided URI is a configured logical URI.
+   * 
+   * @param conf the configuration containing the required IPC
+   *        properties, client failover configurations, etc.
+   * @param nameNodeUri the URI pointing either to a specific NameNode
+   *        or to a logical nameservice.
+   * @param xface the IPC interface which should be created
+   * @return an object containing both the proxy and the associated
+   *         delegation token service it corresponds to
+   * @throws IOException if there is an error creating the proxy
+   **/
+  @SuppressWarnings("unchecked")
+  public static <T> ProxyAndInfo<T> createProxy(Configuration conf,
+      URI nameNodeUri, Class<T> xface) throws IOException {
+    Class<FailoverProxyProvider<T>> failoverProxyProviderClass =
+        getFailoverProxyProviderClass(conf, nameNodeUri, xface);
+  
+    if (failoverProxyProviderClass == null) {
+      // Non-HA case
+      return createNonHAProxy(conf, NameNode.getAddress(nameNodeUri), xface,
+          UserGroupInformation.getCurrentUser(), true);
+    } else {
+      // HA case
+      FailoverProxyProvider<T> failoverProxyProvider = NameNodeProxies
+          .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface,
+              nameNodeUri);
+      Conf config = new Conf(conf);
+      T proxy = (T) RetryProxy.create(xface, failoverProxyProvider, RetryPolicies
+          .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
+              config.maxFailoverAttempts, config.failoverSleepBaseMillis,
+              config.failoverSleepMaxMillis));
+      
+      Text dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri);
+      return new ProxyAndInfo<T>(proxy, dtService);
+    }
+  }
+
+  /**
+   * Creates an explicitly non-HA-enabled proxy object. Most of the time you
+   * don't want to use this, and should instead use {@link createProxy}.
+   * 
+   * @param conf the configuration object
+   * @param nnAddr address of the remote NN to connect to
+   * @param xface the IPC interface which should be created
+   * @param ugi the user who is making the calls on the proxy object
+   * @param withRetries certain interfaces have a non-standard retry policy
+   * @return an object containing both the proxy and the associated
+   *         delegation token service it corresponds to
+   * @throws IOException
+   */
+  @SuppressWarnings("unchecked")
+  public static <T> ProxyAndInfo<T> createNonHAProxy(
+      Configuration conf, InetSocketAddress nnAddr, Class<T> xface,
+      UserGroupInformation ugi, boolean withRetries) throws IOException {
+    Text dtService = SecurityUtil.buildTokenService(nnAddr);
+  
+    T proxy;
+    if (xface == ClientProtocol.class) {
+      proxy = (T) createNNProxyWithClientProtocol(nnAddr, conf, ugi,
+          withRetries);
+    } else if (xface == JournalProtocol.class) {
+      proxy = (T) createNNProxyWithJournalProtocol(nnAddr, conf, ugi);
+    } else if (xface == NamenodeProtocol.class) {
+      proxy = (T) createNNProxyWithNamenodeProtocol(nnAddr, conf, ugi,
+          withRetries);
+    } else if (xface == GetUserMappingsProtocol.class) {
+      proxy = (T) createNNProxyWithGetUserMappingsProtocol(nnAddr, conf, ugi);
+    } else if (xface == RefreshUserMappingsProtocol.class) {
+      proxy = (T) createNNProxyWithRefreshUserMappingsProtocol(nnAddr, conf, ugi);
+    } else if (xface == RefreshAuthorizationPolicyProtocol.class) {
+      proxy = (T) createNNProxyWithRefreshAuthorizationPolicyProtocol(nnAddr,
+          conf, ugi);
+    } else {
+      String message = "Upsupported protocol found when creating the proxy " +
+          "conection to NameNode: " +
+          ((xface != null) ? xface.getClass().getName() : xface);
+      LOG.error(message);
+      throw new IllegalStateException(message);
+    }
+    return new ProxyAndInfo<T>(proxy, dtService);
+  }
+  
+  private static JournalProtocol createNNProxyWithJournalProtocol(
+      InetSocketAddress address, Configuration conf, UserGroupInformation ugi)
+      throws IOException {
+    JournalProtocolPB proxy = (JournalProtocolPB) createNameNodeProxy(address,
+        conf, ugi, JournalProtocolPB.class);
+    return new JournalProtocolTranslatorPB(proxy);
+  }
+
+  private static RefreshAuthorizationPolicyProtocol
+      createNNProxyWithRefreshAuthorizationPolicyProtocol(InetSocketAddress address,
+          Configuration conf, UserGroupInformation ugi) throws IOException {
+    RefreshAuthorizationPolicyProtocolPB proxy = (RefreshAuthorizationPolicyProtocolPB)
+        createNameNodeProxy(address, conf, ugi, RefreshAuthorizationPolicyProtocolPB.class);
+    return new RefreshAuthorizationPolicyProtocolClientSideTranslatorPB(proxy);
+  }
+  
+  private static RefreshUserMappingsProtocol
+      createNNProxyWithRefreshUserMappingsProtocol(InetSocketAddress address,
+          Configuration conf, UserGroupInformation ugi) throws IOException {
+    RefreshUserMappingsProtocolPB proxy = (RefreshUserMappingsProtocolPB)
+        createNameNodeProxy(address, conf, ugi, RefreshUserMappingsProtocolPB.class);
+    return new RefreshUserMappingsProtocolClientSideTranslatorPB(proxy);
+  }
+
+  private static GetUserMappingsProtocol createNNProxyWithGetUserMappingsProtocol(
+      InetSocketAddress address, Configuration conf, UserGroupInformation ugi)
+      throws IOException {
+    GetUserMappingsProtocolPB proxy = (GetUserMappingsProtocolPB)
+        createNameNodeProxy(address, conf, ugi, GetUserMappingsProtocolPB.class);
+    return new GetUserMappingsProtocolClientSideTranslatorPB(proxy);
+  }
+  
+  private static NamenodeProtocol createNNProxyWithNamenodeProtocol(
+      InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
+      boolean withRetries) throws IOException {
+    NamenodeProtocolPB proxy = (NamenodeProtocolPB) createNameNodeProxy(
+        address, conf, ugi, NamenodeProtocolPB.class);
+    if (withRetries) { // create the proxy with retries
+      RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(5, 200,
+          TimeUnit.MILLISECONDS);
+      Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap 
+                     = new HashMap<Class<? extends Exception>, RetryPolicy>();
+      RetryPolicy methodPolicy = RetryPolicies.retryByException(timeoutPolicy,
+          exceptionToPolicyMap);
+      Map<String, RetryPolicy> methodNameToPolicyMap 
+                     = new HashMap<String, RetryPolicy>();
+      methodNameToPolicyMap.put("getBlocks", methodPolicy);
+      methodNameToPolicyMap.put("getAccessKeys", methodPolicy);
+      proxy = (NamenodeProtocolPB) RetryProxy.create(NamenodeProtocolPB.class,
+          proxy, methodNameToPolicyMap);
+    }
+    return new NamenodeProtocolTranslatorPB(proxy);
+  }
+  
+  private static ClientProtocol createNNProxyWithClientProtocol(
+      InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
+      boolean withRetries) throws IOException {
+    ClientNamenodeProtocolPB proxy = (ClientNamenodeProtocolPB) NameNodeProxies
+        .createNameNodeProxy(address, conf, ugi, ClientNamenodeProtocolPB.class);
+    if (withRetries) { // create the proxy with retries
+      RetryPolicy createPolicy = RetryPolicies
+          .retryUpToMaximumCountWithFixedSleep(5,
+              HdfsConstants.LEASE_SOFTLIMIT_PERIOD, TimeUnit.MILLISECONDS);
+    
+      Map<Class<? extends Exception>, RetryPolicy> remoteExceptionToPolicyMap 
+                 = new HashMap<Class<? extends Exception>, RetryPolicy>();
+      remoteExceptionToPolicyMap.put(AlreadyBeingCreatedException.class,
+          createPolicy);
+    
+      Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap
+                 = new HashMap<Class<? extends Exception>, RetryPolicy>();
+      exceptionToPolicyMap.put(RemoteException.class, RetryPolicies
+          .retryByRemoteException(RetryPolicies.TRY_ONCE_THEN_FAIL,
+              remoteExceptionToPolicyMap));
+      RetryPolicy methodPolicy = RetryPolicies.retryByException(
+          RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
+      Map<String, RetryPolicy> methodNameToPolicyMap 
+                 = new HashMap<String, RetryPolicy>();
+    
+      methodNameToPolicyMap.put("create", methodPolicy);
+    
+      proxy = (ClientNamenodeProtocolPB) RetryProxy
+          .create(ClientNamenodeProtocolPB.class, proxy, methodNameToPolicyMap);
+    }
+    return new ClientNamenodeProtocolTranslatorPB(proxy);
+  }
+  
+  @SuppressWarnings("unchecked")
+  private static Object createNameNodeProxy(InetSocketAddress address,
+      Configuration conf, UserGroupInformation ugi, Class xface)
+      throws IOException {
+    RPC.setProtocolEngine(conf, xface, ProtobufRpcEngine.class);
+    Object proxy = RPC.getProxy(xface, RPC.getProtocolVersion(xface), address,
+        ugi, conf, NetUtils.getDefaultSocketFactory(conf));
+    return proxy;
+  }
+
+  /** Gets the configured Failover proxy provider's class */
+  private static <T> Class<FailoverProxyProvider<T>> getFailoverProxyProviderClass(
+      Configuration conf, URI nameNodeUri, Class<T> xface) throws IOException {
+    if (nameNodeUri == null) {
+      return null;
+    }
+    String host = nameNodeUri.getHost();
+  
+    String configKey = DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "."
+        + host;
+    try {
+      @SuppressWarnings("unchecked")
+      Class<FailoverProxyProvider<T>> ret = (Class<FailoverProxyProvider<T>>) conf
+          .getClass(configKey, null, FailoverProxyProvider.class);
+      if (ret != null) {
+        // If we found a proxy provider, then this URI should be a logical NN.
+        // Given that, it shouldn't have a non-default port number.
+        int port = nameNodeUri.getPort();
+        if (port > 0 && port != NameNode.DEFAULT_PORT) {
+          throw new IOException("Port " + port + " specified in URI "
+              + nameNodeUri + " but host '" + host
+              + "' is a logical (HA) namenode"
+              + " and does not use port information.");
+        }
+      }
+      return ret;
+    } catch (RuntimeException e) {
+      if (e.getCause() instanceof ClassNotFoundException) {
+        throw new IOException("Could not load failover proxy provider class "
+            + conf.get(configKey) + " which is configured for authority "
+            + nameNodeUri, e);
+      } else {
+        throw e;
+      }
+    }
+  }
+
+  /** Creates the Failover proxy provider instance*/
+  @SuppressWarnings("unchecked")
+  private static <T> FailoverProxyProvider<T> createFailoverProxyProvider(
+      Configuration conf, Class<FailoverProxyProvider<T>> failoverProxyProviderClass,
+      Class<T> xface, URI nameNodeUri) throws IOException {
+    Preconditions.checkArgument(
+        xface.isAssignableFrom(NamenodeProtocols.class),
+        "Interface %s is not a NameNode protocol", xface);
+    try {
+      Constructor<FailoverProxyProvider<T>> ctor = failoverProxyProviderClass
+          .getConstructor(Configuration.class, URI.class, Class.class);
+      FailoverProxyProvider<?> provider = ctor.newInstance(conf, nameNodeUri,
+          xface);
+      return (FailoverProxyProvider<T>) provider;
+    } catch (Exception e) {
+      String message = "Couldn't create proxy provider " + failoverProxyProviderClass;
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(message, e);
+      }
+      if (e.getCause() instanceof IOException) {
+        throw (IOException) e.getCause();
+      } else {
+        throw new IOException(message, e);
+      }
+    }
+  }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
index c6dc3e3a2bc..bd5815b1b51 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
@@ -20,12 +20,10 @@ package org.apache.hadoop.hdfs.protocolPB;
 import java.io.Closeable;
 import java.io.FileNotFoundException;
 import java.io.IOException;
-import java.net.InetSocketAddress;
 import java.util.Arrays;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FileAlreadyExistsException;
@@ -34,7 +32,6 @@ import org.apache.hadoop.fs.ParentNotDirectoryException;
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.Options.Rename;
 import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
@@ -59,15 +56,12 @@ import org.apache.hadoop.io.EnumSetWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.ProtocolTranslator;
 import org.apache.hadoop.ipc.ProtobufHelper;
-import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolMetaInterface;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RpcClientUtil;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
-import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.AccessControlException;
-import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AbandonBlockRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.AddBlockRequestProto;
@@ -135,29 +129,11 @@ public class ClientNamenodeProtocolTranslatorPB implements
     ProtocolMetaInterface, ClientProtocol, Closeable, ProtocolTranslator {
   final private ClientNamenodeProtocolPB rpcProxy;
 
-  public ClientNamenodeProtocolTranslatorPB(InetSocketAddress nameNodeAddr,
-      Configuration conf, UserGroupInformation ugi) throws IOException {
-    this(getNamenodeRetryProxy(nameNodeAddr, conf, ugi));
-  }
-
   public ClientNamenodeProtocolTranslatorPB(ClientNamenodeProtocolPB proxy)
       throws IOException {
     rpcProxy = proxy;
   }
   
-  private static ClientNamenodeProtocolPB getNamenodeRetryProxy(
-      InetSocketAddress nameNodeAddr, Configuration conf,
-      UserGroupInformation ugi) throws IOException {
-    RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class,
-        ProtobufRpcEngine.class);
-    ClientNamenodeProtocolPB proxy = RPC.getProxy(
-        ClientNamenodeProtocolPB.class, RPC
-            .getProtocolVersion(ClientNamenodeProtocolPB.class), nameNodeAddr,
-        ugi, conf, NetUtils.getSocketFactory(conf,
-            ClientNamenodeProtocolPB.class));
-    return DFSUtil.createNameNodeProxyWithRetries(proxy);
-  }
-  
   public void close() {
     RPC.stopProxy(rpcProxy);
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/GetUserMappingsProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/GetUserMappingsProtocolClientSideTranslatorPB.java
index a968bfb01a5..c5407172b7f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/GetUserMappingsProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/GetUserMappingsProtocolClientSideTranslatorPB.java
@@ -20,22 +20,15 @@ package org.apache.hadoop.hdfs.protocolPB;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.net.InetSocketAddress;
-
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.proto.GetUserMappingsProtocolProtos.GetGroupsForUserRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.GetUserMappingsProtocolProtos.GetGroupsForUserResponseProto;
 import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.ipc.ProtobufHelper;
-import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolMetaInterface;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RpcClientUtil;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
-import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.tools.GetUserMappingsProtocol;
 
 import com.google.protobuf.RpcController;
@@ -47,16 +40,10 @@ public class GetUserMappingsProtocolClientSideTranslatorPB implements
   /** RpcController is not used and hence is set to null */
   private final static RpcController NULL_CONTROLLER = null;
   private final GetUserMappingsProtocolPB rpcProxy;
-
+  
   public GetUserMappingsProtocolClientSideTranslatorPB(
-      InetSocketAddress nameNodeAddr, UserGroupInformation ugi,
-      Configuration conf) throws IOException {
-    RPC.setProtocolEngine(conf, GetUserMappingsProtocolPB.class,
-        ProtobufRpcEngine.class);
-    rpcProxy = RPC.getProxy(GetUserMappingsProtocolPB.class,
-        RPC.getProtocolVersion(GetUserMappingsProtocolPB.class),
-        NameNode.getAddress(conf), ugi, conf,
-        NetUtils.getSocketFactory(conf, GetUserMappingsProtocol.class));
+      GetUserMappingsProtocolPB rpcProxy) {
+    this.rpcProxy = rpcProxy;
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolTranslatorPB.java
index 89d3247cd65..76a4f1c5caf 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/JournalProtocolTranslatorPB.java
@@ -19,17 +19,14 @@ package org.apache.hadoop.hdfs.protocolPB;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.net.InetSocketAddress;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.JournalRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.StartLogSegmentRequestProto;
 import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
 import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
-import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtobufHelper;
 import org.apache.hadoop.ipc.ProtocolMetaInterface;
 import org.apache.hadoop.ipc.ProtocolSignature;
@@ -52,12 +49,9 @@ public class JournalProtocolTranslatorPB implements ProtocolMetaInterface,
   /** RpcController is not used and hence is set to null */
   private final static RpcController NULL_CONTROLLER = null;
   private final JournalProtocolPB rpcProxy;
-
-  public JournalProtocolTranslatorPB(InetSocketAddress nameNodeAddr,
-      Configuration conf) throws IOException {
-    RPC.setProtocolEngine(conf, JournalProtocolPB.class, ProtobufRpcEngine.class);
-    rpcProxy = RPC.getProxy(JournalProtocolPB.class,
-        RPC.getProtocolVersion(JournalProtocolPB.class), nameNodeAddr, conf);
+  
+  public JournalProtocolTranslatorPB(JournalProtocolPB rpcProxy) {
+    this.rpcProxy = rpcProxy;
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
index 1f8a47d1b07..b8469ece796 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java
@@ -19,11 +19,9 @@ package org.apache.hadoop.hdfs.protocolPB;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.net.InetSocketAddress;
 
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeCommandProto;
@@ -47,14 +45,11 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
 import org.apache.hadoop.ipc.ProtobufHelper;
-import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolMetaInterface;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RpcClientUtil;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
-import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.security.UserGroupInformation;
 
 import com.google.protobuf.RpcController;
 import com.google.protobuf.ServiceException;
@@ -84,15 +79,6 @@ public class NamenodeProtocolTranslatorPB implements NamenodeProtocol,
       VersionRequestProto.newBuilder().build();
 
   final private NamenodeProtocolPB rpcProxy;
-
-  public NamenodeProtocolTranslatorPB(InetSocketAddress nameNodeAddr,
-      Configuration conf, UserGroupInformation ugi) throws IOException {
-    RPC.setProtocolEngine(conf, NamenodeProtocolPB.class,
-        ProtobufRpcEngine.class);
-    rpcProxy = RPC.getProxy(NamenodeProtocolPB.class,
-        RPC.getProtocolVersion(NamenodeProtocolPB.class), nameNodeAddr, ugi,
-        conf, NetUtils.getSocketFactory(conf, NamenodeProtocolPB.class));
-  }
   
   public NamenodeProtocolTranslatorPB(NamenodeProtocolPB rpcProxy) {
     this.rpcProxy = rpcProxy;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java
index 22b2bcffabc..6e94ad20994 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshAuthorizationPolicyProtocolClientSideTranslatorPB.java
@@ -20,21 +20,15 @@ package org.apache.hadoop.hdfs.protocolPB;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.net.InetSocketAddress;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.proto.RefreshAuthorizationPolicyProtocolProtos.RefreshServiceAclRequestProto;
 import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.ipc.ProtobufHelper;
-import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolMetaInterface;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RpcClientUtil;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
-import org.apache.hadoop.net.NetUtils;
-import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
 
 import com.google.protobuf.RpcController;
@@ -46,16 +40,10 @@ public class RefreshAuthorizationPolicyProtocolClientSideTranslatorPB implements
   /** RpcController is not used and hence is set to null */
   private final static RpcController NULL_CONTROLLER = null;
   private final RefreshAuthorizationPolicyProtocolPB rpcProxy;
-
+  
   public RefreshAuthorizationPolicyProtocolClientSideTranslatorPB(
-      InetSocketAddress nameNodeAddr, UserGroupInformation ugi,
-      Configuration conf) throws IOException {
-    RPC.setProtocolEngine(conf, RefreshAuthorizationPolicyProtocolPB.class,
-        ProtobufRpcEngine.class);
-    rpcProxy = RPC.getProxy(RefreshAuthorizationPolicyProtocolPB.class,
-        RPC.getProtocolVersion(RefreshAuthorizationPolicyProtocolPB.class),
-        NameNode.getAddress(conf), ugi, conf,
-        NetUtils.getSocketFactory(conf, RefreshAuthorizationPolicyProtocol.class));
+      RefreshAuthorizationPolicyProtocolPB rpcProxy) {
+    this.rpcProxy = rpcProxy;
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java
index c11cf511f54..027cb3faae3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/RefreshUserMappingsProtocolClientSideTranslatorPB.java
@@ -20,23 +20,17 @@ package org.apache.hadoop.hdfs.protocolPB;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.net.InetSocketAddress;
 
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.proto.RefreshUserMappingsProtocolProtos.RefreshSuperUserGroupsConfigurationRequestProto;
 import org.apache.hadoop.hdfs.protocol.proto.RefreshUserMappingsProtocolProtos.RefreshUserToGroupsMappingsRequestProto;
 import org.apache.hadoop.hdfs.protocolR23Compatible.ProtocolSignatureWritable;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.ipc.ProtobufHelper;
-import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolMetaInterface;
 import org.apache.hadoop.ipc.ProtocolSignature;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RpcClientUtil;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
-import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.RefreshUserMappingsProtocol;
-import org.apache.hadoop.security.UserGroupInformation;
 
 import com.google.protobuf.RpcController;
 import com.google.protobuf.ServiceException;
@@ -47,16 +41,10 @@ public class RefreshUserMappingsProtocolClientSideTranslatorPB implements
   /** RpcController is not used and hence is set to null */
   private final static RpcController NULL_CONTROLLER = null;
   private final RefreshUserMappingsProtocolPB rpcProxy;
-
+  
   public RefreshUserMappingsProtocolClientSideTranslatorPB(
-      InetSocketAddress nameNodeAddr, UserGroupInformation ugi,
-      Configuration conf) throws IOException {
-    RPC.setProtocolEngine(conf, RefreshUserMappingsProtocolPB.class,
-        ProtobufRpcEngine.class);
-    rpcProxy = RPC.getProxy(RefreshUserMappingsProtocolPB.class,
-        RPC.getProtocolVersion(RefreshUserMappingsProtocolPB.class),
-        NameNode.getAddress(conf), ugi, conf,
-        NetUtils.getSocketFactory(conf, RefreshUserMappingsProtocol.class));
+      RefreshUserMappingsProtocolPB rpcProxy) {
+    this.rpcProxy = rpcProxy;
   }
 
   @Override
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
index e45f721b75a..eab6273c221 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
@@ -31,7 +31,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@@ -76,10 +76,10 @@ class NameNodeConnector {
     URI nameNodeUri = NameNode.getUri(this.namenodeAddress);
     
     this.namenode =
-      HAUtil.createProxy(conf, nameNodeUri, NamenodeProtocol.class)
+      NameNodeProxies.createProxy(conf, nameNodeUri, NamenodeProtocol.class)
         .getProxy();
     this.client =
-      HAUtil.createProxy(conf, nameNodeUri, ClientProtocol.class)
+      NameNodeProxies.createProxy(conf, nameNodeUri, ClientProtocol.class)
         .getProxy();
     this.fs = FileSystem.get(nameNodeUri, conf);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
index 09adc0aa76b..11431af52a2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java
@@ -26,13 +26,13 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
 import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.JournalProtocolService;
 import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
 import org.apache.hadoop.hdfs.protocolPB.JournalProtocolServerSideTranslatorPB;
-import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
@@ -41,7 +41,6 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeCommand;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
-import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.StandbyException;
 import org.apache.hadoop.net.NetUtils;
@@ -71,7 +70,7 @@ public class BackupNode extends NameNode {
   private static final String BN_SERVICE_RPC_ADDRESS_KEY = DFSConfigKeys.DFS_NAMENODE_BACKUP_SERVICE_RPC_ADDRESS_KEY;
 
   /** Name-node proxy */
-  NamenodeProtocolTranslatorPB namenode;
+  NamenodeProtocol namenode;
   /** Name-node RPC address */
   String nnRpcAddress;
   /** Name-node HTTP address */
@@ -192,7 +191,7 @@ public class BackupNode extends NameNode {
     }
     // Stop the RPC client
     if (namenode != null) {
-      IOUtils.cleanup(LOG, namenode);
+      RPC.stopProxy(namenode);
     }
     namenode = null;
     // Stop the checkpoint manager
@@ -285,8 +284,9 @@ public class BackupNode extends NameNode {
   private NamespaceInfo handshake(Configuration conf) throws IOException {
     // connect to name node
     InetSocketAddress nnAddress = NameNode.getServiceAddress(conf, true);
-    this.namenode = new NamenodeProtocolTranslatorPB(nnAddress, conf,
-        UserGroupInformation.getCurrentUser());
+    this.namenode = NameNodeProxies.createNonHAProxy(conf, nnAddress,
+        NamenodeProtocol.class, UserGroupInformation.getCurrentUser(),
+        true).getProxy();
     this.nnRpcAddress = NetUtils.getHostPortString(nnAddress);
     this.nnHttpAddress = NetUtils.getHostPortString(super.getHttpServerAddress(conf));
     // get version and id info from the name-node
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java
index 6459ffd0e07..402dcdd0ac0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/DfsServlet.java
@@ -26,8 +26,8 @@ import javax.servlet.http.HttpServletRequest;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
 import org.apache.hadoop.ipc.RemoteException;
@@ -77,7 +77,8 @@ abstract class DfsServlet extends HttpServlet {
       NameNodeHttpServer.getNameNodeAddressFromContext(context);
     Configuration conf = new HdfsConfiguration(
         NameNodeHttpServer.getConfFromContext(context));
-    return DFSUtil.createNamenode(nnAddr, conf);
+    return NameNodeProxies.createProxy(conf, NameNode.getUri(nnAddr),
+        ClientProtocol.class).getProxy();
   }
 
   protected UserGroupInformation getUGI(HttpServletRequest request,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
index 8c3ad2ecdbc..bdb4c5e7732 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EditLogBackupOutputStream.java
@@ -22,12 +22,14 @@ import java.net.InetSocketAddress;
 import java.util.Arrays;
 
 import org.apache.hadoop.hdfs.HdfsConfiguration;
-import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.server.common.Storage;
+import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeRegistration;
 import org.apache.hadoop.io.DataOutputBuffer;
-import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.UserGroupInformation;
 
 /**
  * An implementation of the abstract class {@link EditLogOutputStream},
@@ -40,7 +42,7 @@ import org.apache.hadoop.net.NetUtils;
 class EditLogBackupOutputStream extends EditLogOutputStream {
   static int DEFAULT_BUFFER_SIZE = 256;
 
-  private JournalProtocolTranslatorPB backupNode;  // RPC proxy to backup node
+  private JournalProtocol backupNode;  // RPC proxy to backup node
   private NamenodeRegistration bnRegistration;  // backup node registration
   private NamenodeRegistration nnRegistration;  // active node registration
   private EditsDoubleBuffer doubleBuf;
@@ -55,8 +57,9 @@ class EditLogBackupOutputStream extends EditLogOutputStream {
     InetSocketAddress bnAddress =
       NetUtils.createSocketAddr(bnRegistration.getAddress());
     try {
-      this.backupNode =
-          new JournalProtocolTranslatorPB(bnAddress, new HdfsConfiguration());
+      this.backupNode = NameNodeProxies.createNonHAProxy(new HdfsConfiguration(),
+          bnAddress, JournalProtocol.class, UserGroupInformation.getCurrentUser(),
+          true).getProxy();
     } catch(IOException e) {
       Storage.LOG.error("Error connecting to: " + bnAddress, e);
       throw e;
@@ -93,14 +96,14 @@ class EditLogBackupOutputStream extends EditLogOutputStream {
       throw new IOException("BackupEditStream has " + size +
                           " records still to be flushed and cannot be closed.");
     } 
-    IOUtils.cleanup(Storage.LOG, backupNode); // stop the RPC threads
+    RPC.stopProxy(backupNode); // stop the RPC threads
     doubleBuf.close();
     doubleBuf = null;
   }
 
   @Override
   public void abort() throws IOException {
-    IOUtils.cleanup(Storage.LOG, backupNode);
+    RPC.stopProxy(backupNode);
     doubleBuf = null;
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
index 3846e806642..c1ce79e439b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/SecondaryNameNode.java
@@ -46,10 +46,10 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.DFSUtil.ErrorSimulator;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
 import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
 import org.apache.hadoop.hdfs.server.common.JspHelper;
@@ -212,8 +212,9 @@ public class SecondaryNameNode implements Runnable {
     nameNodeAddr = NameNode.getServiceAddress(conf, true);
 
     this.conf = conf;
-    this.namenode = new NamenodeProtocolTranslatorPB(nameNodeAddr, conf,
-        UserGroupInformation.getCurrentUser());
+    this.namenode = NameNodeProxies.createNonHAProxy(conf, nameNodeAddr, 
+        NamenodeProtocol.class, UserGroupInformation.getCurrentUser(),
+        true).getProxy();
 
     // initialize checkpoint directories
     fsName = getInfoServer();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index 79223a0455f..a20880aad65 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -32,8 +32,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HAUtil;
-import org.apache.hadoop.hdfs.protocol.ClientProtocol;
-import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
 import org.apache.hadoop.io.retry.FailoverProxyProvider;
 import org.apache.hadoop.ipc.RPC;
@@ -121,18 +120,8 @@ public class ConfiguredFailoverProxyProvider<T> implements
     AddressRpcProxyPair current = proxies.get(currentProxyIndex);
     if (current.namenode == null) {
       try {
-        if (NamenodeProtocol.class.equals(xface)) {
-          current.namenode = DFSUtil.createNNProxyWithNamenodeProtocol(
-              current.address, conf, ugi, false);
-        } else if (ClientProtocol.class.equals(xface)) {
-          current.namenode = DFSUtil.createNNProxyWithClientProtocol(
-              current.address, conf, ugi, false);
-        } else {
-          throw new IllegalStateException(
-              "Upsupported protocol found when creating the proxy conection to NameNode. "
-                  + ((xface != null) ? xface.getClass().getName() : xface)
-                  + " is not supported by " + this.getClass().getName());
-        }
+        current.namenode = NameNodeProxies.createNonHAProxy(conf,
+            current.address, xface, ugi, false).getProxy();
       } catch (IOException e) {
         LOG.error("Failed to create RPC proxy to NameNode", e);
         throw new RuntimeException(e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
index 1025880c9da..edbbb2250b8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
@@ -38,19 +38,20 @@ import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.UpgradeAction;
-import org.apache.hadoop.hdfs.protocolPB.RefreshAuthorizationPolicyProtocolClientSideTranslatorPB;
-import org.apache.hadoop.hdfs.protocolPB.RefreshUserMappingsProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.common.UpgradeStatusReport;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.RefreshUserMappingsProtocol;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.hadoop.util.ToolRunner;
 
@@ -791,9 +792,9 @@ public class DFSAdmin extends FsShell {
         conf.get(DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY, ""));
 
     // Create the client
-    RefreshAuthorizationPolicyProtocolClientSideTranslatorPB refreshProtocol = 
-        new RefreshAuthorizationPolicyProtocolClientSideTranslatorPB(
-        NameNode.getAddress(conf), getUGI(), conf);
+    RefreshAuthorizationPolicyProtocol refreshProtocol =
+        NameNodeProxies.createProxy(conf, FileSystem.getDefaultUri(conf),
+            RefreshAuthorizationPolicyProtocol.class).getProxy();
     
     // Refresh the authorization policy in-effect
     refreshProtocol.refreshServiceAcl();
@@ -817,9 +818,9 @@ public class DFSAdmin extends FsShell {
         conf.get(DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY, ""));
  
     // Create the client
-    RefreshUserMappingsProtocolClientSideTranslatorPB refreshProtocol = 
-        new RefreshUserMappingsProtocolClientSideTranslatorPB(
-        NameNode.getAddress(conf), getUGI(), conf);
+    RefreshUserMappingsProtocol refreshProtocol =
+      NameNodeProxies.createProxy(conf, FileSystem.getDefaultUri(conf),
+          RefreshUserMappingsProtocol.class).getProxy();
 
     // Refresh the user-to-groups mappings
     refreshProtocol.refreshUserToGroupsMappings();
@@ -844,9 +845,9 @@ public class DFSAdmin extends FsShell {
         conf.get(DFSConfigKeys.DFS_NAMENODE_USER_NAME_KEY, ""));
 
     // Create the client
-    RefreshUserMappingsProtocolClientSideTranslatorPB refreshProtocol = 
-        new RefreshUserMappingsProtocolClientSideTranslatorPB(
-        NameNode.getAddress(conf), getUGI(), conf);
+    RefreshUserMappingsProtocol refreshProtocol =
+      NameNodeProxies.createProxy(conf, FileSystem.getDefaultUri(conf),
+          RefreshUserMappingsProtocol.class).getProxy();
 
     // Refresh the user-to-groups mappings
     refreshProtocol.refreshSuperUserGroupsConfiguration();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetGroups.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetGroups.java
index 5ad227d9e02..51612befff5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetGroups.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/GetGroups.java
@@ -21,8 +21,11 @@ import java.io.IOException;
 import java.io.PrintStream;
 import java.net.InetSocketAddress;
 
+import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocolPB.GetUserMappingsProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -34,6 +37,7 @@ import org.apache.hadoop.util.ToolRunner;
  * HDFS implementation of a tool for getting the groups which a given user
  * belongs to.
  */
+@InterfaceAudience.Private
 public class GetGroups extends GetGroupsBase {
 
   static{
@@ -41,11 +45,11 @@ public class GetGroups extends GetGroupsBase {
   }
 
   
-  GetGroups(Configuration conf) {
+  public GetGroups(Configuration conf) {
     super(conf);
   }
 
-  GetGroups(Configuration conf, PrintStream out) {
+  public GetGroups(Configuration conf, PrintStream out) {
     super(conf, out);
   }
   
@@ -57,9 +61,8 @@ public class GetGroups extends GetGroupsBase {
   
   @Override
   protected GetUserMappingsProtocol getUgmProtocol() throws IOException {
-    return new GetUserMappingsProtocolClientSideTranslatorPB(
-        NameNode.getAddress(getConf()), UserGroupInformation.getCurrentUser(),
-        getConf());
+    return NameNodeProxies.createProxy(getConf(), FileSystem.getDefaultUri(getConf()),
+        GetUserMappingsProtocol.class).getProxy();
   }
 
   public static void main(String[] argv) throws Exception {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
index 8693885ec66..b0878d1eb8c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestGetBlocks.java
@@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
-import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.server.common.GenerationStamp;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
@@ -34,8 +33,6 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.ipc.RemoteException;
-import org.apache.hadoop.security.UserGroupInformation;
-
 import junit.framework.TestCase;
 /**
  * This class tests if block replacement request to data nodes work correctly.
@@ -97,8 +94,8 @@ public class TestGetBlocks extends TestCase {
       // get RPC client to namenode
       InetSocketAddress addr = new InetSocketAddress("localhost",
           cluster.getNameNodePort());
-      NamenodeProtocol namenode = new NamenodeProtocolTranslatorPB(addr, CONF,
-          UserGroupInformation.getCurrentUser());
+      NamenodeProtocol namenode = NameNodeProxies.createProxy(CONF,
+          NameNode.getUri(addr), NamenodeProtocol.class).getProxy();
 
       // get blocks of size fileLen from dataNodes[0]
       BlockWithLocations[] locs;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestIsMethodSupported.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestIsMethodSupported.java
index 2c1fbb91f52..3e906655902 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestIsMethodSupported.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestIsMethodSupported.java
@@ -22,6 +22,7 @@ import java.net.InetSocketAddress;
 
 import junit.framework.Assert;
 
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
@@ -31,8 +32,13 @@ import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.RefreshAuthorizationPolicyProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdfs.protocolPB.RefreshUserMappingsProtocolClientSideTranslatorPB;
+import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.RefreshUserMappingsProtocol;
 import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
+import org.apache.hadoop.tools.GetUserMappingsProtocol;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
@@ -66,8 +72,9 @@ public class TestIsMethodSupported {
   @Test
   public void testNamenodeProtocol() throws IOException {
     NamenodeProtocolTranslatorPB translator =
-        new NamenodeProtocolTranslatorPB(nnAddress, conf,
-            UserGroupInformation.getCurrentUser());
+        (NamenodeProtocolTranslatorPB) NameNodeProxies.createNonHAProxy(conf,
+            nnAddress, NamenodeProtocol.class, UserGroupInformation.getCurrentUser(),
+            true).getProxy();
     boolean exists = translator.isMethodSupported("rollEditLog");
     Assert.assertTrue(exists);
     exists = translator.isMethodSupported("bogusMethod");
@@ -99,15 +106,17 @@ public class TestIsMethodSupported {
   @Test
   public void testClientNamenodeProtocol() throws IOException {
     ClientNamenodeProtocolTranslatorPB translator = 
-        new ClientNamenodeProtocolTranslatorPB(nnAddress, conf, 
-            UserGroupInformation.getCurrentUser()); 
+        (ClientNamenodeProtocolTranslatorPB) NameNodeProxies.createNonHAProxy(
+            conf, nnAddress, ClientProtocol.class,
+            UserGroupInformation.getCurrentUser(), true).getProxy();
     Assert.assertTrue(translator.isMethodSupported("mkdirs"));
   }
   
   @Test
   public void tesJournalProtocol() throws IOException {
-    JournalProtocolTranslatorPB translator = 
-        new JournalProtocolTranslatorPB(nnAddress, conf);
+    JournalProtocolTranslatorPB translator = (JournalProtocolTranslatorPB)
+        NameNodeProxies.createNonHAProxy(conf, nnAddress, JournalProtocol.class,
+            UserGroupInformation.getCurrentUser(), true).getProxy();
     //Nameode doesn't implement JournalProtocol
     Assert.assertFalse(translator.isMethodSupported("startLogSegment"));
   }
@@ -130,24 +139,30 @@ public class TestIsMethodSupported {
   @Test
   public void testGetUserMappingsProtocol() throws IOException {
     GetUserMappingsProtocolClientSideTranslatorPB translator = 
-        new GetUserMappingsProtocolClientSideTranslatorPB(
-            nnAddress, UserGroupInformation.getCurrentUser(), conf);
+        (GetUserMappingsProtocolClientSideTranslatorPB)
+        NameNodeProxies.createNonHAProxy(conf, nnAddress,
+            GetUserMappingsProtocol.class, UserGroupInformation.getCurrentUser(),
+            true).getProxy();
     Assert.assertTrue(translator.isMethodSupported("getGroupsForUser"));
   }
   
   @Test
   public void testRefreshAuthorizationPolicyProtocol() throws IOException {
-    RefreshAuthorizationPolicyProtocolClientSideTranslatorPB translator =
-        new RefreshAuthorizationPolicyProtocolClientSideTranslatorPB(
-            nnAddress, UserGroupInformation.getCurrentUser(), conf);
+    RefreshAuthorizationPolicyProtocolClientSideTranslatorPB translator = 
+      (RefreshAuthorizationPolicyProtocolClientSideTranslatorPB)
+      NameNodeProxies.createNonHAProxy(conf, nnAddress,
+          RefreshAuthorizationPolicyProtocol.class,
+          UserGroupInformation.getCurrentUser(), true).getProxy();
     Assert.assertTrue(translator.isMethodSupported("refreshServiceAcl"));
   }
   
   @Test
   public void testRefreshUserMappingsProtocol() throws IOException {
     RefreshUserMappingsProtocolClientSideTranslatorPB translator =
-        new RefreshUserMappingsProtocolClientSideTranslatorPB(
-            nnAddress, UserGroupInformation.getCurrentUser(), conf);
+        (RefreshUserMappingsProtocolClientSideTranslatorPB)
+        NameNodeProxies.createNonHAProxy(conf, nnAddress,
+            RefreshUserMappingsProtocol.class,
+            UserGroupInformation.getCurrentUser(), true).getProxy();
     Assert.assertTrue(
         translator.isMethodSupported("refreshUserToGroupsMappings"));
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplication.java
index a488b0a5cdc..e211d209773 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplication.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReplication.java
@@ -75,7 +75,8 @@ public class TestReplication extends TestCase {
   private void checkFile(FileSystem fileSys, Path name, int repl)
     throws IOException {
     Configuration conf = fileSys.getConf();
-    ClientProtocol namenode = DFSUtil.createNamenode(conf);
+    ClientProtocol namenode = NameNodeProxies.createProxy(conf, fileSys.getUri(),
+        ClientProtocol.class).getProxy();
       
     waitForBlockReplication(name.toString(), namenode, 
                             Math.min(numDatanodes, repl), -1);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
index 5de0113915f..2dddb1b6e08 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
@@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@@ -100,7 +101,8 @@ public class TestBalancer extends TestCase {
     cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numNodes).build();
     try {
       cluster.waitActive();
-      client = DFSUtil.createNamenode(conf);
+      client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0).getUri(),
+          ClientProtocol.class).getProxy();
 
       short replicationFactor = (short)(numNodes-1);
       long fileLen = size/replicationFactor;
@@ -194,7 +196,8 @@ public class TestBalancer extends TestCase {
                                               .simulatedCapacities(capacities)
                                               .build();
     cluster.waitActive();
-    client = DFSUtil.createNamenode(conf);
+    client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0).getUri(),
+        ClientProtocol.class).getProxy();
 
     for(int i = 0; i < blocksDN.length; i++)
       cluster.injectBlocks(i, Arrays.asList(blocksDN[i]));
@@ -308,7 +311,8 @@ public class TestBalancer extends TestCase {
                                 .build();
     try {
       cluster.waitActive();
-      client = DFSUtil.createNamenode(conf);
+      client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0).getUri(),
+          ClientProtocol.class).getProxy();
 
       long totalCapacity = sum(capacities);
       
@@ -400,7 +404,8 @@ public class TestBalancer extends TestCase {
                                 .build();
     try {
       cluster.waitActive();
-      client = DFSUtil.createNamenode(conf);
+      client = NameNodeProxies.createProxy(conf, cluster.getFileSystem(0).getUri(),
+          ClientProtocol.class).getProxy();
 
       long totalCapacity = sum(capacities);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
index 6764213e12d..e064534da42 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
@@ -23,10 +23,12 @@ import java.net.InetSocketAddress;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.NameNodeProxies;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -71,12 +73,13 @@ public class TestBalancerWithHANameNodes {
     cluster = new MiniDFSCluster.Builder(conf).nnTopology(simpleHATopology)
         .numDataNodes(capacities.length).racks(racks).simulatedCapacities(
             capacities).build();
+    HATestUtil.setFailoverConfigurations(cluster, conf);
     try {
       cluster.waitActive();
       cluster.transitionToActive(1);
       Thread.sleep(500);
-      client = DFSUtil.createNamenode(cluster.getNameNode(1)
-          .getNameNodeAddress(), conf);
+      client = NameNodeProxies.createProxy(conf, FileSystem.getDefaultUri(conf),
+          ClientProtocol.class).getProxy();
       long totalCapacity = TestBalancer.sum(capacities);
       // fill up the cluster to be 30% full
       long totalUsedSpace = totalCapacity * 3 / 10;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index b844b60b1bc..42b5612571a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -150,6 +150,11 @@ public abstract class HATestUtil {
     return fs;
   }
   
+  public static void setFailoverConfigurations(MiniDFSCluster cluster,
+      Configuration conf) {
+    setFailoverConfigurations(cluster, conf, getLogicalHostname(cluster));
+  }
+  
   /** Sets the required configurations for performing failover of default namespace. */
   public static void setFailoverConfigurations(MiniDFSCluster cluster,
       Configuration conf, String logicalName) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestGetGroupsWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestGetGroupsWithHA.java
new file mode 100644
index 00000000000..e548817b6a7
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestGetGroupsWithHA.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import java.io.IOException;
+import java.io.PrintStream;
+
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.tools.GetGroups;
+import org.apache.hadoop.tools.GetGroupsTestBase;
+import org.apache.hadoop.util.Tool;
+import org.junit.After;
+import org.junit.Before;
+
+public class TestGetGroupsWithHA extends GetGroupsTestBase {
+  
+  private MiniDFSCluster cluster;
+  
+  @Before
+  public void setUpNameNode() throws IOException {
+    conf = new HdfsConfiguration();
+    cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(0).build();
+    HATestUtil.setFailoverConfigurations(cluster, conf);
+  }
+  
+  @After
+  public void tearDownNameNode() {
+    if (cluster != null) {
+      cluster.shutdown();
+    }
+  }
+
+  @Override
+  protected Tool getTool(PrintStream o) {
+    return new GetGroups(conf, o);
+  }
+
+}

From 978a8050e28b2afb193a3e00d82a8475fa4d2428 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 29 Feb 2012 01:09:07 +0000
Subject: [PATCH 167/177] HDFS-2920. fix remaining TODO items. Contributed by
 Aaron T. Myers and Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1294923 13f79535-47bb-0310-9956-ffa450edef68
---
 .../main/java/org/apache/hadoop/ipc/RPC.java  |  6 ++
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../org/apache/hadoop/hdfs/DFSClient.java     | 15 +---
 .../hadoop/hdfs/protocol/ClientProtocol.java  |  4 +-
 .../ClientDatanodeProtocolTranslatorPB.java   |  9 ++-
 .../server/blockmanagement/BlockManager.java  |  1 -
 .../hdfs/server/datanode/BPOfferService.java  | 14 +---
 .../hdfs/server/datanode/BPServiceActor.java  |  4 +-
 .../server/datanode/BlockPoolManager.java     | 10 ---
 .../hadoop/hdfs/server/datanode/DataNode.java | 71 ++++++++++++-------
 .../datanode/UpgradeManagerDatanode.java      |  2 +-
 .../datanode/UpgradeObjectDatanode.java       | 11 +--
 .../hdfs/server/namenode/FSDirectory.java     | 17 ++---
 .../hdfs/server/namenode/FSEditLogLoader.java |  4 +-
 .../hadoop/hdfs/server/namenode/FSImage.java  |  6 --
 .../hdfs/server/namenode/FSNamesystem.java    |  2 -
 .../hadoop/hdfs/server/namenode/NameNode.java |  5 +-
 .../namenode/ha/StandbyCheckpointer.java      |  2 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  6 +-
 .../server/datanode/TestBlockRecovery.java    |  6 +-
 .../TestDataNodeMultipleRegistrations.java    | 20 ++++--
 .../server/datanode/TestRefreshNamenodes.java | 30 ++++----
 .../namenode/ha/TestPipelinesFailover.java    |  4 +-
 .../namenode/ha/TestStandbyCheckpoints.java   |  3 -
 24 files changed, 122 insertions(+), 132 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
index 069841b1c9b..eee364ccdee 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RPC.java
@@ -580,6 +580,12 @@ public class RPC {
    * @param proxy the RPC proxy object to be stopped
    */
   public static void stopProxy(Object proxy) {
+    if (proxy instanceof ProtocolTranslator) {
+      RPC.stopProxy(((ProtocolTranslator)proxy)
+          .getUnderlyingProxyObject());
+      return;
+    }
+    
     InvocationHandler invocationHandler = null;
     try {
       invocationHandler = Proxy.getInvocationHandler(proxy);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 18de5f63349..42bdcf8a860 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -238,3 +238,5 @@ HDFS-3013. HA: NameNode format doesn't pick up dfs.namenode.name.dir.NameService
 HDFS-3019. Fix silent failure of TestEditLogJournalFailures (todd)
 
 HDFS-2958. Sweep for remaining proxy construction which doesn't go through failover path. (atm)
+
+HDFS-2920. fix remaining TODO items. (atm and todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index f0dc8ceff2a..83cd9a8a3ca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -418,22 +418,9 @@ public class DFSClient implements java.io.Closeable {
   
   /**
    * Close connections the Namenode.
-   * The namenode variable is either a rpcProxy passed by a test or 
-   * created using the protocolTranslator which is closeable.
-   * If closeable then call close, else close using RPC.stopProxy().
    */
   void closeConnectionToNamenode() {
-    if (namenode instanceof Closeable) {
-      try {
-        ((Closeable) namenode).close();
-        return;
-      } catch (IOException e) {
-        // fall through - lets try the stopProxy
-        LOG.warn("Exception closing namenode, stopping the proxy");
-      }     
-    } else {
-      RPC.stopProxy(namenode);
-    }
+    RPC.stopProxy(namenode);
   }
   
   /** Abort and release resources held.  Ignore all errors. */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
index 5ef00bfe9ca..641dfc11bb1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java
@@ -694,7 +694,6 @@ public interface ClientProtocol extends VersionedProtocol {
    * 
    * @throws IOException
    */
-  //TODO(HA): Should this be @Idempotent?
   public void finalizeUpgrade() throws IOException;
 
   /**
@@ -704,7 +703,6 @@ public interface ClientProtocol extends VersionedProtocol {
    * @return upgrade status information or null if no upgrades are in progress
    * @throws IOException
    */
-  //TODO(HA): Should this be @Idempotent?
   public UpgradeStatusReport distributedUpgradeProgress(UpgradeAction action) 
       throws IOException;
 
@@ -737,7 +735,7 @@ public interface ClientProtocol extends VersionedProtocol {
    * @param bandwidth Blanacer bandwidth in bytes per second for this datanode.
    * @throws IOException
    */
-  //TODO(HA): Should this be @Idempotent?
+  @Idempotent
   public void setBalancerBandwidth(long bandwidth) throws IOException;
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java
index d03f27060b5..bc32ab00a56 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientDatanodeProtocolTranslatorPB.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.ipc.ProtobufHelper;
 import org.apache.hadoop.ipc.ProtobufRpcEngine;
 import org.apache.hadoop.ipc.ProtocolMetaInterface;
 import org.apache.hadoop.ipc.ProtocolSignature;
+import org.apache.hadoop.ipc.ProtocolTranslator;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RpcClientUtil;
 import org.apache.hadoop.ipc.RpcPayloadHeader.RpcKind;
@@ -63,7 +64,8 @@ import com.google.protobuf.ServiceException;
 @InterfaceAudience.Private
 @InterfaceStability.Stable
 public class ClientDatanodeProtocolTranslatorPB implements
-    ProtocolMetaInterface, ClientDatanodeProtocol, Closeable {
+    ProtocolMetaInterface, ClientDatanodeProtocol,
+    ProtocolTranslator, Closeable {
   public static final Log LOG = LogFactory
       .getLog(ClientDatanodeProtocolTranslatorPB.class);
   
@@ -211,4 +213,9 @@ public class ClientDatanodeProtocolTranslatorPB implements
         ClientDatanodeProtocolPB.class, RpcKind.RPC_PROTOCOL_BUFFER,
         RPC.getProtocolVersion(ClientDatanodeProtocolPB.class), methodName);
   }
+
+  @Override
+  public Object getUnderlyingProxyObject() {
+    return rpcProxy;
+  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 9a76f1e43d7..cbae6f2246c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs.server.blockmanagement;
 
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.io.StringWriter;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index c35b35f0647..aaba4fff2ad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -383,7 +383,6 @@ class BPOfferService {
 
     bpServices.remove(actor);
 
-    // TODO: synchronization should be a little better here
     if (bpServices.isEmpty()) {
       dn.shutdownBlockPool(this);
       
@@ -392,12 +391,6 @@ class BPOfferService {
     }
   }
 
-  @Deprecated
-  synchronized InetSocketAddress getNNSocketAddress() {
-    // TODO(HA) this doesn't make sense anymore
-    return bpServiceToActive.getNNSocketAddress();
-  }
-
   /**
    * Called by the DN to report an error to the NNs.
    */
@@ -432,11 +425,9 @@ class BPOfferService {
   }
 
   /**
-   * TODO: this is still used in a few places where we need to sort out
-   * what to do in HA!
-   * @return a proxy to the active NN
+   * @return a proxy to the active NN, or null if the BPOS has not
+   * acknowledged any NN as active yet.
    */
-  @Deprecated
   synchronized DatanodeProtocolClientSideTranslatorPB getActiveNN() {
     if (bpServiceToActive != null) {
       return bpServiceToActive.bpNamenode;
@@ -596,6 +587,7 @@ class BPOfferService {
       break;
     case DatanodeProtocol.DNA_SHUTDOWN:
       // TODO: DNA_SHUTDOWN appears to be unused - the NN never sends this command
+      // See HDFS-2987.
       throw new UnsupportedOperationException("Received unimplemented DNA_SHUTDOWN");
     case DatanodeProtocol.DNA_REGISTER:
       // namenode requested a registration - at start or if NN lost contact
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
index 982a5685033..75f32cbc04c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java
@@ -538,8 +538,8 @@ class BPServiceActor implements Runnable {
         DatanodeCommand cmd = blockReport();
         processCommand(new DatanodeCommand[]{ cmd });
 
-        // Now safe to start scanning the block pool
-        // TODO(HA): this doesn't seem quite right
+        // Now safe to start scanning the block pool.
+        // If it has already been started, this is a no-op.
         if (dn.blockScanner != null) {
           dn.blockScanner.addBlockPool(bpos.getBlockPoolId());
         }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java
index 3176be20784..3355ee269a5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockPoolManager.java
@@ -86,16 +86,6 @@ class BlockPoolManager {
     return bpByBlockPoolId.get(bpid);
   }
   
-  // TODO(HA) would be good to kill this
-  synchronized BPOfferService get(InetSocketAddress addr) {
-    for (BPOfferService bpos : offerServices) {
-      if (bpos.containsNN(addr)) {
-        return bpos;
-      }
-    }
-    return null;
-  }
-
   synchronized void remove(BPOfferService t) {
     offerServices.remove(t);
     bpByBlockPoolId.remove(t.getBlockPoolId());
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
index acbcb032a8f..f13466a4abe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java
@@ -565,6 +565,23 @@ public class DataNode extends Configured
     bpos.reportRemoteBadBlock(srcDataNode, block);
   }
   
+  /**
+   * Try to send an error report to the NNs associated with the given
+   * block pool.
+   * @param bpid the block pool ID
+   * @param errCode error code to send
+   * @param errMsg textual message to send
+   */
+  void trySendErrorReport(String bpid, int errCode, String errMsg) {
+    BPOfferService bpos = blockPoolManager.get(bpid);
+    if (bpos == null) {
+      throw new IllegalArgumentException("Bad block pool: " + bpid);
+    }
+    bpos.trySendErrorReport(errCode, errMsg);
+  }
+
+
+  
   /**
    * Return the BPOfferService instance corresponding to the given block.
    * @param block
@@ -874,7 +891,7 @@ public class DataNode extends Configured
     // TODO: all the BPs should have the same name as each other, they all come
     // from getName() here! and the use cases only are in tests where they just
     // call with getName(). So we could probably just make this method return
-    // the first BPOS's registration
+    // the first BPOS's registration. See HDFS-2609.
     BPOfferService [] bposArray = blockPoolManager.getAllNamenodeThreads();
     for (BPOfferService bpos : bposArray) {
       if(bpos.bpRegistration.getName().equals(mName))
@@ -920,22 +937,6 @@ public class DataNode extends Configured
       throw new IOException(ie.getMessage());
     }
   }
-
-  /**
-   * get the name node address based on the block pool id
-   * @param bpid block pool ID
-   * @return namenode address corresponding to the bpid
-   */
-  public InetSocketAddress getNameNodeAddr(String bpid) {
-    // TODO(HA) this function doesn't make sense! used by upgrade code
-    // Should it return just the active one or simply return the BPService.
-    BPOfferService bp = blockPoolManager.get(bpid);
-    if (bp != null) {
-      return bp.getNNSocketAddress();
-    }
-    LOG.warn("No name node address found for block pool ID " + bpid);
-    return null;
-  }
   
   public InetSocketAddress getSelfAddr() {
     return selfAddr;
@@ -1869,7 +1870,7 @@ public class DataNode extends Configured
    * @return Namenode corresponding to the bpid
    * @throws IOException
    */
-  public DatanodeProtocolClientSideTranslatorPB getBPNamenode(String bpid)
+  public DatanodeProtocolClientSideTranslatorPB getActiveNamenodeForBP(String bpid)
       throws IOException {
     BPOfferService bpos = blockPoolManager.get(bpid);
     if (bpos == null) {
@@ -1888,9 +1889,13 @@ public class DataNode extends Configured
   void syncBlock(RecoveringBlock rBlock,
                          List<BlockRecord> syncList) throws IOException {
     ExtendedBlock block = rBlock.getBlock();
-    DatanodeProtocolClientSideTranslatorPB nn = getBPNamenode(block
-        .getBlockPoolId());
-    assert nn != null;
+    DatanodeProtocolClientSideTranslatorPB nn =
+      getActiveNamenodeForBP(block.getBlockPoolId());
+    if (nn == null) {
+      throw new IOException(
+          "Unable to synchronize block " + rBlock + ", since this DN "
+          + " has not acknowledged any NN as active.");
+    }
     
     long recoveryId = rBlock.getNewGenerationStamp();
     if (LOG.isDebugEnabled()) {
@@ -2111,14 +2116,19 @@ public class DataNode extends Configured
 
   /**
    * Returned information is a JSON representation of a map with 
-   * name node host name as the key and block pool Id as the value
+   * name node host name as the key and block pool Id as the value.
+   * Note that, if there are multiple NNs in an NA nameservice,
+   * a given block pool may be represented twice.
    */
   @Override // DataNodeMXBean
   public String getNamenodeAddresses() {
     final Map<String, String> info = new HashMap<String, String>();
     for (BPOfferService bpos : blockPoolManager.getAllNamenodeThreads()) {
       if (bpos != null) {
-        info.put(bpos.getNNSocketAddress().getHostName(), bpos.getBlockPoolId());
+        for (BPServiceActor actor : bpos.getBPServiceActors()) {
+          info.put(actor.getNNSocketAddress().getHostName(),
+              bpos.getBlockPoolId());
+        }
       }
     }
     return JSON.toString(info);
@@ -2167,11 +2177,18 @@ public class DataNode extends Configured
 
   /**
    * @param addr rpc address of the namenode
-   * @return true - if BPOfferService corresponding to the namenode is alive
+   * @return true if the datanode is connected to a NameNode at the
+   * given address
    */
-  public boolean isBPServiceAlive(InetSocketAddress addr) {
-    BPOfferService bp = blockPoolManager.get(addr);
-    return bp != null ? bp.isAlive() : false;
+  public boolean isConnectedToNN(InetSocketAddress addr) {
+    for (BPOfferService bpos : getAllBpOs()) {
+      for (BPServiceActor bpsa : bpos.getBPServiceActors()) {
+        if (addr.equals(bpsa.getNNSocketAddress())) {
+          return bpsa.isAlive();
+        }
+      }
+    }
+    return false;
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/UpgradeManagerDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/UpgradeManagerDatanode.java
index 478fb5660d0..9ada40fd5f0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/UpgradeManagerDatanode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/UpgradeManagerDatanode.java
@@ -92,7 +92,7 @@ class UpgradeManagerDatanode extends UpgradeManager {
           "UpgradeManagerDatanode.currentUpgrades is not null.";
         assert upgradeDaemon == null : 
           "UpgradeManagerDatanode.upgradeDaemon is not null.";
-        DatanodeProtocol nn = dataNode.getBPNamenode(bpid);
+        DatanodeProtocol nn = dataNode.getActiveNamenodeForBP(bpid);
         nn.processUpgradeCommand(broadcastCommand);
         return true;
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/UpgradeObjectDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/UpgradeObjectDatanode.java
index ddb1d6029f8..49d26212d09 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/UpgradeObjectDatanode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/UpgradeObjectDatanode.java
@@ -45,7 +45,7 @@ public abstract class UpgradeObjectDatanode extends UpgradeObject implements Run
   }
   
   protected DatanodeProtocol getNamenode() throws IOException {
-    return dataNode.getBPNamenode(bpid);
+    return dataNode.getActiveNamenodeForBP(bpid);
   }
 
   void setDatanode(DataNode dataNode, String bpid) {
@@ -92,14 +92,7 @@ public abstract class UpgradeObjectDatanode extends UpgradeObject implements Run
             + " Name-node version = " + nsInfo.getLayoutVersion() + ".";
     DataNode.LOG.fatal( errorMsg );
     String bpid = nsInfo.getBlockPoolID();
-    DatanodeProtocol nn = dataNode.getBPNamenode(bpid);
-    try {
-      nn.errorReport(dataNode.getDNRegistrationForBP(bpid),
-                                    DatanodeProtocol.NOTIFY, errorMsg);
-    } catch(SocketTimeoutException e) {  // namenode is busy
-      DataNode.LOG.info("Problem connecting to server: " 
-                        + dataNode.getNameNodeAddr(nsInfo.getBlockPoolID()));
-    }
+    dataNode.trySendErrorReport(bpid, DatanodeProtocol.NOTIFY, errorMsg);
     throw new IOException(errorMsg);
   }
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index 9c1eb25807e..f922c11bc95 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -282,18 +282,13 @@ public class FSDirectory implements Closeable {
       newNode = new INodeFile(permissions, 0, replication,
                               modificationTime, atime, preferredBlockSize);
     }
-    writeLock(); // TODO: this is silly, considering the assert above!
-    try {
-      try {
-        newNode = addNode(path, newNode, UNKNOWN_DISK_SPACE);
-      } catch (IOException e) {
-        return null;
-      }
-      return newNode;
-    } finally {
-      writeUnlock();
-    }
 
+    try {
+      newNode = addNode(path, newNode, UNKNOWN_DISK_SPACE);
+    } catch (IOException e) {
+      return null;
+    }
+    return newNode;
   }
 
   INodeDirectory addToParent(byte[] src, INodeDirectory parentINode,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index bf1ec992c4c..84d4ace2833 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -266,8 +266,8 @@ public class FSEditLogLoader {
 
       // Now close the file
       INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile;
-      // TODO: we could use removeLease(holder, path) here, but OP_CLOSE
-      // doesn't seem to serialize the holder... unclear why!
+      // One might expect that you could use removeLease(holder, path) here,
+      // but OP_CLOSE doesn't serialize the holder. So, remove by path.
       fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path);
       INodeFile newFile = ucFile.convertToInodeFile();
       fsDir.replaceNode(addCloseOp.path, ucFile, newFile);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
index adc3b46b7f5..7fb3d4bdfc5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java
@@ -226,7 +226,6 @@ public class FSImage implements Closeable {
       }
     }
 
-    // TODO(HA): Have to figure out a story for the first 3 of these.
     // 3. Do transitions
     switch(startOpt) {
     case UPGRADE:
@@ -261,7 +260,6 @@ public class FSImage implements Closeable {
       StorageState curState;
       try {
         curState = sd.analyzeStorage(startOpt, storage);
-        // TODO(HA): Fix this.
         String nameserviceId = DFSUtil.getNamenodeNameServiceId(conf);
         if (curState != StorageState.NORMAL && HAUtil.isHAEnabled(conf, nameserviceId)) {
           throw new IOException("Cannot start an HA namenode with name dirs " +
@@ -637,8 +635,6 @@ public class FSImage implements Closeable {
     
     // update the txid for the edit log
     editLog.setNextTxId(storage.getMostRecentCheckpointTxId() + numLoaded + 1);
-    // TODO(HA): This should probably always return false when HA is enabled and
-    // we're coming up in standby state.
     return needToSave;
   }
 
@@ -697,8 +693,6 @@ public class FSImage implements Closeable {
     } finally {
       FSEditLog.closeAllStreams(editStreams);
       // update the counts
-      // TODO(HA): this may be very slow -- we probably want to
-      // update them as we go for HA.
       target.dir.updateCountForINodeWithQuota();   
     }
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 4d54701bbb4..bc40864a4ed 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -533,7 +533,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       
       if (!editLog.isOpenForWrite()) {
         // During startup, we're already open for write during initialization.
-        // TODO(HA): consider adding a startup state?
         editLog.initJournalsForWrite();
         // May need to recover
         editLog.recoverUnclosedStreams();
@@ -912,7 +911,6 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
     } finally {
       // using finally to ensure we also wait for lease daemon
       try {
-        // TODO: these lines spew lots of warnings about "already stopped" logs, etc
         stopActiveServices();
         stopStandbyServices();
         if (dir != null) {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index 5dc62560232..d07ed860d22 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -920,7 +920,7 @@ public class NameNode {
     if (!haEnabled) {
       return; // no-op, if HA is not enabled
     }
-    // TODO:HA implement health check
+    // TODO(HA): implement health check
     return;
   }
   
@@ -963,7 +963,7 @@ public class NameNode {
   /**
    * Class used as expose {@link NameNode} as context to {@link HAState}
    * 
-   * TODO:HA
+   * TODO(HA):
    * When entering and exiting state, on failing to start services,
    * appropriate action is needed todo either shutdown the node or recover
    * from failure.
@@ -1005,7 +1005,6 @@ public class NameNode {
     
     @Override
     public void stopStandbyServices() throws IOException {
-      // TODO(HA): Are we guaranteed to be the only active here?
       if (namesystem != null) {
         namesystem.stopStandbyServices();
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
index edfc53fb12c..036dd431ade 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/StandbyCheckpointer.java
@@ -176,7 +176,7 @@ public class StandbyCheckpointer {
   public void cancelAndPreventCheckpoints() throws ServiceFailedException {
     try {
       thread.preventCheckpointsFor(PREVENT_AFTER_CANCEL_MS);
-      // TODO: there is a really narrow race here if we are just
+      // TODO(HA): there is a really narrow race here if we are just
       // about to start a checkpoint - this won't cancel it!
       namesystem.getFSImage().cancelSaveNamespace(
           "About to exit standby state");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 770d0f1066a..658282a0e72 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -1696,9 +1696,9 @@ public class MiniDFSCluster {
     // If a datanode failed to start, then do not wait
     for (DataNodeProperties dn : dataNodes) {
       // the datanode thread communicating with the namenode should be alive
-      if (!dn.datanode.isBPServiceAlive(addr)) {
-        LOG.warn("BPOfferService failed to start in datanode " + dn.datanode
-            + " for namenode at " + addr);
+      if (!dn.datanode.isConnectedToNN(addr)) {
+        LOG.warn("BPOfferService in datanode " + dn.datanode
+            + " failed to connect to namenode at " + addr);
         return false;
       }
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
index 2a75998d798..59a61cf2ea9 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java
@@ -461,7 +461,7 @@ public class TestBlockRecovery {
         initReplicaRecovery(any(RecoveringBlock.class));
     Daemon d = spyDN.recoverBlocks(initRecoveringBlocks());
     d.join();
-    DatanodeProtocol dnP = dn.getBPNamenode(POOL_ID);
+    DatanodeProtocol dnP = dn.getActiveNamenodeForBP(POOL_ID);
     verify(dnP).commitBlockSynchronization(
         block, RECOVERY_ID, 0, true, true, DatanodeID.EMPTY_ARRAY);
   }
@@ -518,7 +518,7 @@ public class TestBlockRecovery {
     } catch (IOException e) {
       e.getMessage().startsWith("Cannot recover ");
     }
-    DatanodeProtocol namenode = dn.getBPNamenode(POOL_ID);
+    DatanodeProtocol namenode = dn.getActiveNamenodeForBP(POOL_ID);
     verify(namenode, never()).commitBlockSynchronization(
         any(ExtendedBlock.class), anyLong(), anyLong(), anyBoolean(),
         anyBoolean(), any(DatanodeID[].class));
@@ -547,7 +547,7 @@ public class TestBlockRecovery {
       } catch (IOException e) {
         e.getMessage().startsWith("Cannot recover ");
       }
-      DatanodeProtocol namenode = dn.getBPNamenode(POOL_ID);
+      DatanodeProtocol namenode = dn.getActiveNamenodeForBP(POOL_ID);
       verify(namenode, never()).commitBlockSynchronization(
           any(ExtendedBlock.class), anyLong(), anyLong(), anyBoolean(),
           anyBoolean(), any(DatanodeID[].class));
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
index a3e8ceb90f0..20a16c31669 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeMultipleRegistrations.java
@@ -23,6 +23,8 @@ import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNotSame;
 
 import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.commons.logging.Log;
@@ -99,15 +101,15 @@ public class TestDataNodeMultipleRegistrations {
       BPOfferService bpos2 = dn.getAllBpOs()[1];
 
       // The order of bpos is not guaranteed, so fix the order
-      if (bpos1.getNNSocketAddress().equals(nn2.getNameNodeAddress())) {
+      if (getNNSocketAddress(bpos1).equals(nn2.getNameNodeAddress())) {
         BPOfferService tmp = bpos1;
         bpos1 = bpos2;
         bpos2 = tmp;
       }
 
-      assertEquals("wrong nn address", bpos1.getNNSocketAddress(),
+      assertEquals("wrong nn address", getNNSocketAddress(bpos1),
           nn1.getNameNodeAddress());
-      assertEquals("wrong nn address", bpos2.getNNSocketAddress(),
+      assertEquals("wrong nn address", getNNSocketAddress(bpos2),
           nn2.getNameNodeAddress());
       assertEquals("wrong bpid", bpos1.getBlockPoolId(), bpid1);
       assertEquals("wrong bpid", bpos2.getBlockPoolId(), bpid2);
@@ -121,6 +123,12 @@ public class TestDataNodeMultipleRegistrations {
       cluster.shutdown();
     }
   }
+  
+  private static InetSocketAddress getNNSocketAddress(BPOfferService bpos) {
+    List<BPServiceActor> actors = bpos.getBPServiceActors();
+    assertEquals(1, actors.size());
+    return actors.get(0).getNNSocketAddress();
+  }
 
   /**
    * starts single nn and single dn and verifies registration and handshake
@@ -154,14 +162,16 @@ public class TestDataNodeMultipleRegistrations {
 
       for (BPOfferService bpos : dn.getAllBpOs()) {
         LOG.info("reg: bpid=" + "; name=" + bpos.bpRegistration.name + "; sid="
-            + bpos.bpRegistration.storageID + "; nna=" + bpos.getNNSocketAddress());
+            + bpos.bpRegistration.storageID + "; nna=" +
+            getNNSocketAddress(bpos));
       }
 
       // try block report
       BPOfferService bpos1 = dn.getAllBpOs()[0];
       bpos1.triggerBlockReportForTests();
 
-      assertEquals("wrong nn address", bpos1.getNNSocketAddress(),
+      assertEquals("wrong nn address",
+          getNNSocketAddress(bpos1),
           nn1.getNameNodeAddress());
       assertEquals("wrong bpid", bpos1.getBlockPoolId(), bpid1);
       assertEquals("wrong cid", dn.getClusterId(), cid1);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
index cfa1d64c903..2d6f2103796 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestRefreshNamenodes.java
@@ -22,15 +22,18 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.util.Set;
 
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
 import org.apache.hadoop.hdfs.MiniDFSNNTopology.NSConf;
 import org.junit.Test;
 
+import com.google.common.base.Joiner;
+import com.google.common.collect.Sets;
+
 /**
  * Tests datanode refresh namenode list functionality.
  */
@@ -65,21 +68,24 @@ public class TestRefreshNamenodes {
 
       cluster.addNameNode(conf, nnPort4);
 
-      BPOfferService[] bpoList = dn.getAllBpOs();
       // Ensure a BPOfferService in the datanodes corresponds to
       // a namenode in the cluster
+      Set<InetSocketAddress> nnAddrsFromCluster = Sets.newHashSet();
       for (int i = 0; i < 4; i++) {
-        InetSocketAddress addr = cluster.getNameNode(i).getNameNodeAddress();
-        boolean found = false;
-        for (int j = 0; j < bpoList.length; j++) {
-          if (bpoList[j] != null && addr.equals(bpoList[j].getNNSocketAddress())) {
-            found = true;
-            bpoList[j] = null; // Erase the address that matched
-            break;
-          }
-        }
-        assertTrue("NameNode address " + addr + " is not found.", found);
+        assertTrue(nnAddrsFromCluster.add(
+            cluster.getNameNode(i).getNameNodeAddress()));
       }
+      
+      Set<InetSocketAddress> nnAddrsFromDN = Sets.newHashSet();
+      for (BPOfferService bpos : dn.getAllBpOs()) {
+        for (BPServiceActor bpsa : bpos.getBPServiceActors()) {
+          assertTrue(nnAddrsFromDN.add(bpsa.getNNSocketAddress()));
+        }
+      }
+      
+      assertEquals("",
+          Joiner.on(",").join(
+            Sets.symmetricDifference(nnAddrsFromCluster, nnAddrsFromDN)));
     } finally {
       if (cluster != null) {
         cluster.shutdown();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
index 465987c6cb1..547ba72e493 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestPipelinesFailover.java
@@ -179,7 +179,7 @@ public class TestPipelinesFailover {
 
       // write another block and a half
       AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
-      stm.hflush(); // TODO: see above
+      stm.hflush();
       
       LOG.info("Failing back to NN 0");
       cluster.transitionToStandby(0);
@@ -188,7 +188,7 @@ public class TestPipelinesFailover {
       cluster.stopDataNode(1);
       
       AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
-      stm.hflush(); // TODO: see above
+      stm.hflush();
       
       
       stm.close();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
index a34d6bdfc21..5440c38cc22 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java
@@ -127,9 +127,6 @@ public class TestStandbyCheckpoints {
     List<File> dirs = Lists.newArrayList();
     dirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 0));
     dirs.addAll(FSImageTestUtil.getNameNodeCurrentDirs(cluster, 1));
-    // TODO: this failed once because it caught a ckpt file -- maybe
-    // this is possible if one of the NNs is really fast and the other is slow?
-    // need to loop this to suss out the race.
     FSImageTestUtil.assertParallelFilesAreIdentical(dirs, ImmutableSet.<String>of());
   }
   

From 7b6b204924ec3d2aeb4c42c09456fbbefc3c7817 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 29 Feb 2012 21:57:56 +0000
Subject: [PATCH 168/177] HDFS-3027. Implement a simple NN health check.
 Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295300 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../hdfs/server/namenode/FSNamesystem.java    |  9 ++-
 .../hadoop/hdfs/server/namenode/NameNode.java |  7 +-
 .../namenode/NameNodeResourceChecker.java     |  5 +-
 .../apache/hadoop/hdfs/MiniDFSCluster.java    |  9 ++-
 .../server/namenode/ha/TestNNHealthCheck.java | 73 +++++++++++++++++++
 6 files changed, 97 insertions(+), 8 deletions(-)
 create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 42bdcf8a860..4790bd7f66c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -240,3 +240,5 @@ HDFS-3019. Fix silent failure of TestEditLogJournalFailures (todd)
 HDFS-2958. Sweep for remaining proxy construction which doesn't go through failover path. (atm)
 
 HDFS-2920. fix remaining TODO items. (atm and todd)
+
+HDFS-3027. Implement a simple NN health check. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index bc40864a4ed..1fa7bcaafcb 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -3082,7 +3082,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    *
    * @return true if there were sufficient resources available, false otherwise.
    */
-  private boolean nameNodeHasResourcesAvailable() {
+  boolean nameNodeHasResourcesAvailable() {
     return hasResourcesAvailable;
   }
 
@@ -3090,7 +3090,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
    * Perform resource checks and cache the results.
    * @throws IOException
    */
-  private void checkAvailableResources() throws IOException {
+  void checkAvailableResources() {
     Preconditions.checkState(nnResourceChecker != null,
         "nnResourceChecker not initialized");
     hasResourcesAvailable = nnResourceChecker.hasAvailableDiskSpace();
@@ -5188,4 +5188,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
   public SafeModeInfo getSafeModeInfoForTests() {
     return safeMode;
   }
+  
+  @VisibleForTesting
+  public void setNNResourceChecker(NameNodeResourceChecker nnResourceChecker) {
+    this.nnResourceChecker = nnResourceChecker;
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index d07ed860d22..b62f0d5d9ea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -920,8 +920,11 @@ public class NameNode {
     if (!haEnabled) {
       return; // no-op, if HA is not enabled
     }
-    // TODO(HA): implement health check
-    return;
+    getNamesystem().checkAvailableResources();
+    if (!getNamesystem().nameNodeHasResourcesAvailable()) {
+      throw new HealthCheckFailedException(
+          "The NameNode has no resources available");
+    }
   }
   
   synchronized void transitionToActive() 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java
index e4817c7b180..a024a5524a6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeResourceChecker.java
@@ -46,7 +46,7 @@ import com.google.common.base.Predicate;
  * are added by default, and arbitrary extra volumes may be configured as well.
  */
 @InterfaceAudience.Private
-class NameNodeResourceChecker {
+public class NameNodeResourceChecker {
   private static final Log LOG = LogFactory.getLog(NameNodeResourceChecker.class.getName());
 
   // Space (in bytes) reserved per volume.
@@ -176,8 +176,7 @@ class NameNodeResourceChecker {
    *         least one redundant volume and all of the required volumes, false
    *         otherwise.
    */
-  boolean hasAvailableDiskSpace()
-      throws IOException {
+  public boolean hasAvailableDiskSpace() {
     return NameNodeResourcePolicy.areResourcesAvailable(volumes.values(),
         minimumRedundantVolumes);
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index 658282a0e72..d69dc0a7da7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -1145,7 +1145,14 @@ public class MiniDFSCluster {
    */
   public NamenodeProtocols getNameNodeRpc() {
     checkSingleNameNode();
-    return getNameNode(0).getRpcServer();
+    return getNameNodeRpc(0);
+  }
+  
+  /**
+   * Get an instance of the NameNode's RPC handler.
+   */
+  public NamenodeProtocols getNameNodeRpc(int nnIndex) {
+    return getNameNode(nnIndex).getRpcServer();
   }
   
   /**
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java
new file mode 100644
index 00000000000..ab2a8dd0614
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestNNHealthCheck.java
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.server.namenode.ha;
+
+import static org.junit.Assert.fail;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ha.HealthCheckFailedException;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.MiniDFSNNTopology;
+import org.apache.hadoop.hdfs.server.namenode.NameNodeResourceChecker;
+import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+public class TestNNHealthCheck {
+
+  @Test
+  public void testNNHealthCheck() throws IOException {
+    MiniDFSCluster cluster = null;
+    try {
+      Configuration conf = new Configuration();
+      cluster = new MiniDFSCluster.Builder(conf)
+          .numDataNodes(0)
+          .nnTopology(MiniDFSNNTopology.simpleHATopology())
+          .build();
+
+      NameNodeResourceChecker mockResourceChecker = Mockito.mock(
+          NameNodeResourceChecker.class);
+      Mockito.doReturn(true).when(mockResourceChecker).hasAvailableDiskSpace();
+      cluster.getNameNode(0).getNamesystem()
+          .setNNResourceChecker(mockResourceChecker);
+      
+      NamenodeProtocols rpc = cluster.getNameNodeRpc(0);
+      
+      // Should not throw error, which indicates healthy.
+      rpc.monitorHealth();
+      
+      Mockito.doReturn(false).when(mockResourceChecker).hasAvailableDiskSpace();
+      
+      try {
+        // Should throw error - NN is unhealthy.
+        rpc.monitorHealth();
+        fail("Should not have succeeded in calling monitorHealth");
+      } catch (HealthCheckFailedException hcfe) {
+        GenericTestUtils.assertExceptionContains(
+            "The NameNode has no resources available", hcfe);
+      }
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+}

From 01b17c40cf20df576fb18315c2019645d5050ddf Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Wed, 29 Feb 2012 23:42:52 +0000
Subject: [PATCH 169/177] HDFS-2979. Balancer should use logical uri for
 creating failover proxy with HA enabled. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295340 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 65 +++++++++++++++++++
 .../hadoop/hdfs/server/balancer/Balancer.java | 15 ++---
 .../server/balancer/NameNodeConnector.java    | 14 ++--
 .../hadoop/hdfs/server/namenode/NameNode.java | 16 +++--
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   | 49 +++++++++++++-
 .../hdfs/server/balancer/TestBalancer.java    |  7 +-
 .../balancer/TestBalancerWithHANameNodes.java | 25 ++++---
 .../TestBalancerWithMultipleNameNodes.java    |  9 ++-
 .../hdfs/server/namenode/ha/HATestUtil.java   |  7 ++
 10 files changed, 158 insertions(+), 51 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 4790bd7f66c..01553a7c452 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -242,3 +242,5 @@ HDFS-2958. Sweep for remaining proxy construction which doesn't go through failo
 HDFS-2920. fix remaining TODO items. (atm and todd)
 
 HDFS-3027. Implement a simple NN health check. (atm)
+
+HDFS-2979. Balancer should use logical uri for creating failover proxy with HA enabled. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index f4a861089be..e63ed0d26b6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -28,9 +28,11 @@ import java.security.SecureRandom;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
 import java.util.StringTokenizer;
 
 import javax.net.SocketFactory;
@@ -43,6 +45,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolTranslatorPB;
@@ -605,6 +608,68 @@ public class DFSUtil {
         "nnId=" + namenodeId + ";addr=" + addr + "]";
     }
   }
+  
+  /**
+   * Get a URI for each configured nameservice. If a nameservice is
+   * HA-enabled, then the logical URI of the nameservice is returned. If the
+   * nameservice is not HA-enabled, then a URI corresponding to an RPC address
+   * of the single NN for that nameservice is returned, preferring the service
+   * RPC address over the client RPC address.
+   * 
+   * @param conf configuration
+   * @return a collection of all configured NN URIs, preferring service
+   *         addresses
+   */
+  public static Collection<URI> getNsServiceRpcUris(Configuration conf) {
+    return getNameServiceUris(conf,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+  }
+
+  /**
+   * Get a URI for each configured nameservice. If a nameservice is
+   * HA-enabled, then the logical URI of the nameservice is returned. If the
+   * nameservice is not HA-enabled, then a URI corresponding to the address of
+   * the single NN for that nameservice is returned.
+   * 
+   * @param conf configuration
+   * @param keys configuration keys to try in order to get the URI for non-HA
+   *        nameservices
+   * @return a collection of all configured NN URIs
+   */
+  public static Collection<URI> getNameServiceUris(Configuration conf,
+      String... keys) {
+    Set<URI> ret = new HashSet<URI>();
+    for (String nsId : getNameServiceIds(conf)) {
+      if (HAUtil.isHAEnabled(conf, nsId)) {
+        // Add the logical URI of the nameservice.
+        try {
+          ret.add(new URI(HdfsConstants.HDFS_URI_SCHEME + "://" + nsId));
+        } catch (URISyntaxException ue) {
+          throw new IllegalArgumentException(ue);
+        }
+      } else {
+        // Add the URI corresponding to the address of the NN.
+        for (String key : keys) {
+          String addr = conf.get(concatSuffixes(key, nsId));
+          if (addr != null) {
+            ret.add(createUri(HdfsConstants.HDFS_URI_SCHEME,
+                NetUtils.createSocketAddr(addr)));
+            break;
+          }
+        }
+      }
+    }
+    // Add the generic configuration keys.
+    for (String key : keys) {
+      String addr = conf.get(key);
+      if (addr != null) {
+        ret.add(createUri("hdfs", NetUtils.createSocketAddr(addr)));
+        break;
+      }
+    }
+    return ret;
+  }
 
   /**
    * Given the InetSocketAddress this method returns the nameservice Id
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
index bc7c13a9147..e808af623cc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
@@ -24,8 +24,8 @@ import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
-import java.net.InetSocketAddress;
 import java.net.Socket;
+import java.net.URI;
 import java.text.DateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -39,7 +39,6 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -1380,8 +1379,7 @@ public class Balancer {
    * for each namenode,
    * execute a {@link Balancer} to work through all datanodes once.  
    */
-  static int run(Map<String, Map<String, InetSocketAddress>> namenodes,
-      final Parameters p,
+  static int run(Collection<URI> namenodes, final Parameters p,
       Configuration conf) throws IOException, InterruptedException {
     final long sleeptime = 2000*conf.getLong(
         DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
@@ -1395,10 +1393,8 @@ public class Balancer {
     final List<NameNodeConnector> connectors
         = new ArrayList<NameNodeConnector>(namenodes.size());
     try {
-      for(Entry<String, Map<String, InetSocketAddress>> entry :
-          namenodes.entrySet()) {
-        connectors.add(
-            new NameNodeConnector(entry.getValue().values(), conf));
+      for (URI uri : namenodes) {
+        connectors.add(new NameNodeConnector(uri, conf));
       }
     
       boolean done = false;
@@ -1480,8 +1476,7 @@ public class Balancer {
       try {
         checkReplicationPolicyCompatibility(conf);
 
-        final Map<String, Map<String, InetSocketAddress>> namenodes =
-          DFSUtil.getNNServiceRpcAddresses(conf);
+        final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
         return Balancer.run(namenodes, parse(args), conf);
       } catch (IOException e) {
         System.out.println(e + ".  Exiting ...");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
index eab6273c221..c4208b79516 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
@@ -21,9 +21,7 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetAddress;
-import java.net.InetSocketAddress;
 import java.net.URI;
-import java.util.Collection;
 import java.util.EnumSet;
 
 import org.apache.commons.logging.Log;
@@ -38,7 +36,6 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.io.IOUtils;
@@ -46,8 +43,6 @@ import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
 
-import com.google.common.collect.Lists;
-
 /**
  * The class provides utilities for {@link Balancer} to access a NameNode
  */
@@ -56,7 +51,7 @@ class NameNodeConnector {
   private static final Log LOG = Balancer.LOG;
   private static final Path BALANCER_ID_PATH = new Path("/system/balancer.id");
 
-  final InetSocketAddress namenodeAddress;
+  final URI nameNodeUri;
   final String blockpoolID;
 
   final NamenodeProtocol namenode;
@@ -70,10 +65,9 @@ class NameNodeConnector {
   private BlockTokenSecretManager blockTokenSecretManager;
   private Daemon keyupdaterthread; // AccessKeyUpdater thread
 
-  NameNodeConnector(Collection<InetSocketAddress> haNNs,
+  NameNodeConnector(URI nameNodeUri,
       Configuration conf) throws IOException {
-    this.namenodeAddress = Lists.newArrayList(haNNs).get(0);
-    URI nameNodeUri = NameNode.getUri(this.namenodeAddress);
+    this.nameNodeUri = nameNodeUri;
     
     this.namenode =
       NameNodeProxies.createProxy(conf, nameNodeUri, NamenodeProtocol.class)
@@ -186,7 +180,7 @@ class NameNodeConnector {
 
   @Override
   public String toString() {
-    return getClass().getSimpleName() + "[namenodeAddress=" + namenodeAddress
+    return getClass().getSimpleName() + "[namenodeUri=" + nameNodeUri
         + ", id=" + blockpoolID
         + "]";
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index b62f0d5d9ea..d0e657baab8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -526,21 +526,21 @@ public class NameNode {
 
   protected NameNode(Configuration conf, NamenodeRole role) 
       throws IOException { 
-    this.conf = conf;
+    this.conf = new Configuration(conf);
     this.role = role;
-    String nsId = getNameServiceId(conf);
-    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
-    this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
+    String nsId = getNameServiceId(this.conf);
+    String namenodeId = HAUtil.getNameNodeId(this.conf, nsId);
+    this.haEnabled = HAUtil.isHAEnabled(this.conf, nsId);
     if (!haEnabled) {
       state = ACTIVE_STATE;
     } else {
       state = STANDBY_STATE;
     }
-    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
+    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(this.conf);
     this.haContext = createHAContext();
     try {
-      initializeGenericKeys(conf, nsId, namenodeId);
-      initialize(conf);
+      initializeGenericKeys(this.conf, nsId, namenodeId);
+      initialize(this.conf);
       state.prepareToEnterState(haContext);
       state.enterState(haContext);
     } catch (IOException e) {
@@ -651,6 +651,7 @@ public class NameNode {
       throws IOException {
     String nsId = DFSUtil.getNamenodeNameServiceId(conf);
     String namenodeId = HAUtil.getNameNodeId(conf, nsId);
+    conf = new Configuration(conf);
     initializeGenericKeys(conf, nsId, namenodeId);
 
     if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
@@ -697,6 +698,7 @@ public class NameNode {
   private static boolean finalize(Configuration conf,
                                boolean isConfirmationNeeded
                                ) throws IOException {
+    conf = new Configuration(conf);
     String nsId = DFSUtil.getNamenodeNameServiceId(conf);
     String namenodeId = HAUtil.getNameNodeId(conf, nsId);
     initializeGenericKeys(conf, nsId, namenodeId);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index e49bb107e20..a9b62c3aead 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -25,6 +25,8 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
@@ -41,6 +43,8 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 public class TestDFSUtil {
@@ -233,11 +237,12 @@ public class TestDFSUtil {
    * {@link DFSUtil#isDefaultNamenodeAddress(Configuration, InetSocketAddress, String...)}
    */
   @Test
-  public void testSingleNamenode() {
+  public void testSingleNamenode() throws URISyntaxException {
     HdfsConfiguration conf = new HdfsConfiguration();
     final String DEFAULT_ADDRESS = "localhost:9000";
     final String NN2_ADDRESS = "localhost:9001";
     conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, DEFAULT_ADDRESS);
+    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, DEFAULT_ADDRESS);
 
     InetSocketAddress testAddress1 = NetUtils.createSocketAddr(DEFAULT_ADDRESS);
     boolean isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress1,
@@ -247,6 +252,10 @@ public class TestDFSUtil {
     isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress2,
         DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
     assertFalse(isDefault);
+    
+    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals(1, uris.size());
+    assertTrue(uris.contains(new URI("hdfs://" + DEFAULT_ADDRESS)));
   }
 
   /** Tests to ensure default namenode is used as fallback */
@@ -407,13 +416,14 @@ public class TestDFSUtil {
   }
   
   @Test
-  public void testHANameNodesWithFederation() {
+  public void testHANameNodesWithFederation() throws URISyntaxException {
     HdfsConfiguration conf = new HdfsConfiguration();
     
     final String NS1_NN1_HOST = "ns1-nn1.example.com:8020";
     final String NS1_NN2_HOST = "ns1-nn2.example.com:8020";
     final String NS2_NN1_HOST = "ns2-nn1.example.com:8020";
     final String NS2_NN2_HOST = "ns2-nn2.example.com:8020";
+    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://ns1");
     
     // Two nameservices, each with two NNs.
     conf.set(DFS_FEDERATION_NAMESERVICES, "ns1,ns2");
@@ -460,6 +470,11 @@ public class TestDFSUtil {
     // Ditto for nameservice IDs, if multiple are defined
     assertEquals(null, DFSUtil.getNamenodeNameServiceId(conf));
     assertEquals(null, DFSUtil.getSecondaryNameServiceId(conf));
+    
+    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals(2, uris.size());
+    assertTrue(uris.contains(new URI("hdfs://ns1")));
+    assertTrue(uris.contains(new URI("hdfs://ns2")));
   }
 
   @Test
@@ -509,4 +524,34 @@ public class TestDFSUtil {
     assertEquals("127.0.0.1:12345",
         DFSUtil.substituteForWildcardAddress("127.0.0.1:12345", "foo"));
   }
+  
+  @Test
+  public void testGetNNUris() throws Exception {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    
+    final String NS1_NN1_HOST = "ns1-nn1.example.com:8020";
+    final String NS1_NN2_HOST = "ns1-nn1.example.com:8020";
+    final String NS2_NN_HOST  = "ns2-nn.example.com:8020";
+    final String NN_HOST      = "nn.example.com:8020";
+    
+    conf.set(DFS_FEDERATION_NAMESERVICES, "ns1,ns2");
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"),"nn1,nn2");
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn1"), NS1_NN1_HOST);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn2"), NS1_NN2_HOST);
+    
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "ns2"),
+        NS2_NN_HOST);
+    
+    conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, "hdfs://" + NN_HOST);
+    
+    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY, 
+        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
+    
+    assertEquals(3, uris.size());
+    assertTrue(uris.contains(new URI("hdfs://ns1")));
+    assertTrue(uris.contains(new URI("hdfs://" + NS2_NN_HOST)));
+    assertTrue(uris.contains(new URI("hdfs://" + NN_HOST)));
+  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
index 2dddb1b6e08..81b03a568e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
@@ -18,11 +18,11 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import java.io.IOException;
-import java.net.InetSocketAddress;
+import java.net.URI;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
-import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.TimeoutException;
 
@@ -338,8 +338,7 @@ public class TestBalancer extends TestCase {
     waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
 
     // start rebalancing
-    Map<String, Map<String, InetSocketAddress>> namenodes =
-      DFSUtil.getNNServiceRpcAddresses(conf);
+    Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
     assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
index e064534da42..9a0001fd09f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
@@ -18,9 +18,10 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
-import java.net.InetSocketAddress;
-import java.util.Map;
+import java.net.URI;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -67,12 +68,12 @@ public class TestBalancerWithHANameNodes {
     int numOfDatanodes = capacities.length;
     NNConf nn1Conf = new MiniDFSNNTopology.NNConf("nn1");
     nn1Conf.setIpcPort(NameNode.DEFAULT_PORT);
-    MiniDFSNNTopology simpleHATopology = new MiniDFSNNTopology()
-        .addNameservice(new MiniDFSNNTopology.NSConf(null).addNN(nn1Conf)
-            .addNN(new MiniDFSNNTopology.NNConf("nn2")));
-    cluster = new MiniDFSCluster.Builder(conf).nnTopology(simpleHATopology)
-        .numDataNodes(capacities.length).racks(racks).simulatedCapacities(
-            capacities).build();
+    cluster = new MiniDFSCluster.Builder(conf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(capacities.length)
+        .racks(racks)
+        .simulatedCapacities(capacities)
+        .build();
     HATestUtil.setFailoverConfigurations(cluster, conf);
     try {
       cluster.waitActive();
@@ -89,14 +90,12 @@ public class TestBalancerWithHANameNodes {
       // start up an empty node with the same capacity and on the same rack
       cluster.startDataNodes(conf, 1, true, null, new String[] { newNodeRack },
           new long[] { newNodeCapacity });
-
-      HATestUtil.setFailoverConfigurations(cluster, conf, NameNode.getUri(
-          cluster.getNameNode(0).getNameNodeAddress()).getHost());
       totalCapacity += newNodeCapacity;
       TestBalancer.waitForHeartBeat(totalUsedSpace, totalCapacity, client,
           cluster);
-      Map<String, Map<String, InetSocketAddress>> namenodes = DFSUtil
-          .getNNServiceRpcAddresses(conf);
+      Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
+      assertEquals(1, namenodes.size());
+      assertTrue(namenodes.contains(HATestUtil.getLogicalUri(cluster)));
       final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
       assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
       TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
index 0245615a487..333d23ad9bc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
@@ -18,10 +18,10 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import java.io.IOException;
-import java.net.InetSocketAddress;
+import java.net.URI;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
-import java.util.Map;
 import java.util.Random;
 
 import org.apache.commons.logging.Log;
@@ -40,8 +40,8 @@ import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -157,8 +157,7 @@ public class TestBalancerWithMultipleNameNodes {
     LOG.info("BALANCER 1");
 
     // start rebalancing
-    final Map<String, Map<String, InetSocketAddress>> namenodes =
-      DFSUtil.getNNServiceRpcAddresses(s.conf);
+    final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(s.conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, s.conf);
     Assert.assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index 42b5612571a..bf919cea7f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
@@ -188,6 +189,12 @@ public abstract class HATestUtil {
     return String.format(LOGICAL_HOSTNAME, cluster.getInstanceId());
   }
   
+  public static URI getLogicalUri(MiniDFSCluster cluster)
+      throws URISyntaxException {
+    return new URI(HdfsConstants.HDFS_URI_SCHEME + "://" +
+        getLogicalHostname(cluster));
+  }
+  
   public static void waitForCheckpoint(MiniDFSCluster cluster, int nnIdx,
       List<Integer> txids) throws InterruptedException {
     long start = System.currentTimeMillis();

From 1d3a0c95573d6c5f63ba92e332fc93d9b166efe5 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 1 Mar 2012 00:08:13 +0000
Subject: [PATCH 170/177] Fix two more issues from trunk merge just committed
 -- removal of VersionedProtocol

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295349 13f79535-47bb-0310-9956-ffa450edef68
---
 .../HAServiceProtocolClientSideTranslatorPB.java    | 13 -------------
 .../apache/hadoop/ha/TestFailoverController.java    | 12 ------------
 2 files changed, 25 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java
index 4a67aa61410..3bf4f6f0133 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/protocolPB/HAServiceProtocolClientSideTranslatorPB.java
@@ -71,19 +71,6 @@ public class HAServiceProtocolClientSideTranslatorPB implements
         RPC.getProtocolVersion(HAServiceProtocolPB.class), addr, conf);
   }
   
-  @Override
-  public long getProtocolVersion(String protocol, long clientVersion)
-      throws IOException {
-    return rpcProxy.getProtocolVersion(protocol, clientVersion);
-  }
-
-  @Override
-  public ProtocolSignature getProtocolSignature(String protocol,
-      long clientVersion, int clientMethodsHash) throws IOException {
-    return rpcProxy.getProtocolSignature(protocol, clientVersion,
-        clientMethodsHash);
-  }
-
   @Override
   public void monitorHealth() throws IOException {
     try {
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
index 1e206b4c3b8..9e2cc75e9d1 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java
@@ -50,18 +50,6 @@ public class TestFailoverController {
       this.state = state;
     }
 
-    @Override
-    public long getProtocolVersion(String protocol, long clientVersion)
-        throws IOException {
-      return 0;
-    }
-
-    @Override
-    public ProtocolSignature getProtocolSignature(String protocol,
-        long clientVersion, int clientMethodsHash) throws IOException {
-      return null;
-    }
-
     @Override
     public void monitorHealth() throws HealthCheckFailedException, IOException {
       // Do nothing

From 30cffeb388f9065f0c5ce5fa53e127940a8917b6 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 1 Mar 2012 00:37:09 +0000
Subject: [PATCH 171/177] HDFS-3023. Optimize entries in edits log for
 persistBlocks call. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295356 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |   2 +
 .../hadoop/hdfs/protocol/LayoutVersion.java   |   5 +-
 .../hdfs/server/namenode/FSDirectory.java     |   2 +-
 .../hdfs/server/namenode/FSEditLog.java       |   7 +
 .../hdfs/server/namenode/FSEditLogLoader.java |  68 +-
 .../hdfs/server/namenode/FSEditLogOp.java     |  87 ++-
 .../server/namenode/FSEditLogOpCodes.java     |   3 +-
 .../server/namenode/FSImageSerialization.java |  44 ++
 .../offlineEditsViewer/EditsElement.java      |   2 +
 .../EditsLoaderCurrent.java                   |  24 +-
 .../src/test/resources/editsStored            | Bin 2572 -> 3330 bytes
 .../src/test/resources/editsStored.xml        | 661 +++++++++++++-----
 12 files changed, 698 insertions(+), 207 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 01553a7c452..5cd34af391b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -244,3 +244,5 @@ HDFS-2920. fix remaining TODO items. (atm and todd)
 HDFS-3027. Implement a simple NN health check. (atm)
 
 HDFS-2979. Balancer should use logical uri for creating failover proxy with HA enabled. (atm)
+
+HDFS-3023. Optimize entries in edits log for persistBlocks call. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
index 729748f3026..3680ee54aa8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/LayoutVersion.java
@@ -91,7 +91,10 @@ public class LayoutVersion {
     STORED_TXIDS(-37, "Transaction IDs are stored in edits log and image files"),
     TXID_BASED_LAYOUT(-38, "File names in NN Storage are based on transaction IDs"), 
     EDITLOG_OP_OPTIMIZATION(-39,
-        "Use LongWritable and ShortWritable directly instead of ArrayWritable of UTF8");
+        "Use LongWritable and ShortWritable directly instead of ArrayWritable of UTF8"),
+    OPTIMIZE_PERSIST_BLOCKS(-40,
+        "Serialize block lists with delta-encoded variable length ints, " +
+        "add OP_UPDATE_BLOCKS");
     
     final int lv;
     final int ancestorLV;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
index 2f9b2b7e3f6..ab0f4c4dddd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java
@@ -369,7 +369,7 @@ public class FSDirectory implements Closeable {
 
     writeLock();
     try {
-      fsImage.getEditLog().logOpenFile(path, file);
+      fsImage.getEditLog().logUpdateBlocks(path, file);
       if(NameNode.stateChangeLog.isDebugEnabled()) {
         NameNode.stateChangeLog.debug("DIR* FSDirectory.persistBlocks: "
             +path+" with "+ file.getBlocks().length 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index a62d304f5bf..3572226d8c5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -626,6 +626,13 @@ public class FSEditLog  {
     logEdit(op);
   }
   
+  public void logUpdateBlocks(String path, INodeFileUnderConstruction file) {
+    UpdateBlocksOp op = UpdateBlocksOp.getInstance()
+      .setPath(path)
+      .setBlocks(file.getBlocks());
+    logEdit(op);
+  }
+  
   /** 
    * Add create directory record to edit log
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
index 91558353ccc..7c241071504 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
 import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
 import org.apache.hadoop.hdfs.server.common.Storage;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.AddCloseOp;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.BlockListUpdatingOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.CancelDelegationTokenOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ClearNSQuotaOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.ConcatDeleteOp;
@@ -55,6 +56,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetQuotaOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SetReplicationOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.SymlinkOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.TimesOp;
+import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateBlocksOp;
 import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.UpdateMasterKeyOp;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease;
 import org.apache.hadoop.hdfs.util.Holder;
@@ -242,6 +244,10 @@ public class FSEditLogLoader {
       // Fall-through for case 2.
       // Regardless of whether it's a new file or an updated file,
       // update the block list.
+      
+      // Update the salient file attributes.
+      newFile.setAccessTime(addCloseOp.atime);
+      newFile.setModificationTimeForce(addCloseOp.mtime);
       updateBlocks(fsDir, addCloseOp, newFile);
       break;
     }
@@ -283,6 +289,24 @@ public class FSEditLogLoader {
       }
       break;
     }
+    case OP_UPDATE_BLOCKS: {
+      UpdateBlocksOp updateOp = (UpdateBlocksOp)op;
+      if (FSNamesystem.LOG.isDebugEnabled()) {
+        FSNamesystem.LOG.debug(op.opCode + ": " + updateOp.path +
+            " numblocks : " + updateOp.blocks.length);
+      }
+      INodeFile oldFile = getINodeFile(fsDir, updateOp.path);
+      if (oldFile == null) {
+        throw new IOException(
+            "Operation trying to update blocks in non-existent file " +
+            updateOp.path);
+      }
+      
+      // Update in-memory data structures
+      updateBlocks(fsDir, updateOp, oldFile);
+      break;
+    }
+      
     case OP_SET_REPLICATION: {
       SetReplicationOp setReplicationOp = (SetReplicationOp)op;
       short replication = fsNamesys.getBlockManager().adjustReplication(
@@ -472,32 +496,29 @@ public class FSEditLogLoader {
    * Update in-memory data structures with new block information.
    * @throws IOException
    */
-  private void updateBlocks(FSDirectory fsDir, AddCloseOp addCloseOp,
+  private void updateBlocks(FSDirectory fsDir, BlockListUpdatingOp op,
       INodeFile file) throws IOException {
-    
-    // Update the salient file attributes.
-    file.setAccessTime(addCloseOp.atime);
-    file.setModificationTimeForce(addCloseOp.mtime);
-    
     // Update its block list
     BlockInfo[] oldBlocks = file.getBlocks();
+    Block[] newBlocks = op.getBlocks();
+    String path = op.getPath();
     
     // Are we only updating the last block's gen stamp.
-    boolean isGenStampUpdate = oldBlocks.length == addCloseOp.blocks.length;
+    boolean isGenStampUpdate = oldBlocks.length == newBlocks.length;
     
     // First, update blocks in common
-    for (int i = 0; i < oldBlocks.length && i < addCloseOp.blocks.length; i++) {
+    for (int i = 0; i < oldBlocks.length && i < newBlocks.length; i++) {
       BlockInfo oldBlock = oldBlocks[i];
-      Block newBlock = addCloseOp.blocks[i];
+      Block newBlock = newBlocks[i];
       
-      boolean isLastBlock = i == addCloseOp.blocks.length - 1;
+      boolean isLastBlock = i == newBlocks.length - 1;
       if (oldBlock.getBlockId() != newBlock.getBlockId() ||
           (oldBlock.getGenerationStamp() != newBlock.getGenerationStamp() && 
               !(isGenStampUpdate && isLastBlock))) {
         throw new IOException("Mismatched block IDs or generation stamps, " + 
             "attempting to replace block " + oldBlock + " with " + newBlock +
-            " as block # " + i + "/" + addCloseOp.blocks.length + " of " +
-            addCloseOp.path);
+            " as block # " + i + "/" + newBlocks.length + " of " +
+            path);
       }
       
       oldBlock.setNumBytes(newBlock.getNumBytes());
@@ -506,7 +527,7 @@ public class FSEditLogLoader {
       oldBlock.setGenerationStamp(newBlock.getGenerationStamp());
       
       if (oldBlock instanceof BlockInfoUnderConstruction &&
-          (!isLastBlock || addCloseOp.opCode == FSEditLogOpCodes.OP_CLOSE)) {
+          (!isLastBlock || op.shouldCompleteLastBlock())) {
         changeMade = true;
         fsNamesys.getBlockManager().forceCompleteBlock(
             (INodeFileUnderConstruction)file,
@@ -520,24 +541,27 @@ public class FSEditLogLoader {
       }
     }
     
-    if (addCloseOp.blocks.length < oldBlocks.length) {
+    if (newBlocks.length < oldBlocks.length) {
       // We're removing a block from the file, e.g. abandonBlock(...)
       if (!file.isUnderConstruction()) {
         throw new IOException("Trying to remove a block from file " +
-            addCloseOp.path + " which is not under construction.");
+            path + " which is not under construction.");
       }
-      if (addCloseOp.blocks.length != oldBlocks.length - 1) {
+      if (newBlocks.length != oldBlocks.length - 1) {
         throw new IOException("Trying to remove more than one block from file "
-            + addCloseOp.path);
+            + path);
       }
-      fsDir.unprotectedRemoveBlock(addCloseOp.path,
+      fsDir.unprotectedRemoveBlock(path,
           (INodeFileUnderConstruction)file, oldBlocks[oldBlocks.length - 1]);
-    } else if (addCloseOp.blocks.length > oldBlocks.length) {
+    } else if (newBlocks.length > oldBlocks.length) {
       // We're adding blocks
-      for (int i = oldBlocks.length; i < addCloseOp.blocks.length; i++) {
-        Block newBlock = addCloseOp.blocks[i];
+      for (int i = oldBlocks.length; i < newBlocks.length; i++) {
+        Block newBlock = newBlocks[i];
         BlockInfo newBI;
-        if (addCloseOp.opCode == FSEditLogOpCodes.OP_ADD){
+        if (!op.shouldCompleteLastBlock()) {
+          // TODO: shouldn't this only be true for the last block?
+          // what about an old-version fsync() where fsync isn't called
+          // until several blocks in?
           newBI = new BlockInfoUnderConstruction(
               newBlock, file.getReplication());
         } else {
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
index f075770c33c..949554dbda0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java
@@ -101,6 +101,7 @@ public abstract class FSEditLogOp {
                       new LogSegmentOp(OP_START_LOG_SEGMENT));
         instances.put(OP_END_LOG_SEGMENT,
                       new LogSegmentOp(OP_END_LOG_SEGMENT));
+        instances.put(OP_UPDATE_BLOCKS, new UpdateBlocksOp());
         return instances;
       }
   };
@@ -128,8 +129,14 @@ public abstract class FSEditLogOp {
   abstract void writeFields(DataOutputStream out)
       throws IOException;
 
+  static interface BlockListUpdatingOp {
+    Block[] getBlocks();
+    String getPath();
+    boolean shouldCompleteLastBlock();
+  }
+  
   @SuppressWarnings("unchecked")
-  static abstract class AddCloseOp extends FSEditLogOp {
+  static abstract class AddCloseOp extends FSEditLogOp implements BlockListUpdatingOp {
     int length;
     String path;
     short replication;
@@ -151,6 +158,10 @@ public abstract class FSEditLogOp {
       this.path = path;
       return (T)this;
     }
+    
+    public String getPath() {
+      return path;
+    }
 
     <T extends AddCloseOp> T setReplication(short replication) {
       this.replication = replication;
@@ -176,6 +187,10 @@ public abstract class FSEditLogOp {
       this.blocks = blocks;
       return (T)this;
     }
+    
+    public Block[] getBlocks() {
+      return blocks;
+    }
 
     <T extends AddCloseOp> T setPermissionStatus(PermissionStatus permissions) {
       this.permissions = permissions;
@@ -347,6 +362,10 @@ public abstract class FSEditLogOp {
       return (AddOp)opInstances.get().get(OP_ADD);
     }
 
+    public boolean shouldCompleteLastBlock() {
+      return false;
+    }
+
     @Override
     public String toString() {
       StringBuilder builder = new StringBuilder();
@@ -365,6 +384,10 @@ public abstract class FSEditLogOp {
       return (CloseOp)opInstances.get().get(OP_CLOSE);
     }
 
+    public boolean shouldCompleteLastBlock() {
+      return true;
+    }
+
     @Override
     public String toString() {
       StringBuilder builder = new StringBuilder();
@@ -373,6 +396,68 @@ public abstract class FSEditLogOp {
       return builder.toString();
     }
   }
+  
+  static class UpdateBlocksOp extends FSEditLogOp implements BlockListUpdatingOp {
+    String path;
+    Block[] blocks;
+    
+    private UpdateBlocksOp() {
+      super(OP_UPDATE_BLOCKS);
+    }
+    
+    static UpdateBlocksOp getInstance() {
+      return (UpdateBlocksOp)opInstances.get()
+        .get(OP_UPDATE_BLOCKS);
+    }
+    
+    
+    UpdateBlocksOp setPath(String path) {
+      this.path = path;
+      return this;
+    }
+    
+    public String getPath() {
+      return path;
+    }
+
+    UpdateBlocksOp setBlocks(Block[] blocks) {
+      this.blocks = blocks;
+      return this;
+    }
+    
+    public Block[] getBlocks() {
+      return blocks;
+    }
+
+    @Override
+    void writeFields(DataOutputStream out) throws IOException {
+      FSImageSerialization.writeString(path, out);
+      FSImageSerialization.writeCompactBlockArray(blocks, out);
+    }
+    
+    @Override
+    void readFields(DataInputStream in, int logVersion) throws IOException {
+      path = FSImageSerialization.readString(in);
+      this.blocks = FSImageSerialization.readCompactBlockArray(
+          in, logVersion);
+    }
+
+    @Override
+    public boolean shouldCompleteLastBlock() {
+      return false;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append("UpdateBlocksOp [path=")
+        .append(path)
+        .append(", blocks=")
+        .append(Arrays.toString(blocks))
+        .append("]");
+      return sb.toString();
+    }
+  }
 
   static class SetReplicationOp extends FSEditLogOp {
     String path;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java
index 220c267f085..1f809c12b26 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java
@@ -55,7 +55,8 @@ public enum FSEditLogOpCodes {
   OP_UPDATE_MASTER_KEY          ((byte) 21),
   OP_REASSIGN_LEASE             ((byte) 22),
   OP_END_LOG_SEGMENT            ((byte) 23),
-  OP_START_LOG_SEGMENT          ((byte) 24);
+  OP_START_LOG_SEGMENT          ((byte) 24),
+  OP_UPDATE_BLOCKS              ((byte) 25);
 
   private byte opCode;
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
index 5b480305b02..f5084339e8d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.ShortWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
 
 /**
  * Static utility functions for serializing various pieces of data in the correct
@@ -277,6 +278,49 @@ public class FSImageSerialization {
       ustr.getLength(), (byte) Path.SEPARATOR_CHAR);
   }
 
+
+  /**
+   * Write an array of blocks as compactly as possible. This uses
+   * delta-encoding for the generation stamp and size, following
+   * the principle that genstamp increases relatively slowly,
+   * and size is equal for all but the last block of a file.
+   */
+  public static void writeCompactBlockArray(
+      Block[] blocks, DataOutputStream out) throws IOException {
+    WritableUtils.writeVInt(out, blocks.length);
+    Block prev = null;
+    for (Block b : blocks) {
+      long szDelta = b.getNumBytes() -
+          (prev != null ? prev.getNumBytes() : 0);
+      long gsDelta = b.getGenerationStamp() -
+          (prev != null ? prev.getGenerationStamp() : 0);
+      out.writeLong(b.getBlockId()); // blockid is random
+      WritableUtils.writeVLong(out, szDelta);
+      WritableUtils.writeVLong(out, gsDelta);
+      prev = b;
+    }
+  }
+  
+  public static Block[] readCompactBlockArray(
+      DataInputStream in, int logVersion) throws IOException {
+    int num = WritableUtils.readVInt(in);
+    if (num < 0) {
+      throw new IOException("Invalid block array length: " + num);
+    }
+    Block prev = null;
+    Block[] ret = new Block[num];
+    for (int i = 0; i < num; i++) {
+      long id = in.readLong();
+      long sz = WritableUtils.readVLong(in) +
+          ((prev != null) ? prev.getNumBytes() : 0);
+      long gs = WritableUtils.readVLong(in) +
+          ((prev != null) ? prev.getGenerationStamp() : 0);
+      ret[i] = new Block(id, sz, gs);
+      prev = ret[i];
+    }
+    return ret;
+  }
+
   /**
    * DatanodeImage is used to store persistent information
    * about datanodes into the fsImage.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/EditsElement.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/EditsElement.java
index a01083065dc..3a460e021d5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/EditsElement.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/EditsElement.java
@@ -48,6 +48,8 @@ public enum EditsElement {
       BLOCK_ID,
       BLOCK_NUM_BYTES,
       BLOCK_GENERATION_STAMP,
+      BLOCK_DELTA_NUM_BYTES, // delta-encoded relative to previous block
+      BLOCK_DELTA_GEN_STAMP, // delta-encoded relative to previous block
     PERMISSION_STATUS,
       FS_PERMISSIONS,
     CLIENT_NAME,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/EditsLoaderCurrent.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/EditsLoaderCurrent.java
index f1da4c61759..d34bff92d75 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/EditsLoaderCurrent.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/EditsLoaderCurrent.java
@@ -41,7 +41,7 @@ import static org.apache.hadoop.hdfs.tools.offlineEditsViewer.Tokenizer.VIntToke
 class EditsLoaderCurrent implements EditsLoader {
 
   private static int[] supportedVersions = { -18, -19, -20, -21, -22, -23, -24,
-      -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39};
+      -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40};
 
   private EditsVisitor v;
   private int editsVersion = 0;
@@ -150,6 +150,25 @@ class EditsLoaderCurrent implements EditsLoader {
     }
   }
 
+  private void visit_OP_UPDATE_BLOCKS() throws IOException {
+    visitTxId();
+    v.visitStringUTF8(EditsElement.PATH);
+    VIntToken numBlocksToken = v.visitVInt(EditsElement.NUMBLOCKS);
+    for (int i = 0; i < numBlocksToken.value; i++) {
+      v.visitEnclosingElement(EditsElement.BLOCK);
+
+      v.visitLong(EditsElement.BLOCK_ID);
+      if (i == 0) {
+        v.visitVLong(EditsElement.BLOCK_NUM_BYTES);
+        v.visitVLong(EditsElement.BLOCK_GENERATION_STAMP);
+      } else {
+        v.visitVLong(EditsElement.BLOCK_DELTA_NUM_BYTES);
+        v.visitVLong(EditsElement.BLOCK_DELTA_GEN_STAMP);
+      }
+      v.leaveEnclosingElement();
+    }
+  }
+  
   /**
    * Visit OP_RENAME_OLD
    */
@@ -521,6 +540,9 @@ class EditsLoaderCurrent implements EditsLoader {
       case OP_START_LOG_SEGMENT: // 24
         visit_OP_BEGIN_LOG_SEGMENT();
         break;
+      case OP_UPDATE_BLOCKS: // 25
+        visit_OP_UPDATE_BLOCKS();
+        break;
       default:
       {
         throw new IOException("Unknown op code " + editsOpCode);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored
index 0101672b9d31cfa960c098c4541d468343f4d98f..5099ce21b3026331ccda23d2fe289b6d7b427f69 100644
GIT binary patch
literal 3330
zcmb`JZBSHI7{|}u3+yf{h=|B)U<DD0yUQ*hCL}`wIglU)GYvMa>+V&FeSyn@gK0C6
zp@ytcV8oP60b`?$^vRhZY?9WOnWnMw1HDi)4TBUysJw|Z+r4|va~bYlh!33m;oN)9
zd4A9TJm)#*9*@V<AAxY8>cYP*SVul&)H_u2h^x7kZqJ?U%pYoi4qApgKBfoE{`7F1
zUm&sY!!`X^;q9tv(uVxeD&2t+D`PaXjLFF$3Lz@_>fY|0`I;Xp^N#m(IxLpJipnyE
zJ;*vL%cu?%lbQYUPHPEccN+8Z^LA(Kc_}NiXlIsDW6&ohCh3zC42ecPQtAyU+63;=
z2X@7VD2elcMSQ=qJl_M_8(L1Tx5z~(6`qvMQN>sY?bTE63o1MhgcL|(glSeQ%cTiX
zi9&Ri5|%ML9c;DHR?58-5CgSOI7X+_f9is8lbZh9f79F_1d3E5oe=KV36rb&vSQ&*
zP{*H#aIad#w{>~GW1l~+A}NG|#3-sfO(w_MQSib}$`ccjenvq$S1;^D3OjQ+|FE6m
zf_w4jn{z>AScqsCqT*BAD)_|k*7nNCj9@Z!jnqQ%ee|%0l+XN>-K)v)P!U-o4~}p{
zDRg<?>>XIkwb+;*Vc*zhCLk1s){?E`u$xUzqtnD5WSqhdsv%+JYQC)4lDgmC{W<6&
zd!D}A=6^d!jUAzINnO-O{S6r<$M(CCi@y6+eQP5y)?+hz^xDLC#lVP=WXOQwlJS&l
zy;nx}IW*tYQ6B1s*<Aoc0-?t~NY7@0xIvOgOA`^b?$Y-U%k%PQdG^$;tId$a8znJ9
z(0`E#y5Iv(6pny-{i60!@GWW|^V#nmW3IjcnDt{Mj{jWfO1y~Q5sE~?3vwhuP|6(%
z-h-AL4J(!&ql%tfkQ7BEjVlYn`(YtAi6v2mj$UP0#Mm&mrsKa=*EWIq<HY=4OXqXJ
zVE%;ITn6Ueq!@RXH(vSSXzO);;r6t&z7Mb`3GBYEIMV>IXfceIh9T-sjk4i<+h8b<
z?dY?1LO#TZ2||GXl?bqa$6|z35LAoW?}1xU`+eV3sEdCs4CeY2nd^hJG_?)CBNQuA
zst2V#<R`$_d|9zn_%rHO3P^eyv+0S`mpXPq_~XQqsFqJIJNA~gZ{bp5qOtyLCzwA&
z%qJ5|^Irt>&0=#In0r%U;@!;OPn<hZE$7#)`-LyE02WVRQ^%XOmH}*w7)DFO5S8?6
zWb={Y)8jn$(~~z(K`Lm(1R=m1a4IYauz<&6gj5hzi`pl_t*Cv<Hx<t9kk5qRVMr@|
zUh%6eJ{cB%>xt&5H&1@uyoS(pC<q@X71cItiM@0YIDfG8L=!-?k3i4B#_rxwf=ocT
zKgY4ZkTsc0xj!*?f8^}rxN7K7dK|@thDyF(yFO00`NJ;qCq(*Xo2ABnIiL<B-VFb5
zmi;I@2{I)So6*yQ^+RU}0wp1}?vRPKXsnE>g3)kSH13wiR%@J0h4A<7$t1JqYQC)4
zo;un$HvsCAF`J%|YipYzkGC$Nj(Vi3+7USTRu#9sW{#=dE-*@2u2FMvkq+E%Bkmv8
z94IOZ#~<j~W!}h_2w~7J?oMyaGufCthlP1>yTS%LZ2J;2VY9i<N<@Bs;#j5%r2FJ%
R#{(2FLarnzwXOX}!+#7lX<h&T

literal 2572
zcmezW|Nkus1_)q`T+iJg3gs{{b}{Oov@u`Dd?Ui|>r`<Um;^HuL}DZJ`~9n9!{)5!
zg6d#_(9ADGu9SU;iZQSP8LSLE`e~Utsqx80sfi`242*^hyoSaGCI)5(2BzkwW=7~-
zW>Z5WkO3?u`6($}#ia$QMd?NPr3H*j7?fPxf}L|RQ}asV{rvrW9Rq?~T|%8*<8{r<
zEKJP}O^xCW88{7%%=HX_&~RdTz<o}rQ`iW*#GC||sGQ#+&ImP{9Y`~yd6FCA<=p(T
z)D#r|nVX6}Z0cfyYUKdZOboaco7>eicrim2gS^7bAfTU;S(KVwl3!FApPLP2qZn^t
z$iQfT5pZwazp`1tA___~zhagDb02Cn12>RCAfhZxP;D|{pjt#_3r@|1MHCMaF0mlq
zB{s_sN<xz+120-6F&gGx{=Xa)eIUSxD$Dp^MZ&%x6hR=sk1EN)k(XNTU!Ip*#K5rn
z=E*<25LFBUKpH(M@ERIfm>OD|ni`oK7#ILuj?4|!T<OOL)hvi5ZoyK_FaATp3Kq^?
z{7~gWSnR@;lFSTDfvOo9cB?*PfTaUrh&jx!i>fV>p=L6O02!<d!q5zxpO>6i5?_*7
zl%86GmT%1rjgcb(#HCttcvkXq7c4n|${peyWQmeKksQQq!0_X2PTY*=5KqGL9`ox>
z&(9s^Sk)1RF24M$LgkMomM!Sw82M_6ma&=|EIh>^%9-CR>v+)*VKXp@0~xFgd??{*
zfEJYI1}G&Wh)cDg{Bhf21}rE+1wBEx7@H$UE0SB79j-|#p3W_hfp`rbif_JnSTe^-
zghFjWh#NR0UgV1C`i8C^GZZg*SM<R`Q4*q@`7NKB_z4J`fk6t$KzBK?phFEd6O`iH
z9LS|wD7w7n{s;?2X=2=BhLRSM+`{~|O(fa#?xJRh*WjV`R??NHY44$jJJH39c1Uk1
zy*Cj<J!UB0_;I5cRQ`YfD3VyPXJhoVh{{I|QZsY!CtFMf!A3GL$bt>!g{0-;%G{jH
zylm`tAl2{Y=Aa@EUflz^7%u-GH2v61P-zANa?onuD?cfuC^0!3Si%Au91A!j)&i*`
zqIcFZGC|BUG%+(VH8(dmHp{I~hL&Xv@+9iEFrAig;Wwzz0|5oXx`Pr$ogj)C7!-jF
zRt6D$V6&#UI5Ry@HwVZ_)h$Uah8C7a7G@@vhL&a~<|d$UMd8BICUas*E@~S`7}3VD
zFgG+bH#9IeL2A^L?y~2G7Bvh?3|JLocM`7$(BOE##N1TB{FGFXF`&W;1e8IX{{RJ}
BRQ><}

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
index 65fe23a0222..acc34bb2733 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored.xml
@@ -1,34 +1,34 @@
 <?xml version="1.0"?>
 <EDITS>
-  <EDITS_VERSION>-38</EDITS_VERSION>
+  <EDITS_VERSION>-40</EDITS_VERSION>
   <RECORD>
     <OPCODE>24</OPCODE>
     <DATA>
       <TRANSACTION_ID>1</TRANSACTION_ID>
     </DATA>
-    <CHECKSUM>1504643968</CHECKSUM>
+    <CHECKSUM>-2045328303</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>21</OPCODE>
     <DATA>
       <TRANSACTION_ID>2</TRANSACTION_ID>
       <KEY_ID>1</KEY_ID>
-      <KEY_EXPIRY_DATE>1304751257518</KEY_EXPIRY_DATE>
+      <KEY_EXPIRY_DATE>1331096884634</KEY_EXPIRY_DATE>
       <KEY_LENGTH>3</KEY_LENGTH>
-      <KEY_BLOB>2FhO</KEY_BLOB>
+      <KEY_BLOB>o0v1</KEY_BLOB>
     </DATA>
-    <CHECKSUM>-174778556</CHECKSUM>
+    <CHECKSUM>-1521490291</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>21</OPCODE>
     <DATA>
       <TRANSACTION_ID>3</TRANSACTION_ID>
       <KEY_ID>2</KEY_ID>
-      <KEY_EXPIRY_DATE>1304751257521</KEY_EXPIRY_DATE>
+      <KEY_EXPIRY_DATE>1331096884637</KEY_EXPIRY_DATE>
       <KEY_LENGTH>3</KEY_LENGTH>
-      <KEY_BLOB>77-r</KEY_BLOB>
+      <KEY_BLOB>3WMF</KEY_BLOB>
     </DATA>
-    <CHECKSUM>1565957291</CHECKSUM>
+    <CHECKSUM>65546244</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>10</OPCODE>
@@ -42,11 +42,10 @@
     <OPCODE>0</OPCODE>
     <DATA>
       <TRANSACTION_ID>5</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
       <PATH>/file_create</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057562</MTIME>
-      <ATIME>1304060057562</ATIME>
+      <MTIME>1330405685834</MTIME>
+      <ATIME>1330405685834</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <NUMBLOCKS>0</NUMBLOCKS>
       <PERMISSION_STATUS>
@@ -54,20 +53,19 @@
         <GROUPNAME>supergroup</GROUPNAME>
         <FS_PERMISSIONS>420</FS_PERMISSIONS>
       </PERMISSION_STATUS>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-66857152_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-2143415023_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
     </DATA>
-    <CHECKSUM>-1854451489</CHECKSUM>
+    <CHECKSUM>179250704</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>9</OPCODE>
     <DATA>
       <TRANSACTION_ID>6</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
       <PATH>/file_create</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057572</MTIME>
-      <ATIME>1304060057562</ATIME>
+      <MTIME>1330405685848</MTIME>
+      <ATIME>1330405685834</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <NUMBLOCKS>0</NUMBLOCKS>
       <PERMISSION_STATUS>
@@ -76,44 +74,41 @@
         <FS_PERMISSIONS>420</FS_PERMISSIONS>
       </PERMISSION_STATUS>
     </DATA>
-    <CHECKSUM>617592855</CHECKSUM>
+    <CHECKSUM>-584136658</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>1</OPCODE>
     <DATA>
       <TRANSACTION_ID>7</TRANSACTION_ID>
-      <LENGTH>3</LENGTH>
       <SOURCE>/file_create</SOURCE>
       <DESTINATION>/file_moved</DESTINATION>
-      <TIMESTAMP>1304060057575</TIMESTAMP>
+      <TIMESTAMP>1330405685852</TIMESTAMP>
     </DATA>
-    <CHECKSUM>367100554</CHECKSUM>
+    <CHECKSUM>-1983534581</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>2</OPCODE>
     <DATA>
       <TRANSACTION_ID>8</TRANSACTION_ID>
-      <LENGTH>2</LENGTH>
       <PATH>/file_moved</PATH>
-      <TIMESTAMP>1304060057577</TIMESTAMP>
+      <TIMESTAMP>1330405685857</TIMESTAMP>
     </DATA>
-    <CHECKSUM>1048346698</CHECKSUM>
+    <CHECKSUM>-97648053</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>3</OPCODE>
     <DATA>
       <TRANSACTION_ID>9</TRANSACTION_ID>
-      <LENGTH>3</LENGTH>
       <PATH>/directory_mkdir</PATH>
-      <TIMESTAMP>1304060057581</TIMESTAMP>
-      <ATIME>0</ATIME>
+      <TIMESTAMP>1330405685861</TIMESTAMP>
+      <ATIME>1330405685861</ATIME>
       <PERMISSION_STATUS>
         <USERNAME>todd</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <FS_PERMISSIONS>493</FS_PERMISSIONS>
       </PERMISSION_STATUS>
     </DATA>
-    <CHECKSUM>1207240248</CHECKSUM>
+    <CHECKSUM>-146811985</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>10</OPCODE>
@@ -127,11 +122,10 @@
     <OPCODE>0</OPCODE>
     <DATA>
       <TRANSACTION_ID>11</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
       <PATH>/file_create</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057584</MTIME>
-      <ATIME>1304060057584</ATIME>
+      <MTIME>1330405685866</MTIME>
+      <ATIME>1330405685866</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <NUMBLOCKS>0</NUMBLOCKS>
       <PERMISSION_STATUS>
@@ -139,20 +133,19 @@
         <GROUPNAME>supergroup</GROUPNAME>
         <FS_PERMISSIONS>420</FS_PERMISSIONS>
       </PERMISSION_STATUS>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-66857152_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-2143415023_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
     </DATA>
-    <CHECKSUM>1796314473</CHECKSUM>
+    <CHECKSUM>806955943</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>9</OPCODE>
     <DATA>
       <TRANSACTION_ID>12</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
       <PATH>/file_create</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057588</MTIME>
-      <ATIME>1304060057584</ATIME>
+      <MTIME>1330405685868</MTIME>
+      <ATIME>1330405685866</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <NUMBLOCKS>0</NUMBLOCKS>
       <PERMISSION_STATUS>
@@ -161,7 +154,7 @@
         <FS_PERMISSIONS>420</FS_PERMISSIONS>
       </PERMISSION_STATUS>
     </DATA>
-    <CHECKSUM>1017626905</CHECKSUM>
+    <CHECKSUM>641893387</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>4</OPCODE>
@@ -170,7 +163,7 @@
       <PATH>/file_create</PATH>
       <REPLICATION>1</REPLICATION>
     </DATA>
-    <CHECKSUM>1842610087</CHECKSUM>
+    <CHECKSUM>24198146</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>7</OPCODE>
@@ -195,12 +188,11 @@
     <OPCODE>13</OPCODE>
     <DATA>
       <TRANSACTION_ID>16</TRANSACTION_ID>
-      <LENGTH>3</LENGTH>
       <PATH>/file_create</PATH>
       <MTIME>1285195527000</MTIME>
       <ATIME>1285195527000</ATIME>
     </DATA>
-    <CHECKSUM>1428793678</CHECKSUM>
+    <CHECKSUM>1853168961</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>14</OPCODE>
@@ -216,13 +208,12 @@
     <OPCODE>15</OPCODE>
     <DATA>
       <TRANSACTION_ID>18</TRANSACTION_ID>
-      <LENGTH>3</LENGTH>
       <SOURCE>/file_create</SOURCE>
       <DESTINATION>/file_moved</DESTINATION>
-      <TIMESTAMP>1304060057605</TIMESTAMP>
+      <TIMESTAMP>1330405685882</TIMESTAMP>
       <RENAME_OPTIONS>AA</RENAME_OPTIONS>
     </DATA>
-    <CHECKSUM>-1155144192</CHECKSUM>
+    <CHECKSUM>-1235158297</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>10</OPCODE>
@@ -236,11 +227,10 @@
     <OPCODE>0</OPCODE>
     <DATA>
       <TRANSACTION_ID>20</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
       <PATH>/file_concat_target</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057613</MTIME>
-      <ATIME>1304060057613</ATIME>
+      <MTIME>1330405685889</MTIME>
+      <ATIME>1330405685889</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <NUMBLOCKS>0</NUMBLOCKS>
       <PERMISSION_STATUS>
@@ -248,125 +238,141 @@
         <GROUPNAME>supergroup</GROUPNAME>
         <FS_PERMISSIONS>420</FS_PERMISSIONS>
       </PERMISSION_STATUS>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-66857152_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-2143415023_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
     </DATA>
-    <CHECKSUM>-428545606</CHECKSUM>
-  </RECORD>
-  <RECORD>
-    <OPCODE>9</OPCODE>
-    <DATA>
-      <TRANSACTION_ID>21</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
-      <PATH>/file_concat_target</PATH>
-      <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057694</MTIME>
-      <ATIME>1304060057613</ATIME>
-      <BLOCKSIZE>512</BLOCKSIZE>
-      <NUMBLOCKS>3</NUMBLOCKS>
-      <BLOCK>
-        <BLOCK_ID>3459038074990663911</BLOCK_ID>
-        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
-        <BLOCK_GENERATION_STAMP>1003</BLOCK_GENERATION_STAMP>
-      </BLOCK>
-      <BLOCK>
-        <BLOCK_ID>-5555244278278879146</BLOCK_ID>
-        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
-        <BLOCK_GENERATION_STAMP>1003</BLOCK_GENERATION_STAMP>
-      </BLOCK>
-      <BLOCK>
-        <BLOCK_ID>-6344128791846831740</BLOCK_ID>
-        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
-        <BLOCK_GENERATION_STAMP>1003</BLOCK_GENERATION_STAMP>
-      </BLOCK>
-      <PERMISSION_STATUS>
-        <USERNAME>todd</USERNAME>
-        <GROUPNAME>supergroup</GROUPNAME>
-        <FS_PERMISSIONS>420</FS_PERMISSIONS>
-      </PERMISSION_STATUS>
-    </DATA>
-    <CHECKSUM>707995174</CHECKSUM>
+    <CHECKSUM>-981119572</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>10</OPCODE>
     <DATA>
-      <TRANSACTION_ID>22</TRANSACTION_ID>
+      <TRANSACTION_ID>21</TRANSACTION_ID>
       <GENERATION_STAMP>1004</GENERATION_STAMP>
     </DATA>
-    <CHECKSUM>-1500977009</CHECKSUM>
+    <CHECKSUM>-1627007926</CHECKSUM>
   </RECORD>
   <RECORD>
-    <OPCODE>0</OPCODE>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>22</TRANSACTION_ID>
+      <PATH>/file_concat_target</PATH>
+      <NUMBLOCKS>1</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-7144805496741076283</BLOCK_ID>
+        <BLOCK_NUM_BYTES>0</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1004</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>-1131701615</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
     <DATA>
       <TRANSACTION_ID>23</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
-      <PATH>/file_concat_0</PATH>
-      <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057701</MTIME>
-      <ATIME>1304060057701</ATIME>
-      <BLOCKSIZE>512</BLOCKSIZE>
-      <NUMBLOCKS>0</NUMBLOCKS>
-      <PERMISSION_STATUS>
-        <USERNAME>todd</USERNAME>
-        <GROUPNAME>supergroup</GROUPNAME>
-        <FS_PERMISSIONS>420</FS_PERMISSIONS>
-      </PERMISSION_STATUS>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-66857152_1</CLIENT_NAME>
-      <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
+      <GENERATION_STAMP>1005</GENERATION_STAMP>
     </DATA>
-    <CHECKSUM>-119850856</CHECKSUM>
+    <CHECKSUM>-957035430</CHECKSUM>
   </RECORD>
   <RECORD>
-    <OPCODE>9</OPCODE>
+    <OPCODE>25</OPCODE>
     <DATA>
       <TRANSACTION_ID>24</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
-      <PATH>/file_concat_0</PATH>
-      <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057737</MTIME>
-      <ATIME>1304060057701</ATIME>
-      <BLOCKSIZE>512</BLOCKSIZE>
-      <NUMBLOCKS>3</NUMBLOCKS>
+      <PATH>/file_concat_target</PATH>
+      <NUMBLOCKS>2</NUMBLOCKS>
       <BLOCK>
-        <BLOCK_ID>4671949296381030428</BLOCK_ID>
+        <BLOCK_ID>-7144805496741076283</BLOCK_ID>
         <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
         <BLOCK_GENERATION_STAMP>1004</BLOCK_GENERATION_STAMP>
       </BLOCK>
       <BLOCK>
-        <BLOCK_ID>-844362243522407159</BLOCK_ID>
-        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
-        <BLOCK_GENERATION_STAMP>1004</BLOCK_GENERATION_STAMP>
+        <BLOCK_ID>-4125931756867080767</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>-512</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
       </BLOCK>
-      <BLOCK>
-        <BLOCK_ID>3476886462779656950</BLOCK_ID>
-        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
-        <BLOCK_GENERATION_STAMP>1004</BLOCK_GENERATION_STAMP>
-      </BLOCK>
-      <PERMISSION_STATUS>
-        <USERNAME>todd</USERNAME>
-        <GROUPNAME>supergroup</GROUPNAME>
-        <FS_PERMISSIONS>420</FS_PERMISSIONS>
-      </PERMISSION_STATUS>
     </DATA>
-    <CHECKSUM>-766805874</CHECKSUM>
+    <CHECKSUM>-932985519</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>10</OPCODE>
     <DATA>
       <TRANSACTION_ID>25</TRANSACTION_ID>
-      <GENERATION_STAMP>1005</GENERATION_STAMP>
+      <GENERATION_STAMP>1006</GENERATION_STAMP>
     </DATA>
-    <CHECKSUM>238426056</CHECKSUM>
+    <CHECKSUM>-1757460878</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>26</TRANSACTION_ID>
+      <PATH>/file_concat_target</PATH>
+      <NUMBLOCKS>3</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-7144805496741076283</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1004</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>-4125931756867080767</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>0</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>1562413691487277050</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>-512</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>-154090859</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>9</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>27</TRANSACTION_ID>
+      <PATH>/file_concat_target</PATH>
+      <REPLICATION>1</REPLICATION>
+      <MTIME>1330405685978</MTIME>
+      <ATIME>1330405685889</ATIME>
+      <BLOCKSIZE>512</BLOCKSIZE>
+      <NUMBLOCKS>3</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-7144805496741076283</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1004</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>-4125931756867080767</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1005</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>1562413691487277050</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1006</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <PERMISSION_STATUS>
+        <USERNAME>todd</USERNAME>
+        <GROUPNAME>supergroup</GROUPNAME>
+        <FS_PERMISSIONS>420</FS_PERMISSIONS>
+      </PERMISSION_STATUS>
+    </DATA>
+    <CHECKSUM>-292633850</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>28</TRANSACTION_ID>
+      <GENERATION_STAMP>1007</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>-1431358549</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>0</OPCODE>
     <DATA>
-      <TRANSACTION_ID>26</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
-      <PATH>/file_concat_1</PATH>
+      <TRANSACTION_ID>29</TRANSACTION_ID>
+      <PATH>/file_concat_0</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057742</MTIME>
-      <ATIME>1304060057742</ATIME>
+      <MTIME>1330405685983</MTIME>
+      <ATIME>1330405685983</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <NUMBLOCKS>0</NUMBLOCKS>
       <PERMISSION_STATUS>
@@ -374,36 +380,116 @@
         <GROUPNAME>supergroup</GROUPNAME>
         <FS_PERMISSIONS>420</FS_PERMISSIONS>
       </PERMISSION_STATUS>
-      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-66857152_1</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-2143415023_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
     </DATA>
-    <CHECKSUM>1156254705</CHECKSUM>
+    <CHECKSUM>-318194869</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>30</TRANSACTION_ID>
+      <GENERATION_STAMP>1008</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>156309208</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>31</TRANSACTION_ID>
+      <PATH>/file_concat_0</PATH>
+      <NUMBLOCKS>1</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>6084289468290363112</BLOCK_ID>
+        <BLOCK_NUM_BYTES>0</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1008</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>-596016492</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>32</TRANSACTION_ID>
+      <GENERATION_STAMP>1009</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>-1734001394</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>33</TRANSACTION_ID>
+      <PATH>/file_concat_0</PATH>
+      <NUMBLOCKS>2</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>6084289468290363112</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1008</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>-4219431127125026105</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>-512</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>1352178323</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>34</TRANSACTION_ID>
+      <GENERATION_STAMP>1010</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>794444850</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>35</TRANSACTION_ID>
+      <PATH>/file_concat_0</PATH>
+      <NUMBLOCKS>3</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>6084289468290363112</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1008</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>-4219431127125026105</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>0</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>-1765119074945211374</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>-512</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>-1530696539</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>9</OPCODE>
     <DATA>
-      <TRANSACTION_ID>27</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
-      <PATH>/file_concat_1</PATH>
+      <TRANSACTION_ID>36</TRANSACTION_ID>
+      <PATH>/file_concat_0</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1304060057764</MTIME>
-      <ATIME>1304060057742</ATIME>
+      <MTIME>1330405686013</MTIME>
+      <ATIME>1330405685983</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <NUMBLOCKS>3</NUMBLOCKS>
       <BLOCK>
-        <BLOCK_ID>-754893470864399741</BLOCK_ID>
+        <BLOCK_ID>6084289468290363112</BLOCK_ID>
         <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
-        <BLOCK_GENERATION_STAMP>1005</BLOCK_GENERATION_STAMP>
+        <BLOCK_GENERATION_STAMP>1008</BLOCK_GENERATION_STAMP>
       </BLOCK>
       <BLOCK>
-        <BLOCK_ID>1820875380010181049</BLOCK_ID>
+        <BLOCK_ID>-4219431127125026105</BLOCK_ID>
         <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
-        <BLOCK_GENERATION_STAMP>1005</BLOCK_GENERATION_STAMP>
+        <BLOCK_GENERATION_STAMP>1009</BLOCK_GENERATION_STAMP>
       </BLOCK>
       <BLOCK>
-        <BLOCK_ID>8266387560744259971</BLOCK_ID>
+        <BLOCK_ID>-1765119074945211374</BLOCK_ID>
         <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
-        <BLOCK_GENERATION_STAMP>1005</BLOCK_GENERATION_STAMP>
+        <BLOCK_GENERATION_STAMP>1010</BLOCK_GENERATION_STAMP>
       </BLOCK>
       <PERMISSION_STATUS>
         <USERNAME>todd</USERNAME>
@@ -411,121 +497,336 @@
         <FS_PERMISSIONS>420</FS_PERMISSIONS>
       </PERMISSION_STATUS>
     </DATA>
-    <CHECKSUM>-654780301</CHECKSUM>
+    <CHECKSUM>-2043978220</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>37</TRANSACTION_ID>
+      <GENERATION_STAMP>1011</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>1010571629</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>0</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>38</TRANSACTION_ID>
+      <PATH>/file_concat_1</PATH>
+      <REPLICATION>1</REPLICATION>
+      <MTIME>1330405686017</MTIME>
+      <ATIME>1330405686017</ATIME>
+      <BLOCKSIZE>512</BLOCKSIZE>
+      <NUMBLOCKS>0</NUMBLOCKS>
+      <PERMISSION_STATUS>
+        <USERNAME>todd</USERNAME>
+        <GROUPNAME>supergroup</GROUPNAME>
+        <FS_PERMISSIONS>420</FS_PERMISSIONS>
+      </PERMISSION_STATUS>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-2143415023_1</CLIENT_NAME>
+      <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
+    </DATA>
+    <CHECKSUM>-501297097</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>39</TRANSACTION_ID>
+      <GENERATION_STAMP>1012</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>-1934711736</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>40</TRANSACTION_ID>
+      <PATH>/file_concat_1</PATH>
+      <NUMBLOCKS>1</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-7448471719302683860</BLOCK_ID>
+        <BLOCK_NUM_BYTES>0</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1012</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>-1853122907</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>41</TRANSACTION_ID>
+      <GENERATION_STAMP>1013</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>862670668</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>42</TRANSACTION_ID>
+      <PATH>/file_concat_1</PATH>
+      <NUMBLOCKS>2</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-7448471719302683860</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1012</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>-8051065559769974521</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>-512</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>-1169706939</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>43</TRANSACTION_ID>
+      <GENERATION_STAMP>1014</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>-2070661520</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>44</TRANSACTION_ID>
+      <PATH>/file_concat_1</PATH>
+      <NUMBLOCKS>3</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-7448471719302683860</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1012</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>-8051065559769974521</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>0</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>3808670437711973616</BLOCK_ID>
+        <BLOCK_DELTA_NUM_BYTES>-512</BLOCK_DELTA_NUM_BYTES>
+        <BLOCK_DELTA_GEN_STAMP>1</BLOCK_DELTA_GEN_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>-1568093815</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>9</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>45</TRANSACTION_ID>
+      <PATH>/file_concat_1</PATH>
+      <REPLICATION>1</REPLICATION>
+      <MTIME>1330405686042</MTIME>
+      <ATIME>1330405686017</ATIME>
+      <BLOCKSIZE>512</BLOCKSIZE>
+      <NUMBLOCKS>3</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-7448471719302683860</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1012</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>-8051065559769974521</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1013</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <BLOCK>
+        <BLOCK_ID>3808670437711973616</BLOCK_ID>
+        <BLOCK_NUM_BYTES>512</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1014</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <PERMISSION_STATUS>
+        <USERNAME>todd</USERNAME>
+        <GROUPNAME>supergroup</GROUPNAME>
+        <FS_PERMISSIONS>420</FS_PERMISSIONS>
+      </PERMISSION_STATUS>
+    </DATA>
+    <CHECKSUM>-1640101896</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>16</OPCODE>
     <DATA>
-      <TRANSACTION_ID>28</TRANSACTION_ID>
-      <LENGTH>4</LENGTH>
+      <TRANSACTION_ID>46</TRANSACTION_ID>
       <CONCAT_TARGET>/file_concat_target</CONCAT_TARGET>
+      <LENGTH>2</LENGTH>
       <CONCAT_SOURCE>/file_concat_0</CONCAT_SOURCE>
       <CONCAT_SOURCE>/file_concat_1</CONCAT_SOURCE>
-      <TIMESTAMP>1304060057767</TIMESTAMP>
+      <TIMESTAMP>1330405686046</TIMESTAMP>
     </DATA>
-    <CHECKSUM>1273279541</CHECKSUM>
+    <CHECKSUM>2122891157</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>17</OPCODE>
     <DATA>
-      <TRANSACTION_ID>29</TRANSACTION_ID>
-      <LENGTH>4</LENGTH>
+      <TRANSACTION_ID>47</TRANSACTION_ID>
       <SOURCE>/file_symlink</SOURCE>
       <DESTINATION>/file_concat_target</DESTINATION>
-      <MTIME>1304060057770</MTIME>
-      <ATIME>1304060057770</ATIME>
+      <MTIME>1330405686051</MTIME>
+      <ATIME>1330405686051</ATIME>
       <PERMISSION_STATUS>
         <USERNAME>todd</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <FS_PERMISSIONS>511</FS_PERMISSIONS>
       </PERMISSION_STATUS>
     </DATA>
-    <CHECKSUM>1385678569</CHECKSUM>
+    <CHECKSUM>-585385283</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>18</OPCODE>
     <DATA>
-      <TRANSACTION_ID>30</TRANSACTION_ID>
+      <TRANSACTION_ID>48</TRANSACTION_ID>
       <T_VERSION>0</T_VERSION>
       <T_OWNER>todd</T_OWNER>
       <T_RENEWER>JobTracker</T_RENEWER>
       <T_REAL_USER/>
-      <T_ISSUE_DATE>1304060057773</T_ISSUE_DATE>
-      <T_MAX_DATE>1304664857773</T_MAX_DATE>
+      <T_ISSUE_DATE>1330405686056</T_ISSUE_DATE>
+      <T_MAX_DATE>1331010486056</T_MAX_DATE>
       <T_SEQUENCE_NUMBER>1</T_SEQUENCE_NUMBER>
       <T_MASTER_KEY_ID>2</T_MASTER_KEY_ID>
-      <T_EXPIRY_TIME>1304146457773</T_EXPIRY_TIME>
+      <T_EXPIRY_TIME>1330492086056</T_EXPIRY_TIME>
     </DATA>
-    <CHECKSUM>913145699</CHECKSUM>
+    <CHECKSUM>791321007</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>19</OPCODE>
     <DATA>
-      <TRANSACTION_ID>31</TRANSACTION_ID>
+      <TRANSACTION_ID>49</TRANSACTION_ID>
       <T_VERSION>0</T_VERSION>
       <T_OWNER>todd</T_OWNER>
       <T_RENEWER>JobTracker</T_RENEWER>
       <T_REAL_USER/>
-      <T_ISSUE_DATE>1304060057773</T_ISSUE_DATE>
-      <T_MAX_DATE>1304664857773</T_MAX_DATE>
+      <T_ISSUE_DATE>1330405686056</T_ISSUE_DATE>
+      <T_MAX_DATE>1331010486056</T_MAX_DATE>
       <T_SEQUENCE_NUMBER>1</T_SEQUENCE_NUMBER>
       <T_MASTER_KEY_ID>2</T_MASTER_KEY_ID>
-      <T_EXPIRY_TIME>1304146457785</T_EXPIRY_TIME>
+      <T_EXPIRY_TIME>1330492086075</T_EXPIRY_TIME>
     </DATA>
-    <CHECKSUM>-1772039941</CHECKSUM>
+    <CHECKSUM>649714969</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>20</OPCODE>
     <DATA>
-      <TRANSACTION_ID>32</TRANSACTION_ID>
+      <TRANSACTION_ID>50</TRANSACTION_ID>
       <T_VERSION>0</T_VERSION>
       <T_OWNER>todd</T_OWNER>
       <T_RENEWER>JobTracker</T_RENEWER>
       <T_REAL_USER/>
-      <T_ISSUE_DATE>1304060057773</T_ISSUE_DATE>
-      <T_MAX_DATE>1304664857773</T_MAX_DATE>
+      <T_ISSUE_DATE>1330405686056</T_ISSUE_DATE>
+      <T_MAX_DATE>1331010486056</T_MAX_DATE>
       <T_SEQUENCE_NUMBER>1</T_SEQUENCE_NUMBER>
       <T_MASTER_KEY_ID>2</T_MASTER_KEY_ID>
     </DATA>
-    <CHECKSUM>1382094146</CHECKSUM>
+    <CHECKSUM>1190872628</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>51</TRANSACTION_ID>
+      <GENERATION_STAMP>1015</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>-460593521</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>0</OPCODE>
     <DATA>
-      <TRANSACTION_ID>33</TRANSACTION_ID>
-      <LENGTH>5</LENGTH>
-      <PATH>/reassign-lease-test</PATH>
+      <TRANSACTION_ID>52</TRANSACTION_ID>
+      <PATH>/hard-lease-recovery-test</PATH>
       <REPLICATION>1</REPLICATION>
-      <MTIME>1286491964741</MTIME>
-      <ATIME>1286491964741</ATIME>
+      <MTIME>1330405686084</MTIME>
+      <ATIME>1330405686084</ATIME>
       <BLOCKSIZE>512</BLOCKSIZE>
       <NUMBLOCKS>0</NUMBLOCKS>
       <PERMISSION_STATUS>
-        <USERNAME>atm</USERNAME>
+        <USERNAME>todd</USERNAME>
         <GROUPNAME>supergroup</GROUPNAME>
         <FS_PERMISSIONS>420</FS_PERMISSIONS>
       </PERMISSION_STATUS>
-      <CLIENT_NAME>DFSClient_871171074</CLIENT_NAME>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-2143415023_1</CLIENT_NAME>
       <CLIENT_MACHINE>127.0.0.1</CLIENT_MACHINE>
     </DATA>
-    <CHECKSUM>1975140107</CHECKSUM>
+    <CHECKSUM>2093219037</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>53</TRANSACTION_ID>
+      <GENERATION_STAMP>1016</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>120488596</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>54</TRANSACTION_ID>
+      <PATH>/hard-lease-recovery-test</PATH>
+      <NUMBLOCKS>1</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-357061736603024522</BLOCK_ID>
+        <BLOCK_NUM_BYTES>0</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1016</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>2098840974</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>25</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>55</TRANSACTION_ID>
+      <PATH>/hard-lease-recovery-test</PATH>
+      <NUMBLOCKS>1</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-357061736603024522</BLOCK_ID>
+        <BLOCK_NUM_BYTES>0</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1016</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+    </DATA>
+    <CHECKSUM>-1794222801</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>10</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>56</TRANSACTION_ID>
+      <GENERATION_STAMP>1017</GENERATION_STAMP>
+    </DATA>
+    <CHECKSUM>-2123999915</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>22</OPCODE>
     <DATA>
-      <TRANSACTION_ID>34</TRANSACTION_ID>
-      <CLIENT_NAME>DFSClient_871171074</CLIENT_NAME>
-      <PATH>/reassign-lease-test</PATH>
+      <TRANSACTION_ID>57</TRANSACTION_ID>
+      <CLIENT_NAME>DFSClient_NONMAPREDUCE_-2143415023_1</CLIENT_NAME>
+      <PATH>/hard-lease-recovery-test</PATH>
       <CLIENT_NAME>HDFS_NameNode</CLIENT_NAME>
     </DATA>
-    <CHECKSUM>1975140107</CHECKSUM>
+    <CHECKSUM>-1841690515</CHECKSUM>
+  </RECORD>
+  <RECORD>
+    <OPCODE>9</OPCODE>
+    <DATA>
+      <TRANSACTION_ID>58</TRANSACTION_ID>
+      <PATH>/hard-lease-recovery-test</PATH>
+      <REPLICATION>1</REPLICATION>
+      <MTIME>1330405688726</MTIME>
+      <ATIME>1330405686084</ATIME>
+      <BLOCKSIZE>512</BLOCKSIZE>
+      <NUMBLOCKS>1</NUMBLOCKS>
+      <BLOCK>
+        <BLOCK_ID>-357061736603024522</BLOCK_ID>
+        <BLOCK_NUM_BYTES>11</BLOCK_NUM_BYTES>
+        <BLOCK_GENERATION_STAMP>1017</BLOCK_GENERATION_STAMP>
+      </BLOCK>
+      <PERMISSION_STATUS>
+        <USERNAME>todd</USERNAME>
+        <GROUPNAME>supergroup</GROUPNAME>
+        <FS_PERMISSIONS>420</FS_PERMISSIONS>
+      </PERMISSION_STATUS>
+    </DATA>
+    <CHECKSUM>-218102037</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>23</OPCODE>
     <DATA>
-      <TRANSACTION_ID>35</TRANSACTION_ID>
+      <TRANSACTION_ID>59</TRANSACTION_ID>
     </DATA>
-    <CHECKSUM>1975140107</CHECKSUM>
+    <CHECKSUM>-1616653774</CHECKSUM>
   </RECORD>
   <RECORD>
     <OPCODE>-1</OPCODE>

From 9318ff425019b9f88e154a80f3aeb23e6c69cb69 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 1 Mar 2012 07:03:40 +0000
Subject: [PATCH 172/177] Revert commit of HDFS-2979.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295435 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 -
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 65 -------------------
 .../hadoop/hdfs/server/balancer/Balancer.java | 15 +++--
 .../server/balancer/NameNodeConnector.java    | 14 ++--
 .../hadoop/hdfs/server/namenode/NameNode.java | 16 ++---
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   | 49 +-------------
 .../hdfs/server/balancer/TestBalancer.java    |  7 +-
 .../balancer/TestBalancerWithHANameNodes.java | 25 +++----
 .../TestBalancerWithMultipleNameNodes.java    |  9 +--
 .../hdfs/server/namenode/ha/HATestUtil.java   |  7 --
 10 files changed, 51 insertions(+), 158 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 5cd34af391b..9ded5e8efc0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -243,6 +243,4 @@ HDFS-2920. fix remaining TODO items. (atm and todd)
 
 HDFS-3027. Implement a simple NN health check. (atm)
 
-HDFS-2979. Balancer should use logical uri for creating failover proxy with HA enabled. (atm)
-
 HDFS-3023. Optimize entries in edits log for persistBlocks call. (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index e63ed0d26b6..f4a861089be 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -28,11 +28,9 @@ import java.security.SecureRandom;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
-import java.util.Set;
 import java.util.StringTokenizer;
 
 import javax.net.SocketFactory;
@@ -45,7 +43,6 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolTranslatorPB;
@@ -608,68 +605,6 @@ public class DFSUtil {
         "nnId=" + namenodeId + ";addr=" + addr + "]";
     }
   }
-  
-  /**
-   * Get a URI for each configured nameservice. If a nameservice is
-   * HA-enabled, then the logical URI of the nameservice is returned. If the
-   * nameservice is not HA-enabled, then a URI corresponding to an RPC address
-   * of the single NN for that nameservice is returned, preferring the service
-   * RPC address over the client RPC address.
-   * 
-   * @param conf configuration
-   * @return a collection of all configured NN URIs, preferring service
-   *         addresses
-   */
-  public static Collection<URI> getNsServiceRpcUris(Configuration conf) {
-    return getNameServiceUris(conf,
-        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
-        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
-  }
-
-  /**
-   * Get a URI for each configured nameservice. If a nameservice is
-   * HA-enabled, then the logical URI of the nameservice is returned. If the
-   * nameservice is not HA-enabled, then a URI corresponding to the address of
-   * the single NN for that nameservice is returned.
-   * 
-   * @param conf configuration
-   * @param keys configuration keys to try in order to get the URI for non-HA
-   *        nameservices
-   * @return a collection of all configured NN URIs
-   */
-  public static Collection<URI> getNameServiceUris(Configuration conf,
-      String... keys) {
-    Set<URI> ret = new HashSet<URI>();
-    for (String nsId : getNameServiceIds(conf)) {
-      if (HAUtil.isHAEnabled(conf, nsId)) {
-        // Add the logical URI of the nameservice.
-        try {
-          ret.add(new URI(HdfsConstants.HDFS_URI_SCHEME + "://" + nsId));
-        } catch (URISyntaxException ue) {
-          throw new IllegalArgumentException(ue);
-        }
-      } else {
-        // Add the URI corresponding to the address of the NN.
-        for (String key : keys) {
-          String addr = conf.get(concatSuffixes(key, nsId));
-          if (addr != null) {
-            ret.add(createUri(HdfsConstants.HDFS_URI_SCHEME,
-                NetUtils.createSocketAddr(addr)));
-            break;
-          }
-        }
-      }
-    }
-    // Add the generic configuration keys.
-    for (String key : keys) {
-      String addr = conf.get(key);
-      if (addr != null) {
-        ret.add(createUri("hdfs", NetUtils.createSocketAddr(addr)));
-        break;
-      }
-    }
-    return ret;
-  }
 
   /**
    * Given the InetSocketAddress this method returns the nameservice Id
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
index e808af623cc..bc7c13a9147 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
@@ -24,8 +24,8 @@ import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
+import java.net.InetSocketAddress;
 import java.net.Socket;
-import java.net.URI;
 import java.text.DateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -39,6 +39,7 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -1379,7 +1380,8 @@ public class Balancer {
    * for each namenode,
    * execute a {@link Balancer} to work through all datanodes once.  
    */
-  static int run(Collection<URI> namenodes, final Parameters p,
+  static int run(Map<String, Map<String, InetSocketAddress>> namenodes,
+      final Parameters p,
       Configuration conf) throws IOException, InterruptedException {
     final long sleeptime = 2000*conf.getLong(
         DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
@@ -1393,8 +1395,10 @@ public class Balancer {
     final List<NameNodeConnector> connectors
         = new ArrayList<NameNodeConnector>(namenodes.size());
     try {
-      for (URI uri : namenodes) {
-        connectors.add(new NameNodeConnector(uri, conf));
+      for(Entry<String, Map<String, InetSocketAddress>> entry :
+          namenodes.entrySet()) {
+        connectors.add(
+            new NameNodeConnector(entry.getValue().values(), conf));
       }
     
       boolean done = false;
@@ -1476,7 +1480,8 @@ public class Balancer {
       try {
         checkReplicationPolicyCompatibility(conf);
 
-        final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
+        final Map<String, Map<String, InetSocketAddress>> namenodes =
+          DFSUtil.getNNServiceRpcAddresses(conf);
         return Balancer.run(namenodes, parse(args), conf);
       } catch (IOException e) {
         System.out.println(e + ".  Exiting ...");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
index c4208b79516..eab6273c221 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
@@ -21,7 +21,9 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetAddress;
+import java.net.InetSocketAddress;
 import java.net.URI;
+import java.util.Collection;
 import java.util.EnumSet;
 
 import org.apache.commons.logging.Log;
@@ -36,6 +38,7 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
+import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.io.IOUtils;
@@ -43,6 +46,8 @@ import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
 
+import com.google.common.collect.Lists;
+
 /**
  * The class provides utilities for {@link Balancer} to access a NameNode
  */
@@ -51,7 +56,7 @@ class NameNodeConnector {
   private static final Log LOG = Balancer.LOG;
   private static final Path BALANCER_ID_PATH = new Path("/system/balancer.id");
 
-  final URI nameNodeUri;
+  final InetSocketAddress namenodeAddress;
   final String blockpoolID;
 
   final NamenodeProtocol namenode;
@@ -65,9 +70,10 @@ class NameNodeConnector {
   private BlockTokenSecretManager blockTokenSecretManager;
   private Daemon keyupdaterthread; // AccessKeyUpdater thread
 
-  NameNodeConnector(URI nameNodeUri,
+  NameNodeConnector(Collection<InetSocketAddress> haNNs,
       Configuration conf) throws IOException {
-    this.nameNodeUri = nameNodeUri;
+    this.namenodeAddress = Lists.newArrayList(haNNs).get(0);
+    URI nameNodeUri = NameNode.getUri(this.namenodeAddress);
     
     this.namenode =
       NameNodeProxies.createProxy(conf, nameNodeUri, NamenodeProtocol.class)
@@ -180,7 +186,7 @@ class NameNodeConnector {
 
   @Override
   public String toString() {
-    return getClass().getSimpleName() + "[namenodeUri=" + nameNodeUri
+    return getClass().getSimpleName() + "[namenodeAddress=" + namenodeAddress
         + ", id=" + blockpoolID
         + "]";
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
index d0e657baab8..b62f0d5d9ea 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java
@@ -526,21 +526,21 @@ public class NameNode {
 
   protected NameNode(Configuration conf, NamenodeRole role) 
       throws IOException { 
-    this.conf = new Configuration(conf);
+    this.conf = conf;
     this.role = role;
-    String nsId = getNameServiceId(this.conf);
-    String namenodeId = HAUtil.getNameNodeId(this.conf, nsId);
-    this.haEnabled = HAUtil.isHAEnabled(this.conf, nsId);
+    String nsId = getNameServiceId(conf);
+    String namenodeId = HAUtil.getNameNodeId(conf, nsId);
+    this.haEnabled = HAUtil.isHAEnabled(conf, nsId);
     if (!haEnabled) {
       state = ACTIVE_STATE;
     } else {
       state = STANDBY_STATE;
     }
-    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(this.conf);
+    this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf);
     this.haContext = createHAContext();
     try {
-      initializeGenericKeys(this.conf, nsId, namenodeId);
-      initialize(this.conf);
+      initializeGenericKeys(conf, nsId, namenodeId);
+      initialize(conf);
       state.prepareToEnterState(haContext);
       state.enterState(haContext);
     } catch (IOException e) {
@@ -651,7 +651,6 @@ public class NameNode {
       throws IOException {
     String nsId = DFSUtil.getNamenodeNameServiceId(conf);
     String namenodeId = HAUtil.getNameNodeId(conf, nsId);
-    conf = new Configuration(conf);
     initializeGenericKeys(conf, nsId, namenodeId);
 
     if (!conf.getBoolean(DFS_NAMENODE_SUPPORT_ALLOW_FORMAT_KEY, 
@@ -698,7 +697,6 @@ public class NameNode {
   private static boolean finalize(Configuration conf,
                                boolean isConfirmationNeeded
                                ) throws IOException {
-    conf = new Configuration(conf);
     String nsId = DFSUtil.getNamenodeNameServiceId(conf);
     String namenodeId = HAUtil.getNameNodeId(conf, nsId);
     initializeGenericKeys(conf, nsId, namenodeId);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index a9b62c3aead..e49bb107e20 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -25,8 +25,6 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
-import java.net.URI;
-import java.net.URISyntaxException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
@@ -43,8 +41,6 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
-import org.apache.hadoop.fs.CommonConfigurationKeys;
-
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 public class TestDFSUtil {
@@ -237,12 +233,11 @@ public class TestDFSUtil {
    * {@link DFSUtil#isDefaultNamenodeAddress(Configuration, InetSocketAddress, String...)}
    */
   @Test
-  public void testSingleNamenode() throws URISyntaxException {
+  public void testSingleNamenode() {
     HdfsConfiguration conf = new HdfsConfiguration();
     final String DEFAULT_ADDRESS = "localhost:9000";
     final String NN2_ADDRESS = "localhost:9001";
     conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, DEFAULT_ADDRESS);
-    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, DEFAULT_ADDRESS);
 
     InetSocketAddress testAddress1 = NetUtils.createSocketAddr(DEFAULT_ADDRESS);
     boolean isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress1,
@@ -252,10 +247,6 @@ public class TestDFSUtil {
     isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress2,
         DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
     assertFalse(isDefault);
-    
-    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY);
-    assertEquals(1, uris.size());
-    assertTrue(uris.contains(new URI("hdfs://" + DEFAULT_ADDRESS)));
   }
 
   /** Tests to ensure default namenode is used as fallback */
@@ -416,14 +407,13 @@ public class TestDFSUtil {
   }
   
   @Test
-  public void testHANameNodesWithFederation() throws URISyntaxException {
+  public void testHANameNodesWithFederation() {
     HdfsConfiguration conf = new HdfsConfiguration();
     
     final String NS1_NN1_HOST = "ns1-nn1.example.com:8020";
     final String NS1_NN2_HOST = "ns1-nn2.example.com:8020";
     final String NS2_NN1_HOST = "ns2-nn1.example.com:8020";
     final String NS2_NN2_HOST = "ns2-nn2.example.com:8020";
-    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://ns1");
     
     // Two nameservices, each with two NNs.
     conf.set(DFS_FEDERATION_NAMESERVICES, "ns1,ns2");
@@ -470,11 +460,6 @@ public class TestDFSUtil {
     // Ditto for nameservice IDs, if multiple are defined
     assertEquals(null, DFSUtil.getNamenodeNameServiceId(conf));
     assertEquals(null, DFSUtil.getSecondaryNameServiceId(conf));
-    
-    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY);
-    assertEquals(2, uris.size());
-    assertTrue(uris.contains(new URI("hdfs://ns1")));
-    assertTrue(uris.contains(new URI("hdfs://ns2")));
   }
 
   @Test
@@ -524,34 +509,4 @@ public class TestDFSUtil {
     assertEquals("127.0.0.1:12345",
         DFSUtil.substituteForWildcardAddress("127.0.0.1:12345", "foo"));
   }
-  
-  @Test
-  public void testGetNNUris() throws Exception {
-    HdfsConfiguration conf = new HdfsConfiguration();
-    
-    final String NS1_NN1_HOST = "ns1-nn1.example.com:8020";
-    final String NS1_NN2_HOST = "ns1-nn1.example.com:8020";
-    final String NS2_NN_HOST  = "ns2-nn.example.com:8020";
-    final String NN_HOST      = "nn.example.com:8020";
-    
-    conf.set(DFS_FEDERATION_NAMESERVICES, "ns1,ns2");
-    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"),"nn1,nn2");
-    conf.set(DFSUtil.addKeySuffixes(
-        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn1"), NS1_NN1_HOST);
-    conf.set(DFSUtil.addKeySuffixes(
-        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn2"), NS1_NN2_HOST);
-    
-    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "ns2"),
-        NS2_NN_HOST);
-    
-    conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, "hdfs://" + NN_HOST);
-    
-    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY, 
-        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
-    
-    assertEquals(3, uris.size());
-    assertTrue(uris.contains(new URI("hdfs://ns1")));
-    assertTrue(uris.contains(new URI("hdfs://" + NS2_NN_HOST)));
-    assertTrue(uris.contains(new URI("hdfs://" + NN_HOST)));
-  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
index 81b03a568e2..2dddb1b6e08 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
@@ -18,11 +18,11 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import java.io.IOException;
-import java.net.URI;
+import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.TimeoutException;
 
@@ -338,7 +338,8 @@ public class TestBalancer extends TestCase {
     waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
 
     // start rebalancing
-    Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
+    Map<String, Map<String, InetSocketAddress>> namenodes =
+      DFSUtil.getNNServiceRpcAddresses(conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
     assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
index 9a0001fd09f..e064534da42 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
@@ -18,10 +18,9 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
 
-import java.net.URI;
-import java.util.Collection;
+import java.net.InetSocketAddress;
+import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -68,12 +67,12 @@ public class TestBalancerWithHANameNodes {
     int numOfDatanodes = capacities.length;
     NNConf nn1Conf = new MiniDFSNNTopology.NNConf("nn1");
     nn1Conf.setIpcPort(NameNode.DEFAULT_PORT);
-    cluster = new MiniDFSCluster.Builder(conf)
-        .nnTopology(MiniDFSNNTopology.simpleHATopology())
-        .numDataNodes(capacities.length)
-        .racks(racks)
-        .simulatedCapacities(capacities)
-        .build();
+    MiniDFSNNTopology simpleHATopology = new MiniDFSNNTopology()
+        .addNameservice(new MiniDFSNNTopology.NSConf(null).addNN(nn1Conf)
+            .addNN(new MiniDFSNNTopology.NNConf("nn2")));
+    cluster = new MiniDFSCluster.Builder(conf).nnTopology(simpleHATopology)
+        .numDataNodes(capacities.length).racks(racks).simulatedCapacities(
+            capacities).build();
     HATestUtil.setFailoverConfigurations(cluster, conf);
     try {
       cluster.waitActive();
@@ -90,12 +89,14 @@ public class TestBalancerWithHANameNodes {
       // start up an empty node with the same capacity and on the same rack
       cluster.startDataNodes(conf, 1, true, null, new String[] { newNodeRack },
           new long[] { newNodeCapacity });
+
+      HATestUtil.setFailoverConfigurations(cluster, conf, NameNode.getUri(
+          cluster.getNameNode(0).getNameNodeAddress()).getHost());
       totalCapacity += newNodeCapacity;
       TestBalancer.waitForHeartBeat(totalUsedSpace, totalCapacity, client,
           cluster);
-      Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
-      assertEquals(1, namenodes.size());
-      assertTrue(namenodes.contains(HATestUtil.getLogicalUri(cluster)));
+      Map<String, Map<String, InetSocketAddress>> namenodes = DFSUtil
+          .getNNServiceRpcAddresses(conf);
       final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
       assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
       TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
index 333d23ad9bc..0245615a487 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
@@ -18,10 +18,10 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import java.io.IOException;
-import java.net.URI;
+import java.net.InetSocketAddress;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.List;
+import java.util.Map;
 import java.util.Random;
 
 import org.apache.commons.logging.Log;
@@ -40,8 +40,8 @@ import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -157,7 +157,8 @@ public class TestBalancerWithMultipleNameNodes {
     LOG.info("BALANCER 1");
 
     // start rebalancing
-    final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(s.conf);
+    final Map<String, Map<String, InetSocketAddress>> namenodes =
+      DFSUtil.getNNServiceRpcAddresses(s.conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, s.conf);
     Assert.assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index bf919cea7f8..42b5612571a 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -35,7 +35,6 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
@@ -189,12 +188,6 @@ public abstract class HATestUtil {
     return String.format(LOGICAL_HOSTNAME, cluster.getInstanceId());
   }
   
-  public static URI getLogicalUri(MiniDFSCluster cluster)
-      throws URISyntaxException {
-    return new URI(HdfsConstants.HDFS_URI_SCHEME + "://" +
-        getLogicalHostname(cluster));
-  }
-  
   public static void waitForCheckpoint(MiniDFSCluster cluster, int nnIdx,
       List<Integer> txids) throws InterruptedException {
     long start = System.currentTimeMillis();

From 0d7de507a5d6e9292aeabe357fab6976f81f5be4 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Thu, 1 Mar 2012 08:50:10 +0000
Subject: [PATCH 173/177] HDFS-2979. Balancer should use logical uri for
 creating failover proxy with HA enabled. Contributed by Aaron T. Myers.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295473 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 +
 .../java/org/apache/hadoop/hdfs/DFSUtil.java  | 65 +++++++++++++++++++
 .../hadoop/hdfs/server/balancer/Balancer.java | 15 ++---
 .../server/balancer/NameNodeConnector.java    | 14 ++--
 .../org/apache/hadoop/hdfs/DFSTestUtil.java   | 24 +++++++
 .../apache/hadoop/hdfs/MiniDFSCluster.java    | 20 ++++--
 .../org/apache/hadoop/hdfs/TestDFSUtil.java   | 49 +++++++++++++-
 .../hdfs/server/balancer/TestBalancer.java    |  7 +-
 .../balancer/TestBalancerWithHANameNodes.java | 26 ++++----
 .../TestBalancerWithMultipleNameNodes.java    | 17 +++--
 .../hdfs/server/namenode/ha/HATestUtil.java   |  7 ++
 11 files changed, 196 insertions(+), 50 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 9ded5e8efc0..a0d3c1785b8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -244,3 +244,5 @@ HDFS-2920. fix remaining TODO items. (atm and todd)
 HDFS-3027. Implement a simple NN health check. (atm)
 
 HDFS-3023. Optimize entries in edits log for persistBlocks call. (todd)
+
+HDFS-2979. Balancer should use logical uri for creating failover proxy with HA enabled. (atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
index f4a861089be..e63ed0d26b6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java
@@ -28,9 +28,11 @@ import java.security.SecureRandom;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
 import java.util.StringTokenizer;
 
 import javax.net.SocketFactory;
@@ -43,6 +45,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
 import org.apache.hadoop.hdfs.protocolPB.ClientDatanodeProtocolTranslatorPB;
@@ -605,6 +608,68 @@ public class DFSUtil {
         "nnId=" + namenodeId + ";addr=" + addr + "]";
     }
   }
+  
+  /**
+   * Get a URI for each configured nameservice. If a nameservice is
+   * HA-enabled, then the logical URI of the nameservice is returned. If the
+   * nameservice is not HA-enabled, then a URI corresponding to an RPC address
+   * of the single NN for that nameservice is returned, preferring the service
+   * RPC address over the client RPC address.
+   * 
+   * @param conf configuration
+   * @return a collection of all configured NN URIs, preferring service
+   *         addresses
+   */
+  public static Collection<URI> getNsServiceRpcUris(Configuration conf) {
+    return getNameServiceUris(conf,
+        DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+        DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
+  }
+
+  /**
+   * Get a URI for each configured nameservice. If a nameservice is
+   * HA-enabled, then the logical URI of the nameservice is returned. If the
+   * nameservice is not HA-enabled, then a URI corresponding to the address of
+   * the single NN for that nameservice is returned.
+   * 
+   * @param conf configuration
+   * @param keys configuration keys to try in order to get the URI for non-HA
+   *        nameservices
+   * @return a collection of all configured NN URIs
+   */
+  public static Collection<URI> getNameServiceUris(Configuration conf,
+      String... keys) {
+    Set<URI> ret = new HashSet<URI>();
+    for (String nsId : getNameServiceIds(conf)) {
+      if (HAUtil.isHAEnabled(conf, nsId)) {
+        // Add the logical URI of the nameservice.
+        try {
+          ret.add(new URI(HdfsConstants.HDFS_URI_SCHEME + "://" + nsId));
+        } catch (URISyntaxException ue) {
+          throw new IllegalArgumentException(ue);
+        }
+      } else {
+        // Add the URI corresponding to the address of the NN.
+        for (String key : keys) {
+          String addr = conf.get(concatSuffixes(key, nsId));
+          if (addr != null) {
+            ret.add(createUri(HdfsConstants.HDFS_URI_SCHEME,
+                NetUtils.createSocketAddr(addr)));
+            break;
+          }
+        }
+      }
+    }
+    // Add the generic configuration keys.
+    for (String key : keys) {
+      String addr = conf.get(key);
+      if (addr != null) {
+        ret.add(createUri("hdfs", NetUtils.createSocketAddr(addr)));
+        break;
+      }
+    }
+    return ret;
+  }
 
   /**
    * Given the InetSocketAddress this method returns the nameservice Id
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
index bc7c13a9147..e808af623cc 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java
@@ -24,8 +24,8 @@ import java.io.BufferedOutputStream;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
-import java.net.InetSocketAddress;
 import java.net.Socket;
+import java.net.URI;
 import java.text.DateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -39,7 +39,6 @@ import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
-import java.util.Map.Entry;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
@@ -1380,8 +1379,7 @@ public class Balancer {
    * for each namenode,
    * execute a {@link Balancer} to work through all datanodes once.  
    */
-  static int run(Map<String, Map<String, InetSocketAddress>> namenodes,
-      final Parameters p,
+  static int run(Collection<URI> namenodes, final Parameters p,
       Configuration conf) throws IOException, InterruptedException {
     final long sleeptime = 2000*conf.getLong(
         DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
@@ -1395,10 +1393,8 @@ public class Balancer {
     final List<NameNodeConnector> connectors
         = new ArrayList<NameNodeConnector>(namenodes.size());
     try {
-      for(Entry<String, Map<String, InetSocketAddress>> entry :
-          namenodes.entrySet()) {
-        connectors.add(
-            new NameNodeConnector(entry.getValue().values(), conf));
+      for (URI uri : namenodes) {
+        connectors.add(new NameNodeConnector(uri, conf));
       }
     
       boolean done = false;
@@ -1480,8 +1476,7 @@ public class Balancer {
       try {
         checkReplicationPolicyCompatibility(conf);
 
-        final Map<String, Map<String, InetSocketAddress>> namenodes =
-          DFSUtil.getNNServiceRpcAddresses(conf);
+        final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
         return Balancer.run(namenodes, parse(args), conf);
       } catch (IOException e) {
         System.out.println(e + ".  Exiting ...");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
index eab6273c221..c4208b79516 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/NameNodeConnector.java
@@ -21,9 +21,7 @@ import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.InetAddress;
-import java.net.InetSocketAddress;
 import java.net.URI;
-import java.util.Collection;
 import java.util.EnumSet;
 
 import org.apache.commons.logging.Log;
@@ -38,7 +36,6 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenSecretManager;
 import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
 import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo;
 import org.apache.hadoop.io.IOUtils;
@@ -46,8 +43,6 @@ import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.security.token.Token;
 import org.apache.hadoop.util.Daemon;
 
-import com.google.common.collect.Lists;
-
 /**
  * The class provides utilities for {@link Balancer} to access a NameNode
  */
@@ -56,7 +51,7 @@ class NameNodeConnector {
   private static final Log LOG = Balancer.LOG;
   private static final Path BALANCER_ID_PATH = new Path("/system/balancer.id");
 
-  final InetSocketAddress namenodeAddress;
+  final URI nameNodeUri;
   final String blockpoolID;
 
   final NamenodeProtocol namenode;
@@ -70,10 +65,9 @@ class NameNodeConnector {
   private BlockTokenSecretManager blockTokenSecretManager;
   private Daemon keyupdaterthread; // AccessKeyUpdater thread
 
-  NameNodeConnector(Collection<InetSocketAddress> haNNs,
+  NameNodeConnector(URI nameNodeUri,
       Configuration conf) throws IOException {
-    this.namenodeAddress = Lists.newArrayList(haNNs).get(0);
-    URI nameNodeUri = NameNode.getUri(this.namenodeAddress);
+    this.nameNodeUri = nameNodeUri;
     
     this.namenode =
       NameNodeProxies.createProxy(conf, nameNodeUri, NamenodeProtocol.class)
@@ -186,7 +180,7 @@ class NameNodeConnector {
 
   @Override
   public String toString() {
-    return getClass().getSimpleName() + "[namenodeAddress=" + namenodeAddress
+    return getClass().getSimpleName() + "[namenodeUri=" + nameNodeUri
         + ", id=" + blockpoolID
         + "]";
   }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
index 409dd375257..7854f95f884 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hdfs;
 
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY;
 import static org.junit.Assert.assertEquals;
 
 import java.io.BufferedOutputStream;
@@ -38,9 +40,11 @@ import java.net.URLConnection;
 import java.security.PrivilegedExceptionAction;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
 import java.util.concurrent.TimeoutException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -52,6 +56,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileSystem.Statistics;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.DFSClient.DFSDataInputStream;
+import org.apache.hadoop.hdfs.MiniDFSCluster.NameNodeInfo;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@@ -74,6 +79,8 @@ import org.apache.hadoop.security.ShellBasedUnixGroupsMapping;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.security.token.Token;
 
+import com.google.common.base.Joiner;
+
 /** Utilities for HDFS tests */
 public class DFSTestUtil {
   
@@ -681,4 +688,21 @@ public class DFSTestUtil {
 
     return BlockOpResponseProto.parseDelimitedFrom(in);
   }
+  
+  public static void setFederatedConfiguration(MiniDFSCluster cluster,
+      Configuration conf) {
+    Set<String> nameservices = new HashSet<String>();
+    for (NameNodeInfo info : cluster.getNameNodeInfos()) {
+      assert info.nameserviceId != null;
+      nameservices.add(info.nameserviceId);
+      conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_RPC_ADDRESS_KEY,
+          info.nameserviceId), DFSUtil.createUri(HdfsConstants.HDFS_URI_SCHEME,
+              info.nameNode.getNameNodeAddress()).toString());
+      conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY,
+          info.nameserviceId), DFSUtil.createUri(HdfsConstants.HDFS_URI_SCHEME,
+              info.nameNode.getNameNodeAddress()).toString());
+    }
+    conf.set(DFSConfigKeys.DFS_FEDERATION_NAMESERVICES, Joiner.on(",")
+        .join(nameservices));
+  }
 }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
index d69dc0a7da7..584446ac13c 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java
@@ -316,8 +316,13 @@ public class MiniDFSCluster {
   static class NameNodeInfo {
     final NameNode nameNode;
     final Configuration conf;
-    NameNodeInfo(NameNode nn, Configuration conf) {
+    final String nameserviceId;
+    final String nnId;
+    NameNodeInfo(NameNode nn, String nameserviceId, String nnId,
+        Configuration conf) {
       this.nameNode = nn;
+      this.nameserviceId = nameserviceId;
+      this.nnId = nnId;
       this.conf = conf;
     }
   }
@@ -674,6 +679,10 @@ public class MiniDFSCluster {
     return fileAsURI(new File(baseDir, "shared-edits-" +
         minNN + "-through-" + maxNN));
   }
+  
+  public NameNodeInfo[] getNameNodeInfos() {
+    return this.nameNodes;
+  }
 
   private void initNameNodeConf(Configuration conf,
       String nameserviceId, String nnId,
@@ -763,7 +772,8 @@ public class MiniDFSCluster {
         .getHostPortString(nn.getHttpAddress()));
     DFSUtil.setGenericConf(conf, nameserviceId, nnId,
         DFS_NAMENODE_HTTP_ADDRESS_KEY);
-    nameNodes[nnIndex] = new NameNodeInfo(nn, new Configuration(conf));
+    nameNodes[nnIndex] = new NameNodeInfo(nn, nameserviceId, nnId,
+        new Configuration(conf));
   }
 
   /**
@@ -1264,7 +1274,7 @@ public class MiniDFSCluster {
       nn.stop();
       nn.join();
       Configuration conf = nameNodes[nnIndex].conf;
-      nameNodes[nnIndex] = new NameNodeInfo(null, conf);
+      nameNodes[nnIndex] = new NameNodeInfo(null, null, null, conf);
     }
   }
   
@@ -1307,10 +1317,12 @@ public class MiniDFSCluster {
    */
   public synchronized void restartNameNode(int nnIndex, boolean waitActive)
       throws IOException {
+    String nameserviceId = nameNodes[nnIndex].nameserviceId;
+    String nnId = nameNodes[nnIndex].nnId;
     Configuration conf = nameNodes[nnIndex].conf;
     shutdownNameNode(nnIndex);
     NameNode nn = NameNode.createNameNode(new String[] {}, conf);
-    nameNodes[nnIndex] = new NameNodeInfo(nn, conf);
+    nameNodes[nnIndex] = new NameNodeInfo(nn, nameserviceId, nnId, conf);
     if (waitActive) {
       waitClusterUp();
       LOG.info("Restarted the namenode");
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
index e49bb107e20..a9b62c3aead 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java
@@ -25,6 +25,8 @@ import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.net.InetSocketAddress;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
@@ -41,6 +43,8 @@ import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
+
 import static org.apache.hadoop.hdfs.DFSConfigKeys.*;
 
 public class TestDFSUtil {
@@ -233,11 +237,12 @@ public class TestDFSUtil {
    * {@link DFSUtil#isDefaultNamenodeAddress(Configuration, InetSocketAddress, String...)}
    */
   @Test
-  public void testSingleNamenode() {
+  public void testSingleNamenode() throws URISyntaxException {
     HdfsConfiguration conf = new HdfsConfiguration();
     final String DEFAULT_ADDRESS = "localhost:9000";
     final String NN2_ADDRESS = "localhost:9001";
     conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, DEFAULT_ADDRESS);
+    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, DEFAULT_ADDRESS);
 
     InetSocketAddress testAddress1 = NetUtils.createSocketAddr(DEFAULT_ADDRESS);
     boolean isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress1,
@@ -247,6 +252,10 @@ public class TestDFSUtil {
     isDefault = DFSUtil.isDefaultNamenodeAddress(conf, testAddress2,
         DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY);
     assertFalse(isDefault);
+    
+    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals(1, uris.size());
+    assertTrue(uris.contains(new URI("hdfs://" + DEFAULT_ADDRESS)));
   }
 
   /** Tests to ensure default namenode is used as fallback */
@@ -407,13 +416,14 @@ public class TestDFSUtil {
   }
   
   @Test
-  public void testHANameNodesWithFederation() {
+  public void testHANameNodesWithFederation() throws URISyntaxException {
     HdfsConfiguration conf = new HdfsConfiguration();
     
     final String NS1_NN1_HOST = "ns1-nn1.example.com:8020";
     final String NS1_NN2_HOST = "ns1-nn2.example.com:8020";
     final String NS2_NN1_HOST = "ns2-nn1.example.com:8020";
     final String NS2_NN2_HOST = "ns2-nn2.example.com:8020";
+    conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://ns1");
     
     // Two nameservices, each with two NNs.
     conf.set(DFS_FEDERATION_NAMESERVICES, "ns1,ns2");
@@ -460,6 +470,11 @@ public class TestDFSUtil {
     // Ditto for nameservice IDs, if multiple are defined
     assertEquals(null, DFSUtil.getNamenodeNameServiceId(conf));
     assertEquals(null, DFSUtil.getSecondaryNameServiceId(conf));
+    
+    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY);
+    assertEquals(2, uris.size());
+    assertTrue(uris.contains(new URI("hdfs://ns1")));
+    assertTrue(uris.contains(new URI("hdfs://ns2")));
   }
 
   @Test
@@ -509,4 +524,34 @@ public class TestDFSUtil {
     assertEquals("127.0.0.1:12345",
         DFSUtil.substituteForWildcardAddress("127.0.0.1:12345", "foo"));
   }
+  
+  @Test
+  public void testGetNNUris() throws Exception {
+    HdfsConfiguration conf = new HdfsConfiguration();
+    
+    final String NS1_NN1_HOST = "ns1-nn1.example.com:8020";
+    final String NS1_NN2_HOST = "ns1-nn1.example.com:8020";
+    final String NS2_NN_HOST  = "ns2-nn.example.com:8020";
+    final String NN_HOST      = "nn.example.com:8020";
+    
+    conf.set(DFS_FEDERATION_NAMESERVICES, "ns1,ns2");
+    conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"),"nn1,nn2");
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn1"), NS1_NN1_HOST);
+    conf.set(DFSUtil.addKeySuffixes(
+        DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn2"), NS1_NN2_HOST);
+    
+    conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "ns2"),
+        NS2_NN_HOST);
+    
+    conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, "hdfs://" + NN_HOST);
+    
+    Collection<URI> uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_RPC_ADDRESS_KEY, 
+        DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY);
+    
+    assertEquals(3, uris.size());
+    assertTrue(uris.contains(new URI("hdfs://ns1")));
+    assertTrue(uris.contains(new URI("hdfs://" + NS2_NN_HOST)));
+    assertTrue(uris.contains(new URI("hdfs://" + NN_HOST)));
+  }
 }
\ No newline at end of file
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
index 2dddb1b6e08..81b03a568e2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
@@ -18,11 +18,11 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import java.io.IOException;
-import java.net.InetSocketAddress;
+import java.net.URI;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
-import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.TimeoutException;
 
@@ -338,8 +338,7 @@ public class TestBalancer extends TestCase {
     waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
 
     // start rebalancing
-    Map<String, Map<String, InetSocketAddress>> namenodes =
-      DFSUtil.getNNServiceRpcAddresses(conf);
+    Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
     assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
index e064534da42..9d13a2b6190 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithHANameNodes.java
@@ -18,9 +18,10 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
-import java.net.InetSocketAddress;
-import java.util.Map;
+import java.net.URI;
+import java.util.Collection;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -67,12 +68,13 @@ public class TestBalancerWithHANameNodes {
     int numOfDatanodes = capacities.length;
     NNConf nn1Conf = new MiniDFSNNTopology.NNConf("nn1");
     nn1Conf.setIpcPort(NameNode.DEFAULT_PORT);
-    MiniDFSNNTopology simpleHATopology = new MiniDFSNNTopology()
-        .addNameservice(new MiniDFSNNTopology.NSConf(null).addNN(nn1Conf)
-            .addNN(new MiniDFSNNTopology.NNConf("nn2")));
-    cluster = new MiniDFSCluster.Builder(conf).nnTopology(simpleHATopology)
-        .numDataNodes(capacities.length).racks(racks).simulatedCapacities(
-            capacities).build();
+    Configuration copiedConf = new Configuration(conf);
+    cluster = new MiniDFSCluster.Builder(copiedConf)
+        .nnTopology(MiniDFSNNTopology.simpleHATopology())
+        .numDataNodes(capacities.length)
+        .racks(racks)
+        .simulatedCapacities(capacities)
+        .build();
     HATestUtil.setFailoverConfigurations(cluster, conf);
     try {
       cluster.waitActive();
@@ -89,14 +91,12 @@ public class TestBalancerWithHANameNodes {
       // start up an empty node with the same capacity and on the same rack
       cluster.startDataNodes(conf, 1, true, null, new String[] { newNodeRack },
           new long[] { newNodeCapacity });
-
-      HATestUtil.setFailoverConfigurations(cluster, conf, NameNode.getUri(
-          cluster.getNameNode(0).getNameNodeAddress()).getHost());
       totalCapacity += newNodeCapacity;
       TestBalancer.waitForHeartBeat(totalUsedSpace, totalCapacity, client,
           cluster);
-      Map<String, Map<String, InetSocketAddress>> namenodes = DFSUtil
-          .getNNServiceRpcAddresses(conf);
+      Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
+      assertEquals(1, namenodes.size());
+      assertTrue(namenodes.contains(HATestUtil.getLogicalUri(cluster)));
       final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, conf);
       assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
       TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
index 0245615a487..b130e027b04 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java
@@ -18,10 +18,10 @@
 package org.apache.hadoop.hdfs.server.balancer;
 
 import java.io.IOException;
-import java.net.InetSocketAddress;
+import java.net.URI;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
-import java.util.Map;
 import java.util.Random;
 
 import org.apache.commons.logging.Log;
@@ -40,8 +40,8 @@ import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.protocol.LocatedBlock;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
@@ -157,8 +157,7 @@ public class TestBalancerWithMultipleNameNodes {
     LOG.info("BALANCER 1");
 
     // start rebalancing
-    final Map<String, Map<String, InetSocketAddress>> namenodes =
-      DFSUtil.getNNServiceRpcAddresses(s.conf);
+    final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(s.conf);
     final int r = Balancer.run(namenodes, Balancer.Parameters.DEFALUT, s.conf);
     Assert.assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
 
@@ -252,7 +251,8 @@ public class TestBalancerWithMultipleNameNodes {
     final ExtendedBlock[][] blocks;
     {
       LOG.info("UNEVEN 1");
-      final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+      final MiniDFSCluster cluster = new MiniDFSCluster
+          .Builder(new Configuration(conf))
           .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(2))
           .numDataNodes(nDataNodes)
           .racks(racks)
@@ -261,6 +261,7 @@ public class TestBalancerWithMultipleNameNodes {
       LOG.info("UNEVEN 2");
       try {
         cluster.waitActive();
+        DFSTestUtil.setFederatedConfiguration(cluster, conf);
         LOG.info("UNEVEN 3");
         final Suite s = new Suite(cluster, nNameNodes, nDataNodes, conf);
         blocks = generateBlocks(s, usedSpacePerNN);
@@ -327,13 +328,15 @@ public class TestBalancerWithMultipleNameNodes {
     Assert.assertEquals(nDataNodes, racks.length);
 
     LOG.info("RUN_TEST -1");
-    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
+    final MiniDFSCluster cluster = new MiniDFSCluster
+        .Builder(new Configuration(conf))
         .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(nNameNodes))
         .numDataNodes(nDataNodes)
         .racks(racks)
         .simulatedCapacities(capacities)
         .build();
     LOG.info("RUN_TEST 0");
+    DFSTestUtil.setFederatedConfiguration(cluster, conf);
 
     try {
       cluster.waitActive();
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
index 42b5612571a..bf919cea7f8 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/HATestUtil.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.hadoop.hdfs.DFSUtil;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.datanode.DataNodeAdapter;
 import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil;
@@ -188,6 +189,12 @@ public abstract class HATestUtil {
     return String.format(LOGICAL_HOSTNAME, cluster.getInstanceId());
   }
   
+  public static URI getLogicalUri(MiniDFSCluster cluster)
+      throws URISyntaxException {
+    return new URI(HdfsConstants.HDFS_URI_SCHEME + "://" +
+        getLogicalHostname(cluster));
+  }
+  
   public static void waitForCheckpoint(MiniDFSCluster cluster, int nnIdx,
       List<Integer> txids) throws InterruptedException {
     long start = System.currentTimeMillis();

From 6e5e1e54bcd416080da404be8ade658d7b81c4c9 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 1 Mar 2012 18:23:19 +0000
Subject: [PATCH 174/177] Amend HDFS-3023. Add new layout version number to
 ImageLoaderCurrent to fix TestOfflineImageViewer.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295721 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
index 8960cbce31a..fdc9892e1a1 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/ImageLoaderCurrent.java
@@ -122,7 +122,8 @@ class ImageLoaderCurrent implements ImageLoader {
   protected final DateFormat dateFormat = 
                                       new SimpleDateFormat("yyyy-MM-dd HH:mm");
   private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23,
-      -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39};
+      -24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39,
+      -40};
   private int imageVersion = 0;
 
   /* (non-Javadoc)

From 55832c6c96a22f68b505f858485083a350da09a6 Mon Sep 17 00:00:00 2001
From: Todd Lipcon <todd@apache.org>
Date: Thu, 1 Mar 2012 18:57:58 +0000
Subject: [PATCH 175/177] HDFS-3035. Fix failure of TestFileAppendRestart due
 to OP_UPDATE_BLOCKS. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1295740 13f79535-47bb-0310-9956-ffa450edef68
---
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../hadoop/hdfs/TestFileAppendRestart.java    | 21 +++++++++++--------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index a0d3c1785b8..19551ba4aca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -246,3 +246,5 @@ HDFS-3027. Implement a simple NN health check. (atm)
 HDFS-3023. Optimize entries in edits log for persistBlocks call. (todd)
 
 HDFS-2979. Balancer should use logical uri for creating failover proxy with HA enabled. (atm)
+
+HDFS-3035. Fix failure of TestFileAppendRestart due to OP_UPDATE_BLOCKS (todd)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java
index 816332d0a75..e10eab8c57d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileAppendRestart.java
@@ -99,27 +99,30 @@ public class TestFileAppendRestart {
 
       counts = FSImageTestUtil.countEditLogOpTypes(editLog);
       // OP_ADD to create file
-      // OP_ADD for first block
+      // OP_UPDATE_BLOCKS for first block
       // OP_CLOSE to close file
       // OP_ADD to reopen file
-      // OP_ADD for second block
+      // OP_UPDATE_BLOCKS for second block
       // OP_CLOSE to close file
-      assertEquals(4, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
+      assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
+      assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_UPDATE_BLOCKS).held);
       assertEquals(2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
 
       Path p2 = new Path("/not-block-boundaries");
       writeAndAppend(fs, p2, BLOCK_SIZE/2, BLOCK_SIZE);
       counts = FSImageTestUtil.countEditLogOpTypes(editLog);
       // OP_ADD to create file
-      // OP_ADD for first block
+      // OP_UPDATE_BLOCKS for first block
       // OP_CLOSE to close file
       // OP_ADD to re-establish the lease
-      // OP_ADD from the updatePipeline call (increments genstamp of last block)
-      // OP_ADD at the start of the second block
+      // OP_UPDATE_BLOCKS from the updatePipeline call (increments genstamp of last block)
+      // OP_UPDATE_BLOCKS at the start of the second block
       // OP_CLOSE to close file
-      // Total: 5 OP_ADDs and 2 OP_CLOSEs in addition to the ones above
-      assertEquals(9, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
-      assertEquals(4, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
+      // Total: 2 OP_ADDs, 3 OP_UPDATE_BLOCKS, and 2 OP_CLOSEs in addition
+      //        to the ones above
+      assertEquals(2+2, (int)counts.get(FSEditLogOpCodes.OP_ADD).held);
+      assertEquals(2+3, (int)counts.get(FSEditLogOpCodes.OP_UPDATE_BLOCKS).held);
+      assertEquals(2+2, (int)counts.get(FSEditLogOpCodes.OP_CLOSE).held);
       
       cluster.restartNameNode();
       

From 7be4e5bd222c6f1c40f88ee8b24b1587e157a87e Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 2 Mar 2012 01:32:49 +0000
Subject: [PATCH 176/177] HDFS-3039. Address findbugs and javadoc warnings on
 branch. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1296017 13f79535-47bb-0310-9956-ffa450edef68
---
 .../dev-support/findbugsExcludeFile.xml       |  6 +++-
 .../hadoop/ha/ActiveStandbyElector.java       |  2 +-
 .../java/org/apache/hadoop/ha/HAAdmin.java    |  3 +-
 .../apache/hadoop/ha/SshFenceByTcpPort.java   |  2 +-
 .../org/apache/hadoop/util/ThreadUtil.java    |  2 +-
 .../hadoop-hdfs/CHANGES.HDFS-1623.txt         |  2 ++
 .../dev-support/findbugsExcludeFile.xml       |  8 +++++
 .../java/org/apache/hadoop/hdfs/HAUtil.java   |  3 +-
 .../apache/hadoop/hdfs/NameNodeProxies.java   |  9 ++---
 .../server/blockmanagement/BlockManager.java  |  2 +-
 .../hdfs/server/datanode/BPOfferService.java  |  4 +--
 .../hdfs/server/namenode/FSEditLog.java       | 35 +++++++++----------
 .../hdfs/server/namenode/FSNamesystem.java    |  4 +--
 .../server/namenode/FileJournalManager.java   |  8 +++--
 .../server/namenode/NamenodeJspHelper.java    |  4 +--
 15 files changed, 55 insertions(+), 39 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml
index 3624c99871f..855b0284537 100644
--- a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml
@@ -278,8 +278,12 @@
       <!-- protobuf generated code -->
       <Class name="~org\.apache\.hadoop\.ipc\.protobuf\.ProtocolInfoProtos.*"/>
     </Match>
-		<Match>
+    <Match>
       <!-- protobuf generated code -->
       <Class name="~org\.apache\.hadoop\.ipc\.protobuf\.IpcConnectionContextProtos.*"/>
     </Match>
+    <Match>
+      <!-- protobuf generated code -->
+      <Class name="~org\.apache\.hadoop\.ha\.proto\.HAServiceProtocolProtos.*"/>
+    </Match>
  </FindBugsFilter>
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
index e91c4ce9926..7da2d3e1bfd 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
@@ -234,7 +234,7 @@ public class ActiveStandbyElector implements Watcher, StringCallback,
   /**
    * Exception thrown when there is no active leader
    */
-  public class ActiveNotFoundException extends Exception {
+  public static class ActiveNotFoundException extends Exception {
     private static final long serialVersionUID = 3505396722342846462L;
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
index dedbebb58b5..3350692d683 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAAdmin.java
@@ -262,8 +262,7 @@ public abstract class HAAdmin extends Configured implements Tool {
       return -1;
     }
 
-    int i = 0;
-    String cmd = argv[i++];
+    String cmd = argv[0];
 
     if (!cmd.startsWith("-")) {
       errOut.println("Bad command '" + cmd + "': expected command starting with '-'");
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
index 88404b92fd4..cec731cf20b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/SshFenceByTcpPort.java
@@ -76,7 +76,7 @@ public class SshFenceByTcpPort extends Configured
     if (argStr != null) {
       // Use a dummy service when checking the arguments defined
       // in the configuration are parseable.
-      Args args = new Args(new InetSocketAddress("localhost", 8020), argStr);
+      new Args(new InetSocketAddress("localhost", 8020), argStr);
     }
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java
index 535ac341223..6e4dfafdf73 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ThreadUtil.java
@@ -30,7 +30,7 @@ public class ThreadUtil {
   /**
    * Cause the current thread to sleep as close as possible to the provided
    * number of milliseconds. This method will log and ignore any
-   * {@link InterrupedException} encountered.
+   * {@link InterruptedException} encountered.
    * 
    * @param millis the number of milliseconds for the current thread to sleep
    */
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
index 19551ba4aca..3e59df7433d 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.HDFS-1623.txt
@@ -248,3 +248,5 @@ HDFS-3023. Optimize entries in edits log for persistBlocks call. (todd)
 HDFS-2979. Balancer should use logical uri for creating failover proxy with HA enabled. (atm)
 
 HDFS-3035. Fix failure of TestFileAppendRestart due to OP_UPDATE_BLOCKS (todd)
+
+HDFS-3039. Address findbugs and javadoc warnings on branch. (todd via atm)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
index 9b5d6df12e4..5590055539f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
@@ -247,4 +247,12 @@
        <Method name="save" />
        <Bug pattern="OS_OPEN_STREAM" />
      </Match>
+     <!--
+      This method isn't performance-critical and is much clearer to write as it's written.
+      -->
+     <Match>
+       <Class name="org.apache.hadoop.hdfs.server.datanode.BlockPoolManager" />
+       <Method name="doRefreshNamenodes" />
+       <Bug category="PERFORMANCE" />
+     </Match>
  </FindBugsFilter>
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 30792984d17..34e9d2e9dd5 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -99,7 +99,8 @@ public class HAUtil {
         nsId, null, DFSUtil.LOCAL_ADDRESS_MATCHER);
     if (suffixes == null) {
       String msg = "Configuration " + DFS_NAMENODE_RPC_ADDRESS_KEY + 
-          " must be suffixed with" + namenodeId + " for HA configuration.";
+          " must be suffixed with nameservice and namenode ID for HA " +
+          "configuration.";
       throw new HadoopIllegalArgumentException(msg);
     }
     
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
index d895734f332..650c313c0ad 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
@@ -63,7 +63,8 @@ public class NameNodeProxies {
   /**
    * Wrapper for a client proxy as well as its associated service ID.
    * This is simply used as a tuple-like return type for
-   * {@link createProxy} and {@link createNonHaProxy}.
+   * {@link NameNodeProxies#createProxy} and
+   * {@link NameNodeProxies#createNonHAProxy}.
    */
   public static class ProxyAndInfo<PROXYTYPE> {
     private final PROXYTYPE proxy;
@@ -125,7 +126,7 @@ public class NameNodeProxies {
 
   /**
    * Creates an explicitly non-HA-enabled proxy object. Most of the time you
-   * don't want to use this, and should instead use {@link createProxy}.
+   * don't want to use this, and should instead use {@link NameNodeProxies#createProxy}.
    * 
    * @param conf the configuration object
    * @param nnAddr address of the remote NN to connect to
@@ -160,8 +161,8 @@ public class NameNodeProxies {
           conf, ugi);
     } else {
       String message = "Upsupported protocol found when creating the proxy " +
-          "conection to NameNode: " +
-          ((xface != null) ? xface.getClass().getName() : xface);
+          "connection to NameNode: " +
+          ((xface != null) ? xface.getClass().getName() : "null");
       LOG.error(message);
       throw new IllegalStateException(message);
     }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index cbae6f2246c..1c9b2aad4f6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -1918,7 +1918,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
     int numCurrentReplica = countLiveNodes(storedBlock);
     if (storedBlock.getBlockUCState() == BlockUCState.COMMITTED
         && numCurrentReplica >= minReplication) {
-      storedBlock = completeBlock(storedBlock.getINode(), storedBlock, false);
+      completeBlock(storedBlock.getINode(), storedBlock, false);
     } else if (storedBlock.isComplete()) {
       // check whether safe replication is reached for the block
       // only complete blocks are counted towards that.
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
index aaba4fff2ad..27567b543fe 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java
@@ -173,7 +173,7 @@ class BPOfferService {
     }
   }
   
-  NamespaceInfo getNamespaceInfo() {
+  synchronized NamespaceInfo getNamespaceInfo() {
     return bpNSInfo;
   }
   
@@ -366,7 +366,7 @@ class BPOfferService {
     }
   }
 
-  DatanodeRegistration createRegistration() {
+  synchronized DatanodeRegistration createRegistration() {
     Preconditions.checkState(bpNSInfo != null,
         "getRegistration() can only be called after initial handshake");
     return dn.createBPRegistration(bpNSInfo);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
index 3572226d8c5..7c630d70db6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@@ -188,7 +188,7 @@ public class FSEditLog  {
     this.sharedEditsDirs = FSNamesystem.getSharedEditsDirs(conf);
   }
   
-  public void initJournalsForWrite() {
+  public synchronized void initJournalsForWrite() {
     Preconditions.checkState(state == State.UNINITIALIZED ||
         state == State.CLOSED, "Unexpected state: %s", state);
     
@@ -196,7 +196,7 @@ public class FSEditLog  {
     state = State.BETWEEN_LOG_SEGMENTS;
   }
   
-  public void initSharedJournalsForRead() {
+  public synchronized void initSharedJournalsForRead() {
     if (state == State.OPEN_FOR_READING) {
       LOG.warn("Initializing shared journals for READ, already open for READ",
           new Exception());
@@ -209,7 +209,7 @@ public class FSEditLog  {
     state = State.OPEN_FOR_READING;
   }
   
-  private void initJournals(List<URI> dirs) {
+  private synchronized void initJournals(List<URI> dirs) {
     int minimumRedundantJournals = conf.getInt(
         DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_KEY,
         DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_MINIMUM_DEFAULT);
@@ -808,7 +808,7 @@ public class FSEditLog  {
    * Used only by unit tests.
    */
   @VisibleForTesting
-  List<JournalAndStream> getJournals() {
+  synchronized List<JournalAndStream> getJournals() {
     return journalSet.getAllJournalStreams();
   }
   
@@ -816,7 +816,7 @@ public class FSEditLog  {
    * Used only by tests.
    */
   @VisibleForTesting
-  public JournalSet getJournalSet() {
+  synchronized public JournalSet getJournalSet() {
     return journalSet;
   }
   
@@ -950,17 +950,14 @@ public class FSEditLog  {
   /**
    * Archive any log files that are older than the given txid.
    */
-  public void purgeLogsOlderThan(final long minTxIdToKeep) {
-    synchronized (this) {
-      // synchronized to prevent findbugs warning about inconsistent
-      // synchronization. This will be JIT-ed out if asserts are
-      // off.
-      assert curSegmentTxId == HdfsConstants.INVALID_TXID || // on format this is no-op
-        minTxIdToKeep <= curSegmentTxId :
-        "cannot purge logs older than txid " + minTxIdToKeep +
-        " when current segment starts at " + curSegmentTxId;
-    }
+  public synchronized void purgeLogsOlderThan(final long minTxIdToKeep) {
+    assert curSegmentTxId == HdfsConstants.INVALID_TXID || // on format this is no-op
+      minTxIdToKeep <= curSegmentTxId :
+      "cannot purge logs older than txid " + minTxIdToKeep +
+      " when current segment starts at " + curSegmentTxId;
 
+    // This could be improved to not need synchronization. But currently,
+    // journalSet is not threadsafe, so we need to synchronize this method.
     try {
       journalSet.purgeLogsOlderThan(minTxIdToKeep);
     } catch (IOException ex) {
@@ -992,8 +989,8 @@ public class FSEditLog  {
 
 
   // sets the initial capacity of the flush buffer.
-  public void setOutputBufferCapacity(int size) {
-      journalSet.setOutputBufferCapacity(size);
+  synchronized void setOutputBufferCapacity(int size) {
+    journalSet.setOutputBufferCapacity(size);
   }
 
   /**
@@ -1069,7 +1066,7 @@ public class FSEditLog  {
   /**
    * Run recovery on all journals to recover any unclosed segments
    */
-  void recoverUnclosedStreams() {
+  synchronized void recoverUnclosedStreams() {
     Preconditions.checkState(
         state == State.BETWEEN_LOG_SEGMENTS,
         "May not recover segments - wrong state: %s", state);
@@ -1092,7 +1089,7 @@ public class FSEditLog  {
    * @param toAtLeast the selected streams must contain this transaction
    * @param inProgessOk set to true if in-progress streams are OK
    */
-  public Collection<EditLogInputStream> selectInputStreams(long fromTxId,
+  public synchronized Collection<EditLogInputStream> selectInputStreams(long fromTxId,
       long toAtLeastTxId, boolean inProgressOk) throws IOException {
     List<EditLogInputStream> streams = new ArrayList<EditLogInputStream>();
     EditLogInputStream stream = journalSet.getInputStream(fromTxId, inProgressOk);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 228a340f0c8..f22f8088251 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -494,7 +494,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       nnResourceChecker = new NameNodeResourceChecker(conf);
       checkAvailableResources();
       assert safeMode != null &&
-        !safeMode.initializedReplQueues;
+        !safeMode.isPopulatingReplQueues();
       setBlockTotal();
       blockManager.activate(conf);
       this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
@@ -3801,7 +3801,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats,
       }
     }
 
-    private void adjustBlockTotals(int deltaSafe, int deltaTotal) {
+    private synchronized void adjustBlockTotals(int deltaSafe, int deltaTotal) {
       if (!shouldIncrementallyTrackBlocks) {
         return;
       }
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
index eaaf65b5fc2..603dd000909 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FileJournalManager.java
@@ -310,7 +310,9 @@ class FileJournalManager implements JournalManager {
         // file, but before writing anything to it. Safe to delete it.
         if (elf.getFile().length() == 0) {
           LOG.info("Deleting zero-length edit log file " + elf);
-          elf.getFile().delete();
+          if (!elf.getFile().delete()) {
+            throw new IOException("Unable to delete file " + elf.getFile());
+          }
           continue;
         }
         
@@ -328,7 +330,9 @@ class FileJournalManager implements JournalManager {
         // delete the file.
         if (elf.getNumTransactions() == 0) {
           LOG.info("Deleting edit log file with zero transactions " + elf);
-          elf.getFile().delete();
+          if (!elf.getFile().delete()) {
+            throw new IOException("Unable to delete " + elf.getFile());
+          }
           continue;
         }
         
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
index 6b4701e0164..44c07510ba2 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
@@ -315,10 +315,10 @@ class NamenodeJspHelper {
       // since the standby namenode doesn't compute replication queues 
       String underReplicatedBlocks = "";
       if (nn.getServiceState() == HAServiceState.ACTIVE) {
-    	  underReplicatedBlocks = new String(rowTxt() 
+    	  underReplicatedBlocks = rowTxt() 
               + colTxt("Excludes missing blocks.")
               + "Number of Under-Replicated Blocks" + colTxt() + ":" + colTxt()
-              + fsn.getBlockManager().getUnderReplicatedNotMissingBlocks()); 
+              + fsn.getBlockManager().getUnderReplicatedNotMissingBlocks(); 
       }
       out.print("<div id=\"dfstable\"> <table>\n" + rowTxt() + colTxt()
           + "Configured Capacity" + colTxt() + ":" + colTxt()

From ee486e8e43f4e45c6d0ea0b9814c88dbe3eebba2 Mon Sep 17 00:00:00 2001
From: Aaron Myers <atm@apache.org>
Date: Fri, 2 Mar 2012 23:09:07 +0000
Subject: [PATCH 177/177] Merge trunk into HA branch.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-1623@1296510 13f79535-47bb-0310-9956-ffa450edef68