Merge r1438306 through r1440221 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1440222 13f79535-47bb-0310-9956-ffa450edef68
2013-01-29 23:53:59 +00:00 · 2013-01-29 23:53:59 +00:00 · f352b4ce0c
commit f352b4ce0c
parent 6f4fe09687 3e0bc281c8
44 changed files with 770 additions and 664 deletions
--- a/BUILDING.txt
+++ b/BUILDING.txt
@ -6,7 +6,6 @@ Requirements:
 * Unix System
 * JDK 1.6
 * Maven 3.0
-* Forrest 0.8 (if generating docs)
 * Findbugs 1.3.9 (if running findbugs)
 * ProtocolBuffer 2.4.1+ (for MapReduce and HDFS)
 * CMake 2.6 or newer (if compiling native code)
--- a/dev-support/relnotes.py
+++ b/dev-support/relnotes.py
@ -155,7 +155,7 @@ def __init__(self, versions):
    end=1
    count=100
    while (at < end):
-      params = urllib.urlencode({'jql': "project in (HADOOP,HDFS,MAPREDUCE,YARN) and fixVersion in ('"+"' , '".join(versions)+"') and resolution = Fixed", 'startAt':at+1, 'maxResults':count})
+      params = urllib.urlencode({'jql': "project in (HADOOP,HDFS,MAPREDUCE,YARN) and fixVersion in ('"+"' , '".join(versions)+"') and resolution = Fixed", 'startAt':at, 'maxResults':count})
      resp = urllib.urlopen("https://issues.apache.org/jira/rest/api/2/search?%s"%params)
      data = json.loads(resp.read())
      if (data.has_key('errorMessages')):
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@ -149,9 +149,6 @@ Trunk (Unreleased)
    HADOOP-8924. Add maven plugin alternative to shell script to save
    package-info.java. (Chris Nauroth via suresh)

-    HADOOP-9245. mvn clean without running mvn install before fails.
-    (Karthik Kambatla via suresh)
-
  BUG FIXES

    HADOOP-8419. Fixed GzipCode NPE reset for IBM JDK. (Yu Li via eyang)
@ -322,12 +319,23 @@ Trunk (Unreleased)
    HADOOP-9202. test-patch.sh fails during mvn eclipse:eclipse if patch adds
    a new module to the build (Chris Nauroth via bobby)

+    HADOOP-9245. mvn clean without running mvn install before fails.
+    (Karthik Kambatla via suresh)
+
+    HADOOP-9249. hadoop-maven-plugins version-info goal causes build failure
+    when running with Clover. (Chris Nauroth via suresh)
+
  OPTIMIZATIONS

    HADOOP-7761. Improve the performance of raw comparisons. (todd)

    HADOOP-8589 ViewFs tests fail when tests and home dirs are nested (sanjay Radia)

+    HADOOP-9246 Execution phase for hadoop-maven-plugin should be
+    process-resources (Karthik Kambatla and Chris Nauroth via jlowe)
+
+    HADOOP-9190. packaging docs is broken. (Andy Isaacson via atm)
+
 Release 2.0.3-alpha - Unreleased 

  INCOMPATIBLE CHANGES
@ -575,6 +583,9 @@ Release 2.0.3-alpha - Unreleased
    HADOOP-9215. when using cmake-2.6, libhadoop.so doesn't get created
    (only libhadoop.so.1.0.0) (Colin Patrick McCabe via todd)

+    HADOOP-8857. hadoop.http.authentication.signature.secret.file docs 
+    should not state that secret is randomly generated. (tucu)
+
 Release 2.0.2-alpha - 2012-09-07 

  INCOMPATIBLE CHANGES
@ -1293,6 +1304,10 @@ Release 0.23.6 - UNRELEASED
    HADOOP-9242. Duplicate surefire plugin config in hadoop-common.
    (Andrey Klochkov via suresh)

+    HADOOP-9247. Parametrize Clover "generateXxx" properties to make them
+    re-definable via -D in mvn calls. (Ivan A. Veselovsky via suresh)
+
+
  OPTIMIZATIONS

  BUG FIXES
@ -1310,6 +1325,8 @@ Release 0.23.6 - UNRELEASED

    HADOOP-9097. Maven RAT plugin is not checking all source files (tgraves)

+    HADOOP-9255. relnotes.py missing last jira (tgraves)
+
 Release 0.23.5 - 2012-11-28


--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@ -274,7 +274,7 @@
        <executions>
          <execution>
            <id>version-info</id>
-            <phase>compile</phase>
+            <phase>generate-resources</phase>
            <goals>
              <goal>version-info</goal>
            </goals>
--- a/hadoop-common-project/hadoop-common/src/main/docs/forrest.properties
+++ b/hadoop-common-project/hadoop-common/src/main/docs/forrest.properties
@ -1,112 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-##############
-# Properties used by forrest.build.xml for building the website
-# These are the defaults, un-comment them if you need to change them.
-##############
-
-# Prints out a summary of Forrest settings for this project
-#forrest.echo=true 
-
-# Project name (used to name .war file)
-#project.name=my-project
-
-# Specifies name of Forrest skin to use
-#project.skin=tigris
-#project.skin=pelt
-
-# comma separated list, file:// is supported
-#forrest.skins.descriptors=http://forrest.apache.org/skins/skins.xml,file:///c:/myskins/skins.xml
-
-##############
-# behavioural properties
-#project.menu-scheme=tab_attributes
-#project.menu-scheme=directories
-
-##############
-# layout properties
-
-# Properties that can be set to override the default locations
-#
-# Parent properties must be set. This usually means uncommenting
-# project.content-dir if any other property using it is uncommented
-
-#project.status=status.xml
-#project.content-dir=src/documentation
-#project.raw-content-dir=${project.content-dir}/content
-#project.conf-dir=${project.content-dir}/conf
-#project.sitemap-dir=${project.content-dir}
-#project.xdocs-dir=${project.content-dir}/content/xdocs
-#project.resources-dir=${project.content-dir}/resources
-#project.stylesheets-dir=${project.resources-dir}/stylesheets
-#project.images-dir=${project.resources-dir}/images
-#project.schema-dir=${project.resources-dir}/schema
-#project.skins-dir=${project.content-dir}/skins
-#project.skinconf=${project.content-dir}/skinconf.xml
-#project.lib-dir=${project.content-dir}/lib
-#project.classes-dir=${project.content-dir}/classes
-#project.translations-dir=${project.content-dir}/translations
-
-##############
-# validation properties
-
-# This set of properties determine if validation is performed
-# Values are inherited unless overridden.
-# e.g. if forrest.validate=false then all others are false unless set to true.
-#forrest.validate=true
-#forrest.validate.xdocs=${forrest.validate}
-#forrest.validate.skinconf=${forrest.validate}
-# Workaround (HADOOP-7072) for http://issues.apache.org/jira/browse/FOR-984
-# Remove when forrest-0.9 is available
-forrest.validate.sitemap=false
-forrest.validate.stylesheets=false
-# End of forrest-0.8 + JDK6 workaround
-#forrest.validate.skins=${forrest.validate}
-forrest.validate.skins.stylesheets=false
-
-# *.failonerror=(true|false) - stop when an XML file is invalid
-#forrest.validate.failonerror=true
-
-# *.excludes=(pattern) - comma-separated list of path patterns to not validate
-# e.g.
-#forrest.validate.xdocs.excludes=samples/subdir/**, samples/faq.xml
-#forrest.validate.xdocs.excludes=
-
-
-##############
-# General Forrest properties
-
-# The URL to start crawling from
-#project.start-uri=linkmap.html
-# Set logging level for messages printed to the console
-# (DEBUG, INFO, WARN, ERROR, FATAL_ERROR)
-#project.debuglevel=ERROR
-# Max memory to allocate to Java
-#forrest.maxmemory=64m
-# Any other arguments to pass to the JVM. For example, to run on an X-less
-# server, set to -Djava.awt.headless=true
-#forrest.jvmargs=
-# The bugtracking URL - the issue number will be appended
-#project.bugtracking-url=http://issues.apache.org/bugzilla/show_bug.cgi?id=
-#project.bugtracking-url=http://issues.apache.org/jira/browse/
-# The issues list as rss
-#project.issues-rss-url=
-#I18n Property only works for the "forrest run" target.
-#project.i18n=true
-project.configfile=${project.home}/src/documentation/conf/cli.xconf
-
--- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
+++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml
@ -890,7 +890,6 @@
  <value>${user.home}/hadoop-http-auth-signature-secret</value>
  <description>
    The signature secret for signing the authentication tokens.
-    If not set a random secret is generated at startup time.
    The same secret should be used for JT/NN/DN/TT configurations.
  </description>
 </property>
--- a/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm
+++ b/hadoop-common-project/hadoop-common/src/site/apt/HttpAuthentication.apt.vm
@ -64,10 +64,9 @@ Authentication for Hadoop HTTP web-consoles
   The default value is <<<36000>>>.

   <<<hadoop.http.authentication.signature.secret.file>>>: The signature secret
-   file for signing the authentication tokens. If not set a random secret is
-   generated at startup time. The same secret should be used for all nodes
-   in the cluster, JobTracker, NameNode, DataNode and TastTracker. The
-   default value is <<<${user.home}/hadoop-http-auth-signature-secret>>>.
+   file for signing the authentication tokens. The same secret should be used 
+   for all nodes in the cluster, JobTracker, NameNode, DataNode and TastTracker. 
+   The default value is <<<${user.home}/hadoop-http-auth-signature-secret>>>.
   IMPORTANT: This file should be readable only by the Unix user running the
   daemons.

--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@ -497,6 +497,8 @@ Release 2.0.3-alpha - Unreleased
    HDFS-4403. DFSClient can infer checksum type when not provided by reading
    first byte (todd)

+    HDFS-4259. Improve pipeline DN replacement failure message (harsh)
+
  OPTIMIZATIONS

    HDFS-3429. DataNode reads checksums even if client does not need them (todd)
@ -728,6 +730,9 @@ Release 2.0.3-alpha - Unreleased
    HDFS-4359. Slow RPC responses from NN can prevent metrics collection on
    DNs. (liang xie via atm)

+    HDFS-4444. Add space between total transaction time and number of
+    transactions in FSEditLog#printStatistics. (Stephen Chu via suresh)
+
  BREAKDOWN OF HDFS-3077 SUBTASKS

    HDFS-3077. Quorum-based protocol for reading and writing edit logs.
@ -1377,6 +1382,9 @@ Release 2.0.2-alpha - 2012-09-07
    HDFS-3944. Httpfs resolveAuthority() is not resolving host correctly. (tucu)

    HDFS-3972. Trash emptier fails in secure HA cluster. (todd via eli)
+
+    HDFS-4443. Remove a trailing '`' character from the HTML code generated by
+    NamenodeJspHelper.generateNodeData(..).  (Christian Rohling via szetszwo)
 
  BREAKDOWN OF HDFS-3042 SUBTASKS

@ -2225,6 +2233,8 @@ Release 0.23.7 - UNRELEASED

  BUG FIXES

+    HDFS-4288. NN accepts incremental BR as IBR in safemode (daryn via kihwal)
+
 Release 0.23.6 - UNRELEASED

  INCOMPATIBLE CHANGES
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/forrest.properties
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/docs/forrest.properties
@ -1,112 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-##############
-# Properties used by forrest.build.xml for building the website
-# These are the defaults, un-comment them if you need to change them.
-##############
-
-# Prints out a summary of Forrest settings for this project
-#forrest.echo=true 
-
-# Project name (used to name .war file)
-#project.name=my-project
-
-# Specifies name of Forrest skin to use
-#project.skin=tigris
-#project.skin=pelt
-
-# comma separated list, file:// is supported
-#forrest.skins.descriptors=http://forrest.apache.org/skins/skins.xml,file:///c:/myskins/skins.xml
-
-##############
-# behavioural properties
-#project.menu-scheme=tab_attributes
-#project.menu-scheme=directories
-
-##############
-# layout properties
-
-# Properties that can be set to override the default locations
-#
-# Parent properties must be set. This usually means uncommenting
-# project.content-dir if any other property using it is uncommented
-
-#project.status=status.xml
-#project.content-dir=src/documentation
-#project.raw-content-dir=${project.content-dir}/content
-#project.conf-dir=${project.content-dir}/conf
-#project.sitemap-dir=${project.content-dir}
-#project.xdocs-dir=${project.content-dir}/content/xdocs
-#project.resources-dir=${project.content-dir}/resources
-#project.stylesheets-dir=${project.resources-dir}/stylesheets
-#project.images-dir=${project.resources-dir}/images
-#project.schema-dir=${project.resources-dir}/schema
-#project.skins-dir=${project.content-dir}/skins
-#project.skinconf=${project.content-dir}/skinconf.xml
-#project.lib-dir=${project.content-dir}/lib
-#project.classes-dir=${project.content-dir}/classes
-#project.translations-dir=${project.content-dir}/translations
-
-##############
-# validation properties
-
-# This set of properties determine if validation is performed
-# Values are inherited unless overridden.
-# e.g. if forrest.validate=false then all others are false unless set to true.
-#forrest.validate=true
-#forrest.validate.xdocs=${forrest.validate}
-#forrest.validate.skinconf=${forrest.validate}
-# Workaround (HADOOP-7072) for http://issues.apache.org/jira/browse/FOR-984
-# Remove when forrest-0.9 is available
-forrest.validate.sitemap=false
-forrest.validate.stylesheets=false
-# End of forrest-0.8 + JDK6 workaround
-#forrest.validate.skins=${forrest.validate}
-forrest.validate.skins.stylesheets=false
-
-# *.failonerror=(true|false) - stop when an XML file is invalid
-#forrest.validate.failonerror=true
-
-# *.excludes=(pattern) - comma-separated list of path patterns to not validate
-# e.g.
-#forrest.validate.xdocs.excludes=samples/subdir/**, samples/faq.xml
-#forrest.validate.xdocs.excludes=
-
-
-##############
-# General Forrest properties
-
-# The URL to start crawling from
-#project.start-uri=linkmap.html
-# Set logging level for messages printed to the console
-# (DEBUG, INFO, WARN, ERROR, FATAL_ERROR)
-#project.debuglevel=ERROR
-# Max memory to allocate to Java
-#forrest.maxmemory=64m
-# Any other arguments to pass to the JVM. For example, to run on an X-less
-# server, set to -Djava.awt.headless=true
-#forrest.jvmargs=
-# The bugtracking URL - the issue number will be appended
-#project.bugtracking-url=http://issues.apache.org/bugzilla/show_bug.cgi?id=
-#project.bugtracking-url=http://issues.apache.org/jira/browse/
-# The issues list as rss
-#project.issues-rss-url=
-#I18n Property only works for the "forrest run" target.
-#project.i18n=true
-project.configfile=${project.home}/src/documentation/conf/cli.xconf
-
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java
@ -786,13 +786,18 @@ private void setHflush() {
    private int findNewDatanode(final DatanodeInfo[] original
        ) throws IOException {
      if (nodes.length != original.length + 1) {
-        throw new IOException("Failed to add a datanode.  "
-            + "User may turn off this feature by setting "
-            + DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_KEY
-            + " in configuration, where the current policy is "
-            + dfsClient.dtpReplaceDatanodeOnFailure
-            + ".  (Nodes: current=" + Arrays.asList(nodes)
-            + ", original=" + Arrays.asList(original) + ")");
+        throw new IOException(
+            new StringBuilder()
+            .append("Failed to replace a bad datanode on the existing pipeline ")
+            .append("due to no more good datanodes being available to try. ")
+            .append("(Nodes: current=").append(Arrays.asList(nodes))
+            .append(", original=").append(Arrays.asList(original)).append("). ")
+            .append("The current failed datanode replacement policy is ")
+            .append(dfsClient.dtpReplaceDatanodeOnFailure).append(", and ")
+            .append("a client may configure this via '")
+            .append(DFSConfigKeys.DFS_CLIENT_WRITE_REPLACE_DATANODE_ON_FAILURE_POLICY_KEY)
+            .append("' in its configuration.")
+            .toString());
      }
      for(int i = 0; i < nodes.length; i++) {
        int j = 0;
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java
@ -88,11 +88,7 @@ public void setBlockCollection(BlockCollection bc) {
  DatanodeDescriptor getDatanode(int index) {
    assert this.triplets != null : "BlockInfo is not initialized";
    assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
-    DatanodeDescriptor node = (DatanodeDescriptor)triplets[index*3];
-    assert node == null || 
-        DatanodeDescriptor.class.getName().equals(node.getClass().getName()) : 
-              "DatanodeDescriptor is expected at " + index*3;
-    return node;
+    return (DatanodeDescriptor)triplets[index*3];
  }

  private BlockInfo getPrevious(int index) {
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@ -62,6 +62,7 @@
 import org.apache.hadoop.hdfs.server.namenode.FSClusterStats;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.hdfs.server.namenode.Namesystem;
+import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics;
 import org.apache.hadoop.hdfs.server.protocol.BlockCommand;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
 import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
@ -1576,7 +1577,10 @@ public void processReport(final DatanodeID nodeID, final String poolId,
    }

    // Log the block report processing stats from Namenode perspective
-    NameNode.getNameNodeMetrics().addBlockReport((int) (endTime - startTime));
+    final NameNodeMetrics metrics = NameNode.getNameNodeMetrics();
+    if (metrics != null) {
+      metrics.addBlockReport((int) (endTime - startTime));
+    }
    blockLog.info("BLOCK* processReport: from "
        + nodeID + ", blocks: " + newReport.getNumberOfBlocks()
        + ", processing time: " + (endTime - startTime) + " msecs");
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@ -547,6 +547,7 @@ public int getVolumeFailures() {
  @Override
  public void updateRegInfo(DatanodeID nodeReg) {
    super.updateRegInfo(nodeReg);
+    firstBlockReport = true; // must re-process IBR after re-registration
  }

  /**
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@ -419,7 +419,7 @@ boolean isDatanodeDead(DatanodeDescriptor node) {
  }

  /** Add a datanode. */
-  private void addDatanode(final DatanodeDescriptor node) {
+  void addDatanode(final DatanodeDescriptor node) {
    // To keep host2DatanodeMap consistent with datanodeMap,
    // remove  from host2DatanodeMap the datanodeDescriptor removed
    // from datanodeMap before adding node to host2DatanodeMap.
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java
@ -646,7 +646,7 @@ private void printStatistics(boolean force) {
    buf.append(numTransactions);
    buf.append(" Total time for transactions(ms): ");
    buf.append(totalTimeTransactions);
-    buf.append("Number of transactions batched in Syncs: ");
+    buf.append(" Number of transactions batched in Syncs: ");
    buf.append(numTransactionsBatchedInSync);
    buf.append(" Number of syncs: ");
    buf.append(editLogStream.getNumSync());
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java
@ -592,7 +592,7 @@ void generateNodeData(JspWriter out, DatanodeDescriptor d, String suffix,
          + "<td class=\"pcused\">"
          + ServletUtil.percentageGraph((int) Double.parseDouble(percentUsed),
              100) 
-          + "<td align=\"right\" class=\"pcremaining`\">"
+          + "<td align=\"right\" class=\"pcremaining\">"
          + percentRemaining 
          + "<td title=" + "\"blocks scheduled : "
          + d.getBlocksScheduled() + "\" class=\"blocks\">" + d.numBlocks()+"\n"
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
@ -34,13 +34,16 @@
 import org.apache.hadoop.hdfs.DFSTestUtil;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.protocol.Block;
+import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
 import org.apache.hadoop.hdfs.protocol.HdfsConstants;
 import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor.BlockTargetPair;
 import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
+import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration;
 import org.apache.hadoop.net.NetworkTopology;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.Mockito;
+import static org.mockito.Mockito.*;

 import com.google.common.base.Joiner;
 import com.google.common.collect.ImmutableList;
@ -485,4 +488,70 @@ public void testHighestPriReplSrcChosenDespiteMaxReplLimit() throws Exception {
            new NumberReplicas(),
            UnderReplicatedBlocks.QUEUE_HIGHEST_PRIORITY));
  }
+
+  @Test
+  public void testSafeModeIBR() throws Exception {
+    DatanodeDescriptor node = spy(nodes.get(0));
+    node.setStorageID("dummy-storage");
+    node.isAlive = true;
+
+    DatanodeRegistration nodeReg =
+        new DatanodeRegistration(node, null, null, "");
+
+    // pretend to be in safemode
+    doReturn(true).when(fsn).isInStartupSafeMode();
+    
+    // register new node
+    bm.getDatanodeManager().registerDatanode(nodeReg);
+    bm.getDatanodeManager().addDatanode(node); // swap in spy    
+    assertEquals(node, bm.getDatanodeManager().getDatanode(node));
+    assertTrue(node.isFirstBlockReport());
+    // send block report, should be processed
+    reset(node);
+    bm.processReport(node, "pool", new BlockListAsLongs(null, null));
+    verify(node).receivedBlockReport();
+    assertFalse(node.isFirstBlockReport());
+    // send block report again, should NOT be processed
+    reset(node);
+    bm.processReport(node, "pool", new BlockListAsLongs(null, null));
+    verify(node, never()).receivedBlockReport();
+    assertFalse(node.isFirstBlockReport());
+
+    // re-register as if node restarted, should update existing node
+    bm.getDatanodeManager().removeDatanode(node);
+    reset(node);
+    bm.getDatanodeManager().registerDatanode(nodeReg);
+    verify(node).updateRegInfo(nodeReg);
+    assertTrue(node.isFirstBlockReport()); // ready for report again
+    // send block report, should be processed after restart
+    reset(node);
+    bm.processReport(node, "pool", new BlockListAsLongs(null, null));
+    verify(node).receivedBlockReport();
+    assertFalse(node.isFirstBlockReport());
+  }
+  
+  @Test
+  public void testSafeModeIBRAfterIncremental() throws Exception {
+    DatanodeDescriptor node = spy(nodes.get(0));
+    node.setStorageID("dummy-storage");
+    node.isAlive = true;
+
+    DatanodeRegistration nodeReg =
+        new DatanodeRegistration(node, null, null, "");
+
+    // pretend to be in safemode
+    doReturn(true).when(fsn).isInStartupSafeMode();
+
+    // register new node
+    bm.getDatanodeManager().registerDatanode(nodeReg);
+    bm.getDatanodeManager().addDatanode(node); // swap in spy    
+    assertEquals(node, bm.getDatanodeManager().getDatanode(node));
+    assertTrue(node.isFirstBlockReport());
+    // send block report while pretending to already have blocks
+    reset(node);
+    doReturn(1).when(node).numBlocks();
+    bm.processReport(node, "pool", new BlockListAsLongs(null, null));
+    verify(node).receivedBlockReport();
+    assertFalse(node.isFirstBlockReport());
+  }
 }
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -11,16 +11,9 @@ Trunk (Unreleased)
    MAPREDUCE-2669. Add new examples for Mean, Median, and Standard Deviation.
    (Plamen Jeliazkov via shv)

-    MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins.
-    (Avner BenHanoch via acmurthy) 
-
-    MAPREDUCE-4807. Allow MapOutputBuffer to be pluggable. (masokan via tucu)
-
    MAPREDUCE-4887. Add RehashPartitioner, to smooth distributions
    with poor implementations of Object#hashCode().  (Radim Kolar via cutting)

-    MAPREDUCE-4808. Refactor MapOutput and MergeManager to facilitate reuse by Shuffle implementations. (masokan via tucu)
-
  IMPROVEMENTS

    MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for
@ -78,9 +71,6 @@ Trunk (Unreleased)
    MAPREDUCE-4735. Make arguments in TestDFSIO case insensitive.
    (Brandon Li via suresh)

-    MAPREDUCE-4809. Change visibility of classes for pluggable sort changes. 
-    (masokan via tucu)
-
  BUG FIXES

    MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant.
@ -180,6 +170,14 @@ Release 2.0.3-alpha - Unreleased
    MAPREDUCE-4810. Added new admin command options for MR AM. (Jerry Chen via
    vinodkv)

+    MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins.
+    (Avner BenHanoch via acmurthy) 
+
+    MAPREDUCE-4807. Allow MapOutputBuffer to be pluggable. (masokan via tucu)
+
+    MAPREDUCE-4808. Refactor MapOutput and MergeManager to facilitate reuse 
+    by Shuffle implementations. (masokan via tucu)
+
  IMPROVEMENTS

    MAPREDUCE-3678. The Map tasks logs should have the value of input
@ -211,6 +209,12 @@ Release 2.0.3-alpha - Unreleased

    MAPREDUCE-4949. Enable multiple pi jobs to run in parallel. (sandyr via tucu)

+    MAPREDUCE-4809. Change visibility of classes for pluggable sort changes. 
+    (masokan via tucu)
+
+    MAPREDUCE-4838. Add additional fields like Locality, Avataar to the
+    JobHistory logs. (Zhijie Shen via sseth)
+
  OPTIMIZATIONS

  BUG FIXES
@ -268,6 +272,9 @@ Release 2.0.3-alpha - Unreleased
    MAPREDUCE-4948. Fix a failing unit test TestYARNRunner.testHistoryServerToken.
    (Junping Du via sseth)

+    MAPREDUCE-4803. Remove duplicate copy of TestIndexCache. (Mariappan Asokan
+    via sseth)
+
    MAPREDUCE-2264. Job status exceeds 100% in some cases. 
    (devaraj.k and sandyr via tucu)

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@ -28,6 +28,7 @@
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReadWriteLock;
@ -1192,6 +1193,39 @@ private int getBlockSize() {
    }
  }
  */
+  /**
+    * Get the workflow adjacencies from the job conf
+    * The string returned is of the form "key"="value" "key"="value" ...
+    */
+  private static String getWorkflowAdjacencies(Configuration conf) {
+    int prefixLen = MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING.length();
+    Map<String,String> adjacencies = 
+        conf.getValByRegex(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_PATTERN);
+    if (adjacencies.isEmpty()) {
+      return "";
+    }
+    int size = 0;
+    for (Entry<String,String> entry : adjacencies.entrySet()) {
+      int keyLen = entry.getKey().length();
+      size += keyLen - prefixLen;
+      size += entry.getValue().length() + 6;
+    }
+    StringBuilder sb = new StringBuilder(size);
+    for (Entry<String,String> entry : adjacencies.entrySet()) {
+      int keyLen = entry.getKey().length();
+      sb.append("\"");
+      sb.append(escapeString(entry.getKey().substring(prefixLen, keyLen)));
+      sb.append("\"=\"");
+      sb.append(escapeString(entry.getValue()));
+      sb.append("\" ");
+    }
+    return sb.toString();
+  }
+  
+  public static String escapeString(String data) {
+    return StringUtils.escapeString(data, StringUtils.ESCAPE_CHAR,
+        new char[] {'"', '=', '.'});
+  }

  public static class InitTransition 
      implements MultipleArcTransition<JobImpl, JobEvent, JobStateInternal> {
@ -1217,7 +1251,11 @@ public JobStateInternal transition(JobImpl job, JobEvent event) {
            job.conf.get(MRJobConfig.USER_NAME, "mapred"),
            job.appSubmitTime,
            job.remoteJobConfFile.toString(),
-            job.jobACLs, job.queueName);
+            job.jobACLs, job.queueName,
+            job.conf.get(MRJobConfig.WORKFLOW_ID, ""),
+            job.conf.get(MRJobConfig.WORKFLOW_NAME, ""),
+            job.conf.get(MRJobConfig.WORKFLOW_NODE_NAME, ""),
+            getWorkflowAdjacencies(job.conf));
        job.eventHandler.handle(new JobHistoryEvent(job.jobId, jse));
        //TODO JH Verify jobACLs, UserName via UGI?

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
@ -66,6 +66,8 @@
 import org.apache.hadoop.mapreduce.jobhistory.TaskAttemptUnsuccessfulCompletionEvent;
 import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
+import org.apache.hadoop.mapreduce.v2.api.records.Avataar;
+import org.apache.hadoop.mapreduce.v2.api.records.Locality;
 import org.apache.hadoop.mapreduce.v2.api.records.Phase;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport;
@ -156,7 +158,8 @@ public abstract class TaskAttemptImpl implements
  private final org.apache.hadoop.mapred.JobID oldJobId;
  private final TaskAttemptListener taskAttemptListener;
  private final Resource resourceCapability;
-  private final String[] dataLocalHosts;
+  protected Set<String> dataLocalHosts;
+  protected Set<String> dataLocalRacks;
  private final List<String> diagnostics = new ArrayList<String>();
  private final Lock readLock;
  private final Lock writeLock;
@ -175,6 +178,8 @@ public abstract class TaskAttemptImpl implements
  private int shufflePort = -1;
  private String trackerName;
  private int httpPort;
+  private Locality locality;
+  private Avataar avataar;

  private static final CleanupContainerTransition CLEANUP_CONTAINER_TRANSITION =
    new CleanupContainerTransition();
@ -532,8 +537,16 @@ public TaskAttemptImpl(TaskId taskId, int i,
        getMemoryRequired(conf, taskId.getTaskType()));
    this.resourceCapability.setVirtualCores(
        getCpuRequired(conf, taskId.getTaskType()));
-    this.dataLocalHosts = dataLocalHosts;
+
+    this.dataLocalHosts = resolveHosts(dataLocalHosts);
    RackResolver.init(conf);
+    this.dataLocalRacks = new HashSet<String>(); 
+    for (String host : this.dataLocalHosts) {
+      this.dataLocalRacks.add(RackResolver.resolve(host).getNetworkLocation());
+    }
+
+    locality = Locality.OFF_SWITCH;
+    avataar = Avataar.VIRGIN;

    // This "this leak" is okay because the retained pointer is in an
    //  instance variable.
@ -1032,6 +1045,23 @@ public TaskAttemptStateInternal getInternalState() {
    }
  }

+  public Locality getLocality() {
+    return locality;
+  }
+  
+  public void setLocality(Locality locality) {
+    this.locality = locality;
+  }
+
+  public Avataar getAvataar()
+  {
+    return avataar;
+  }
+  
+  public void setAvataar(Avataar avataar) {
+    this.avataar = avataar;
+  }
+  
  private static TaskAttemptState getExternalState(
      TaskAttemptStateInternal smState) {
    switch (smState) {
@ -1232,25 +1262,27 @@ public void transition(TaskAttemptImpl taskAttempt,
                taskAttempt.attemptId, 
                taskAttempt.resourceCapability));
      } else {
-        Set<String> racks = new HashSet<String>(); 
-        for (String host : taskAttempt.dataLocalHosts) {
-          racks.add(RackResolver.resolve(host).getNetworkLocation());
-        }
        taskAttempt.eventHandler.handle(new ContainerRequestEvent(
-            taskAttempt.attemptId, taskAttempt.resourceCapability, taskAttempt
-                .resolveHosts(taskAttempt.dataLocalHosts), racks
-                .toArray(new String[racks.size()])));
+            taskAttempt.attemptId, taskAttempt.resourceCapability,
+            taskAttempt.dataLocalHosts.toArray(
+                new String[taskAttempt.dataLocalHosts.size()]),
+            taskAttempt.dataLocalRacks.toArray(
+                new String[taskAttempt.dataLocalRacks.size()])));
      }
    }
  }

-  protected String[] resolveHosts(String[] src) {
-    String[] result = new String[src.length];
-    for (int i = 0; i < src.length; i++) {
-      if (isIP(src[i])) {
-        result[i] = resolveHost(src[i]);
-      } else {
-        result[i] = src[i];
+  protected Set<String> resolveHosts(String[] src) {
+    Set<String> result = new HashSet<String>();
+    if (src != null) {
+      for (int i = 0; i < src.length; i++) {
+        if (src[i] == null) {
+          continue;
+        } else if (isIP(src[i])) {
+          result.add(resolveHost(src[i]));
+        } else {
+          result.add(src[i]);
+        }
      }
    }
    return result;
@ -1300,6 +1332,20 @@ public void transition(final TaskAttemptImpl taskAttempt,
          taskAttempt.remoteTask.isMapTask(), taskAttempt.containerID.getId());
      taskAttempt.taskAttemptListener.registerPendingTask(
          taskAttempt.remoteTask, taskAttempt.jvmID);
+
+      taskAttempt.locality = Locality.OFF_SWITCH;
+      if (taskAttempt.dataLocalHosts.size() > 0) {
+        String cHost = taskAttempt.resolveHost(
+            taskAttempt.containerNodeId.getHost());
+        if (taskAttempt.dataLocalHosts.contains(cHost)) {
+          taskAttempt.locality = Locality.NODE_LOCAL;
+        }
+      }
+      if (taskAttempt.locality == Locality.OFF_SWITCH) {
+        if (taskAttempt.dataLocalRacks.contains(taskAttempt.nodeRackName)) {
+          taskAttempt.locality = Locality.RACK_LOCAL;
+        }
+      }
      
      //launch the container
      //create the container object to be launched for a given Task attempt
@ -1376,7 +1422,7 @@ public void transition(TaskAttemptImpl taskAttempt,
            taskAttempt.attemptId.getTaskId().getJobId(), tauce));
      } else {
        LOG.debug("Not generating HistoryFinish event since start event not " +
-        		"generated for taskAttempt: " + taskAttempt.getID());
+            "generated for taskAttempt: " + taskAttempt.getID());
      }
    }
  }
@ -1421,7 +1467,8 @@ public void transition(TaskAttemptImpl taskAttempt,
            TypeConverter.fromYarn(taskAttempt.attemptId.getTaskId().getTaskType()),
            taskAttempt.launchTime,
            nodeHttpInetAddr.getHostName(), nodeHttpInetAddr.getPort(),
-            taskAttempt.shufflePort, taskAttempt.containerID);
+            taskAttempt.shufflePort, taskAttempt.containerID,
+            taskAttempt.locality.toString(), taskAttempt.avataar.toString());
      taskAttempt.eventHandler.handle
          (new JobHistoryEvent(taskAttempt.attemptId.getTaskId().getJobId(), tase));
      taskAttempt.eventHandler.handle
@ -1510,7 +1557,7 @@ public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) {
        // handling failed map/reduce events.
      }else {
        LOG.debug("Not generating HistoryFinish event since start event not " +
-        		"generated for taskAttempt: " + taskAttempt.getID());
+            "generated for taskAttempt: " + taskAttempt.getID());
      }
      taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
          taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED));
@ -1580,7 +1627,7 @@ public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) {
            taskAttempt.attemptId.getTaskId().getJobId(), tauce));
      }else {
        LOG.debug("Not generating HistoryFinish event since start event not " +
-        		"generated for taskAttempt: " + taskAttempt.getID());
+            "generated for taskAttempt: " + taskAttempt.getID());
      }
      taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
          taskAttempt.attemptId, TaskEventType.T_ATTEMPT_FAILED));
@ -1648,7 +1695,7 @@ public void transition(TaskAttemptImpl taskAttempt,
            taskAttempt.attemptId.getTaskId().getJobId(), tauce));
      }else {
        LOG.debug("Not generating HistoryFinish event since start event not " +
-        		"generated for taskAttempt: " + taskAttempt.getID());
+            "generated for taskAttempt: " + taskAttempt.getID());
      }
 //      taskAttempt.logAttemptFinishedEvent(TaskAttemptStateInternal.KILLED); Not logging Map/Reduce attempts in case of failure.
      taskAttempt.eventHandler.handle(new TaskTAttemptEvent(
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskImpl.java
@ -46,6 +46,7 @@
 import org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent;
 import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
+import org.apache.hadoop.mapreduce.v2.api.records.Avataar;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEvent;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptCompletionEventStatus;
@ -594,8 +595,9 @@ protected TaskAttempt getSuccessfulAttempt() {
  }

  // This is always called in the Write Lock
-  private void addAndScheduleAttempt() {
+  private void addAndScheduleAttempt(Avataar avataar) {
    TaskAttempt attempt = createAttempt();
+    ((TaskAttemptImpl) attempt).setAvataar(avataar);
    if (LOG.isDebugEnabled()) {
      LOG.debug("Created attempt " + attempt.getID());
    }
@ -749,7 +751,7 @@ private static class InitialScheduleTransition

    @Override
    public void transition(TaskImpl task, TaskEvent event) {
-      task.addAndScheduleAttempt();
+      task.addAndScheduleAttempt(Avataar.VIRGIN);
      task.scheduledTime = task.clock.getTime();
      TaskStartedEvent tse = new TaskStartedEvent(
          TypeConverter.fromYarn(task.taskId), task.getLaunchTime(),
@ -772,7 +774,7 @@ private static class RedundantScheduleTransition
    @Override
    public void transition(TaskImpl task, TaskEvent event) {
      LOG.info("Scheduling a redundant attempt for task " + task.taskId);
-      task.addAndScheduleAttempt();
+      task.addAndScheduleAttempt(Avataar.SPECULATIVE);
    }
  }

@ -849,7 +851,7 @@ public void transition(TaskImpl task, TaskEvent event) {
      task.finishedAttempts.add(taskAttemptId);
      task.inProgressAttempts.remove(taskAttemptId);
      if (task.successfulAttempt == null) {
-        task.addAndScheduleAttempt();
+        task.addAndScheduleAttempt(Avataar.VIRGIN);
      }
    }
  }
@ -937,7 +939,7 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) {
        task.inProgressAttempts.remove(taskAttemptId);
        if (task.inProgressAttempts.size() == 0
            && task.successfulAttempt == null) {
-          task.addAndScheduleAttempt();
+          task.addAndScheduleAttempt(Avataar.VIRGIN);
        }
      } else {
        task.handleTaskAttemptCompletion(
@ -1053,7 +1055,7 @@ public TaskStateInternal transition(TaskImpl task, TaskEvent event) {
      // from the map splitInfo. So the bad node might be sent as a location
      // to the RM. But the RM would ignore that just like it would ignore
      // currently pending container requests affinitized to bad nodes.
-      task.addAndScheduleAttempt();
+      task.addAndScheduleAttempt(Avataar.VIRGIN);
      return TaskStateInternal.SCHEDULED;
    }
  }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java
@ -67,7 +67,6 @@
 import org.apache.hadoop.yarn.api.records.NodeReport;
 import org.apache.hadoop.yarn.api.records.NodeState;
 import org.apache.hadoop.yarn.api.records.Priority;
-import org.apache.hadoop.yarn.api.records.Resource;
 import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
 import org.apache.hadoop.yarn.util.RackResolver;

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestJobImpl.java
@ -33,6 +33,9 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.JobACL;
 import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.jobhistory.EventType;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistoryEvent;
+import org.apache.hadoop.mapreduce.jobhistory.JobSubmittedEvent;
 import org.apache.hadoop.mapreduce.JobID;
 import org.apache.hadoop.mapreduce.JobStatus.State;
 import org.apache.hadoop.mapreduce.MRConfig;
@ -66,6 +69,7 @@
 import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
 import org.apache.hadoop.yarn.event.AsyncDispatcher;
 import org.apache.hadoop.yarn.event.Dispatcher;
+import org.apache.hadoop.yarn.event.Event;
 import org.apache.hadoop.yarn.event.EventHandler;
 import org.apache.hadoop.yarn.state.StateMachine;
 import org.apache.hadoop.yarn.state.StateMachineFactory;
@ -105,6 +109,13 @@ public void testJobNoTasks() {
    Configuration conf = new Configuration();
    conf.setInt(MRJobConfig.NUM_REDUCES, 0);
    conf.set(MRJobConfig.MR_AM_STAGING_DIR, stagingDir);
+    conf.set(MRJobConfig.WORKFLOW_ID, "testId");
+    conf.set(MRJobConfig.WORKFLOW_NAME, "testName");
+    conf.set(MRJobConfig.WORKFLOW_NODE_NAME, "testNodeName");
+    conf.set(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING + "key1", "value1");
+    conf.set(MRJobConfig.WORKFLOW_ADJACENCY_PREFIX_STRING + "key2", "value2");
+    
+ 
    AsyncDispatcher dispatcher = new AsyncDispatcher();
    dispatcher.init(conf);
    dispatcher.start();
@ -114,6 +125,9 @@ public void testJobNoTasks() {
    commitHandler.init(conf);
    commitHandler.start();

+    JobSubmittedEventHandler jseHandler = new JobSubmittedEventHandler("testId",
+        "testName", "testNodeName", "\"key2\"=\"value2\" \"key1\"=\"value1\" ");
+    dispatcher.register(EventType.class, jseHandler);
    JobImpl job = createStubbedJob(conf, dispatcher, 0);
    job.handle(new JobEvent(job.getID(), JobEventType.JOB_INIT));
    assertJobState(job, JobStateInternal.INITED);
@ -121,6 +135,11 @@ public void testJobNoTasks() {
    assertJobState(job, JobStateInternal.SUCCEEDED);
    dispatcher.stop();
    commitHandler.stop();
+    try {
+      Assert.assertTrue(jseHandler.getAssertValue());
+    } catch (InterruptedException e) {
+      Assert.fail("Workflow related attributes are not tested properly");
+    }
  }

  @Test(timeout=20000)
@ -614,6 +633,67 @@ private static void assertJobState(JobImpl job, JobStateInternal state) {
    Assert.assertEquals(state, job.getInternalState());
  }

+  private static class JobSubmittedEventHandler implements
+      EventHandler<JobHistoryEvent> {
+
+    private String workflowId;
+    
+    private String workflowName;
+    
+    private String workflowNodeName;
+    
+    private String workflowAdjacencies;
+    
+    private Boolean assertBoolean;
+
+    public JobSubmittedEventHandler(String workflowId, String workflowName,
+        String workflowNodeName, String workflowAdjacencies) {
+      this.workflowId = workflowId;
+      this.workflowName = workflowName;
+      this.workflowNodeName = workflowNodeName;
+      this.workflowAdjacencies = workflowAdjacencies;
+      assertBoolean = null;
+    }
+
+    @Override
+    public void handle(JobHistoryEvent jhEvent) {
+      if (jhEvent.getType() != EventType.JOB_SUBMITTED) {
+        return;
+      }
+      JobSubmittedEvent jsEvent = (JobSubmittedEvent) jhEvent.getHistoryEvent();
+      if (!workflowId.equals(jsEvent.getWorkflowId())) {
+        setAssertValue(false);
+        return;
+      }
+      if (!workflowName.equals(jsEvent.getWorkflowName())) {
+        setAssertValue(false);
+        return;
+      }
+      if (!workflowNodeName.equals(jsEvent.getWorkflowNodeName())) {
+        setAssertValue(false);
+        return;
+      }
+      if (!workflowAdjacencies.equals(jsEvent.getWorkflowAdjacencies())) {
+        setAssertValue(false);
+        return;
+      }
+      setAssertValue(true);
+    }
+    
+    private synchronized void setAssertValue(Boolean bool) {
+      assertBoolean = bool;
+      notify();
+    }
+    
+    public synchronized boolean getAssertValue() throws InterruptedException {
+      while (assertBoolean == null) {
+        wait();
+      }
+      return assertBoolean;
+    }
+
+  }
+
  private static class StubbedJob extends JobImpl {
    //override the init transition
    private final InitTransition initTransition;
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java
@ -48,6 +48,7 @@
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.JobState;
+import org.apache.hadoop.mapreduce.v2.api.records.Locality;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptReport;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
@ -157,6 +158,7 @@ public void testHostResolveAttempt() throws Exception {
        createMapTaskAttemptImplForTest(eventHandler, splitInfo);
    TaskAttemptImpl spyTa = spy(mockTaskAttempt);
    when(spyTa.resolveHost(hosts[0])).thenReturn("host1");
+    spyTa.dataLocalHosts = spyTa.resolveHosts(splitInfo.getLocations());

    TaskAttemptEvent mockTAEvent = mock(TaskAttemptEvent.class);
    rct.transition(spyTa, mockTAEvent);
@ -360,6 +362,8 @@ public void testLaunchFailedWhileKilling() throws Exception {
    taImpl.handle(new TaskAttemptEvent(attemptId,
        TaskAttemptEventType.TA_CONTAINER_LAUNCH_FAILED));
    assertFalse(eventHandler.internalError);
+    assertEquals("Task attempt is not assigned on the local node", 
+        Locality.NODE_LOCAL, taImpl.getLocality());
  }

  @Test
@ -398,7 +402,7 @@ public void testContainerCleanedWhileRunning() throws Exception {
          mock(Token.class), new Credentials(),
          new SystemClock(), appCtx);

-    NodeId nid = BuilderUtils.newNodeId("127.0.0.1", 0);
+    NodeId nid = BuilderUtils.newNodeId("127.0.0.2", 0);
    ContainerId contId = BuilderUtils.newContainerId(appAttemptId, 3);
    Container container = mock(Container.class);
    when(container.getId()).thenReturn(contId);
@ -416,6 +420,8 @@ public void testContainerCleanedWhileRunning() throws Exception {
        TaskAttemptEventType.TA_CONTAINER_CLEANED));
    assertFalse("InternalError occurred trying to handle TA_CONTAINER_CLEANED",
        eventHandler.internalError);
+    assertEquals("Task attempt is not assigned on the local rack",
+        Locality.RACK_LOCAL, taImpl.getLocality());
  }

  @Test
@ -439,7 +445,7 @@ public void testContainerCleanedWhileCommitting() throws Exception {
    jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");

    TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
-    when(splits.getLocations()).thenReturn(new String[] {"127.0.0.1"});
+    when(splits.getLocations()).thenReturn(new String[] {});

    AppContext appCtx = mock(AppContext.class);
    ClusterInfo clusterInfo = mock(ClusterInfo.class);
@ -475,6 +481,8 @@ public void testContainerCleanedWhileCommitting() throws Exception {
        TaskAttemptEventType.TA_CONTAINER_CLEANED));
    assertFalse("InternalError occurred trying to handle TA_CONTAINER_CLEANED",
        eventHandler.internalError);
+    assertEquals("Task attempt is assigned locally", Locality.OFF_SWITCH,
+        taImpl.getLocality());
  }

  @Test
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskImpl.java
@ -38,6 +38,7 @@
 import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
 import org.apache.hadoop.mapreduce.security.token.JobTokenIdentifier;
 import org.apache.hadoop.mapreduce.split.JobSplit.TaskSplitMetaInfo;
+import org.apache.hadoop.mapreduce.v2.api.records.Avataar;
 import org.apache.hadoop.mapreduce.v2.api.records.JobId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
 import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
@ -46,10 +47,12 @@
 import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
 import org.apache.hadoop.mapreduce.v2.app.AppContext;
 import org.apache.hadoop.mapreduce.v2.app.TaskAttemptListener;
+import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
 import org.apache.hadoop.mapreduce.v2.app.job.TaskStateInternal;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
 import org.apache.hadoop.mapreduce.v2.app.job.event.TaskTAttemptEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl;
 import org.apache.hadoop.mapreduce.v2.app.metrics.MRAppMetrics;
 import org.apache.hadoop.security.Credentials;
 import org.apache.hadoop.security.token.Token;
@ -254,6 +257,7 @@ private void scheduleTaskAttempt(TaskId taskId) {
    mockTask.handle(new TaskEvent(taskId, 
        TaskEventType.T_SCHEDULE));
    assertTaskScheduledState();
+    assertTaskAttemptAvataar(Avataar.VIRGIN);
  }
  
  private void killTask(TaskId taskId) {
@ -338,6 +342,19 @@ private void assertTaskKillWaitState() {
  private void assertTaskSucceededState() {
    assertEquals(TaskState.SUCCEEDED, mockTask.getState());
  }
+
+  /**
+   * {@link Avataar}
+   */
+  private void assertTaskAttemptAvataar(Avataar avataar) {
+    for (TaskAttempt taskAttempt : mockTask.getAttempts().values()) {
+      if (((TaskAttemptImpl) taskAttempt).getAvataar() == avataar) {
+        return;
+      }
+    }
+    fail("There is no " + (avataar == Avataar.VIRGIN ? "virgin" : "speculative")
+        + "task attempt");
+  }
  
  @Test
  public void testInit() {
@ -516,6 +533,9 @@ private void runSpeculativeTaskAttemptSucceeds(
    
    // The task should still be in the succeeded state
    assertTaskSucceededState();
+    
+    // The task should contain speculative a task attempt
+    assertTaskAttemptAvataar(Avataar.SPECULATIVE);
  }
  
  @Test
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/Avataar.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/Avataar.java
@ -0,0 +1,24 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.api.records;
+
+public enum Avataar {
+  VIRGIN,
+  SPECULATIVE
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/Locality.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/api/records/Locality.java
@ -0,0 +1,25 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.api.records;
+
+public enum Locality {
+  NODE_LOCAL,
+  RACK_LOCAL,
+  OFF_SWITCH
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/avro/Events.avpr
@ -91,7 +91,11 @@
                                    "values": "string"
                                   }
          },
-          {"name": "jobQueueName", "type": "string"}
+          {"name": "jobQueueName", "type": "string"},
+          {"name": "workflowId", "type": "string"},
+          {"name": "workflowName", "type": "string"},
+          {"name": "workflowNodeName", "type": "string"},
+          {"name": "workflowAdjacencies", "type": "string"}
      ]
     },

@ -191,7 +195,9 @@
          {"name": "trackerName", "type": "string"},
          {"name": "httpPort", "type": "int"},
          {"name": "shufflePort", "type": "int"},
-          {"name": "containerId", "type": "string"}
+          {"name": "containerId", "type": "string"},
+          {"name": "locality", "type": "string"},
+          {"name": "avataar", "type": "string"}
      ]
     },

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@ -647,5 +647,18 @@ public interface MRJobConfig {
      "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*",
      "$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*",
  };
+
+  public static final String WORKFLOW_ID = "mapreduce.workflow.id";
+  
+  public static final String WORKFLOW_NAME = "mapreduce.workflow.name";
+  
+  public static final String WORKFLOW_NODE_NAME =
+      "mapreduce.workflow.node.name";
+  
+  public static final String WORKFLOW_ADJACENCY_PREFIX_STRING =
+      "mapreduce.workflow.adjacency.";
+  
+  public static final String WORKFLOW_ADJACENCY_PREFIX_PATTERN =
+      "^mapreduce\\.workflow\\.adjacency\\..+";
  
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSubmittedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobSubmittedEvent.java
@ -52,6 +52,29 @@ public class JobSubmittedEvent implements HistoryEvent {
  public JobSubmittedEvent(JobID id, String jobName, String userName,
      long submitTime, String jobConfPath,
      Map<JobACL, AccessControlList> jobACLs, String jobQueueName) {
+    this(id, jobName, userName, submitTime, jobConfPath, jobACLs,
+        jobQueueName, "", "", "", "");
+  }
+
+  /**
+   * Create an event to record job submission
+   * @param id The job Id of the job
+   * @param jobName Name of the job
+   * @param userName Name of the user who submitted the job
+   * @param submitTime Time of submission
+   * @param jobConfPath Path of the Job Configuration file
+   * @param jobACLs The configured acls for the job.
+   * @param jobQueueName The job-queue to which this job was submitted to
+   * @param workflowId The Id of the workflow
+   * @param workflowName The name of the workflow
+   * @param workflowNodeName The node name of the workflow
+   * @param workflowAdjacencies The adjacencies of the workflow
+   */
+  public JobSubmittedEvent(JobID id, String jobName, String userName,
+      long submitTime, String jobConfPath,
+      Map<JobACL, AccessControlList> jobACLs, String jobQueueName,
+      String workflowId, String workflowName, String workflowNodeName,
+      String workflowAdjacencies) {
    datum.jobid = new Utf8(id.toString());
    datum.jobName = new Utf8(jobName);
    datum.userName = new Utf8(userName);
@ -66,6 +89,18 @@ public JobSubmittedEvent(JobID id, String jobName, String userName,
    if (jobQueueName != null) {
      datum.jobQueueName = new Utf8(jobQueueName);
    }
+    if (workflowId != null) {
+      datum.workflowId = new Utf8(workflowId);
+    }
+    if (workflowName != null) {
+      datum.workflowName = new Utf8(workflowName);
+    }
+    if (workflowNodeName != null) {
+      datum.workflowNodeName = new Utf8(workflowNodeName);
+    }
+    if (workflowAdjacencies != null) {
+      datum.workflowAdjacencies = new Utf8(workflowAdjacencies);
+    }
  }

  JobSubmittedEvent() {}
@ -105,6 +140,34 @@ public Map<JobACL, AccessControlList> getJobAcls() {
    }
    return jobAcls;
  }
+  /** Get the id of the workflow */
+  public String getWorkflowId() {
+    if (datum.workflowId != null) {
+      return datum.workflowId.toString();
+    }
+    return null;
+  }
+  /** Get the name of the workflow */
+  public String getWorkflowName() {
+    if (datum.workflowName != null) {
+      return datum.workflowName.toString();
+    }
+    return null;
+  }
+  /** Get the node name of the workflow */
+  public String getWorkflowNodeName() {
+    if (datum.workflowNodeName != null) {
+      return datum.workflowNodeName.toString();
+    }
+    return null;
+  }
+  /** Get the adjacencies of the workflow */
+  public String getWorkflowAdjacencies() {
+    if (datum.workflowAdjacencies != null) {
+      return datum.workflowAdjacencies.toString();
+    }
+    return null;
+  }
  /** Get the event type */
  public EventType getEventType() { return EventType.JOB_SUBMITTED; }

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptStartedEvent.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/TaskAttemptStartedEvent.java
@ -46,10 +46,13 @@ public class TaskAttemptStartedEvent implements HistoryEvent {
   * @param httpPort The port number of the tracker
   * @param shufflePort The shuffle port number of the container
   * @param containerId The containerId for the task attempt.
+   * @param locality The locality of the task attempt
+   * @param avataar The avataar of the task attempt
   */
  public TaskAttemptStartedEvent( TaskAttemptID attemptId,  
      TaskType taskType, long startTime, String trackerName,
-      int httpPort, int shufflePort, ContainerId containerId) {
+      int httpPort, int shufflePort, ContainerId containerId,
+      String locality, String avataar) {
    datum.attemptId = new Utf8(attemptId.toString());
    datum.taskid = new Utf8(attemptId.getTaskID().toString());
    datum.startTime = startTime;
@ -58,14 +61,21 @@ public TaskAttemptStartedEvent( TaskAttemptID attemptId,
    datum.httpPort = httpPort;
    datum.shufflePort = shufflePort;
    datum.containerId = new Utf8(containerId.toString());
+    if (locality != null) {
+      datum.locality = new Utf8(locality);
+    }
+    if (avataar != null) {
+      datum.avataar = new Utf8(avataar);
+    }
  }

  // TODO Remove after MrV1 is removed.
  // Using a dummy containerId to prevent jobHistory parse failures.
  public TaskAttemptStartedEvent(TaskAttemptID attemptId, TaskType taskType,
-      long startTime, String trackerName, int httpPort, int shufflePort) {
+      long startTime, String trackerName, int httpPort, int shufflePort,
+      String locality, String avataar) {
    this(attemptId, taskType, startTime, trackerName, httpPort, shufflePort,
-        ConverterUtils.toContainerId("container_-1_-1_-1_-1"));
+        ConverterUtils.toContainerId("container_-1_-1_-1_-1"), locality, avataar);
  }

  TaskAttemptStartedEvent() {}
@ -105,4 +115,19 @@ public EventType getEventType() {
  public ContainerId getContainerId() {
    return ConverterUtils.toContainerId(datum.containerId.toString());
  }
+  /** Get the locality */
+  public String getLocality() {
+    if (datum.locality != null) {
+      return datum.locality.toString();
+    }
+    return null;
+  }
+  /** Get the avataar */
+  public String getAvataar() {
+    if (datum.avataar != null) {
+      return datum.avataar.toString();
+    }
+    return null;
+  }
+
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java
@ -522,7 +522,7 @@ public void merge(List<CompressAwarePath> inputs) throws IOException {
      
      // 1. Prepare the list of files to be merged. 
      for (CompressAwarePath file : inputs) {
-        approxOutputSize += localFS.getFileStatus(file.getPath()).getLen();
+        approxOutputSize += localFS.getFileStatus(file).getLen();
      }

      // add the checksum length
@ -753,12 +753,12 @@ private RawKeyValueIterator finalMerge(JobConf job, FileSystem fs,
    CompressAwarePath[] onDisk = onDiskMapOutputs.toArray(
        new CompressAwarePath[onDiskMapOutputs.size()]);
    for (CompressAwarePath file : onDisk) {
-      long fileLength = fs.getFileStatus(file.getPath()).getLen();
+      long fileLength = fs.getFileStatus(file).getLen();
      onDiskBytes += fileLength;
      rawBytes += (file.getRawDataLength() > 0) ? file.getRawDataLength() : fileLength;

      LOG.debug("Disk file: " + file + " Length is " + fileLength);
-      diskSegments.add(new Segment<K, V>(job, fs, file.getPath(), codec, keepInputs,
+      diskSegments.add(new Segment<K, V>(job, fs, file, codec, keepInputs,
                                         (file.toString().endsWith(
                                             Task.MERGED_OUTPUT_PREFIX) ?
                                          null : mergedMapOutputsCounter), file.getRawDataLength()
@ -806,23 +806,26 @@ public int compare(Segment<K, V> o1, Segment<K, V> o2) {
  
  }

-  static class CompressAwarePath
-  {
+  static class CompressAwarePath extends Path {
    private long rawDataLength;

-    private Path path;
-
    public CompressAwarePath(Path path, long rawDataLength) {
-      this.path = path;
+      super(path.toUri());
      this.rawDataLength = rawDataLength;
    }

    public long getRawDataLength() {
      return rawDataLength;
    }
-
-    public Path getPath() {
-      return path;
+    
+    @Override
+    public boolean equals(Object other) {
+      return super.equals(other);
+    }
+    
+    @Override
+    public int hashCode() {
+      return super.hashCode();
    }
  }
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMerger.java
@ -15,36 +15,156 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.hadoop.mapred;
+package org.apache.hadoop.mapreduce.task.reduce;

 import static org.mockito.Matchers.any;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 import static org.mockito.Mockito.doAnswer;

+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;

 import junit.framework.Assert;

 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalDirAllocator;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DataInputBuffer;
 import org.apache.hadoop.io.RawComparator;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.Counters.Counter;
 import org.apache.hadoop.mapred.IFile.Reader;
+import org.apache.hadoop.mapred.IFile;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MROutputFiles;
+import org.apache.hadoop.mapred.Merger;
 import org.apache.hadoop.mapred.Merger.Segment;
+import org.apache.hadoop.mapred.RawKeyValueIterator;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.MRConfig;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl;
 import org.apache.hadoop.util.Progress;
 import org.apache.hadoop.util.Progressable;
+import org.junit.After;
+import org.junit.Before;
 import org.junit.Test;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;

 public class TestMerger {

+  private Configuration conf;
+  private JobConf jobConf;
+  private FileSystem fs;
+  
+  @Before
+  public void setup() throws IOException {
+    conf = new Configuration();
+    jobConf = new JobConf();
+    fs = FileSystem.getLocal(conf);
+  }
+  
+  @After
+  public void cleanup() throws IOException {    
+    fs.delete(new Path(jobConf.getLocalDirs()[0]), true);
+  }
+  
+  @Test
+  public void testInMemoryMerger() throws IOException {
+    JobID jobId = new JobID("a", 0);
+    TaskAttemptID reduceId = new TaskAttemptID(
+        new TaskID(jobId, TaskType.REDUCE, 0), 0);
+    TaskAttemptID mapId1 = new TaskAttemptID(
+        new TaskID(jobId, TaskType.MAP, 1), 0);
+    TaskAttemptID mapId2 = new TaskAttemptID(
+        new TaskID(jobId, TaskType.MAP, 2), 0);
+    
+    LocalDirAllocator lda = new LocalDirAllocator(MRConfig.LOCAL_DIR);
+    
+    MergeManagerImpl<Text, Text> mergeManager = new MergeManagerImpl<Text, Text>(
+        reduceId, jobConf, fs, lda, Reporter.NULL, null, null, null, null, null,
+        null, null, new Progress(), new MROutputFiles());
+    
+    // write map outputs
+    Map<String, String> map1 = new TreeMap<String, String>();
+    map1.put("apple", "disgusting");
+    map1.put("carrot", "delicious");
+    Map<String, String> map2 = new TreeMap<String, String>();
+    map1.put("banana", "pretty good");
+    byte[] mapOutputBytes1 = writeMapOutput(conf, map1);
+    byte[] mapOutputBytes2 = writeMapOutput(conf, map2);
+    InMemoryMapOutput<Text, Text> mapOutput1 = new InMemoryMapOutput<Text, Text>(
+        conf, mapId1, mergeManager, mapOutputBytes1.length, null, true);
+    InMemoryMapOutput<Text, Text> mapOutput2 = new InMemoryMapOutput<Text, Text>(
+        conf, mapId2, mergeManager, mapOutputBytes2.length, null, true);
+    System.arraycopy(mapOutputBytes1, 0, mapOutput1.getMemory(), 0,
+        mapOutputBytes1.length);
+    System.arraycopy(mapOutputBytes2, 0, mapOutput2.getMemory(), 0,
+        mapOutputBytes2.length);
+    
+    // create merger and run merge
+    MergeThread<InMemoryMapOutput<Text, Text>, Text, Text> inMemoryMerger =
+        mergeManager.createInMemoryMerger();
+    List<InMemoryMapOutput<Text, Text>> mapOutputs =
+        new ArrayList<InMemoryMapOutput<Text, Text>>();
+    mapOutputs.add(mapOutput1);
+    mapOutputs.add(mapOutput2);
+    
+    inMemoryMerger.merge(mapOutputs);
+    
+    Assert.assertEquals(1, mergeManager.onDiskMapOutputs.size());
+    Path outPath = mergeManager.onDiskMapOutputs.iterator().next();
+    
+    List<String> keys = new ArrayList<String>();
+    List<String> values = new ArrayList<String>();
+    readOnDiskMapOutput(conf, fs, outPath, keys, values);
+    Assert.assertEquals(keys, Arrays.asList("apple", "banana", "carrot"));
+    Assert.assertEquals(values, Arrays.asList("disgusting", "pretty good", "delicious"));
+  }
+  
+  private byte[] writeMapOutput(Configuration conf, Map<String, String> keysToValues)
+      throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    FSDataOutputStream fsdos = new FSDataOutputStream(baos, null);
+    IFile.Writer<Text, Text> writer = new IFile.Writer<Text, Text>(conf, fsdos,
+        Text.class, Text.class, null, null);
+    for (String key : keysToValues.keySet()) {
+      String value = keysToValues.get(key);
+      writer.append(new Text(key), new Text(value));
+    }
+    writer.close();
+    return baos.toByteArray();
+  }
+  
+  private void readOnDiskMapOutput(Configuration conf, FileSystem fs, Path path,
+      List<String> keys, List<String> values) throws IOException {
+    IFile.Reader<Text, Text> reader = new IFile.Reader<Text, Text>(conf, fs,
+        path, null, null);
+    DataInputBuffer keyBuff = new DataInputBuffer();
+    DataInputBuffer valueBuff = new DataInputBuffer();
+    Text key = new Text();
+    Text value = new Text();
+    while (reader.nextRawKey(keyBuff)) {
+      key.readFields(keyBuff);
+      keys.add(key.toString());
+      reader.nextRawValue(valueBuff);
+      value.readFields(valueBuff);
+      values.add(value.toString());
+    }
+  }
+  
  @Test
  public void testCompressed() throws IOException {
    testMergeShouldReturnProperProgress(getCompressedSegments());
@ -58,9 +178,6 @@ public void testUncompressed() throws IOException {
  @SuppressWarnings( { "deprecation", "unchecked" })
  public void testMergeShouldReturnProperProgress(
      List<Segment<Text, Text>> segments) throws IOException {
-    Configuration conf = new Configuration();
-    JobConf jobConf = new JobConf();
-    FileSystem fs = FileSystem.getLocal(conf);
    Path tmpDir = new Path("localpath");
    Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
    Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
@ -87,7 +204,6 @@ private List<Segment<Text, Text>> getUncompressedSegments() throws IOException {
    List<Segment<Text, Text>> segments = new ArrayList<Segment<Text, Text>>();
    for (int i = 1; i < 1; i++) {
      segments.add(getUncompressedSegment(i));
-      System.out.println("adding segment");
    }
    return segments;
  }
@ -96,7 +212,6 @@ private List<Segment<Text, Text>> getCompressedSegments() throws IOException {
    List<Segment<Text, Text>> segments = new ArrayList<Segment<Text, Text>>();
    for (int i = 1; i < 1; i++) {
      segments.add(getCompressedSegment(i));
-      System.out.println("adding segment");
    }
    return segments;
  }
@ -133,7 +248,7 @@ public Boolean answer(InvocationOnMock invocation) {
        if (i++ == 2) {
          return false;
        }
-        key.reset(("Segement Key " + segmentName + i).getBytes(), 20);
+        key.reset(("Segment Key " + segmentName + i).getBytes(), 20);
        return true;
      }
    };
@ -149,7 +264,7 @@ public Void answer(InvocationOnMock invocation) {
        if (i++ == 2) {
          return null;
        }
-        key.reset(("Segement Value " + segmentName + i).getBytes(), 20);
+        key.reset(("Segment Value " + segmentName + i).getBytes(), 20);
        return null;
      }
    };
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestIndexCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestIndexCache.java
@ -1,324 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.mapred;
-
-import java.io.DataOutputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Random;
-import java.util.zip.CRC32;
-import java.util.zip.CheckedOutputStream;
-
-import org.apache.hadoop.fs.ChecksumException;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.mapreduce.server.tasktracker.TTConfig;
-
-import junit.framework.TestCase;
-
-public class TestIndexCache extends TestCase {
-  private JobConf conf;
-  private FileSystem fs;
-  private Path p;
-
-  @Override
-  public void setUp() throws IOException {
-    conf = new JobConf();
-    fs = FileSystem.getLocal(conf).getRaw();
-    p =  new Path(System.getProperty("test.build.data", "/tmp"),
-        "cache").makeQualified(fs.getUri(), fs.getWorkingDirectory());
-  }
-
-  public void testLRCPolicy() throws Exception {
-    Random r = new Random();
-    long seed = r.nextLong();
-    r.setSeed(seed);
-    System.out.println("seed: " + seed);
-    fs.delete(p, true);
-    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
-    final int partsPerMap = 1000;
-    final int bytesPerFile = partsPerMap * 24;
-    IndexCache cache = new IndexCache(conf);
-
-    // fill cache
-    int totalsize = bytesPerFile;
-    for (; totalsize < 1024 * 1024; totalsize += bytesPerFile) {
-      Path f = new Path(p, Integer.toString(totalsize, 36));
-      writeFile(fs, f, totalsize, partsPerMap);
-      IndexRecord rec = cache.getIndexInformation(
-        Integer.toString(totalsize, 36), r.nextInt(partsPerMap), f,
-        UserGroupInformation.getCurrentUser().getShortUserName());
-      checkRecord(rec, totalsize);
-    }
-
-    // delete files, ensure cache retains all elem
-    for (FileStatus stat : fs.listStatus(p)) {
-      fs.delete(stat.getPath(),true);
-    }
-    for (int i = bytesPerFile; i < 1024 * 1024; i += bytesPerFile) {
-      Path f = new Path(p, Integer.toString(i, 36));
-      IndexRecord rec = cache.getIndexInformation(Integer.toString(i, 36),
-        r.nextInt(partsPerMap), f,
-        UserGroupInformation.getCurrentUser().getShortUserName());
-      checkRecord(rec, i);
-    }
-
-    // push oldest (bytesPerFile) out of cache
-    Path f = new Path(p, Integer.toString(totalsize, 36));
-    writeFile(fs, f, totalsize, partsPerMap);
-    cache.getIndexInformation(Integer.toString(totalsize, 36),
-        r.nextInt(partsPerMap), f,
-        UserGroupInformation.getCurrentUser().getShortUserName());
-    fs.delete(f, false);
-
-    // oldest fails to read, or error
-    boolean fnf = false;
-    try {
-      cache.getIndexInformation(Integer.toString(bytesPerFile, 36),
-        r.nextInt(partsPerMap), new Path(p, Integer.toString(bytesPerFile)),
-        UserGroupInformation.getCurrentUser().getShortUserName());
-    } catch (IOException e) {
-      if (e.getCause() == null ||
-          !(e.getCause()  instanceof FileNotFoundException)) {
-        throw e;
-      }
-      else {
-        fnf = true;
-      }
-    }
-    if (!fnf)
-      fail("Failed to push out last entry");
-    // should find all the other entries
-    for (int i = bytesPerFile << 1; i < 1024 * 1024; i += bytesPerFile) {
-      IndexRecord rec = cache.getIndexInformation(Integer.toString(i, 36),
-          r.nextInt(partsPerMap), new Path(p, Integer.toString(i, 36)),
-          UserGroupInformation.getCurrentUser().getShortUserName());
-      checkRecord(rec, i);
-    }
-    IndexRecord rec = cache.getIndexInformation(Integer.toString(totalsize, 36),
-      r.nextInt(partsPerMap), f,
-      UserGroupInformation.getCurrentUser().getShortUserName());
-
-    checkRecord(rec, totalsize);
-  }
-
-  public void testBadIndex() throws Exception {
-    final int parts = 30;
-    fs.delete(p, true);
-    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
-    IndexCache cache = new IndexCache(conf);
-
-    Path f = new Path(p, "badindex");
-    FSDataOutputStream out = fs.create(f, false);
-    CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
-    DataOutputStream dout = new DataOutputStream(iout);
-    for (int i = 0; i < parts; ++i) {
-      for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
-        if (0 == (i % 3)) {
-          dout.writeLong(i);
-        } else {
-          out.writeLong(i);
-        }
-      }
-    }
-    out.writeLong(iout.getChecksum().getValue());
-    dout.close();
-    try {
-      cache.getIndexInformation("badindex", 7, f,
-        UserGroupInformation.getCurrentUser().getShortUserName());
-      fail("Did not detect bad checksum");
-    } catch (IOException e) {
-      if (!(e.getCause() instanceof ChecksumException)) {
-        throw e;
-      }
-    }
-  }
-
-  public void testInvalidReduceNumberOrLength() throws Exception {
-    fs.delete(p, true);
-    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
-    final int partsPerMap = 1000;
-    final int bytesPerFile = partsPerMap * 24;
-    IndexCache cache = new IndexCache(conf);
-
-    // fill cache
-    Path feq = new Path(p, "invalidReduceOrPartsPerMap");
-    writeFile(fs, feq, bytesPerFile, partsPerMap);
-
-    // Number of reducers should always be less than partsPerMap as reducer
-    // numbers start from 0 and there cannot be more reducer than parts
-
-    try {
-      // Number of reducers equal to partsPerMap
-      cache.getIndexInformation("reduceEqualPartsPerMap", 
-               partsPerMap, // reduce number == partsPerMap
-               feq, UserGroupInformation.getCurrentUser().getShortUserName());
-      fail("Number of reducers equal to partsPerMap did not fail");
-    } catch (Exception e) {
-      if (!(e instanceof IOException)) {
-        throw e;
-      }
-    }
-
-    try {
-      // Number of reducers more than partsPerMap
-      cache.getIndexInformation(
-      "reduceMorePartsPerMap", 
-      partsPerMap + 1, // reduce number > partsPerMap
-      feq, UserGroupInformation.getCurrentUser().getShortUserName());
-      fail("Number of reducers more than partsPerMap did not fail");
-    } catch (Exception e) {
-      if (!(e instanceof IOException)) {
-        throw e;
-      }
-    }
-  }
-
-  public void testRemoveMap() throws Exception {
-    // This test case use two thread to call getIndexInformation and 
-    // removeMap concurrently, in order to construct race condition.
-    // This test case may not repeatable. But on my macbook this test 
-    // fails with probability of 100% on code before MAPREDUCE-2541,
-    // so it is repeatable in practice.
-    fs.delete(p, true);
-    conf.setInt(TTConfig.TT_INDEX_CACHE, 10);
-    // Make a big file so removeMapThread almost surely runs faster than 
-    // getInfoThread 
-    final int partsPerMap = 100000;
-    final int bytesPerFile = partsPerMap * 24;
-    final IndexCache cache = new IndexCache(conf);
-
-    final Path big = new Path(p, "bigIndex");
-    final String user = 
-      UserGroupInformation.getCurrentUser().getShortUserName();
-    writeFile(fs, big, bytesPerFile, partsPerMap);
-    
-    // run multiple times
-    for (int i = 0; i < 20; ++i) {
-      Thread getInfoThread = new Thread() {
-        @Override
-        public void run() {
-          try {
-            cache.getIndexInformation("bigIndex", partsPerMap, big, user);
-          } catch (Exception e) {
-            // should not be here
-          }
-        }
-      };
-      Thread removeMapThread = new Thread() {
-        @Override
-        public void run() {
-          cache.removeMap("bigIndex");
-        }
-      };
-      if (i%2==0) {
-        getInfoThread.start();
-        removeMapThread.start();        
-      } else {
-        removeMapThread.start();        
-        getInfoThread.start();
-      }
-      getInfoThread.join();
-      removeMapThread.join();
-      assertEquals(true, cache.checkTotalMemoryUsed());
-    }      
-  }
-  
-  public void testCreateRace() throws Exception {
-    fs.delete(p, true);
-    conf.setInt(TTConfig.TT_INDEX_CACHE, 1);
-    final int partsPerMap = 1000;
-    final int bytesPerFile = partsPerMap * 24;
-    final IndexCache cache = new IndexCache(conf);
-    
-    final Path racy = new Path(p, "racyIndex");
-    final String user =  
-      UserGroupInformation.getCurrentUser().getShortUserName();
-    writeFile(fs, racy, bytesPerFile, partsPerMap);
-
-    // run multiple instances
-    Thread[] getInfoThreads = new Thread[50];
-    for (int i = 0; i < 50; i++) {
-      getInfoThreads[i] = new Thread() {
-        @Override
-        public void run() {
-          try {
-            cache.getIndexInformation("racyIndex", partsPerMap, racy, user);
-            cache.removeMap("racyIndex");
-          } catch (Exception e) {
-            // should not be here
-          }
-        }
-      };
-    }
-
-    for (int i = 0; i < 50; i++) {
-      getInfoThreads[i].start();
-    }
-
-    final Thread mainTestThread = Thread.currentThread();
-
-    Thread timeoutThread = new Thread() {
-      @Override
-      public void run() {
-        try {
-          Thread.sleep(15000);
-          mainTestThread.interrupt();
-        } catch (InterruptedException ie) {
-          // we are done;
-        }
-      }
-    };
-
-    for (int i = 0; i < 50; i++) {
-      try {
-        getInfoThreads[i].join();
-      } catch (InterruptedException ie) {
-        // we haven't finished in time. Potential deadlock/race.
-        fail("Unexpectedly long delay during concurrent cache entry creations");
-      }
-    }
-    // stop the timeoutThread. If we get interrupted before stopping, there
-    // must be something wrong, although it wasn't a deadlock. No need to
-    // catch and swallow.
-    timeoutThread.interrupt();
-  }
-
-  private static void checkRecord(IndexRecord rec, long fill) {
-    assertEquals(fill, rec.startOffset);
-    assertEquals(fill, rec.rawLength);
-    assertEquals(fill, rec.partLength);
-  }
-
-  private static void writeFile(FileSystem fs, Path f, long fill, int parts)
-      throws IOException {
-    FSDataOutputStream out = fs.create(f, false);
-    CheckedOutputStream iout = new CheckedOutputStream(out, new CRC32());
-    DataOutputStream dout = new DataOutputStream(iout);
-    for (int i = 0; i < parts; ++i) {
-      for (int j = 0; j < MapTask.MAP_OUTPUT_INDEX_RECORD_LENGTH / 8; ++j) {
-        dout.writeLong(fill);
-      }
-    }
-    out.writeLong(iout.getChecksum().getValue());
-    dout.close();
-  }
-}
--- a/hadoop-maven-plugins/pom.xml
+++ b/hadoop-maven-plugins/pom.xml
@ -71,6 +71,17 @@
          </execution>
        </executions>
      </plugin>
+      <!--
+      Skip Clover instrumentation for this module to prevent error finding Clover
+      classes during plugin execution when running a build with Clover enabled.
+      -->
+      <plugin>
+        <groupId>com.atlassian.maven.plugins</groupId>
+        <artifactId>maven-clover2-plugin</artifactId>
+        <configuration>
+          <skip>true</skip>
+        </configuration>
+      </plugin>
    </plugins>
  </build>
 </project>
--- a/hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/versioninfo/VersionInfoMojo.java
+++ b/hadoop-maven-plugins/src/main/java/org/apache/hadoop/maven/plugin/versioninfo/VersionInfoMojo.java
@ -46,7 +46,7 @@
 * build.  The version information includes build time, SCM URI, SCM branch, SCM
 * commit, and an MD5 checksum of the contents of the files in the codebase.
 */
-@Mojo(name="version-info", defaultPhase=LifecyclePhase.INITIALIZE)
+@Mojo(name="version-info")
 public class VersionInfoMojo extends AbstractMojo {

  @Parameter(defaultValue="${project}")
--- a/hadoop-project-dist/pom.xml
+++ b/hadoop-project-dist/pom.xml
@ -247,13 +247,6 @@
                    </copy>

                    <!-- Docs -->
-                    <exec dir="${project.build.directory}/docs-src"
-                          executable="${env.FORREST_HOME}/bin/forrest"
-                          failonerror="true">
-                    </exec>
-                    <copy todir="${project.build.directory}/site">
-                      <fileset dir="${project.build.directory}/docs-src/build/site"/>
-                    </copy>
                    <copy file="${project.build.directory}/docs-src/releasenotes.html"
                          todir="${project.build.directory}/site"/>
                    <style basedir="${basedir}/src/main/resources"
--- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Hadoop20JHParser.java
+++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Hadoop20JHParser.java
@ -67,6 +67,11 @@ public Hadoop20JHParser(InputStream input) throws IOException {
    reader = new LineReader(input);
  }

+  public Hadoop20JHParser(LineReader reader) throws IOException {
+    super();
+    this.reader = reader;
+  }
+
  Map<String, HistoryEventEmitter> liveEmitters =
      new HashMap<String, HistoryEventEmitter>();
  Queue<HistoryEvent> remainingEvents = new LinkedList<HistoryEvent>();
--- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java
+++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Job20LineHistoryEventEmitter.java
@ -76,6 +76,23 @@ HistoryEvent maybeEmitEvent(ParsedLine line, String jobIDName,
      }
      String jobName = line.get("JOBNAME");
      String jobQueueName = line.get("JOB_QUEUE");// could be null
+      String workflowId = line.get("WORKFLOW_ID");
+      if (workflowId == null) {
+        workflowId = "";
+      }
+      String workflowName = line.get("WORKFLOW_NAME");
+      if (workflowName == null) {
+        workflowName = "";
+      }
+      String workflowNodeName = line.get("WORKFLOW_NODE_NAME");
+      if (workflowNodeName == null) {
+        workflowNodeName = "";
+      }
+      String workflowAdjacencies = line.get("WORKFLOW_ADJACENCIES");
+      if (workflowAdjacencies == null) {
+        workflowAdjacencies = "";
+      }
+      

      if (submitTime != null) {
        Job20LineHistoryEventEmitter that =
@ -86,7 +103,8 @@ HistoryEvent maybeEmitEvent(ParsedLine line, String jobIDName,
        Map<JobACL, AccessControlList> jobACLs =
          new HashMap<JobACL, AccessControlList>();
        return new JobSubmittedEvent(jobID, jobName, user,
-            that.originalSubmitTime, jobConf, jobACLs, jobQueueName);
+            that.originalSubmitTime, jobConf, jobACLs, jobQueueName,
+            workflowId, workflowName, workflowNodeName, workflowAdjacencies);
      }

      return null;
--- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/TaskAttempt20LineEventEmitter.java
+++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/TaskAttempt20LineEventEmitter.java
@ -65,6 +65,14 @@ HistoryEvent maybeEmitEvent(ParsedLine line, String taskAttemptIDName,
      String taskType = line.get("TASK_TYPE");
      String trackerName = line.get("TRACKER_NAME");
      String httpPort = line.get("HTTP_PORT");
+      String locality = line.get("LOCALITY");
+      if (locality == null) {
+        locality = "";
+      }
+      String avataar = line.get("AVATAAR");
+      if (avataar == null) {
+        avataar = "";
+      }

      if (startTime != null && taskType != null) {
        TaskAttempt20LineEventEmitter that =
@ -79,7 +87,8 @@ HistoryEvent maybeEmitEvent(ParsedLine line, String taskAttemptIDName,
                .parseInt(httpPort);

        return new TaskAttemptStartedEvent(taskAttemptID,
-            that.originalTaskType, that.originalStartTime, trackerName, port, -1);
+            that.originalTaskType, that.originalStartTime, trackerName, port, -1,
+            locality, avataar);
      }

      return null;
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@ -274,6 +274,9 @@ Release 0.23.7 - UNRELEASED

  IMPROVEMENTS

+    YARN-133 Update web services docs for RM clusterMetrics (Ravi Prakash via
+    kihwal)
+
  OPTIMIZATIONS

  BUG FIXES
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml
@ -93,7 +93,7 @@
        <executions>
          <execution>
            <id>version-info</id>
-            <phase>compile</phase>
+            <phase>generate-resources</phase>
            <goals>
              <goal>version-info</goal>
            </goals>
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/apt/ResourceManagerRest.apt.vm
@ -177,6 +177,16 @@ ResourceManager REST API's.
 *---------------+--------------+-------------------------------+
 | appsSubmitted | int | The number of applications submitted |
 *---------------+--------------+-------------------------------+
+| appsCompleted | int | The number of applications completed |
+*---------------+--------------+-------------------------------+
+| appsPending | int | The number of applications pending |
+*---------------+--------------+-------------------------------+
+| appsRunning | int | The number of applications running |
+*---------------+--------------+-------------------------------+
+| appsFailed | int | The number of applications failed |
+*---------------+--------------+-------------------------------+
+| appsKilled | int | The number of applications killed |
+*---------------+--------------+-------------------------------+
 | reservedMB    | long         | The amount of memory reserved in MB |
 *---------------+--------------+-------------------------------+
 | availableMB   | long         | The amount of memory available in MB |
@ -187,6 +197,10 @@ ResourceManager REST API's.
 *---------------+--------------+-------------------------------+
 | containersAllocated | int | The number of containers allocated |
 *---------------+--------------+-------------------------------+
+| containersReserved | int | The number of containers reserved |
+*---------------+--------------+-------------------------------+
+| containersPending | int | The number of containers pending |
+*---------------+--------------+-------------------------------+
 | totalNodes | int | The total number of nodes |
 *---------------+--------------+-------------------------------+
 | activeNodes | int | The number of active nodes |
@ -223,20 +237,26 @@ ResourceManager REST API's.

 +---+
  {
-    "clusterMetrics":
-    {
-      "appsSubmitted":4,
+    "clusterMetrics":{
+      "appsSubmitted":0,
+      "appsCompleted":0,
+      "appsPending":0,
+      "appsRunning":0,
+      "appsFailed":0,
+      "appsKilled":0,
      "reservedMB":0,
-      "availableMB":8192,
+      "availableMB":17408,
      "allocatedMB":0,
-      "totalMB":8192,
      "containersAllocated":0,
+      "containersReserved":0,
+      "containersPending":0,
+      "totalMB":17408,
      "totalNodes":1,
-      "activeNodes":1,
      "lostNodes":0,
      "unhealthyNodes":0,
      "decommissionedNodes":0,
-      "rebootedNodes":0
+      "rebootedNodes":0,
+      "activeNodes":1
    }
  }
 +---+
@ -264,18 +284,25 @@ ResourceManager REST API's.
 +---+
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <clusterMetrics>
-  <appsSubmitted>4</appsSubmitted>
+  <appsSubmitted>0</appsSubmitted>
+  <appsCompleted>0</appsCompleted>
+  <appsPending>0</appsPending>
+  <appsRunning>0</appsRunning>
+  <appsFailed>0</appsFailed>
+  <appsKilled>0</appsKilled>
  <reservedMB>0</reservedMB>
-  <availableMB>8192</availableMB>
+  <availableMB>17408</availableMB>
  <allocatedMB>0</allocatedMB>
  <containersAllocated>0</containersAllocated>
-  <totalMB>8192</totalMB>
+  <containersReserved>0</containersReserved>
+  <containersPending>0</containersPending>
+  <totalMB>17408</totalMB>
  <totalNodes>1</totalNodes>
-  <activeNodes>1</activeNodes>
  <lostNodes>0</lostNodes>
  <unhealthyNodes>0</unhealthyNodes>
  <decommissionedNodes>0</decommissionedNodes>
  <rebootedNodes>0</rebootedNodes>
+  <activeNodes>1</activeNodes>
 </clusterMetrics>
 +---+

--- a/pom.xml
+++ b/pom.xml
@ -517,6 +517,18 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
      <properties>
        <cloverLicenseLocation>${user.home}/.clover.license</cloverLicenseLocation>
        <cloverDatabase>${project.build.directory}/clover/hadoop-coverage.db</cloverDatabase>
+        <!-- NB: This additional parametrization is made in order 
+             to be able to re-define these properties with "-Dk=v" maven options.
+             By some reason the expressions declared in clover 
+             docs like "${maven.clover.generateHtml}" do not work in that way. 
+             However, the below properties are confirmed to work: e.g. 
+             -DcloverGenHtml=false switches off the Html generation.  
+             The default values provided here exactly correspond to Clover defaults, so
+             the behavior is 100% backwards compatible. -->
+        <cloverAlwaysReport>true</cloverAlwaysReport>
+        <cloverGenHtml>true</cloverGenHtml>
+        <cloverGenXml>true</cloverGenXml>
+        <cloverGenHistorical>false</cloverGenHistorical>
      </properties>
      <build>
        <plugins>
@ -530,8 +542,10 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs
              <cloverDatabase>${cloverDatabase}</cloverDatabase>
              <targetPercentage>50%</targetPercentage>
              <outputDirectory>${project.build.directory}/clover</outputDirectory>
-              <generateHtml>true</generateHtml>
-              <generateXml>true</generateXml>
+              <alwaysReport>${cloverAlwaysReport}</alwaysReport>
+              <generateHtml>${cloverGenHtml}</generateHtml>
+              <generateXml>${cloverGenXml}</generateXml>
+              <generateHistorical>${cloverGenHistorical}</generateHistorical>
            </configuration>
            <executions>
              <execution>