Merging r1526971 through r1527683 from trunk to branch HDFS-2832

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2832@1527697 13f79535-47bb-0310-9956-ffa450edef68
2013-09-30 18:28:07 +00:00 · 2013-09-30 18:28:07 +00:00 · 6331ff024c
commit 6331ff024c
parent 6097044066 d2e73b2775
166 changed files with 42615 additions and 276 deletions
--- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-sls.xml
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-sls.xml
@ -0,0 +1,45 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<assembly>
+  <id>hadoop-sls</id>
+  <formats>
+    <format>dir</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+
+  <fileSets>
+    <fileSet>
+      <directory>${basedir}/src/main/bin</directory>
+      <outputDirectory>sls/bin</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/html</directory>
+      <outputDirectory>sls/html</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/sample-conf</directory>
+      <outputDirectory>sls/sample-conf</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/data</directory>
+      <outputDirectory>sls/sample-data</outputDirectory>
+    </fileSet>
+  </fileSets>
+
+</assembly>
--- a/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml
+++ b/hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml
@ -93,6 +93,17 @@
        <include>*-sources.jar</include>
      </includes>
    </fileSet>
+    <fileSet>
+      <directory>../hadoop-sls/target</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/sources</outputDirectory>
+      <includes>
+        <include>*-sources.jar</include>
+      </includes>
+    </fileSet>
+    <fileSet>
+      <directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
+      <outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
+    </fileSet>
  </fileSets>
  <dependencySets>
    <dependencySet>
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@ -339,12 +339,18 @@ Release 2.3.0 - UNRELEASED
    HADOOP-9998.  Provide methods to clear only part of the DNSToSwitchMapping.
    (Junping Du via Colin Patrick McCabe)

+    HADOOP-10006. Compilation failure in trunk for
+    o.a.h.fs.swift.util.JSONUtil (Junping Du via stevel)
+
  OPTIMIZATIONS

    HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn)

  BUG FIXES

+    HADOOP-9964. Fix deadlocks in TestHttpServer by synchronize
+    ReflectionUtils.printThreadInfo. (Junping Du via llu)
+
    HADOOP-9582. Non-existent file to "hadoop fs -conf" doesn't throw error
    (Ashwin Shankar via jlowe)

--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java
@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.io.ByteBufferPool;
+
+import com.google.common.base.Preconditions;
+
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public final class ByteBufferUtil {
+
+  /**
+   * Determine if a stream can do a byte buffer read via read(ByteBuffer buf)
+   */
+  private static boolean streamHasByteBufferRead(InputStream stream) {
+    if (!(stream instanceof ByteBufferReadable)) {
+      return false;
+    }
+    if (!(stream instanceof FSDataInputStream)) {
+      return true;
+    }
+    return ((FSDataInputStream)stream).getWrappedStream() 
+        instanceof ByteBufferReadable;
+  }
+
+  /**
+   * Perform a fallback read.
+   */
+  public static ByteBuffer fallbackRead(
+      InputStream stream, ByteBufferPool bufferPool, int maxLength)
+          throws IOException {
+    if (bufferPool == null) {
+      throw new UnsupportedOperationException("zero-copy reads " +
+          "were not available, and you did not provide a fallback " +
+          "ByteBufferPool.");
+    }
+    boolean useDirect = streamHasByteBufferRead(stream);
+    ByteBuffer buffer = bufferPool.getBuffer(useDirect, maxLength);
+    if (buffer == null) {
+      throw new UnsupportedOperationException("zero-copy reads " +
+          "were not available, and the ByteBufferPool did not provide " +
+          "us with " + (useDirect ? "a direct" : "an indirect") +
+          "buffer.");
+    }
+    Preconditions.checkState(buffer.capacity() > 0);
+    Preconditions.checkState(buffer.isDirect() == useDirect);
+    maxLength = Math.min(maxLength, buffer.capacity());
+    boolean success = false;
+    try {
+      if (useDirect) {
+        buffer.clear();
+        buffer.limit(maxLength);
+        ByteBufferReadable readable = (ByteBufferReadable)stream;
+        int totalRead = 0;
+        while (true) {
+          if (totalRead >= maxLength) {
+            success = true;
+            break;
+          }
+          int nRead = readable.read(buffer);
+          if (nRead < 0) {
+            if (totalRead > 0) {
+              success = true;
+            }
+            break;
+          }
+          totalRead += nRead;
+        }
+        buffer.flip();
+      } else {
+        buffer.clear();
+        int nRead = stream.read(buffer.array(),
+            buffer.arrayOffset(), maxLength);
+        if (nRead >= 0) {
+          buffer.limit(nRead);
+          success = true;
+        }
+      }
+    } finally {
+      if (!success) {
+        // If we got an error while reading, or if we are at EOF, we 
+        // don't need the buffer any more.  We can give it back to the
+        // bufferPool.
+        bufferPool.putBuffer(buffer);
+        buffer = null;
+      }
+    }
+    return buffer;
+  }
+}
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java
@ -1,4 +1,5 @@
 /**
+ * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
@ -19,17 +20,29 @@

 import java.io.*;
 import java.nio.ByteBuffer;
+import java.util.EnumSet;

 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.io.ByteBufferPool;
+import org.apache.hadoop.fs.ByteBufferUtil;
+import org.apache.hadoop.util.IdentityHashStore;

 /** Utility that wraps a {@link FSInputStream} in a {@link DataInputStream}
 * and buffers input through a {@link BufferedInputStream}. */
@InterfaceAudience.Public
@InterfaceStability.Stable
 public class FSDataInputStream extends DataInputStream
-    implements Seekable, PositionedReadable, Closeable,
-    ByteBufferReadable, HasFileDescriptor, CanSetDropBehind, CanSetReadahead {
+    implements Seekable, PositionedReadable, Closeable, 
+      ByteBufferReadable, HasFileDescriptor, CanSetDropBehind, CanSetReadahead,
+      HasEnhancedByteBufferAccess {
+  /**
+   * Map ByteBuffers that we have handed out to readers to ByteBufferPool 
+   * objects
+   */
+  private final IdentityHashStore<ByteBuffer, ByteBufferPool>
+    extendedReadBuffers
+      = new IdentityHashStore<ByteBuffer, ByteBufferPool>(0);

  public FSDataInputStream(InputStream in)
    throws IOException {
@ -167,4 +180,45 @@ public void setDropBehind(Boolean dropBehind)
          "support setting the drop-behind caching setting.");
    }
  }
+
+  @Override
+  public ByteBuffer read(ByteBufferPool bufferPool, int maxLength,
+      EnumSet<ReadOption> opts) 
+          throws IOException, UnsupportedOperationException {
+    try {
+      return ((HasEnhancedByteBufferAccess)in).read(bufferPool,
+          maxLength, opts);
+    }
+    catch (ClassCastException e) {
+      ByteBuffer buffer = ByteBufferUtil.
+          fallbackRead(this, bufferPool, maxLength);
+      if (buffer != null) {
+        extendedReadBuffers.put(buffer, bufferPool);
+      }
+      return buffer;
+    }
+  }
+
+  private static final EnumSet<ReadOption> EMPTY_READ_OPTIONS_SET =
+      EnumSet.noneOf(ReadOption.class);
+
+  final public ByteBuffer read(ByteBufferPool bufferPool, int maxLength)
+          throws IOException, UnsupportedOperationException {
+    return read(bufferPool, maxLength, EMPTY_READ_OPTIONS_SET);
+  }
+  
+  @Override
+  public void releaseBuffer(ByteBuffer buffer) {
+    try {
+      ((HasEnhancedByteBufferAccess)in).releaseBuffer(buffer);
+    }
+    catch (ClassCastException e) {
+      ByteBufferPool bufferPool = extendedReadBuffers.remove( buffer);
+      if (bufferPool == null) {
+        throw new IllegalArgumentException("tried to release a buffer " +
+            "that was not created by this stream.");
+      }
+      bufferPool.putBuffer(buffer);
+    }
+  }
 }
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputStream.java
@ -18,9 +18,11 @@
 package org.apache.hadoop.fs;

 import java.io.*;
+import java.nio.ByteBuffer;

 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.fs.ZeroCopyUnavailableException;

 /****************************************************************
 * FSInputStream is a generic old InputStream with a little bit
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasEnhancedByteBufferAccess.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasEnhancedByteBufferAccess.java
@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.EnumSet;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.io.ByteBufferPool;
+
+/**
+ * FSDataInputStreams implement this interface to provide enhanced
+ * byte buffer access.  Usually this takes the form of mmap support.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+public interface HasEnhancedByteBufferAccess {
+  /**
+   * Get a ByteBuffer containing file data.
+   *
+   * This ByteBuffer may come from the stream itself, via a call like mmap,
+   * or it may come from the ByteBufferFactory which is passed in as an
+   * argument.
+   *
+   * @param factory
+   *            If this is non-null, it will be used to create a fallback
+   *            ByteBuffer when the stream itself cannot create one.
+   * @param maxLength
+   *            The maximum length of buffer to return.  We may return a buffer
+   *            which is shorter than this.
+   * @param opts
+   *            Options to use when reading.
+   *
+   * @return
+   *            We will return null on EOF (and only on EOF).
+   *            Otherwise, we will return a direct ByteBuffer containing at
+   *            least one byte.  You must free this ByteBuffer when you are 
+   *            done with it by calling releaseBuffer on it.
+   *            The buffer will continue to be readable until it is released 
+   *            in this manner.  However, the input stream's close method may
+   *            warn about unclosed buffers.
+   * @throws
+   *            IOException: if there was an error reading.
+   *            UnsupportedOperationException: if factory was null, and we
+   *            needed an external byte buffer.  UnsupportedOperationException
+   *            will never be thrown unless the factory argument is null.
+   */
+  public ByteBuffer read(ByteBufferPool factory, int maxLength,
+      EnumSet<ReadOption> opts)
+          throws IOException, UnsupportedOperationException;
+
+  /**
+   * Release a ByteBuffer which was created by the enhanced ByteBuffer read
+   * function. You must not continue using the ByteBuffer after calling this 
+   * function.
+   *
+   * @param buffer
+   *            The ByteBuffer to release.
+   */
+  public void releaseBuffer(ByteBuffer buffer);
+}
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ReadOption.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ReadOption.java
@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * Options that can be used when reading from a FileSystem.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Stable
+public enum ReadOption {
+  /**
+   * Skip checksums when reading.  This option may be useful when reading a file
+   * format that has built-in checksums, or for testing purposes.
+   */
+  SKIP_CHECKSUMS,
+}
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ZeroCopyUnavailableException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ZeroCopyUnavailableException.java
@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.IOException;
+
+public class ZeroCopyUnavailableException extends IOException {
+  private static final long serialVersionUID = 0L;
+
+  public ZeroCopyUnavailableException(String message) {
+    super(message);
+  }
+
+  public ZeroCopyUnavailableException(String message, Exception e) {
+    super(message, e);
+  }
+
+  public ZeroCopyUnavailableException(Exception e) {
+    super(e);
+  }
+}
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ByteBufferPool.java
@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+@InterfaceAudience.Public
+@InterfaceStability.Stable
+public interface ByteBufferPool {
+  /**
+   * Get a new direct ByteBuffer.  The pool can provide this from
+   * removing a buffer from its internal cache, or by allocating a 
+   * new buffer.
+   *
+   * @param direct     Whether the buffer should be direct.
+   * @param length     The minimum length the buffer will have.
+   * @return           A new ByteBuffer.  This ByteBuffer must be direct.
+   *                   Its capacity can be less than what was requested, but
+   *                   must be at least 1 byte.
+   */
+  ByteBuffer getBuffer(boolean direct, int length);
+
+  /**
+   * Release a buffer back to the pool.
+   * The pool may choose to put this buffer into its cache.
+   *
+   * @param buffer    a direct bytebuffer
+   */
+  void putBuffer(ByteBuffer buffer);
+}
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ElasticByteBufferPool.java
@ -0,0 +1,118 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.io;
+
+import com.google.common.collect.ComparisonChain;
+import org.apache.commons.lang.builder.HashCodeBuilder;
+
+import java.nio.ByteBuffer;
+import java.util.Map;
+import java.util.TreeMap;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/**
+ * This is a simple ByteBufferPool which just creates ByteBuffers as needed.
+ * It also caches ByteBuffers after they're released.  It will always return
+ * the smallest cached buffer with at least the capacity you request.
+ * We don't try to do anything clever here like try to limit the maximum cache
+ * size.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Stable
+public final class ElasticByteBufferPool implements ByteBufferPool {
+  private static final class Key implements Comparable<Key> {
+    private final int capacity;
+    private final long insertionTime;
+
+    Key(int capacity, long insertionTime) {
+      this.capacity = capacity;
+      this.insertionTime = insertionTime;
+    }
+
+    @Override
+    public int compareTo(Key other) {
+      return ComparisonChain.start().
+          compare(capacity, other.capacity).
+          compare(insertionTime, other.insertionTime).
+          result();
+    }
+
+    @Override
+    public boolean equals(Object rhs) {
+      if (rhs == null) {
+        return false;
+      }
+      try {
+        Key o = (Key)rhs;
+        return (compareTo(o) == 0);
+      } catch (ClassCastException e) {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return new HashCodeBuilder().
+          append(capacity).
+          append(insertionTime).
+          toHashCode();
+    }
+  }
+
+  private final TreeMap<Key, ByteBuffer> buffers =
+      new TreeMap<Key, ByteBuffer>();
+
+  private final TreeMap<Key, ByteBuffer> directBuffers =
+      new TreeMap<Key, ByteBuffer>();
+
+  private final TreeMap<Key, ByteBuffer> getBufferTree(boolean direct) {
+    return direct ? directBuffers : buffers;
+  }
+  
+  @Override
+  public synchronized ByteBuffer getBuffer(boolean direct, int length) {
+    TreeMap<Key, ByteBuffer> tree = getBufferTree(direct);
+    Map.Entry<Key, ByteBuffer> entry =
+        tree.ceilingEntry(new Key(length, 0));
+    if (entry == null) {
+      return direct ? ByteBuffer.allocateDirect(length) :
+                      ByteBuffer.allocate(length);
+    }
+    tree.remove(entry.getKey());
+    return entry.getValue();
+  }
+
+  @Override
+  public synchronized void putBuffer(ByteBuffer buffer) {
+    TreeMap<Key, ByteBuffer> tree = getBufferTree(buffer.isDirect());
+    while (true) {
+      Key key = new Key(buffer.capacity(), System.nanoTime());
+      if (!tree.containsKey(key)) {
+        tree.put(key, buffer);
+        return;
+      }
+      // Buffers are indexed by (capacity, time).
+      // If our key is not unique on the first try, we try again, since the
+      // time will be different.  Since we use nanoseconds, it's pretty
+      // unlikely that we'll loop even once, unless the system clock has a
+      // poor granularity.
+    }
+  }
+}
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdentityHashStore.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/IdentityHashStore.java
@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.util;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * The IdentityHashStore stores (key, value) mappings in an array.
+ * It is similar to java.util.HashTable, but much more lightweight.
+ * Neither inserting nor removing an element ever leads to any garbage
+ * getting created (assuming the array doesn't need to be enlarged).
+ *
+ * Unlike HashTable, it compares keys using
+ * {@link System#identityHashCode(Object)} and the identity operator.
+ * This is useful for types like ByteBuffer which have expensive hashCode
+ * and equals operators.
+ *
+ * We use linear probing to resolve collisions.  This avoids the need for
+ * the overhead of linked list data structures.  It also means that it is
+ * expensive to attempt to remove an element that isn't there, since we
+ * have to look at the entire array to be sure that it doesn't exist.
+ *
+ * @param <K>    The key type to use.
+ * @param <V>    THe value type to use.
+ */
+@InterfaceAudience.Private
+@InterfaceStability.Evolving
+@SuppressWarnings("unchecked")
+public final class IdentityHashStore<K, V> {
+  /**
+   * Even elements are keys; odd elements are values.
+   * The array has size 1 + Math.pow(2, capacity).
+   */
+  private Object buffer[];
+
+  private int numInserted = 0;
+
+  private int capacity;
+
+  /**
+   * The default maxCapacity value to use.
+   */
+  private static final int DEFAULT_MAX_CAPACITY = 2;
+
+  public IdentityHashStore(int capacity) {
+    Preconditions.checkArgument(capacity >= 0);
+    if (capacity == 0) {
+      this.capacity = 0;
+      this.buffer = null;
+    } else {
+      // Round the capacity we need up to a power of 2.
+      realloc((int)Math.pow(2,
+          Math.ceil(Math.log(capacity) / Math.log(2))));
+    }
+  }
+
+  private void realloc(int newCapacity) {
+    Preconditions.checkArgument(newCapacity > 0);
+    Object prevBuffer[] = buffer;
+    this.capacity = newCapacity;
+    // Each element takes two array slots -- one for the key, 
+    // and another for the value.  We also want a load factor 
+    // of 0.50.  Combine those together and you get 4 * newCapacity.
+    this.buffer = new Object[4 * newCapacity];
+    this.numInserted = 0;
+    if (prevBuffer != null) {
+      for (int i = 0; i < prevBuffer.length; i += 2) {
+        if (prevBuffer[i] != null) {
+          putInternal(prevBuffer[i], prevBuffer[i + 1]);
+        }
+      }
+    }
+  }
+
+  private void putInternal(Object k, Object v) {
+    int hash = System.identityHashCode(k);
+    final int numEntries = buffer.length / 2;
+    int index = hash % numEntries;
+    while (true) {
+      if (buffer[2 * index] == null) {
+        buffer[2 * index] = k;
+        buffer[1 + (2 * index)] = v;
+        numInserted++;
+        return;
+      }
+      index = (index + 1) % numEntries;
+    }
+  }
+
+  /**
+   * Add a new (key, value) mapping.
+   *
+   * Inserting a new (key, value) never overwrites a previous one.
+   * In other words, you can insert the same key multiple times and it will
+   * lead to multiple entries.
+   */
+  public void put(K k, V v) {
+    Preconditions.checkNotNull(k);
+    if (buffer == null) {
+      realloc(DEFAULT_MAX_CAPACITY);
+    } else if (numInserted + 1 > capacity) {
+      realloc(capacity * 2);
+    }
+    putInternal(k, v);
+  }
+
+  private int getElementIndex(K k) {
+    if (buffer == null) {
+      return -1;
+    }
+    final int numEntries = buffer.length / 2;
+    int hash = System.identityHashCode(k);
+    int index = hash % numEntries;
+    int firstIndex = index;
+    do {
+      if (buffer[2 * index] == k) {
+        return index;
+      }
+      index = (index + 1) % numEntries;
+    } while (index != firstIndex);
+    return -1;
+  }
+
+  /**
+   * Retrieve a value associated with a given key.
+   */
+  public V get(K k) {
+    int index = getElementIndex(k);
+    if (index < 0) {
+      return null;
+    }
+    return (V)buffer[1 + (2 * index)];
+  }
+
+  /**
+   * Retrieve a value associated with a given key, and delete the
+   * relevant entry.
+   */
+  public V remove(K k) {
+    int index = getElementIndex(k);
+    if (index < 0) {
+      return null;
+    }
+    V val = (V)buffer[1 + (2 * index)];
+    buffer[2 * index] = null;
+    buffer[1 + (2 * index)] = null;
+    numInserted--;
+    return val;
+  }
+
+  public boolean isEmpty() {
+    return numInserted == 0;
+  }
+
+  public int numElements() {
+    return numInserted;
+  }
+
+  public int capacity() {
+    return capacity;
+  }
+
+  public interface Visitor<K, V> {
+    void accept(K k, V v);
+  }
+
+  /**
+   * Visit all key, value pairs in the IdentityHashStore.
+   */
+  public void visitAll(Visitor<K, V> visitor) {
+    int length = buffer == null ? 0 : buffer.length;
+    for (int i = 0; i < length; i += 2) {
+      if (buffer[i] != null) {
+        visitor.accept((K)buffer[i], (V)buffer[i + 1]);
+      }
+    }
+  }
+}
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java
@ -154,7 +154,7 @@ private static String getTaskName(long id, String name) {
   * @param stream the stream to
   * @param title a string title for the stack trace
   */
-  public static void printThreadInfo(PrintWriter stream,
+  public synchronized static void printThreadInfo(PrintWriter stream,
                                     String title) {
    final int STACK_DEPTH = 20;
    boolean contention = threadBean.isThreadContentionMonitoringEnabled();
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestIdentityHashStore.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestIdentityHashStore.java
@ -0,0 +1,159 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.util;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+import junit.framework.Assert;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.util.IdentityHashStore;
+import org.apache.hadoop.util.IdentityHashStore.Visitor;
+import org.junit.Test;
+
+public class TestIdentityHashStore {
+  private static final Log LOG = LogFactory.getLog(TestIdentityHashStore.class.getName());
+
+  private static class Key {
+    private final String name;
+
+    Key(String name) {
+      this.name = name;
+    }
+    
+    @Override
+    public int hashCode() {
+      throw new RuntimeException("should not be used!");
+    }
+    
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof Key)) {
+        return false;
+      }
+      Key other = (Key)o;
+      return name.equals(other.name);
+    }
+  }
+
+  @Test(timeout=60000)
+  public void testStartingWithZeroCapacity() {
+    IdentityHashStore<Key, Integer> store = 
+        new IdentityHashStore<Key, Integer>(0);
+    store.visitAll(new Visitor<Key, Integer>() {
+      @Override
+      public void accept(Key k, Integer v) {
+        Assert.fail("found key " + k + " in empty IdentityHashStore.");
+      }
+    });
+    Assert.assertTrue(store.isEmpty());
+    final Key key1 = new Key("key1");
+    Integer value1 = new Integer(100);
+    store.put(key1, value1);
+    Assert.assertTrue(!store.isEmpty());
+    Assert.assertEquals(value1, store.get(key1));
+    store.visitAll(new Visitor<Key, Integer>() {
+      @Override
+      public void accept(Key k, Integer v) {
+        Assert.assertEquals(key1, k);
+      }
+    });
+    Assert.assertEquals(value1, store.remove(key1));
+    Assert.assertTrue(store.isEmpty());
+  }
+  
+  @Test(timeout=60000)
+  public void testDuplicateInserts() {
+    IdentityHashStore<Key, Integer> store = 
+        new IdentityHashStore<Key, Integer>(4);
+    store.visitAll(new Visitor<Key, Integer>() {
+      @Override
+      public void accept(Key k, Integer v) {
+        Assert.fail("found key " + k + " in empty IdentityHashStore.");
+      }
+    });
+    Assert.assertTrue(store.isEmpty());
+    Key key1 = new Key("key1");
+    Integer value1 = new Integer(100);
+    Integer value2 = new Integer(200);
+    Integer value3 = new Integer(300);
+    store.put(key1, value1);
+    Key equalToKey1 = new Key("key1");
+
+    // IdentityHashStore compares by object equality, not equals()
+    Assert.assertNull(store.get(equalToKey1)); 
+
+    Assert.assertTrue(!store.isEmpty());
+    Assert.assertEquals(value1, store.get(key1));
+    store.put(key1, value2);
+    store.put(key1, value3);
+    final List<Integer> allValues = new LinkedList<Integer>();
+    store.visitAll(new Visitor<Key, Integer>() {
+      @Override
+      public void accept(Key k, Integer v) {
+        allValues.add(v);
+      }
+    });
+    Assert.assertEquals(3, allValues.size());
+    for (int i = 0; i < 3; i++) {
+      Integer value = store.remove(key1);
+      Assert.assertTrue(allValues.remove(value));
+    }
+    Assert.assertNull(store.remove(key1));
+    Assert.assertTrue(store.isEmpty());
+  }
+  
+  @Test(timeout=60000)
+  public void testAdditionsAndRemovals() {
+    IdentityHashStore<Key, Integer> store = 
+        new IdentityHashStore<Key, Integer>(0);
+    final int NUM_KEYS = 1000;
+    LOG.debug("generating " + NUM_KEYS + " keys");
+    final List<Key> keys = new ArrayList<Key>(NUM_KEYS);
+    for (int i = 0; i < NUM_KEYS; i++) {
+      keys.add(new Key("key " + i));
+    }
+    for (int i = 0; i < NUM_KEYS; i++) {
+      store.put(keys.get(i), i);
+    }
+    store.visitAll(new Visitor<Key, Integer>() {
+      @Override
+      public void accept(Key k, Integer v) {
+        Assert.assertTrue(keys.contains(k));
+      }
+    });
+    for (int i = 0; i < NUM_KEYS; i++) {
+      Assert.assertEquals(Integer.valueOf(i),
+          store.remove(keys.get(i)));
+    }
+    store.visitAll(new Visitor<Key, Integer>() {
+      @Override
+      public void accept(Key k, Integer v) {
+        Assert.fail("expected all entries to be removed");
+      }
+    });
+    Assert.assertTrue("expected the store to be " +
+        "empty, but found " + store.numElements() + " elements.",
+        store.isEmpty());
+    Assert.assertEquals(1024, store.capacity());
+  }
+  
+}
--- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/DFSClientCache.java
@ -19,6 +19,7 @@

 import java.io.IOException;
 import java.security.PrivilegedExceptionAction;
+import java.util.concurrent.ExecutionException;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@ -27,59 +28,81 @@
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.security.UserGroupInformation;

+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.RemovalListener;
+import com.google.common.cache.RemovalNotification;
+
 /**
 * A cache saves DFSClient objects for different users
 */
-public class DFSClientCache {
-  static final Log LOG = LogFactory.getLog(DFSClientCache.class);
-  private final LruCache<String, DFSClient> lruTable;
+class DFSClientCache {
+  private static final Log LOG = LogFactory.getLog(DFSClientCache.class);
+  /**
+   * Cache that maps User id to corresponding DFSClient.
+   */
+  @VisibleForTesting
+  final LoadingCache<String, DFSClient> clientCache;
+
+  final static int DEFAULT_DFS_CLIENT_CACHE_SIZE = 256;
+
  private final Configuration config;

-  public DFSClientCache(Configuration config) {
-    // By default, keep 256 DFSClient instance for 256 active users
-    this(config, 256);
+  DFSClientCache(Configuration config) {
+    this(config, DEFAULT_DFS_CLIENT_CACHE_SIZE);
  }

-  public DFSClientCache(Configuration config, int size) {
-    lruTable = new LruCache<String, DFSClient>(size);
+  DFSClientCache(Configuration config, int clientCache) {
    this.config = config;
+    this.clientCache = CacheBuilder.newBuilder()
+        .maximumSize(clientCache)
+        .removalListener(clientRemovealListener())
+        .build(clientLoader());
  }

-  public void put(String uname, DFSClient client) {
-    lruTable.put(uname, client);
+  private CacheLoader<String, DFSClient> clientLoader() {
+    return new CacheLoader<String, DFSClient>() {
+      @Override
+      public DFSClient load(String userName) throws Exception {
+        UserGroupInformation ugi = UserGroupInformation
+            .createRemoteUser(userName);
+
+        // Guava requires CacheLoader never returns null.
+        return ugi.doAs(new PrivilegedExceptionAction<DFSClient>() {
+          public DFSClient run() throws IOException {
+            return new DFSClient(NameNode.getAddress(config), config);
+          }
+        });
+      }
+    };
  }

-  synchronized public DFSClient get(String uname) {
-    DFSClient client = lruTable.get(uname);
-    if (client != null) {
-      return client;
-    }
-
-    // Not in table, create one.
-    try {
-      UserGroupInformation ugi = UserGroupInformation.createRemoteUser(uname);
-      client = ugi.doAs(new PrivilegedExceptionAction<DFSClient>() {
-        public DFSClient run() throws IOException {
-          return new DFSClient(NameNode.getAddress(config), config);
+  private RemovalListener<String, DFSClient> clientRemovealListener() {
+    return new RemovalListener<String, DFSClient>() {
+      @Override
+      public void onRemoval(RemovalNotification<String, DFSClient> notification) {
+        DFSClient client = notification.getValue();
+        try {
+          client.close();
+        } catch (IOException e) {
+          LOG.warn(String.format(
+              "IOException when closing the DFSClient(%s), cause: %s", client,
+              e));
        }
-      });
-    } catch (IOException e) {
-      LOG.error("Create DFSClient failed for user:" + uname);
-      e.printStackTrace();
+      }
+    };
+  }

-    } catch (InterruptedException e) {
-      e.printStackTrace();
+  DFSClient get(String userName) {
+    DFSClient client = null;
+    try {
+      client = clientCache.get(userName);
+    } catch (ExecutionException e) {
+      LOG.error("Failed to create DFSClient for user:" + userName + " Cause:"
+          + e);
    }
-    // Add new entry
-    lruTable.put(uname, client);
    return client;
  }
-
-  public int usedSize() {
-    return lruTable.usedSize();
-  }
-
-  public boolean containsKey(String key) {
-    return lruTable.containsKey(key);
-  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/LruCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/LruCache.java
@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hdfs.nfs.nfs3;
-
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-/**
- * A thread-safe LRU table.
- */
-public class LruCache<K, V> {
-  private final int maxSize;
-  private final LinkedHashMap<K, V> map;
-  private static final float hashTableLoadFactor = 0.75f;
-
-  public LruCache(int maxSize) {
-    this.maxSize = maxSize;
-    int hashTableCapacity = (int) Math.ceil(maxSize / hashTableLoadFactor) + 1;
-    map = new LinkedHashMap<K, V>(hashTableCapacity, hashTableLoadFactor, true) {
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
-        return size() > LruCache.this.maxSize;
-      }
-    };
-  }
-
-  // The found entry becomes the most recently used.
-  public synchronized V get(K key) {
-    return map.get(key);
-  }
-
-  public synchronized void put(K key, V value) {
-    map.put(key, value);
-  }
-
-  public synchronized int usedSize() {
-    return map.size();
-  }
-
-  public synchronized boolean containsKey(K key) {
-    return map.containsKey(key);
-  }
-}
--- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java
@ -422,7 +422,7 @@ private void processOverWrite(DFSClient dfsClient, WRITE3Request request,
      LOG.warn("Haven't noticed any partial overwrite for a sequential file"
          + " write requests. Treat it as a real random write, no support.");
      response = new WRITE3Response(Nfs3Status.NFS3ERR_INVAL, wccData, 0,
-          WriteStableHow.UNSTABLE, 0);
+          WriteStableHow.UNSTABLE, Nfs3Constant.WRITE_COMMIT_VERF);
    } else {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Process perfectOverWrite");
@ -559,7 +559,7 @@ private WRITE3Response processPerfectOverWrite(DFSClient dfsClient,
    if (comparator.compare(readbuffer, 0, readCount, data, 0, count) != 0) {
      LOG.info("Perfect overwrite has different content");
      response = new WRITE3Response(Nfs3Status.NFS3ERR_INVAL, wccData, 0,
-          stableHow, 0);
+          stableHow, Nfs3Constant.WRITE_COMMIT_VERF);
    } else {
      LOG.info("Perfect overwrite has same content,"
          + " updating the mtime, then return success");
@ -571,12 +571,12 @@ private WRITE3Response processPerfectOverWrite(DFSClient dfsClient,
        LOG.info("Got error when processing perfect overwrite, path=" + path
            + " error:" + e);
        return new WRITE3Response(Nfs3Status.NFS3ERR_IO, wccData, 0, stableHow,
-            0);
+            Nfs3Constant.WRITE_COMMIT_VERF);
      }

      wccData.setPostOpAttr(postOpAttr);
      response = new WRITE3Response(Nfs3Status.NFS3_OK, wccData, count,
-          stableHow, 0);
+          stableHow, Nfs3Constant.WRITE_COMMIT_VERF);
    }
    return response;
  }
--- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestDFSClientCache.java
@ -17,41 +17,44 @@
 */
 package org.apache.hadoop.hdfs.nfs.nfs3;

+import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;

 import java.io.IOException;

 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.junit.Test;
-import org.mockito.Mockito;

 public class TestDFSClientCache {
  @Test
-  public void testLruTable() throws IOException {
-    DFSClientCache cache = new DFSClientCache(new Configuration(), 3);
-    DFSClient client = Mockito.mock(DFSClient.class);
-    cache.put("a", client);
-    assertTrue(cache.containsKey("a"));
+  public void testEviction() throws IOException {
+    Configuration conf = new Configuration();
+    conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "hdfs://localhost");

-    cache.put("b", client);
-    cache.put("c", client);
-    cache.put("d", client);
-    assertTrue(cache.usedSize() == 3);
-    assertFalse(cache.containsKey("a"));
+    // Only one entry will be in the cache
+    final int MAX_CACHE_SIZE = 2;

-    // Cache should have d,c,b in LRU order
-    assertTrue(cache.containsKey("b"));
-    // Do a lookup to make b the most recently used
-    assertTrue(cache.get("b") != null);
+    DFSClientCache cache = new DFSClientCache(conf, MAX_CACHE_SIZE);

-    cache.put("e", client);
-    assertTrue(cache.usedSize() == 3);
-    // c should be replaced with e, and cache has e,b,d
-    assertFalse(cache.containsKey("c"));
-    assertTrue(cache.containsKey("e"));
-    assertTrue(cache.containsKey("b"));
-    assertTrue(cache.containsKey("d"));
+    DFSClient c1 = cache.get("test1");
+    assertTrue(cache.get("test1").toString().contains("ugi=test1"));
+    assertEquals(c1, cache.get("test1"));
+    assertFalse(isDfsClientClose(c1));
+
+    cache.get("test2");
+    assertTrue(isDfsClientClose(c1));
+    assertEquals(MAX_CACHE_SIZE - 1, cache.clientCache.size());
+  }
+
+  private static boolean isDfsClientClose(DFSClient c) {
+    try {
+      c.exists("");
+    } catch (IOException e) {
+      return e.getMessage().equals("Filesystem closed");
+    }
+    return false;
  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@ -250,6 +250,9 @@ Release 2.3.0 - UNRELEASED
    HDFS-5122. Support failover and retry in WebHdfsFileSystem for NN HA.
    (Haohui Mai via jing9)

+    HDFS-4953. Enable HDFS local reads via mmap.
+    (Colin Patrick McCabe via wang).
+
  IMPROVEMENTS

    HDFS-4657.  Limit the number of blocks logged by the NN after a block
@ -291,6 +294,12 @@ Release 2.3.0 - UNRELEASED

    HDFS-5240. Separate formatting from logging in the audit logger API (daryn)

+    HDFS-5191. Revisit zero-copy API in FSDataInputStream to make it more
+    intuitive.  (Contributed by Colin Patrick McCabe)
+
+    HDFS-5260. Merge zero-copy memory-mapped HDFS client reads to trunk and
+    branch-2. (cnauroth)
+
  OPTIMIZATIONS

    HDFS-5239.  Allow FSNamesystem lock fairness to be configurable (daryn)
@ -316,6 +325,8 @@ Release 2.3.0 - UNRELEASED
    HDFS-5031. BlockScanner scans the block multiple times. (Vinay via Arpit
    Agarwal)

+    HDFS-5266. ElasticByteBufferPool#Key does not implement equals. (cnauroth)
+
 Release 2.2.0 - UNRELEASED

  INCOMPATIBLE CHANGES
@ -336,11 +347,14 @@ Release 2.1.2 - UNRELEASED

  IMPROVEMENTS

-  OPTIMIZATIONS
-
    HDFS-5246. Make Hadoop nfs server port and mount daemon port
    configurable. (Jinghui Wang via brandonli)

+    HDFS-5256. Use guava LoadingCache to implement DFSClientCache. (Haohui Mai
+    via brandonli)
+
+  OPTIMIZATIONS
+
  BUG FIXES

    HDFS-5139. Remove redundant -R option from setrep.
@ -354,6 +368,11 @@ Release 2.1.2 - UNRELEASED
    HDFS-5186. TestFileJournalManager fails on Windows due to file handle leaks.
    (Chuan Liu via cnauroth)

+    HDFS-5268. NFS write commit verifier is not set in a few places (brandonli)
+
+    HDFS-5265. Namenode fails to start when dfs.https.port is unspecified.
+    (Haohui Mai via jing9)
+
 Release 2.1.1-beta - 2013-09-23

  INCOMPATIBLE CHANGES
--- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml
@ -330,4 +330,14 @@
       <Method name="setDirInternal" />
       <Bug pattern="DM_STRING_CTOR" />
     </Match>
+    <Match>
+      <Class name="org.apache.hadoop.hdfs.client.ClientMmapManager" />
+      <Method name="create" />
+      <Bug pattern="UL_UNRELEASED_LOCK_EXCEPTION_PATH" />
+    </Match>
+    <Match>
+      <Class name="org.apache.hadoop.hdfs.client.ClientMmapManager" />
+      <Method name="create" />
+      <Bug pattern="UL_UNRELEASED_LOCK" />
+    </Match>
 </FindBugsFilter>
--- a/hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/CMakeLists.txt
@ -142,6 +142,7 @@ target_link_libraries(test_native_mini_dfs
 )

 add_executable(test_libhdfs_threaded
+    main/native/libhdfs/expect.c
    main/native/libhdfs/test_libhdfs_threaded.c
 )
 target_link_libraries(test_libhdfs_threaded
@ -150,6 +151,16 @@ target_link_libraries(test_libhdfs_threaded
    pthread
 )

+add_executable(test_libhdfs_zerocopy
+    main/native/libhdfs/expect.c
+    main/native/libhdfs/test/test_libhdfs_zerocopy.c
+)
+target_link_libraries(test_libhdfs_zerocopy
+    hdfs
+    native_mini_dfs
+    pthread
+)
+
 IF(REQUIRE_LIBWEBHDFS)
    add_subdirectory(contrib/libwebhdfs)
 ENDIF(REQUIRE_LIBWEBHDFS)
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java
@ -20,12 +20,16 @@
 import java.io.IOException;

 import org.apache.hadoop.fs.ByteBufferReadable;
+import org.apache.hadoop.hdfs.client.ClientMmap;
+import org.apache.hadoop.hdfs.client.ClientMmapManager;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;

 /**
 * A BlockReader is responsible for reading a single block
 * from a single datanode.
 */
 public interface BlockReader extends ByteBufferReadable {
+  

  /* same interface as inputStream java.io.InputStream#read()
   * used by DFSInputStream#read()
@ -81,4 +85,14 @@ public interface BlockReader extends ByteBufferReadable {
   *                      All short-circuit reads are also local.
   */
  boolean isShortCircuit();
+
+  /**
+   * Get a ClientMmap object for this BlockReader.
+   *
+   * @param curBlock      The current block.
+   * @return              The ClientMmap object, or null if mmap is not
+   *                      supported.
+   */
+  ClientMmap getClientMmap(LocatedBlock curBlock,
+        ClientMmapManager mmapManager);
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java
@ -22,11 +22,15 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import org.apache.hadoop.conf.Configuration;

 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hdfs.client.ClientMmap;
+import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
 import org.apache.hadoop.hdfs.util.DirectBufferPool;
 import org.apache.hadoop.io.IOUtils;
@ -87,6 +91,8 @@ class BlockReaderLocal implements BlockReader {
  private final ExtendedBlock block;
  
  private final FileInputStreamCache fisCache;
+  private ClientMmap clientMmap;
+  private boolean mmapDisabled;
  
  private static int getSlowReadBufferNumChunks(int bufSize,
      int bytesPerChecksum) {
@ -113,6 +119,8 @@ public BlockReaderLocal(DFSClient.Conf conf, String filename,
    this.datanodeID = datanodeID;
    this.block = block;
    this.fisCache = fisCache;
+    this.clientMmap = null;
+    this.mmapDisabled = false;

    // read and handle the common header here. For now just a version
    checksumIn.getChannel().position(0);
@ -487,6 +495,10 @@ public synchronized long skip(long n) throws IOException {

  @Override
  public synchronized void close() throws IOException {
+    if (clientMmap != null) {
+      clientMmap.unref();
+      clientMmap = null;
+    }
    if (fisCache != null) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("putting FileInputStream for " + filename +
@ -534,4 +546,30 @@ public boolean isLocal() {
  public boolean isShortCircuit() {
    return true;
  }
+
+  @Override
+  public ClientMmap getClientMmap(LocatedBlock curBlock,
+      ClientMmapManager mmapManager) {
+    if (clientMmap == null) {
+      if (mmapDisabled) {
+        return null;
+      }
+      try {
+        clientMmap = mmapManager.fetch(datanodeID, block, dataIn);
+        if (clientMmap == null) {
+          mmapDisabled = true;
+          return null;
+        }
+      } catch (InterruptedException e) {
+        LOG.error("Interrupted while setting up mmap for " + filename, e);
+        Thread.currentThread().interrupt();
+        return null;
+      } catch (IOException e) {
+        LOG.error("unable to set up mmap for " + filename, e);
+        mmapDisabled = true;
+        return null;
+      }
+    }
+    return clientMmap;
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java
@ -28,6 +28,8 @@
 import java.util.LinkedHashMap;
 import java.util.Map;

+import org.apache.hadoop.hdfs.client.ClientMmap;
+import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@ -35,6 +37,7 @@
 import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
 import org.apache.hadoop.hdfs.util.DirectBufferPool;
@ -701,4 +704,10 @@ public boolean isLocal() {
  public boolean isShortCircuit() {
    return true;
  }
+
+  @Override
+  public ClientMmap getClientMmap(LocatedBlock curBlock,
+      ClientMmapManager mmapManager) {
+    return null;
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@ -104,6 +104,7 @@
 import org.apache.hadoop.fs.UnresolvedLinkException;
 import org.apache.hadoop.fs.VolumeId;
 import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
 import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
 import org.apache.hadoop.hdfs.protocol.ClientProtocol;
@ -206,7 +207,43 @@ public class DFSClient implements java.io.Closeable {
  private boolean shouldUseLegacyBlockReaderLocal;
  private final CachingStrategy defaultReadCachingStrategy;
  private final CachingStrategy defaultWriteCachingStrategy;
+  private ClientMmapManager mmapManager;
  
+  private static final ClientMmapManagerFactory MMAP_MANAGER_FACTORY =
+      new ClientMmapManagerFactory();
+
+  private static final class ClientMmapManagerFactory {
+    private ClientMmapManager mmapManager = null;
+    /**
+     * Tracks the number of users of mmapManager.
+     */
+    private int refcnt = 0;
+
+    synchronized ClientMmapManager get(Configuration conf) {
+      if (refcnt++ == 0) {
+        mmapManager = ClientMmapManager.fromConf(conf);
+      } else {
+        String mismatches = mmapManager.verifyConfigurationMatches(conf);
+        if (!mismatches.isEmpty()) {
+          LOG.warn("The ClientMmapManager settings you specified " +
+            "have been ignored because another thread created the " +
+            "ClientMmapManager first.  " + mismatches);
+        }
+      }
+      return mmapManager;
+    }
+    
+    synchronized void unref(ClientMmapManager mmapManager) {
+      if (this.mmapManager != mmapManager) {
+        throw new IllegalArgumentException();
+      }
+      if (--refcnt == 0) {
+        IOUtils.cleanup(LOG, mmapManager);
+        mmapManager = null;
+      }
+    }
+  }
+
  /**
   * DFSClient configuration 
   */
@ -534,6 +571,7 @@ public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode,
        new CachingStrategy(readDropBehind, readahead);
    this.defaultWriteCachingStrategy =
        new CachingStrategy(writeDropBehind, readahead);
+    this.mmapManager = MMAP_MANAGER_FACTORY.get(conf);
  }
  
  /**
@ -738,9 +776,12 @@ void closeConnectionToNamenode() {
  
  /** Abort and release resources held.  Ignore all errors. */
  void abort() {
+    if (mmapManager != null) {
+      MMAP_MANAGER_FACTORY.unref(mmapManager);
+      mmapManager = null;
+    }
    clientRunning = false;
    closeAllFilesBeingWritten(true);
-
    try {
      // remove reference to this client and stop the renewer,
      // if there is no more clients under the renewer.
@ -784,6 +825,10 @@ private void closeAllFilesBeingWritten(final boolean abort) {
   */
  @Override
  public synchronized void close() throws IOException {
+    if (mmapManager != null) {
+      MMAP_MANAGER_FACTORY.unref(mmapManager);
+      mmapManager = null;
+    }
    if(clientRunning) {
      closeAllFilesBeingWritten(false);
      clientRunning = false;
@ -2496,4 +2541,9 @@ public CachingStrategy getDefaultReadCachingStrategy() {
  public CachingStrategy getDefaultWriteCachingStrategy() {
    return defaultWriteCachingStrategy;
  }
+
+  @VisibleForTesting
+  public ClientMmapManager getMmapManager() {
+    return mmapManager;
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@ -376,6 +376,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
  public static final int DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_DEFAULT = 1024 * 1024;
  public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = "dfs.client.domain.socket.data.traffic";
  public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = false;
+  public static final String DFS_CLIENT_MMAP_CACHE_SIZE = "dfs.client.mmap.cache.size";
+  public static final int DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT = 1024;
+  public static final String DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS = "dfs.client.mmap.cache.timeout.ms";
+  public static final long DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT  = 15 * 60 * 1000;
+  public static final String DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT = "dfs.client.mmap.cache.timeout.ms";
+  public static final int DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT  = 4;

  // property for fsimage compression
  public static final String DFS_IMAGE_COMPRESS_KEY = "dfs.image.compress";
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java
@ -24,6 +24,7 @@
 import java.nio.ByteBuffer;
 import java.util.AbstractMap;
 import java.util.ArrayList;
+import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@ -36,11 +37,15 @@
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.fs.ByteBufferReadable;
+import org.apache.hadoop.fs.ByteBufferUtil;
 import org.apache.hadoop.fs.CanSetDropBehind;
 import org.apache.hadoop.fs.CanSetReadahead;
 import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.FSInputStream;
+import org.apache.hadoop.fs.HasEnhancedByteBufferAccess;
+import org.apache.hadoop.fs.ReadOption;
 import org.apache.hadoop.fs.UnresolvedLinkException;
+import org.apache.hadoop.hdfs.client.ClientMmap;
 import org.apache.hadoop.hdfs.net.DomainPeer;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.net.TcpPeerServer;
@ -54,12 +59,14 @@
 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
 import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
 import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException;
+import org.apache.hadoop.io.ByteBufferPool;
 import org.apache.hadoop.ipc.RPC;
 import org.apache.hadoop.ipc.RemoteException;
 import org.apache.hadoop.net.NetUtils;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.security.AccessControlException;
 import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.util.IdentityHashStore;

 import com.google.common.annotations.VisibleForTesting;

@ -69,7 +76,8 @@
 ****************************************************************/
@InterfaceAudience.Private
 public class DFSInputStream extends FSInputStream
-implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead {
+implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead,
+    HasEnhancedByteBufferAccess {
  @VisibleForTesting
  static boolean tcpReadsDisabledForTesting = false;
  private final PeerCache peerCache;
@ -87,17 +95,28 @@ public class DFSInputStream extends FSInputStream
  private CachingStrategy cachingStrategy;
  private final ReadStatistics readStatistics = new ReadStatistics();

+  /**
+   * Track the ByteBuffers that we have handed out to readers.
+   * 
+   * The value type can be either ByteBufferPool or ClientMmap, depending on
+   * whether we this is a memory-mapped buffer or not.
+   */
+  private final IdentityHashStore<ByteBuffer, Object>
+      extendedReadBuffers = new IdentityHashStore<ByteBuffer, Object>(0);
+
  public static class ReadStatistics {
    public ReadStatistics() {
      this.totalBytesRead = 0;
      this.totalLocalBytesRead = 0;
      this.totalShortCircuitBytesRead = 0;
+      this.totalZeroCopyBytesRead = 0;
    }

    public ReadStatistics(ReadStatistics rhs) {
      this.totalBytesRead = rhs.getTotalBytesRead();
      this.totalLocalBytesRead = rhs.getTotalLocalBytesRead();
      this.totalShortCircuitBytesRead = rhs.getTotalShortCircuitBytesRead();
+      this.totalZeroCopyBytesRead = rhs.getTotalZeroCopyBytesRead();
    }

    /**
@ -123,6 +142,13 @@ public long getTotalLocalBytesRead() {
    public long getTotalShortCircuitBytesRead() {
      return totalShortCircuitBytesRead;
    }
+    
+    /**
+     * @return The total number of zero-copy bytes read.
+     */
+    public long getTotalZeroCopyBytesRead() {
+      return totalZeroCopyBytesRead;
+    }

    /**
     * @return The total number of bytes read which were not local.
@ -145,12 +171,21 @@ void addShortCircuitBytes(long amt) {
      this.totalLocalBytesRead += amt;
      this.totalShortCircuitBytesRead += amt;
    }
+
+    void addZeroCopyBytes(long amt) {
+      this.totalBytesRead += amt;
+      this.totalLocalBytesRead += amt;
+      this.totalShortCircuitBytesRead += amt;
+      this.totalZeroCopyBytesRead += amt;
+    }
    
    private long totalBytesRead;

    private long totalLocalBytesRead;

    private long totalShortCircuitBytesRead;
+
+    private long totalZeroCopyBytesRead;
  }
  
  private final FileInputStreamCache fileInputStreamCache;
@ -587,6 +622,20 @@ public synchronized void close() throws IOException {
    }
    dfsClient.checkOpen();

+    if (!extendedReadBuffers.isEmpty()) {
+      final StringBuilder builder = new StringBuilder();
+      extendedReadBuffers.visitAll(new IdentityHashStore.Visitor<ByteBuffer, Object>() {
+        private String prefix = "";
+        @Override
+        public void accept(ByteBuffer k, Object v) {
+          builder.append(prefix).append(k);
+          prefix = ", ";
+        }
+      });
+      DFSClient.LOG.warn("closing file " + src + ", but there are still " +
+          "unreleased ByteBuffers allocated by read().  " +
+          "Please release " + builder.toString() + ".");
+    }
    if (blockReader != null) {
      blockReader.close();
      blockReader = null;
@ -1393,4 +1442,100 @@ public synchronized void setDropBehind(Boolean dropBehind)
    this.cachingStrategy.setDropBehind(dropBehind);
    closeCurrentBlockReader();
  }
+
+  @Override
+  public synchronized ByteBuffer read(ByteBufferPool bufferPool,
+      int maxLength, EnumSet<ReadOption> opts) 
+          throws IOException, UnsupportedOperationException {
+    assert(maxLength > 0);
+    if (((blockReader == null) || (blockEnd == -1)) &&
+          (pos < getFileLength())) {
+      /*
+       * If we don't have a blockReader, or the one we have has no more bytes
+       * left to read, we call seekToBlockSource to get a new blockReader and
+       * recalculate blockEnd.  Note that we assume we're not at EOF here
+       * (we check this above).
+       */
+      if ((!seekToBlockSource(pos)) || (blockReader == null)) {
+        throw new IOException("failed to allocate new BlockReader " +
+            "at position " + pos);
+      }
+    }
+    boolean canSkipChecksums = opts.contains(ReadOption.SKIP_CHECKSUMS);
+    if (canSkipChecksums) {
+      ByteBuffer buffer = tryReadZeroCopy(maxLength);
+      if (buffer != null) {
+        return buffer;
+      }
+    }
+    ByteBuffer buffer = ByteBufferUtil.
+        fallbackRead(this, bufferPool, maxLength);
+    if (buffer != null) {
+      extendedReadBuffers.put(buffer, bufferPool);
+    }
+    return buffer;
+  }
+
+  private synchronized ByteBuffer tryReadZeroCopy(int maxLength)
+      throws IOException {
+    // Java ByteBuffers can't be longer than 2 GB, because they use
+    // 4-byte signed integers to represent capacity, etc.
+    // So we can't mmap the parts of the block higher than the 2 GB offset.
+    // FIXME: we could work around this with multiple memory maps.
+    // See HDFS-5101.
+    long blockEnd32 = Math.min(Integer.MAX_VALUE, blockEnd);
+    long curPos = pos;
+    long blockLeft = blockEnd32 - curPos + 1;
+    if (blockLeft <= 0) {
+      if (DFSClient.LOG.isDebugEnabled()) {
+        DFSClient.LOG.debug("unable to perform a zero-copy read from offset " +
+          curPos + " of " + src + "; blockLeft = " + blockLeft +
+          "; blockEnd32 = " + blockEnd32 + ", blockEnd = " + blockEnd +
+          "; maxLength = " + maxLength);
+      }
+      return null;
+    }
+    int length = Math.min((int)blockLeft, maxLength);
+    long blockStartInFile = currentLocatedBlock.getStartOffset();
+    long blockPos = curPos - blockStartInFile;
+    long limit = blockPos + length;
+    ClientMmap clientMmap =
+        blockReader.getClientMmap(currentLocatedBlock,
+            dfsClient.getMmapManager());
+    if (clientMmap == null) {
+      if (DFSClient.LOG.isDebugEnabled()) {
+        DFSClient.LOG.debug("unable to perform a zero-copy read from offset " +
+          curPos + " of " + src + "; BlockReader#getClientMmap returned " +
+          "null.");
+      }
+      return null;
+    }
+    seek(pos + length);
+    ByteBuffer buffer = clientMmap.getMappedByteBuffer().asReadOnlyBuffer();
+    buffer.position((int)blockPos);
+    buffer.limit((int)limit);
+    clientMmap.ref();
+    extendedReadBuffers.put(buffer, clientMmap);
+    readStatistics.addZeroCopyBytes(length);
+    if (DFSClient.LOG.isDebugEnabled()) {
+      DFSClient.LOG.debug("readZeroCopy read " + maxLength + " bytes from " +
+          "offset " + curPos + " via the zero-copy read path.  " +
+          "blockEnd = " + blockEnd);
+    }
+    return buffer;
+  }
+
+  @Override
+  public synchronized void releaseBuffer(ByteBuffer buffer) {
+    Object val = extendedReadBuffers.remove(buffer);
+    if (val == null) {
+      throw new IllegalArgumentException("tried to release a buffer " +
+          "that was not created by this stream, " + buffer);
+    }
+    if (val instanceof ClientMmap) {
+      ((ClientMmap)val).unref();
+    } else if (val instanceof ByteBufferPool) {
+      ((ByteBufferPool)val).putBuffer(buffer);
+    }
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java
@ -27,9 +27,12 @@
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.fs.FSInputChecker;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.client.ClientMmap;
+import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
 import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
 import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
@ -485,4 +488,10 @@ public boolean isLocal() {
  public boolean isShortCircuit() {
    return false;
  }
+
+  @Override
+  public ClientMmap getClientMmap(LocatedBlock curBlock,
+      ClientMmapManager mmapManager) {
+    return null;
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java
@ -29,9 +29,12 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.client.ClientMmap;
+import org.apache.hadoop.hdfs.client.ClientMmapManager;
 import org.apache.hadoop.hdfs.net.Peer;
 import org.apache.hadoop.hdfs.protocol.DatanodeID;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.hdfs.protocol.LocatedBlock;
 import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
 import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
 import org.apache.hadoop.hdfs.protocol.datatransfer.PacketReceiver;
@ -40,7 +43,6 @@
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ClientReadStatusProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ReadOpChecksumInfoProto;
 import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
-import org.apache.hadoop.hdfs.security.token.block.DataEncryptionKey;
 import org.apache.hadoop.hdfs.protocolPB.PBHelper;
 import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
 import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException;
@ -451,4 +453,10 @@ public boolean isLocal() {
  public boolean isShortCircuit() {
    return false;
  }
+
+  @Override
+  public ClientMmap getClientMmap(LocatedBlock curBlock,
+      ClientMmapManager manager) {
+    return null;
+  }
 }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmap.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmap.java
@ -0,0 +1,166 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import java.io.FileInputStream;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+
+import java.io.IOException;
+import java.lang.ref.WeakReference;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel.MapMode;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+
+/**
+ * A memory-mapped region used by an HDFS client.
+ * 
+ * This class includes a reference count and some other information used by
+ * ClientMmapManager to track and cache mmaps.
+ */
+@InterfaceAudience.Private
+public class ClientMmap {
+  static final Log LOG = LogFactory.getLog(ClientMmap.class);
+  
+  /**
+   * A reference to the manager of this mmap.
+   * 
+   * This is only a weak reference to help minimize the damange done by
+   * code which leaks references accidentally.
+   */
+  private final WeakReference<ClientMmapManager> manager;
+  
+  /**
+   * The actual mapped memory region.
+   */
+  private final MappedByteBuffer map;
+  
+  /**
+   * A reference count tracking how many threads are using this object.
+   */
+  private final AtomicInteger refCount = new AtomicInteger(1);
+
+  /**
+   * Block pertaining to this mmap
+   */
+  private final ExtendedBlock block;
+  
+  /**
+   * The DataNode where this mmap came from.
+   */
+  private final DatanodeID datanodeID;
+
+  /**
+   * The monotonic time when this mmap was last evictable.
+   */
+  private long lastEvictableTimeNs;
+
+  public static ClientMmap load(ClientMmapManager manager, FileInputStream in, 
+      ExtendedBlock block, DatanodeID datanodeID) 
+          throws IOException {
+    MappedByteBuffer map =
+        in.getChannel().map(MapMode.READ_ONLY, 0,
+            in.getChannel().size());
+    return new ClientMmap(manager, map, block, datanodeID);
+  }
+
+  private ClientMmap(ClientMmapManager manager, MappedByteBuffer map, 
+        ExtendedBlock block, DatanodeID datanodeID) 
+            throws IOException {
+    this.manager = new WeakReference<ClientMmapManager>(manager);
+    this.map = map;
+    this.block = block;
+    this.datanodeID = datanodeID;
+    this.lastEvictableTimeNs = 0;
+  }
+
+  /**
+   * Decrement the reference count on this object.
+   * Should be called with the ClientMmapManager lock held.
+   */
+  public void unref() {
+    int count = refCount.decrementAndGet();
+    if (count < 0) {
+      throw new IllegalArgumentException("can't decrement the " +
+          "reference count on this ClientMmap lower than 0.");
+    } else if (count == 0) {
+      ClientMmapManager man = manager.get();
+      if (man == null) {
+        unmap();
+      } else {
+        man.makeEvictable(this);
+      }
+    }
+  }
+
+  /**
+   * Increment the reference count on this object.
+   *
+   * @return     The new reference count.
+   */
+  public int ref() {
+    return refCount.getAndIncrement();
+  }
+
+  @VisibleForTesting
+  public ExtendedBlock getBlock() {
+    return block;
+  }
+
+  DatanodeID getDatanodeID() {
+    return datanodeID;
+  }
+
+  public MappedByteBuffer getMappedByteBuffer() {
+    return map;
+  }
+
+  public void setLastEvictableTimeNs(long lastEvictableTimeNs) {
+    this.lastEvictableTimeNs = lastEvictableTimeNs;
+  }
+
+  public long getLastEvictableTimeNs() {
+    return this.lastEvictableTimeNs;
+  }
+
+  /**
+   * Unmap the memory region.
+   *
+   * There isn't any portable way to unmap a memory region in Java.
+   * So we use the sun.nio method here.
+   * Note that unmapping a memory region could cause crashes if code
+   * continues to reference the unmapped code.  However, if we don't
+   * manually unmap the memory, we are dependent on the finalizer to
+   * do it, and we have no idea when the finalizer will run.
+   */
+  void unmap() {
+    assert(refCount.get() == 0);
+    if (map instanceof sun.nio.ch.DirectBuffer) {
+      final sun.misc.Cleaner cleaner =
+          ((sun.nio.ch.DirectBuffer) map).cleaner();
+      cleaner.clean();
+    }
+  }
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmapManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/ClientMmapManager.java
@ -0,0 +1,482 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs.client;
+
+import java.io.Closeable;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.lang.ref.WeakReference;
+import java.util.Iterator;
+import java.util.TreeMap;
+import java.util.Map.Entry;
+import java.util.concurrent.ScheduledFuture;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT;
+import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.protocol.DatanodeID;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.io.IOUtils;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.ComparisonChain;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
+
+/**
+ * Tracks mmap instances used on an HDFS client.
+ *
+ * mmaps can be used concurrently by multiple threads at once.
+ * mmaps cannot be closed while they are in use.
+ *
+ * The cache is important for performance, because the first time an mmap is
+ * created, the page table entries (PTEs) are not yet set up.
+ * Even when reading data that is entirely resident in memory, reading an
+ * mmap the second time is faster.
+ */
+@InterfaceAudience.Private
+public class ClientMmapManager implements Closeable {
+  public static final Log LOG = LogFactory.getLog(ClientMmapManager.class);
+
+  private boolean closed = false;
+
+  private final int cacheSize;
+
+  private final long timeoutNs;
+
+  private final int runsPerTimeout;
+
+  private final Lock lock = new ReentrantLock();
+  
+  /**
+   * Maps block, datanode_id to the client mmap object.
+   * If the ClientMmap is in the process of being loaded,
+   * {@link Waitable<ClientMmap>#await()} will block.
+   *
+   * Protected by the ClientMmapManager lock.
+   */
+  private final TreeMap<Key, Waitable<ClientMmap>> mmaps =
+      new TreeMap<Key, Waitable<ClientMmap>>();
+
+  /**
+   * Maps the last use time to the client mmap object.
+   * We ensure that each last use time is unique by inserting a jitter of a
+   * nanosecond or two if necessary.
+   * 
+   * Protected by the ClientMmapManager lock.
+   * ClientMmap objects that are in use are never evictable.
+   */
+  private final TreeMap<Long, ClientMmap> evictable =
+      new TreeMap<Long, ClientMmap>();
+
+  private final ScheduledThreadPoolExecutor executor = 
+      new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().
+          setDaemon(true).setNameFormat("ClientMmapManager").
+          build());
+  
+  /**
+   * The CacheCleaner for this ClientMmapManager.  We don't create this
+   * and schedule it until it becomes necessary.
+   */
+  private CacheCleaner cacheCleaner;
+
+  /**
+   * Factory method to create a ClientMmapManager from a Hadoop
+   * configuration.
+   */
+  public static ClientMmapManager fromConf(Configuration conf) {
+    return new ClientMmapManager(conf.getInt(DFS_CLIENT_MMAP_CACHE_SIZE,
+      DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT),
+      conf.getLong(DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
+        DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT),
+      conf.getInt(DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT,
+        DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT));
+  }
+
+  public ClientMmapManager(int cacheSize, long timeoutMs, int runsPerTimeout) {
+    this.cacheSize = cacheSize;
+    this.timeoutNs = timeoutMs * 1000000;
+    this.runsPerTimeout = runsPerTimeout;
+  }
+  
+  long getTimeoutMs() {
+    return this.timeoutNs / 1000000;
+  }
+
+  int getRunsPerTimeout() {
+    return this.runsPerTimeout;
+  }
+  
+  public String verifyConfigurationMatches(Configuration conf) {
+    StringBuilder bld = new StringBuilder();
+    int cacheSize = conf.getInt(DFS_CLIENT_MMAP_CACHE_SIZE,
+                    DFS_CLIENT_MMAP_CACHE_SIZE_DEFAULT);
+    if (this.cacheSize != cacheSize) {
+      bld.append("You specified a cache size of ").append(cacheSize).
+          append(", but the existing cache size is ").append(this.cacheSize).
+          append(".  ");
+    }
+    long timeoutMs = conf.getLong(DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS,
+        DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS_DEFAULT);
+    if (getTimeoutMs() != timeoutMs) {
+      bld.append("You specified a cache timeout of ").append(timeoutMs).
+          append(" ms, but the existing cache timeout is ").
+          append(getTimeoutMs()).append("ms").append(".  ");
+    }
+    int runsPerTimeout = conf.getInt(
+        DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT,
+        DFS_CLIENT_MMAP_CACHE_THREAD_RUNS_PER_TIMEOUT_DEFAULT);
+    if (getRunsPerTimeout() != runsPerTimeout) {
+      bld.append("You specified ").append(runsPerTimeout).
+          append(" runs per timeout, but the existing runs per timeout is ").
+          append(getTimeoutMs()).append(".  ");
+    }
+    return bld.toString();
+  }
+
+  private static class Waitable<T> {
+    private T val;
+    private final Condition cond;
+
+    public Waitable(Condition cond) {
+      this.val = null;
+      this.cond = cond;
+    }
+
+    public T await() throws InterruptedException {
+      while (this.val == null) {
+        this.cond.await();
+      }
+      return this.val;
+    }
+
+    public void provide(T val) {
+      this.val = val;
+      this.cond.signalAll();
+    }
+  }
+
+  private static class Key implements Comparable<Key> {
+    private final ExtendedBlock block;
+    private final DatanodeID datanode;
+    
+    Key(ExtendedBlock block, DatanodeID datanode) {
+      this.block = block;
+      this.datanode = datanode;
+    }
+
+    /**
+     * Compare two ClientMmap regions that we're storing.
+     *
+     * When we append to a block, we bump the genstamp.  It is important to 
+     * compare the genStamp here.  That way, we will not return a shorter 
+     * mmap than required.
+     */
+    @Override
+    public int compareTo(Key o) {
+      return ComparisonChain.start().
+          compare(block.getBlockId(), o.block.getBlockId()).
+          compare(block.getGenerationStamp(), o.block.getGenerationStamp()).
+          compare(block.getBlockPoolId(), o.block.getBlockPoolId()).
+          compare(datanode, o.datanode).
+          result();
+    }
+
+    @Override
+    public boolean equals(Object rhs) {
+      if (rhs == null) {
+        return false;
+      }
+      try {
+        Key o = (Key)rhs;
+        return (compareTo(o) == 0);
+      } catch (ClassCastException e) {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return block.hashCode() ^ datanode.hashCode();
+    }
+  }
+
+  /**
+   * Thread which handles expiring mmaps from the cache.
+   */
+  private static class CacheCleaner implements Runnable, Closeable {
+    private WeakReference<ClientMmapManager> managerRef;
+    private ScheduledFuture<?> future;
+    
+    CacheCleaner(ClientMmapManager manager) {
+      this.managerRef= new WeakReference<ClientMmapManager>(manager);
+    }
+
+    @Override
+    public void run() {
+      ClientMmapManager manager = managerRef.get();
+      if (manager == null) return;
+      long curTime = System.nanoTime();
+      try {
+        manager.lock.lock();
+        manager.evictStaleEntries(curTime);
+      } finally {
+        manager.lock.unlock();
+      }
+    }
+    
+    void setFuture(ScheduledFuture<?> future) {
+      this.future = future;
+    }
+
+    @Override
+    public void close() throws IOException {
+      future.cancel(false);
+    }
+  }
+
+  /**
+   * Evict entries which are older than curTime + timeoutNs from the cache.
+   *
+   * NOTE: you must call this function with the lock held.
+   */
+  private void evictStaleEntries(long curTime) {
+    if (closed) {
+      return;
+    }
+    Iterator<Entry<Long, ClientMmap>> iter =
+        evictable.entrySet().iterator(); 
+    while (iter.hasNext()) {
+      Entry<Long, ClientMmap> entry = iter.next();
+      if (entry.getKey() + timeoutNs >= curTime) {
+        return;
+      }
+      ClientMmap mmap = entry.getValue();
+      Key key = new Key(mmap.getBlock(), mmap.getDatanodeID());
+      mmaps.remove(key);
+      iter.remove();
+      mmap.unmap();
+    }
+  }
+
+  /**
+   * Evict one mmap object from the cache.
+   *
+   * NOTE: you must call this function with the lock held.
+   *
+   * @return                  True if an object was evicted; false if none
+   *                          could be evicted.
+   */
+  private boolean evictOne() {
+    Entry<Long, ClientMmap> entry = evictable.pollFirstEntry();
+    if (entry == null) {
+      // We don't want to try creating another mmap region, because the
+      // cache is full.
+      return false;
+    }
+    ClientMmap evictedMmap = entry.getValue(); 
+    Key evictedKey = new Key(evictedMmap.getBlock(), 
+                             evictedMmap.getDatanodeID());
+    mmaps.remove(evictedKey);
+    evictedMmap.unmap();
+    return true;
+  }
+
+  /**
+   * Create a new mmap object.
+   * 
+   * NOTE: you must call this function with the lock held.
+   *
+   * @param key              The key which describes this mmap.
+   * @param in               The input stream to use to create the mmap.
+   * @return                 The new mmap object, or null if there were
+   *                         insufficient resources.
+   * @throws IOException     If there was an I/O error creating the mmap.
+   */
+  private ClientMmap create(Key key, FileInputStream in) throws IOException {
+    if (mmaps.size() + 1 > cacheSize) {
+      if (!evictOne()) {
+        LOG.warn("mmap cache is full (with " + cacheSize + " elements) and " +
+              "nothing is evictable.  Ignoring request for mmap with " +
+              "datanodeID=" + key.datanode + ", " + "block=" + key.block);
+        return null;
+      }
+    }
+    // Create the condition variable that other threads may wait on.
+    Waitable<ClientMmap> waitable =
+        new Waitable<ClientMmap>(lock.newCondition());
+    mmaps.put(key, waitable);
+    // Load the entry
+    boolean success = false;
+    ClientMmap mmap = null;
+    try {
+      try {
+        lock.unlock();
+        mmap = ClientMmap.load(this, in, key.block, key.datanode);
+      } finally {
+        lock.lock();
+      }
+      if (cacheCleaner == null) {
+        cacheCleaner = new CacheCleaner(this);
+        ScheduledFuture<?> future = 
+            executor.scheduleAtFixedRate(cacheCleaner,
+                timeoutNs, timeoutNs / runsPerTimeout, TimeUnit.NANOSECONDS);
+        cacheCleaner.setFuture(future);
+      }
+      success = true;
+    } finally {
+      if (!success) {
+        LOG.warn("failed to create mmap for datanodeID=" + key.datanode +
+                  ", " + "block=" + key.block);
+        mmaps.remove(key);
+      }
+      waitable.provide(mmap);
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.info("created a new ClientMmap for block " + key.block +
+          " on datanode " + key.datanode);
+    }
+    return mmap;
+  }
+
+  /**
+   * Get or create an mmap region.
+   * 
+   * @param node       The DataNode that owns the block for this mmap region.
+   * @param block      The block ID, block pool ID, and generation stamp of 
+   *                     the block we want to read.
+   * @param in         An open file for this block.  This stream is only used
+   *                     if we have to create a new mmap; if we use an
+   *                     existing one, it is ignored.
+   *
+   * @return           The client mmap region.
+   */
+  public ClientMmap fetch(DatanodeID datanodeID, ExtendedBlock block,
+      FileInputStream in) throws IOException, InterruptedException {
+    LOG.debug("fetching mmap with datanodeID=" + datanodeID + ", " +
+        "block=" + block);
+    Key key = new Key(block, datanodeID);
+    ClientMmap mmap = null;
+    try {
+      lock.lock();
+      if (closed) {
+        throw new IOException("ClientMmapManager is closed.");
+      }
+      while (mmap == null) {
+        Waitable<ClientMmap> entry = mmaps.get(key);
+        if (entry == null) {
+          return create(key, in);
+        }
+        mmap = entry.await();
+      }
+      if (mmap.ref() == 1) {
+        // When going from nobody using the mmap (ref = 0) to somebody
+        // using the mmap (ref = 1), we must make the mmap un-evictable.
+        evictable.remove(mmap.getLastEvictableTimeNs());
+      }
+    }
+    finally {
+      lock.unlock();
+    }
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("reusing existing mmap with datanodeID=" + datanodeID +
+              ", " + "block=" + block);
+    }
+    return mmap;
+  }
+
+  /**
+   * Make an mmap evictable.
+   * 
+   * When an mmap is evictable, it may be removed from the cache if necessary.
+   * mmaps can only be evictable if nobody is using them.
+   *
+   * @param mmap             The mmap to make evictable.
+   */
+  void makeEvictable(ClientMmap mmap) {
+    try {
+      lock.lock();
+      if (closed) {
+        // If this ClientMmapManager is closed, then don't bother with the
+        // cache; just close the mmap.
+        mmap.unmap();
+        return;
+      }
+      long now = System.nanoTime();
+      while (evictable.containsKey(now)) {
+        now++;
+      }
+      mmap.setLastEvictableTimeNs(now);
+      evictable.put(now, mmap);
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    try {
+      lock.lock();
+      closed = true;
+      IOUtils.cleanup(LOG, cacheCleaner);
+
+      // Unmap all the mmaps that nobody is using.
+      // The ones which are in use will be unmapped just as soon as people stop
+      // using them.
+      evictStaleEntries(Long.MAX_VALUE);
+
+      executor.shutdown();
+    } finally {
+      lock.unlock();
+    }
+  }
+
+  @VisibleForTesting
+  public interface ClientMmapVisitor {
+    void accept(ClientMmap mmap);
+  }
+
+  @VisibleForTesting
+  public synchronized void visitMmaps(ClientMmapVisitor visitor)
+      throws InterruptedException {
+    for (Waitable<ClientMmap> entry : mmaps.values()) {
+      visitor.accept(entry.await());
+    }
+  }
+
+  public void visitEvictable(ClientMmapVisitor visitor)
+      throws InterruptedException {
+    for (ClientMmap mmap : evictable.values()) {
+      visitor.accept(mmap);
+    }
+  }
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeHttpServer.java
@ -100,7 +100,7 @@ public void start() throws IOException {
    if (certSSL) {
      boolean needClientAuth = conf.getBoolean("dfs.https.need.client.auth", false);
      InetSocketAddress secInfoSocAddr = NetUtils.createSocketAddr(infoHost + ":" + conf.get(
-        DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_KEY, infoHost + ":" + 0));
+        DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_KEY, "0"));
      Configuration sslConf = new Configuration(false);
      if (certSSL) {
        sslConf.addResource(conf.get(DFSConfigKeys.DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY,
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/expect.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/expect.c
@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "expect.h"
+#include "hdfs.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int expectFileStats(hdfsFile file,
+      uint64_t expectedTotalBytesRead,
+      uint64_t expectedTotalLocalBytesRead,
+      uint64_t expectedTotalShortCircuitBytesRead,
+      uint64_t expectedTotalZeroCopyBytesRead)
+{
+    struct hdfsReadStatistics *stats = NULL;
+    EXPECT_ZERO(hdfsFileGetReadStatistics(file, &stats));
+    fprintf(stderr, "expectFileStats(expectedTotalBytesRead=%"PRId64", "
+            "expectedTotalLocalBytesRead=%"PRId64", "
+            "expectedTotalShortCircuitBytesRead=%"PRId64", "
+            "expectedTotalZeroCopyBytesRead=%"PRId64", "
+            "totalBytesRead=%"PRId64", "
+            "totalLocalBytesRead=%"PRId64", "
+            "totalShortCircuitBytesRead=%"PRId64", "
+            "totalZeroCopyBytesRead=%"PRId64")\n",
+            expectedTotalBytesRead,
+            expectedTotalLocalBytesRead,
+            expectedTotalShortCircuitBytesRead,
+            expectedTotalZeroCopyBytesRead,
+            stats->totalBytesRead,
+            stats->totalLocalBytesRead,
+            stats->totalShortCircuitBytesRead,
+            stats->totalZeroCopyBytesRead);
+    if (expectedTotalBytesRead != UINT64_MAX) {
+        EXPECT_INT64_EQ(expectedTotalBytesRead, stats->totalBytesRead);
+    }
+    if (expectedTotalLocalBytesRead != UINT64_MAX) {
+        EXPECT_INT64_EQ(expectedTotalLocalBytesRead,
+                      stats->totalLocalBytesRead);
+    }
+    if (expectedTotalShortCircuitBytesRead != UINT64_MAX) {
+        EXPECT_INT64_EQ(expectedTotalShortCircuitBytesRead,
+                      stats->totalShortCircuitBytesRead);
+    }
+    if (expectedTotalZeroCopyBytesRead != UINT64_MAX) {
+        EXPECT_INT64_EQ(expectedTotalZeroCopyBytesRead,
+                      stats->totalZeroCopyBytesRead);
+    }
+    hdfsFileFreeReadStatistics(stats);
+    return 0;
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/expect.h
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/expect.h
@ -19,16 +19,19 @@
 #ifndef LIBHDFS_NATIVE_TESTS_EXPECT_H
 #define LIBHDFS_NATIVE_TESTS_EXPECT_H

+#include <inttypes.h>
 #include <stdio.h>

+struct hdfsFile_internal;
+
 #define EXPECT_ZERO(x) \
    do { \
        int __my_ret__ = x; \
        if (__my_ret__) { \
            int __my_errno__ = errno; \
-            fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d with return " \
 		    "code %d (errno: %d): got nonzero from %s\n", \
-		    __LINE__, __my_ret__, __my_errno__, #x); \
+		    __FILE__, __LINE__, __my_ret__, __my_errno__, #x); \
            return __my_ret__; \
        } \
    } while (0);
@ -38,9 +41,9 @@
        void* __my_ret__ = x; \
        int __my_errno__ = errno; \
        if (__my_ret__ != NULL) { \
-            fprintf(stderr, "TEST_ERROR: failed on line %d (errno: %d): " \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d (errno: %d): " \
 		    "got non-NULL value %p from %s\n", \
-		    __LINE__, __my_errno__, __my_ret__, #x); \
+		    __FILE__, __LINE__, __my_errno__, __my_ret__, #x); \
            return -1; \
        } \
    } while (0);
@ -50,8 +53,8 @@
        void* __my_ret__ = x; \
        int __my_errno__ = errno; \
        if (__my_ret__ == NULL) { \
-            fprintf(stderr, "TEST_ERROR: failed on line %d (errno: %d): " \
-		    "got NULL from %s\n", __LINE__, __my_errno__, #x); \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d (errno: %d): " \
+		    "got NULL from %s\n", __FILE__, __LINE__, __my_errno__, #x); \
            return -1; \
        } \
    } while (0);
@ -61,15 +64,16 @@
        int __my_ret__ = x; \
        int __my_errno__ = errno; \
        if (__my_ret__ != -1) { \
-            fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
-                "code %d (errno: %d): expected -1 from %s\n", __LINE__, \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d with return " \
+                "code %d (errno: %d): expected -1 from %s\n", \
+                    __FILE__, __LINE__, \
                __my_ret__, __my_errno__, #x); \
            return -1; \
        } \
        if (__my_errno__ != e) { \
-            fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d with return " \
                "code %d (errno: %d): expected errno = %d from %s\n", \
-                __LINE__, __my_ret__, __my_errno__, e, #x); \
+                __FILE__, __LINE__, __my_ret__, __my_errno__, e, #x); \
            return -1; \
 	} \
    } while (0);
@ -79,9 +83,9 @@
        int __my_ret__ = x; \
        int __my_errno__ = errno; \
        if (!__my_ret__) { \
-            fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
-		    "code %d (errno: %d): got zero from %s\n", __LINE__, \
-                __my_ret__, __my_errno__, #x); \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d with return " \
+              "code %d (errno: %d): got zero from %s\n", __FILE__, __LINE__, \
+              __my_ret__, __my_errno__, #x); \
            return -1; \
        } \
    } while (0);
@ -91,9 +95,9 @@
        int __my_ret__ = x; \
        int __my_errno__ = errno; \
        if (__my_ret__ < 0) { \
-            fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d with return " \
                "code %d (errno: %d): got negative return from %s\n", \
-		    __LINE__, __my_ret__, __my_errno__, #x); \
+                __FILE__, __LINE__, __my_ret__, __my_errno__, #x); \
            return __my_ret__; \
        } \
    } while (0);
@ -103,9 +107,21 @@
        int __my_ret__ = y; \
        int __my_errno__ = errno; \
        if (__my_ret__ != (x)) { \
-            fprintf(stderr, "TEST_ERROR: failed on line %d with return " \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d with return " \
              "code %d (errno: %d): expected %d\n", \
-               __LINE__, __my_ret__, __my_errno__, (x)); \
+               __FILE__, __LINE__, __my_ret__, __my_errno__, (x)); \
+            return -1; \
+        } \
+    } while (0);
+
+#define EXPECT_INT64_EQ(x, y) \
+    do { \
+        int64_t __my_ret__ = y; \
+        int __my_errno__ = errno; \
+        if (__my_ret__ != (x)) { \
+            fprintf(stderr, "TEST_ERROR: failed on %s:%d with return " \
+              "value %"PRId64" (errno: %d): expected %"PRId64"\n", \
+               __FILE__, __LINE__, __my_ret__, __my_errno__, (x)); \
            return -1; \
        } \
    } while (0);
@ -117,4 +133,17 @@
    ret = -errno; \
    } while (ret == -EINTR);

+/**
+ * Test that an HDFS file has the given statistics.
+ *
+ * Any parameter can be set to UINT64_MAX to avoid checking it.
+ *
+ * @return 0 on success; error code otherwise
+ */
+int expectFileStats(struct hdfsFile_internal *file,
+      uint64_t expectedTotalBytesRead,
+      uint64_t expectedTotalLocalBytesRead,
+      uint64_t expectedTotalShortCircuitBytesRead,
+      uint64_t expectedTotalZeroCopyBytesRead);
+
 #endif
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.c
@ -39,6 +39,7 @@
 #define JAVA_NET_ISA    "java/net/InetSocketAddress"
 #define JAVA_NET_URI    "java/net/URI"
 #define JAVA_STRING     "java/lang/String"
+#define READ_OPTION     "org/apache/hadoop/fs/ReadOption"

 #define JAVA_VOID       "V"

@ -143,6 +144,15 @@ int hdfsFileGetReadStatistics(hdfsFile file,
        goto done;
    }
    s->totalShortCircuitBytesRead = jVal.j;
+    jthr = invokeMethod(env, &jVal, INSTANCE, readStats,
+                  "org/apache/hadoop/hdfs/DFSInputStream$ReadStatistics",
+                  "getTotalZeroCopyBytesRead", "()J");
+    if (jthr) {
+        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+            "hdfsFileGetReadStatistics: getTotalZeroCopyBytesRead failed");
+        goto done;
+    }
+    s->totalZeroCopyBytesRead = jVal.j;
    *stats = s;
    s = NULL;
    ret = 0;
@ -183,6 +193,25 @@ void hdfsFileDisableDirectRead(hdfsFile file)
    file->flags &= ~HDFS_FILE_SUPPORTS_DIRECT_READ;
 }

+int hdfsDisableDomainSocketSecurity(void)
+{
+    jthrowable jthr;
+    JNIEnv* env = getJNIEnv();
+    if (env == NULL) {
+      errno = EINTERNAL;
+      return -1;
+    }
+    jthr = invokeMethod(env, NULL, STATIC, NULL,
+            "org/apache/hadoop/net/unix/DomainSocket",
+            "disableBindPathValidation", "()V");
+    if (jthr) {
+        errno = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+            "DomainSocket#disableBindPathValidation");
+        return -1;
+    }
+    return 0;
+}
+
 /**
 * hdfsJniEnv: A wrapper struct to be used as 'value'
 * while saving thread -> JNIEnv* mappings
@ -220,40 +249,6 @@ static jthrowable constructNewObjectOfPath(JNIEnv *env, const char *path,
    return NULL;
 }

-/**
- * Set a configuration value.
- *
- * @param env               The JNI environment
- * @param jConfiguration    The configuration object to modify
- * @param key               The key to modify
- * @param value             The value to set the key to
- *
- * @return                  NULL on success; exception otherwise
- */
-static jthrowable hadoopConfSetStr(JNIEnv *env, jobject jConfiguration,
-        const char *key, const char *value)
-{
-    jthrowable jthr;
-    jstring jkey = NULL, jvalue = NULL;
-
-    jthr = newJavaStr(env, key, &jkey);
-    if (jthr)
-        goto done;
-    jthr = newJavaStr(env, value, &jvalue);
-    if (jthr)
-        goto done;
-    jthr = invokeMethod(env, NULL, INSTANCE, jConfiguration,
-            HADOOP_CONF, "set", JMETHOD2(JPARAM(JAVA_STRING),
-                                         JPARAM(JAVA_STRING), JAVA_VOID),
-            jkey, jvalue);
-    if (jthr)
-        goto done;
-done:
-    destroyLocalReference(env, jkey);
-    destroyLocalReference(env, jvalue);
-    return jthr;
-}
-
 static jthrowable hadoopConfGetStr(JNIEnv *env, jobject jConfiguration,
        const char *key, char **val)
 {
@ -2108,6 +2103,395 @@ int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime)
    return 0;
 }

+/**
+ * Zero-copy options.
+ *
+ * We cache the EnumSet of ReadOptions which has to be passed into every
+ * readZero call, to avoid reconstructing it each time.  This cache is cleared
+ * whenever an element changes.
+ */
+struct hadoopRzOptions
+{
+    JNIEnv *env;
+    int skipChecksums;
+    jobject byteBufferPool;
+    jobject cachedEnumSet;
+};
+
+struct hadoopRzOptions *hadoopRzOptionsAlloc(void)
+{
+    struct hadoopRzOptions *opts;
+    JNIEnv *env;
+
+    env = getJNIEnv();
+    if (!env) {
+        // Check to make sure the JNI environment is set up properly.
+        errno = EINTERNAL;
+        return NULL;
+    }
+    opts = calloc(1, sizeof(struct hadoopRzOptions));
+    if (!opts) {
+        errno = ENOMEM;
+        return NULL;
+    }
+    return opts;
+}
+
+static void hadoopRzOptionsClearCached(JNIEnv *env,
+        struct hadoopRzOptions *opts)
+{
+    if (!opts->cachedEnumSet) {
+        return;
+    }
+    (*env)->DeleteGlobalRef(env, opts->cachedEnumSet);
+    opts->cachedEnumSet = NULL;
+}
+
+int hadoopRzOptionsSetSkipChecksum(
+        struct hadoopRzOptions *opts, int skip)
+{
+    JNIEnv *env;
+    env = getJNIEnv();
+    if (!env) {
+        errno = EINTERNAL;
+        return -1;
+    }
+    hadoopRzOptionsClearCached(env, opts);
+    opts->skipChecksums = !!skip;
+    return 0;
+}
+
+int hadoopRzOptionsSetByteBufferPool(
+        struct hadoopRzOptions *opts, const char *className)
+{
+    JNIEnv *env;
+    jthrowable jthr;
+    jobject byteBufferPool = NULL;
+
+    env = getJNIEnv();
+    if (!env) {
+        errno = EINTERNAL;
+        return -1;
+    }
+
+    // Note: we don't have to call hadoopRzOptionsClearCached in this
+    // function, since the ByteBufferPool is passed separately from the
+    // EnumSet of ReadOptions.
+
+    jthr = constructNewObjectOfClass(env, &byteBufferPool, className, "()V");
+    if (jthr) {
+        printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+            "hadoopRzOptionsSetByteBufferPool(className=%s): ", className);
+        errno = EINVAL;
+        return -1;
+    }
+    if (opts->byteBufferPool) {
+        // Delete any previous ByteBufferPool we had.
+        (*env)->DeleteGlobalRef(env, opts->byteBufferPool);
+    }
+    opts->byteBufferPool = byteBufferPool;
+    return 0;
+}
+
+void hadoopRzOptionsFree(struct hadoopRzOptions *opts)
+{
+    JNIEnv *env;
+    env = getJNIEnv();
+    if (!env) {
+        return;
+    }
+    hadoopRzOptionsClearCached(env, opts);
+    if (opts->byteBufferPool) {
+        (*env)->DeleteGlobalRef(env, opts->byteBufferPool);
+        opts->byteBufferPool = NULL;
+    }
+    free(opts);
+}
+
+struct hadoopRzBuffer
+{
+    jobject byteBuffer;
+    uint8_t *ptr;
+    int32_t length;
+    int direct;
+};
+
+static jthrowable hadoopRzOptionsGetEnumSet(JNIEnv *env,
+        struct hadoopRzOptions *opts, jobject *enumSet)
+{
+    jthrowable jthr = NULL;
+    jobject enumInst = NULL, enumSetObj = NULL;
+    jvalue jVal;
+
+    if (opts->cachedEnumSet) {
+        // If we cached the value, return it now.
+        *enumSet = opts->cachedEnumSet;
+        goto done;
+    }
+    if (opts->skipChecksums) {
+        jthr = fetchEnumInstance(env, READ_OPTION,
+                  "SKIP_CHECKSUMS", &enumInst);
+        if (jthr) {
+            goto done;
+        }
+        jthr = invokeMethod(env, &jVal, STATIC, NULL,
+                "java/util/EnumSet", "of",
+                "(Ljava/lang/Enum;)Ljava/util/EnumSet;", enumInst);
+        if (jthr) {
+            goto done;
+        }
+        enumSetObj = jVal.l;
+    } else {
+        jclass clazz = (*env)->FindClass(env, READ_OPTION);
+        if (!clazz) {
+            jthr = newRuntimeError(env, "failed "
+                    "to find class for %s", READ_OPTION);
+            goto done;
+        }
+        jthr = invokeMethod(env, &jVal, STATIC, NULL,
+                "java/util/EnumSet", "noneOf",
+                "(Ljava/lang/Class;)Ljava/util/EnumSet;", clazz);
+        enumSetObj = jVal.l;
+    }
+    // create global ref
+    opts->cachedEnumSet = (*env)->NewGlobalRef(env, enumSetObj);
+    if (!opts->cachedEnumSet) {
+        jthr = getPendingExceptionAndClear(env);
+        goto done;
+    }
+    *enumSet = opts->cachedEnumSet;
+    jthr = NULL;
+done:
+    (*env)->DeleteLocalRef(env, enumInst);
+    (*env)->DeleteLocalRef(env, enumSetObj);
+    return jthr;
+}
+
+static int hadoopReadZeroExtractBuffer(JNIEnv *env,
+        const struct hadoopRzOptions *opts, struct hadoopRzBuffer *buffer)
+{
+    int ret;
+    jthrowable jthr;
+    jvalue jVal;
+    uint8_t *directStart;
+    void *mallocBuf = NULL;
+    jint position;
+    jarray array = NULL;
+
+    jthr = invokeMethod(env, &jVal, INSTANCE, buffer->byteBuffer,
+                     "java/nio/ByteBuffer", "remaining", "()I");
+    if (jthr) {
+        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                "hadoopReadZeroExtractBuffer: ByteBuffer#remaining failed: ");
+        goto done;
+    }
+    buffer->length = jVal.i;
+    jthr = invokeMethod(env, &jVal, INSTANCE, buffer->byteBuffer,
+                     "java/nio/ByteBuffer", "position", "()I");
+    if (jthr) {
+        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                "hadoopReadZeroExtractBuffer: ByteBuffer#position failed: ");
+        goto done;
+    }
+    position = jVal.i;
+    directStart = (*env)->GetDirectBufferAddress(env, buffer->byteBuffer);
+    if (directStart) {
+        // Handle direct buffers.
+        buffer->ptr = directStart + position;
+        buffer->direct = 1;
+        ret = 0;
+        goto done;
+    }
+    // Handle indirect buffers.
+    // The JNI docs don't say that GetDirectBufferAddress throws any exceptions
+    // when it fails.  However, they also don't clearly say that it doesn't.  It
+    // seems safest to clear any pending exceptions here, to prevent problems on
+    // various JVMs.
+    (*env)->ExceptionClear(env);
+    if (!opts->byteBufferPool) {
+        fputs("hadoopReadZeroExtractBuffer: we read through the "
+                "zero-copy path, but failed to get the address of the buffer via "
+                "GetDirectBufferAddress.  Please make sure your JVM supports "
+                "GetDirectBufferAddress.\n", stderr);
+        ret = ENOTSUP;
+        goto done;
+    }
+    // Get the backing array object of this buffer.
+    jthr = invokeMethod(env, &jVal, INSTANCE, buffer->byteBuffer,
+                     "java/nio/ByteBuffer", "array", "()[B");
+    if (jthr) {
+        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                "hadoopReadZeroExtractBuffer: ByteBuffer#array failed: ");
+        goto done;
+    }
+    array = jVal.l;
+    if (!array) {
+        fputs("hadoopReadZeroExtractBuffer: ByteBuffer#array returned NULL.",
+              stderr);
+        ret = EIO;
+        goto done;
+    }
+    mallocBuf = malloc(buffer->length);
+    if (!mallocBuf) {
+        fprintf(stderr, "hadoopReadZeroExtractBuffer: failed to allocate %d bytes of memory\n",
+                buffer->length);
+        ret = ENOMEM;
+        goto done;
+    }
+    (*env)->GetByteArrayRegion(env, array, position, buffer->length, mallocBuf);
+    jthr = (*env)->ExceptionOccurred(env);
+    if (jthr) {
+        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                "hadoopReadZeroExtractBuffer: GetByteArrayRegion failed: ");
+        goto done;
+    }
+    buffer->ptr = mallocBuf;
+    buffer->direct = 0;
+    ret = 0;
+
+done:
+    free(mallocBuf);
+    (*env)->DeleteLocalRef(env, array);
+    return ret;
+}
+
+static int translateZCRException(JNIEnv *env, jthrowable exc)
+{
+    int ret;
+    char *className = NULL;
+    jthrowable jthr = classNameOfObject(exc, env, &className);
+
+    if (jthr) {
+        fputs("hadoopReadZero: failed to get class name of "
+                "exception from read().\n", stderr);
+        destroyLocalReference(env, exc);
+        destroyLocalReference(env, jthr);
+        ret = EIO;
+        goto done;
+    }
+    if (!strcmp(className, "java.lang.UnsupportedOperationException")) {
+        ret = EPROTONOSUPPORT;
+        goto done;
+    }
+    ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+            "hadoopZeroCopyRead: ZeroCopyCursor#read failed");
+done:
+    free(className);
+    return ret;
+}
+
+struct hadoopRzBuffer* hadoopReadZero(hdfsFile file,
+            struct hadoopRzOptions *opts, int32_t maxLength)
+{
+    JNIEnv *env;
+    jthrowable jthr = NULL;
+    jvalue jVal;
+    jobject enumSet = NULL, byteBuffer = NULL;
+    struct hadoopRzBuffer* buffer = NULL;
+    int ret;
+
+    env = getJNIEnv();
+    if (!env) {
+        errno = EINTERNAL;
+        return NULL;
+    }
+    if (file->type != INPUT) {
+        fputs("Cannot read from a non-InputStream object!\n", stderr);
+        ret = EINVAL;
+        goto done;
+    }
+    buffer = calloc(1, sizeof(struct hadoopRzBuffer));
+    if (!buffer) {
+        ret = ENOMEM;
+        goto done;
+    }
+    jthr = hadoopRzOptionsGetEnumSet(env, opts, &enumSet);
+    if (jthr) {
+        ret = printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                "hadoopReadZero: hadoopRzOptionsGetEnumSet failed: ");
+        goto done;
+    }
+    jthr = invokeMethod(env, &jVal, INSTANCE, file->file, HADOOP_ISTRM, "read",
+        "(Lorg/apache/hadoop/io/ByteBufferPool;ILjava/util/EnumSet;)"
+        "Ljava/nio/ByteBuffer;", opts->byteBufferPool, maxLength, enumSet);
+    if (jthr) {
+        ret = translateZCRException(env, jthr);
+        goto done;
+    }
+    byteBuffer = jVal.l;
+    if (!byteBuffer) {
+        buffer->byteBuffer = NULL;
+        buffer->length = 0;
+        buffer->ptr = NULL;
+    } else {
+        buffer->byteBuffer = (*env)->NewGlobalRef(env, byteBuffer);
+        if (!buffer->byteBuffer) {
+            ret = printPendingExceptionAndFree(env, PRINT_EXC_ALL,
+                "hadoopReadZero: failed to create global ref to ByteBuffer");
+            goto done;
+        }
+        ret = hadoopReadZeroExtractBuffer(env, opts, buffer);
+        if (ret) {
+            goto done;
+        }
+    }
+    ret = 0;
+done:
+    (*env)->DeleteLocalRef(env, byteBuffer);
+    if (ret) {
+        if (buffer) {
+            if (buffer->byteBuffer) {
+                (*env)->DeleteGlobalRef(env, buffer->byteBuffer);
+            }
+            free(buffer);
+        }
+        errno = ret;
+        return NULL;
+    } else {
+        errno = 0;
+    }
+    return buffer;
+}
+
+int32_t hadoopRzBufferLength(const struct hadoopRzBuffer *buffer)
+{
+    return buffer->length;
+}
+
+const void *hadoopRzBufferGet(const struct hadoopRzBuffer *buffer)
+{
+    return buffer->ptr;
+}
+
+void hadoopRzBufferFree(hdfsFile file, struct hadoopRzBuffer *buffer)
+{
+    jvalue jVal;
+    jthrowable jthr;
+    JNIEnv* env;
+    
+    env = getJNIEnv();
+    if (env == NULL) {
+        errno = EINTERNAL;
+        return;
+    }
+    if (buffer->byteBuffer) {
+        jthr = invokeMethod(env, &jVal, INSTANCE, file->file,
+                    HADOOP_ISTRM, "releaseBuffer",
+                    "(Ljava/nio/ByteBuffer;)V", buffer->byteBuffer);
+        if (jthr) {
+            printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                    "hadoopRzBufferFree: releaseBuffer failed: ");
+            // even on error, we have to delete the reference.
+        }
+        (*env)->DeleteGlobalRef(env, buffer->byteBuffer);
+    }
+    if (!buffer->direct) {
+        free(buffer->ptr);
+    }
+    memset(buffer, 0, sizeof(*buffer));
+    free(buffer);
+}
+
 char***
 hdfsGetHosts(hdfsFS fs, const char* path, tOffset start, tOffset length)
 {
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.h
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs.h
@ -36,6 +36,8 @@
 #define EINTERNAL 255 
 #endif

+#define ELASTIC_BYTE_BUFFER_POOL_CLASS \
+  "org/apache/hadoop/io/ElasticByteBufferPool"

 /** All APIs set errno to meaningful values */

@ -65,6 +67,10 @@ extern  "C" {
    struct hdfsFile_internal;
    typedef struct hdfsFile_internal* hdfsFile;

+    struct hadoopRzOptions;
+
+    struct hadoopRzBuffer;
+
    /**
     * Determine if a file is open for read.
     *
@ -85,6 +91,7 @@ extern  "C" {
      uint64_t totalBytesRead;
      uint64_t totalLocalBytesRead;
      uint64_t totalShortCircuitBytesRead;
+      uint64_t totalZeroCopyBytesRead;
    };

    /**
@ -680,7 +687,107 @@ extern  "C" {
     * @return 0 on success else -1
     */
    int hdfsUtime(hdfsFS fs, const char* path, tTime mtime, tTime atime);
-    
+
+    /**
+     * Allocate a zero-copy options structure.
+     *
+     * You must free all options structures allocated with this function using
+     * hadoopRzOptionsFree.
+     *
+     * @return            A zero-copy options structure, or NULL if one could
+     *                    not be allocated.  If NULL is returned, errno will
+     *                    contain the error number.
+     */
+    struct hadoopRzOptions *hadoopRzOptionsAlloc(void);
+
+    /**
+     * Determine whether we should skip checksums in read0.
+     *
+     * @param opts        The options structure.
+     * @param skip        Nonzero to skip checksums sometimes; zero to always
+     *                    check them.
+     *
+     * @return            0 on success; -1 plus errno on failure.
+     */
+    int hadoopRzOptionsSetSkipChecksum(
+            struct hadoopRzOptions *opts, int skip);
+
+    /**
+     * Set the ByteBufferPool to use with read0.
+     *
+     * @param opts        The options structure.
+     * @param className   If this is NULL, we will not use any
+     *                    ByteBufferPool.  If this is non-NULL, it will be
+     *                    treated as the name of the pool class to use.
+     *                    For example, you can use
+     *                    ELASTIC_BYTE_BUFFER_POOL_CLASS.
+     *
+     * @return            0 if the ByteBufferPool class was found and
+     *                    instantiated;
+     *                    -1 plus errno otherwise.
+     */
+    int hadoopRzOptionsSetByteBufferPool(
+            struct hadoopRzOptions *opts, const char *className);
+
+    /**
+     * Free a hadoopRzOptionsFree structure.
+     *
+     * @param opts        The options structure to free.
+     *                    Any associated ByteBufferPool will also be freed.
+     */
+    void hadoopRzOptionsFree(struct hadoopRzOptions *opts);
+
+    /**
+     * Perform a byte buffer read.
+     * If possible, this will be a zero-copy (mmap) read.
+     *
+     * @param file       The file to read from.
+     * @param opts       An options structure created by hadoopRzOptionsAlloc.
+     * @param maxLength  The maximum length to read.  We may read fewer bytes
+     *                   than this length.
+     *
+     * @return           On success, returns a new hadoopRzBuffer.
+     *                   This buffer will continue to be valid and readable
+     *                   until it is released by readZeroBufferFree.  Failure to
+     *                   release a buffer will lead to a memory leak.
+     *
+     *                   NULL plus an errno code on an error.
+     *                   errno = EOPNOTSUPP indicates that we could not do a
+     *                   zero-copy read, and there was no ByteBufferPool
+     *                   supplied.
+     */
+    struct hadoopRzBuffer* hadoopReadZero(hdfsFile file,
+            struct hadoopRzOptions *opts, int32_t maxLength);
+
+    /**
+     * Determine the length of the buffer returned from readZero.
+     *
+     * @param buffer     a buffer returned from readZero.
+     * @return           the length of the buffer.
+     */
+    int32_t hadoopRzBufferLength(const struct hadoopRzBuffer *buffer);
+
+    /**
+     * Get a pointer to the raw buffer returned from readZero.
+     *
+     * To find out how many bytes this buffer contains, call
+     * hadoopRzBufferLength.
+     *
+     * @param buffer     a buffer returned from readZero.
+     * @return           a pointer to the start of the buffer.  This will be
+     *                   NULL when end-of-file has been reached.
+     */
+    const void *hadoopRzBufferGet(const struct hadoopRzBuffer *buffer);
+
+    /**
+     * Release a buffer obtained through readZero.
+     *
+     * @param file       The hdfs stream that created this buffer.  This must be
+     *                   the same stream you called hadoopReadZero on.
+     * @param buffer     The buffer to release.
+     */
+    void hadoopRzBufferFree(hdfsFile file, struct hadoopRzBuffer *buffer);
+
 #ifdef __cplusplus
 }
 #endif
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs_test.h
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/hdfs_test.h
@ -48,6 +48,15 @@ extern  "C" {
     * @param file     The HDFS file
     */
    void hdfsFileDisableDirectRead(struct hdfsFile_internal *file);
+
+    /**
+     * Disable domain socket security checks.
+     *
+     * @param          0 if domain socket security was disabled;
+     *                 -1 if not.
+     */
+    int hdfsDisableDomainSocketSecurity(void); 
+
 #ifdef __cplusplus
 }
 #endif
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/jni_helper.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/jni_helper.c
@ -608,3 +608,73 @@ JNIEnv* getJNIEnv(void)
    return env;
 }

+int javaObjectIsOfClass(JNIEnv *env, jobject obj, const char *name)
+{
+    jclass clazz;
+    int ret;
+
+    clazz = (*env)->FindClass(env, name);
+    if (!clazz) {
+        printPendingExceptionAndFree(env, PRINT_EXC_ALL,
+            "javaObjectIsOfClass(%s)", name);
+        return -1;
+    }
+    ret = (*env)->IsInstanceOf(env, obj, clazz);
+    (*env)->DeleteLocalRef(env, clazz);
+    return ret == JNI_TRUE ? 1 : 0;
+}
+
+jthrowable hadoopConfSetStr(JNIEnv *env, jobject jConfiguration,
+        const char *key, const char *value)
+{
+    jthrowable jthr;
+    jstring jkey = NULL, jvalue = NULL;
+
+    jthr = newJavaStr(env, key, &jkey);
+    if (jthr)
+        goto done;
+    jthr = newJavaStr(env, value, &jvalue);
+    if (jthr)
+        goto done;
+    jthr = invokeMethod(env, NULL, INSTANCE, jConfiguration,
+            "org/apache/hadoop/conf/Configuration", "set", 
+            "(Ljava/lang/String;Ljava/lang/String;)V",
+            jkey, jvalue);
+    if (jthr)
+        goto done;
+done:
+    (*env)->DeleteLocalRef(env, jkey);
+    (*env)->DeleteLocalRef(env, jvalue);
+    return jthr;
+}
+
+jthrowable fetchEnumInstance(JNIEnv *env, const char *className,
+                         const char *valueName, jobject *out)
+{
+    jclass clazz;
+    jfieldID fieldId;
+    jobject jEnum;
+    char prettyClass[256];
+
+    clazz = (*env)->FindClass(env, className);
+    if (!clazz) {
+        return newRuntimeError(env, "fetchEnum(%s, %s): failed to find class.",
+                className, valueName);
+    }
+    if (snprintf(prettyClass, sizeof(prettyClass), "L%s;", className)
+          >= sizeof(prettyClass)) {
+        return newRuntimeError(env, "fetchEnum(%s, %s): class name too long.",
+                className, valueName);
+    }
+    fieldId = (*env)->GetStaticFieldID(env, clazz, valueName, prettyClass);
+    if (!fieldId) {
+        return getPendingExceptionAndClear(env);
+    }
+    jEnum = (*env)->GetStaticObjectField(env, clazz, fieldId);
+    if (!jEnum) {
+        return getPendingExceptionAndClear(env);
+    }
+    *out = jEnum;
+    return NULL;
+}
+
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/jni_helper.h
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/jni_helper.h
@ -114,6 +114,47 @@ jthrowable classNameOfObject(jobject jobj, JNIEnv *env, char **name);
 * */
 JNIEnv* getJNIEnv(void);

+/**
+ * Figure out if a Java object is an instance of a particular class.
+ *
+ * @param env  The Java environment.
+ * @param obj  The object to check.
+ * @param name The class name to check.
+ *
+ * @return     -1 if we failed to find the referenced class name.
+ *             0 if the object is not of the given class.
+ *             1 if the object is of the given class.
+ */
+int javaObjectIsOfClass(JNIEnv *env, jobject obj, const char *name);
+
+/**
+ * Set a value in a configuration object.
+ *
+ * @param env               The JNI environment
+ * @param jConfiguration    The configuration object to modify
+ * @param key               The key to modify
+ * @param value             The value to set the key to
+ *
+ * @return                  NULL on success; exception otherwise
+ */
+jthrowable hadoopConfSetStr(JNIEnv *env, jobject jConfiguration,
+        const char *key, const char *value);
+
+/**
+ * Fetch an instance of an Enum.
+ *
+ * @param env               The JNI environment.
+ * @param className         The enum class name.
+ * @param valueName         The name of the enum value
+ * @param out               (out param) on success, a local reference to an
+ *                          instance of the enum object.  (Since Java enums are
+ *                          singletones, this is also the only instance.)
+ *
+ * @return                  NULL on success; exception otherwise
+ */
+jthrowable fetchEnumInstance(JNIEnv *env, const char *className,
+                             const char *valueName, jobject *out);
+
 #endif /*LIBHDFS_JNI_HELPER_H*/

 /**
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/native_mini_dfs.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/native_mini_dfs.c
@ -17,14 +17,19 @@
 */

 #include "exception.h"
+#include "hdfs.h"
+#include "hdfs_test.h"
 #include "jni_helper.h"
 #include "native_mini_dfs.h"

 #include <errno.h>
 #include <jni.h>
+#include <limits.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/types.h>
+#include <unistd.h>

 #define MINIDFS_CLUSTER_BUILDER "org/apache/hadoop/hdfs/MiniDFSCluster$Builder"
 #define MINIDFS_CLUSTER "org/apache/hadoop/hdfs/MiniDFSCluster"
@ -39,8 +44,44 @@ struct NativeMiniDfsCluster {
     * The NativeMiniDfsCluster object
     */
    jobject obj;
+
+    /**
+     * Path to the domain socket, or the empty string if there is none.
+     */
+    char domainSocketPath[PATH_MAX];
 };

+static jthrowable nmdConfigureShortCircuit(JNIEnv *env,
+              struct NativeMiniDfsCluster *cl, jobject cobj)
+{
+    jthrowable jthr;
+    char *tmpDir;
+
+    int ret = hdfsDisableDomainSocketSecurity();
+    if (ret) {
+        return newRuntimeError(env, "failed to disable hdfs domain "
+                               "socket security: error %d", ret);
+    }
+    jthr = hadoopConfSetStr(env, cobj, "dfs.client.read.shortcircuit", "true");
+    if (jthr) {
+        return jthr;
+    }
+    tmpDir = getenv("TMPDIR");
+    if (!tmpDir) {
+        tmpDir = "/tmp";
+    }
+    snprintf(cl->domainSocketPath, PATH_MAX, "%s/native_mini_dfs.sock.%d.%d",
+             tmpDir, getpid(), rand());
+    snprintf(cl->domainSocketPath, PATH_MAX, "%s/native_mini_dfs.sock.%d.%d",
+             tmpDir, getpid(), rand());
+    jthr = hadoopConfSetStr(env, cobj, "dfs.domain.socket.path",
+                            cl->domainSocketPath);
+    if (jthr) {
+        return jthr;
+    }
+    return NULL;
+}
+
 struct NativeMiniDfsCluster* nmdCreate(struct NativeMiniDfsConf *conf)
 {
    struct NativeMiniDfsCluster* cl = NULL;
@ -81,6 +122,28 @@ struct NativeMiniDfsCluster* nmdCreate(struct NativeMiniDfsConf *conf)
            goto error;
        }
    }
+    if (jthr) {
+        printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                              "nmdCreate: Configuration::setBoolean");
+        goto error;
+    }
+    // Disable 'minimum block size' -- it's annoying in tests.
+    (*env)->DeleteLocalRef(env, jconfStr);
+    jconfStr = NULL;
+    jthr = newJavaStr(env, "dfs.namenode.fs-limits.min-block-size", &jconfStr);
+    if (jthr) {
+        printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                              "nmdCreate: new String");
+        goto error;
+    }
+    jthr = invokeMethod(env, NULL, INSTANCE, cobj, HADOOP_CONF,
+                        "setLong", "(Ljava/lang/String;J)V", jconfStr, 0LL);
+    if (jthr) {
+        printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                              "nmdCreate: Configuration::setLong");
+        goto error;
+    }
+    // Creae MiniDFSCluster object
    jthr = constructNewObjectOfClass(env, &bld, MINIDFS_CLUSTER_BUILDER,
                    "(L"HADOOP_CONF";)V", cobj);
    if (jthr) {
@ -88,6 +151,14 @@ struct NativeMiniDfsCluster* nmdCreate(struct NativeMiniDfsConf *conf)
            "nmdCreate: NativeMiniDfsCluster#Builder#Builder");
        goto error;
    }
+    if (conf->configureShortCircuit) {
+        jthr = nmdConfigureShortCircuit(env, cl, cobj);
+        if (jthr) {
+            printExceptionAndFree(env, jthr, PRINT_EXC_ALL,
+                "nmdCreate: nmdConfigureShortCircuit error");
+            goto error;
+        }
+    }
    jthr = invokeMethod(env, &val, INSTANCE, bld, MINIDFS_CLUSTER_BUILDER,
            "format", "(Z)L" MINIDFS_CLUSTER_BUILDER ";", conf->doFormat);
    if (jthr) {
@ -272,3 +343,29 @@ error_dlr_nn:
    
    return ret;
 }
+
+int nmdConfigureHdfsBuilder(struct NativeMiniDfsCluster *cl,
+                            struct hdfsBuilder *bld)
+{
+    int port, ret;
+
+    hdfsBuilderSetNameNode(bld, "localhost");
+    port = nmdGetNameNodePort(cl);
+    if (port < 0) {
+      fprintf(stderr, "nmdGetNameNodePort failed with error %d\n", -port);
+      return EIO;
+    }
+    hdfsBuilderSetNameNodePort(bld, port);
+    if (cl->domainSocketPath[0]) {
+      ret = hdfsBuilderConfSetStr(bld, "dfs.client.read.shortcircuit", "true");
+      if (ret) {
+          return ret;
+      }
+      ret = hdfsBuilderConfSetStr(bld, "dfs.domain.socket.path",
+                            cl->domainSocketPath);
+      if (ret) {
+          return ret;
+      }
+    }
+    return 0;
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/native_mini_dfs.h
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/native_mini_dfs.h
@ -21,6 +21,7 @@

 #include <jni.h> /* for jboolean */

+struct hdfsBuilder;
 struct NativeMiniDfsCluster; 

 /**
@ -28,17 +29,24 @@ struct NativeMiniDfsCluster;
 */
 struct NativeMiniDfsConf {
    /**
-     * Nonzero if the cluster should be formatted prior to startup
+     * Nonzero if the cluster should be formatted prior to startup.
     */
    jboolean doFormat;
+
    /**
     * Whether or not to enable webhdfs in MiniDfsCluster
     */
    jboolean webhdfsEnabled;
+
    /**
     * The http port of the namenode in MiniDfsCluster
     */
    jint namenodeHttpPort;
+
+    /**
+     * Nonzero if we should configure short circuit.
+     */
+    jboolean configureShortCircuit;
 };

 /**
@ -84,7 +92,7 @@ void nmdFree(struct NativeMiniDfsCluster* cl);
 *
 * @return          the port, or a negative error code
 */
-int nmdGetNameNodePort(const struct NativeMiniDfsCluster *cl);
+int nmdGetNameNodePort(const struct NativeMiniDfsCluster *cl); 

 /**
 * Get the http address that's in use by the given (non-HA) nativeMiniDfs
@ -101,4 +109,14 @@ int nmdGetNameNodePort(const struct NativeMiniDfsCluster *cl);
 int nmdGetNameNodeHttpAddress(const struct NativeMiniDfsCluster *cl,
                               int *port, const char **hostName);

+/**
+ * Configure the HDFS builder appropriately to connect to this cluster.
+ *
+ * @param bld       The hdfs builder
+ *
+ * @return          the port, or a negative error code
+ */
+int nmdConfigureHdfsBuilder(struct NativeMiniDfsCluster *cl,
+                            struct hdfsBuilder *bld);
+
 #endif
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test/test_libhdfs_zerocopy.c
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/libhdfs/test/test_libhdfs_zerocopy.c
@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "expect.h"
+#include "hdfs.h"
+#include "native_mini_dfs.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#define TO_STR_HELPER(X) #X
+#define TO_STR(X) TO_STR_HELPER(X)
+
+#define TEST_FILE_NAME_LENGTH 128
+#define TEST_ZEROCOPY_FULL_BLOCK_SIZE 4096
+#define TEST_ZEROCOPY_LAST_BLOCK_SIZE 3215
+#define TEST_ZEROCOPY_NUM_BLOCKS 6
+#define SMALL_READ_LEN 16
+
+#define ZC_BUF_LEN 32768
+
+static uint8_t *getZeroCopyBlockData(int blockIdx)
+{
+    uint8_t *buf = malloc(TEST_ZEROCOPY_FULL_BLOCK_SIZE);
+    int i;
+    if (!buf) {
+        fprintf(stderr, "malloc(%d) failed\n", TEST_ZEROCOPY_FULL_BLOCK_SIZE);
+        exit(1);
+    }
+    for (i = 0; i < TEST_ZEROCOPY_FULL_BLOCK_SIZE; i++) {
+      buf[i] = blockIdx + (i % 17);
+    }
+    return buf;
+}
+
+static int getZeroCopyBlockLen(int blockIdx)
+{
+    if (blockIdx >= TEST_ZEROCOPY_NUM_BLOCKS) {
+        return 0;
+    } else if (blockIdx == (TEST_ZEROCOPY_NUM_BLOCKS - 1)) {
+        return TEST_ZEROCOPY_LAST_BLOCK_SIZE;
+    } else {
+        return TEST_ZEROCOPY_FULL_BLOCK_SIZE;
+    }
+}
+
+static void printBuf(const uint8_t *buf, size_t len) __attribute__((unused));
+
+static void printBuf(const uint8_t *buf, size_t len)
+{
+  size_t i;
+
+  for (i = 0; i < len; i++) {
+    fprintf(stderr, "%02x", buf[i]);
+  }
+  fprintf(stderr, "\n");
+}
+
+static int doTestZeroCopyReads(hdfsFS fs, const char *fileName)
+{
+    hdfsFile file = NULL;
+    struct hadoopRzOptions *opts = NULL;
+    struct hadoopRzBuffer *buffer = NULL;
+    uint8_t *block;
+
+    file = hdfsOpenFile(fs, fileName, O_RDONLY, 0, 0, 0);
+    EXPECT_NONNULL(file);
+    opts = hadoopRzOptionsAlloc();
+    EXPECT_NONNULL(opts);
+    EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 1));
+    /* haven't read anything yet */
+    EXPECT_ZERO(expectFileStats(file, 0LL, 0LL, 0LL, 0LL));
+    block = getZeroCopyBlockData(0);
+    EXPECT_NONNULL(block);
+    /* first read is half of a block. */
+    buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2);
+    EXPECT_NONNULL(buffer);
+    EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2,
+          hadoopRzBufferLength(buffer));
+    EXPECT_ZERO(memcmp(hadoopRzBufferGet(buffer), block,
+          TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2));
+    hadoopRzBufferFree(file, buffer);
+    /* read the next half of the block */
+    buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2);
+    EXPECT_NONNULL(buffer);
+    EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2,
+          hadoopRzBufferLength(buffer));
+    EXPECT_ZERO(memcmp(hadoopRzBufferGet(buffer),
+          block + (TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2),
+          TEST_ZEROCOPY_FULL_BLOCK_SIZE / 2));
+    hadoopRzBufferFree(file, buffer);
+    free(block);
+    EXPECT_ZERO(expectFileStats(file, TEST_ZEROCOPY_FULL_BLOCK_SIZE, 
+              TEST_ZEROCOPY_FULL_BLOCK_SIZE,
+              TEST_ZEROCOPY_FULL_BLOCK_SIZE,
+              TEST_ZEROCOPY_FULL_BLOCK_SIZE));
+    /* Now let's read just a few bytes. */
+    buffer = hadoopReadZero(file, opts, SMALL_READ_LEN);
+    EXPECT_NONNULL(buffer);
+    EXPECT_INT_EQ(SMALL_READ_LEN, hadoopRzBufferLength(buffer));
+    block = getZeroCopyBlockData(1);
+    EXPECT_NONNULL(block);
+    EXPECT_ZERO(memcmp(block, hadoopRzBufferGet(buffer), SMALL_READ_LEN));
+    hadoopRzBufferFree(file, buffer);
+    EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN,
+                  hdfsTell(fs, file));
+    EXPECT_ZERO(expectFileStats(file,
+          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN,
+          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN,
+          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN,
+          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN));
+
+    /* Clear 'skip checksums' and test that we can't do zero-copy reads any
+     * more.  Since there is no ByteBufferPool set, we should fail with
+     * EPROTONOSUPPORT.
+     */
+    EXPECT_ZERO(hadoopRzOptionsSetSkipChecksum(opts, 0));
+    EXPECT_NULL(hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE));
+    EXPECT_INT_EQ(EPROTONOSUPPORT, errno);
+
+    /* Now set a ByteBufferPool and try again.  It should succeed this time. */
+    EXPECT_ZERO(hadoopRzOptionsSetByteBufferPool(opts,
+          ELASTIC_BYTE_BUFFER_POOL_CLASS));
+    buffer = hadoopReadZero(file, opts, TEST_ZEROCOPY_FULL_BLOCK_SIZE);
+    EXPECT_NONNULL(buffer);
+    EXPECT_INT_EQ(TEST_ZEROCOPY_FULL_BLOCK_SIZE, hadoopRzBufferLength(buffer));
+    EXPECT_ZERO(expectFileStats(file,
+          (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN,
+          (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN,
+          (2 * TEST_ZEROCOPY_FULL_BLOCK_SIZE) + SMALL_READ_LEN,
+          TEST_ZEROCOPY_FULL_BLOCK_SIZE + SMALL_READ_LEN));
+    EXPECT_ZERO(memcmp(block + SMALL_READ_LEN, hadoopRzBufferGet(buffer),
+        TEST_ZEROCOPY_FULL_BLOCK_SIZE - SMALL_READ_LEN));
+    free(block);
+    block = getZeroCopyBlockData(2);
+    EXPECT_NONNULL(block);
+    EXPECT_ZERO(memcmp(block, hadoopRzBufferGet(buffer) +
+        (TEST_ZEROCOPY_FULL_BLOCK_SIZE - SMALL_READ_LEN), SMALL_READ_LEN));
+    hadoopRzBufferFree(file, buffer);
+    free(block);
+    hadoopRzOptionsFree(opts);
+    EXPECT_ZERO(hdfsCloseFile(fs, file));
+    return 0;
+}
+
+static int createZeroCopyTestFile(hdfsFS fs, char *testFileName,
+                                  size_t testFileNameLen)
+{
+    int blockIdx, blockLen;
+    hdfsFile file;
+    uint8_t *data;
+
+    snprintf(testFileName, testFileNameLen, "/zeroCopyTestFile.%d.%d",
+             getpid(), rand());
+    file = hdfsOpenFile(fs, testFileName, O_WRONLY, 0, 1,
+                        TEST_ZEROCOPY_FULL_BLOCK_SIZE);
+    EXPECT_NONNULL(file);
+    for (blockIdx = 0; blockIdx < TEST_ZEROCOPY_NUM_BLOCKS; blockIdx++) {
+        blockLen = getZeroCopyBlockLen(blockIdx);
+        data = getZeroCopyBlockData(blockIdx);
+        EXPECT_NONNULL(data);
+        EXPECT_INT_EQ(blockLen, hdfsWrite(fs, file, data, blockLen));
+    }
+    EXPECT_ZERO(hdfsCloseFile(fs, file));
+    return 0;
+}
+
+/**
+ * Test that we can write a file with libhdfs and then read it back
+ */
+int main(void)
+{
+    int port;
+    struct NativeMiniDfsConf conf = {
+        .doFormat = 1,
+        .configureShortCircuit = 1,
+    };
+    char testFileName[TEST_FILE_NAME_LENGTH];
+    hdfsFS fs;
+    struct NativeMiniDfsCluster* cl;
+    struct hdfsBuilder *bld;
+
+    cl = nmdCreate(&conf);
+    EXPECT_NONNULL(cl);
+    EXPECT_ZERO(nmdWaitClusterUp(cl));
+    port = nmdGetNameNodePort(cl);
+    if (port < 0) {
+        fprintf(stderr, "TEST_ERROR: test_zerocopy: "
+                "nmdGetNameNodePort returned error %d\n", port);
+        return EXIT_FAILURE;
+    }
+    bld = hdfsNewBuilder();
+    EXPECT_NONNULL(bld);
+    EXPECT_ZERO(nmdConfigureHdfsBuilder(cl, bld));
+    hdfsBuilderSetForceNewInstance(bld);
+    hdfsBuilderConfSetStr(bld, "dfs.block.size",
+                          TO_STR(TEST_ZEROCOPY_FULL_BLOCK_SIZE));
+    /* ensure that we'll always get our mmaps */
+    hdfsBuilderConfSetStr(bld, "dfs.client.read.shortcircuit.skip.checksum",
+                          "true");
+    fs = hdfsBuilderConnect(bld);
+    EXPECT_NONNULL(fs);
+    EXPECT_ZERO(createZeroCopyTestFile(fs, testFileName,
+          TEST_FILE_NAME_LENGTH));
+    EXPECT_ZERO(doTestZeroCopyReads(fs, testFileName));
+    EXPECT_ZERO(hdfsDisconnect(fs));
+    EXPECT_ZERO(nmdShutdown(cl));
+    nmdFree(cl);
+    fprintf(stderr, "TEST_SUCCESS\n"); 
+    return EXIT_SUCCESS;
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@ -1415,4 +1415,32 @@
 	  linearly increases.
 	</description>
 </property>
+
+<property>
+  <name>dfs.client.mmap.cache.size</name>
+  <value>1024</value>
+  <description>
+    When zero-copy reads are used, the DFSClient keeps a cache of recently used
+    memory mapped regions.  This parameter controls the maximum number of
+    entries that we will keep in that cache.
+
+    If this is set to 0, we will not allow mmap.
+
+    The larger this number is, the more file descriptors we will potentially
+    use for memory-mapped files.  mmaped files also use virtual address space.
+    You may need to increase your ulimit virtual address space limits before
+    increasing the client mmap cache size.
+  </description>
+</property>
+
+<property>
+  <name>dfs.client.mmap.cache.timeout.ms</name>
+  <value>900000</value>
+  <description>
+    The minimum length of time that we will keep an mmap entry in the cache
+    between uses.  If an entry is in the cache longer than this, and nobody
+    uses it, it will be removed by a background thread.
+  </description>
+</property>
+
 </configuration>
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestEnhancedByteBufferAccess.java
@ -0,0 +1,530 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.util.concurrent.TimeoutException;
+import java.util.Arrays;
+import java.util.EnumSet;
+import java.util.Random;
+
+import org.apache.commons.lang.SystemUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.hdfs.DFSTestUtil;
+import org.apache.hadoop.hdfs.DistributedFileSystem;
+import org.apache.hadoop.hdfs.HdfsConfiguration;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.client.ClientMmap;
+import org.apache.hadoop.hdfs.client.ClientMmapManager;
+import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
+import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
+import org.apache.hadoop.io.ByteBufferPool;
+import org.apache.hadoop.io.ElasticByteBufferPool;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.io.nativeio.NativeIO;
+import org.apache.hadoop.net.unix.DomainSocket;
+import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.apache.hadoop.test.GenericTestUtils;
+import org.junit.Assert;
+import org.junit.Assume;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
+
+/**
+ * This class tests if EnhancedByteBufferAccess works correctly.
+ */
+public class TestEnhancedByteBufferAccess {
+  private static final Log LOG =
+      LogFactory.getLog(TestEnhancedByteBufferAccess.class.getName());
+
+  static TemporarySocketDirectory sockDir;
+
+  @BeforeClass
+  public static void init() {
+    sockDir = new TemporarySocketDirectory();
+    DomainSocket.disableBindPathValidation();
+  }
+
+  private static byte[] byteBufferToArray(ByteBuffer buf) {
+    byte resultArray[] = new byte[buf.remaining()];
+    buf.get(resultArray);
+    buf.flip();
+    return resultArray;
+  }
+  
+  public static HdfsConfiguration initZeroCopyTest() {
+    Assume.assumeTrue(NativeIO.isAvailable());
+    Assume.assumeTrue(SystemUtils.IS_OS_UNIX);
+    HdfsConfiguration conf = new HdfsConfiguration();
+    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 4096);
+    conf.setInt(DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_SIZE, 3);
+    conf.setLong(DFSConfigKeys.DFS_CLIENT_MMAP_CACHE_TIMEOUT_MS, 100);
+    conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
+        new File(sockDir.getDir(),
+          "TestRequestMmapAccess._PORT.sock").getAbsolutePath());
+    conf.setBoolean(DFSConfigKeys.
+        DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, true);
+    return conf;
+  }
+
+  @Test
+  public void testZeroCopyReads() throws Exception {
+    HdfsConfiguration conf = initZeroCopyTest();
+    MiniDFSCluster cluster = null;
+    final Path TEST_PATH = new Path("/a");
+    FSDataInputStream fsIn = null;
+    final int TEST_FILE_LENGTH = 12345;
+    
+    FileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          TEST_FILE_LENGTH, (short)1, 7567L);
+      try {
+        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      } catch (InterruptedException e) {
+        Assert.fail("unexpected InterruptedException during " +
+            "waitReplication: " + e);
+      } catch (TimeoutException e) {
+        Assert.fail("unexpected TimeoutException during " +
+            "waitReplication: " + e);
+      }
+      fsIn = fs.open(TEST_PATH);
+      byte original[] = new byte[TEST_FILE_LENGTH];
+      IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
+      fsIn.close();
+      fsIn = fs.open(TEST_PATH);
+      ByteBuffer result = fsIn.read(null, 4096,
+          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      Assert.assertEquals(4096, result.remaining());
+      HdfsDataInputStream dfsIn = (HdfsDataInputStream)fsIn;
+      Assert.assertEquals(4096,
+          dfsIn.getReadStatistics().getTotalBytesRead());
+      Assert.assertEquals(4096,
+          dfsIn.getReadStatistics().getTotalZeroCopyBytesRead());
+      Assert.assertArrayEquals(Arrays.copyOfRange(original, 0, 4096),
+          byteBufferToArray(result));
+      fsIn.releaseBuffer(result);
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
+      if (cluster != null) cluster.shutdown();
+    }
+  }
+  
+  @Test
+  public void testShortZeroCopyReads() throws Exception {
+    HdfsConfiguration conf = initZeroCopyTest();
+    MiniDFSCluster cluster = null;
+    final Path TEST_PATH = new Path("/a");
+    FSDataInputStream fsIn = null;
+    final int TEST_FILE_LENGTH = 12345;
+    
+    FileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH, TEST_FILE_LENGTH, (short)1, 7567L);
+      try {
+        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      } catch (InterruptedException e) {
+        Assert.fail("unexpected InterruptedException during " +
+            "waitReplication: " + e);
+      } catch (TimeoutException e) {
+        Assert.fail("unexpected TimeoutException during " +
+            "waitReplication: " + e);
+      }
+      fsIn = fs.open(TEST_PATH);
+      byte original[] = new byte[TEST_FILE_LENGTH];
+      IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
+      fsIn.close();
+      fsIn = fs.open(TEST_PATH);
+
+      // Try to read 8192, but only get 4096 because of the block size.
+      HdfsDataInputStream dfsIn = (HdfsDataInputStream)fsIn;
+      ByteBuffer result =
+        dfsIn.read(null, 8192, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      Assert.assertEquals(4096, result.remaining());
+      Assert.assertEquals(4096,
+          dfsIn.getReadStatistics().getTotalBytesRead());
+      Assert.assertEquals(4096,
+          dfsIn.getReadStatistics().getTotalZeroCopyBytesRead());
+      Assert.assertArrayEquals(Arrays.copyOfRange(original, 0, 4096),
+          byteBufferToArray(result));
+      dfsIn.releaseBuffer(result);
+      
+      // Try to read 4097, but only get 4096 because of the block size.
+      result = 
+          dfsIn.read(null, 4097, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      Assert.assertEquals(4096, result.remaining());
+      Assert.assertArrayEquals(Arrays.copyOfRange(original, 4096, 8192),
+          byteBufferToArray(result));
+      dfsIn.releaseBuffer(result);
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
+      if (cluster != null) cluster.shutdown();
+    }
+  }
+
+  @Test
+  public void testZeroCopyReadsNoFallback() throws Exception {
+    HdfsConfiguration conf = initZeroCopyTest();
+    MiniDFSCluster cluster = null;
+    final Path TEST_PATH = new Path("/a");
+    FSDataInputStream fsIn = null;
+    final int TEST_FILE_LENGTH = 12345;
+    
+    FileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          TEST_FILE_LENGTH, (short)1, 7567L);
+      try {
+        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      } catch (InterruptedException e) {
+        Assert.fail("unexpected InterruptedException during " +
+            "waitReplication: " + e);
+      } catch (TimeoutException e) {
+        Assert.fail("unexpected TimeoutException during " +
+            "waitReplication: " + e);
+      }
+      fsIn = fs.open(TEST_PATH);
+      byte original[] = new byte[TEST_FILE_LENGTH];
+      IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
+      fsIn.close();
+      fsIn = fs.open(TEST_PATH);
+      HdfsDataInputStream dfsIn = (HdfsDataInputStream)fsIn;
+      ByteBuffer result;
+      try {
+        result = dfsIn.read(null, 4097, EnumSet.noneOf(ReadOption.class));
+        Assert.fail("expected UnsupportedOperationException");
+      } catch (UnsupportedOperationException e) {
+        // expected
+      }
+      result = dfsIn.read(null, 4096, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      Assert.assertEquals(4096, result.remaining());
+      Assert.assertEquals(4096,
+          dfsIn.getReadStatistics().getTotalBytesRead());
+      Assert.assertEquals(4096,
+          dfsIn.getReadStatistics().getTotalZeroCopyBytesRead());
+      Assert.assertArrayEquals(Arrays.copyOfRange(original, 0, 4096),
+          byteBufferToArray(result));
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
+      if (cluster != null) cluster.shutdown();
+    }
+  }
+
+  private static class CountingVisitor
+      implements ClientMmapManager.ClientMmapVisitor {
+    int count = 0;
+
+    @Override
+    public void accept(ClientMmap mmap) {
+      count++;
+    }
+
+    public void reset() {
+      count = 0;
+    }
+  }
+
+  @Test
+  public void testZeroCopyMmapCache() throws Exception {
+    HdfsConfiguration conf = initZeroCopyTest();
+    MiniDFSCluster cluster = null;
+    final Path TEST_PATH = new Path("/a");
+    final int TEST_FILE_LENGTH = 16385;
+    final int RANDOM_SEED = 23453;
+    FSDataInputStream fsIn = null;
+    ByteBuffer results[] = { null, null, null, null, null };
+    
+    DistributedFileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
+      try {
+        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      } catch (InterruptedException e) {
+        Assert.fail("unexpected InterruptedException during " +
+            "waitReplication: " + e);
+      } catch (TimeoutException e) {
+        Assert.fail("unexpected TimeoutException during " +
+            "waitReplication: " + e);
+      }
+      fsIn = fs.open(TEST_PATH);
+      byte original[] = new byte[TEST_FILE_LENGTH];
+      IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
+      fsIn.close();
+      fsIn = fs.open(TEST_PATH);
+      final ClientMmapManager mmapManager = fs.getClient().getMmapManager();
+      final CountingVisitor countingVisitor = new CountingVisitor();
+      mmapManager.visitMmaps(countingVisitor);
+      Assert.assertEquals(0, countingVisitor.count);
+      mmapManager.visitEvictable(countingVisitor);
+      Assert.assertEquals(0, countingVisitor.count);
+      results[0] = fsIn.read(null, 4096,
+          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      fsIn.seek(0);
+      results[1] = fsIn.read(null, 4096,
+          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      mmapManager.visitMmaps(countingVisitor);
+      Assert.assertEquals(1, countingVisitor.count);
+      countingVisitor.reset();
+      mmapManager.visitEvictable(countingVisitor);
+      Assert.assertEquals(0, countingVisitor.count);
+      countingVisitor.reset();
+
+      // The mmaps should be of the first block of the file.
+      final ExtendedBlock firstBlock = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
+      mmapManager.visitMmaps(new ClientMmapManager.ClientMmapVisitor() {
+        @Override
+        public void accept(ClientMmap mmap) {
+          Assert.assertEquals(firstBlock, mmap.getBlock());
+        }
+      });
+
+      // Read more blocks.
+      results[2] = fsIn.read(null, 4096,
+          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      results[3] = fsIn.read(null, 4096,
+          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+      try {
+        results[4] = fsIn.read(null, 4096,
+            EnumSet.of(ReadOption.SKIP_CHECKSUMS));
+        Assert.fail("expected UnsupportedOperationException");
+      } catch (UnsupportedOperationException e) {
+        // expected
+      }
+
+      // we should have 3 mmaps, 0 evictable
+      mmapManager.visitMmaps(countingVisitor);
+      Assert.assertEquals(3, countingVisitor.count);
+      countingVisitor.reset();
+      mmapManager.visitEvictable(countingVisitor);
+      Assert.assertEquals(0, countingVisitor.count);
+
+      // After we close the cursors, the mmaps should be evictable for 
+      // a brief period of time.  Then, they should be closed (we're 
+      // using a very quick timeout)
+      for (ByteBuffer buffer : results) {
+        if (buffer != null) {
+          fsIn.releaseBuffer(buffer);
+        }
+      }
+      GenericTestUtils.waitFor(new Supplier<Boolean>() {
+        public Boolean get() {
+          countingVisitor.reset();
+          try {
+            mmapManager.visitEvictable(countingVisitor);
+          } catch (InterruptedException e) {
+            e.printStackTrace();
+            return false;
+          }
+          return (0 == countingVisitor.count);
+        }
+      }, 10, 10000);
+      countingVisitor.reset();
+      mmapManager.visitMmaps(countingVisitor);
+      Assert.assertEquals(0, countingVisitor.count);
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
+      if (cluster != null) cluster.shutdown();
+    }
+  }
+
+  /**
+   * Test HDFS fallback reads.  HDFS streams support the ByteBufferReadable
+   * interface.
+   */
+  @Test
+  public void testHdfsFallbackReads() throws Exception {
+    HdfsConfiguration conf = initZeroCopyTest();
+    MiniDFSCluster cluster = null;
+    final Path TEST_PATH = new Path("/a");
+    final int TEST_FILE_LENGTH = 16385;
+    final int RANDOM_SEED = 23453;
+    FSDataInputStream fsIn = null;
+    
+    DistributedFileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
+      try {
+        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      } catch (InterruptedException e) {
+        Assert.fail("unexpected InterruptedException during " +
+            "waitReplication: " + e);
+      } catch (TimeoutException e) {
+        Assert.fail("unexpected TimeoutException during " +
+            "waitReplication: " + e);
+      }
+      fsIn = fs.open(TEST_PATH);
+      byte original[] = new byte[TEST_FILE_LENGTH];
+      IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
+      fsIn.close();
+      fsIn = fs.open(TEST_PATH);
+      testFallbackImpl(fsIn, original);
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
+      if (cluster != null) cluster.shutdown();
+    }
+  }
+
+  private static class RestrictedAllocatingByteBufferPool
+      implements ByteBufferPool {
+    private final boolean direct;
+
+    RestrictedAllocatingByteBufferPool(boolean direct) {
+      this.direct = direct;
+    }
+    @Override
+    public ByteBuffer getBuffer(boolean direct, int length) {
+      Preconditions.checkArgument(this.direct == direct);
+      return direct ? ByteBuffer.allocateDirect(length) :
+        ByteBuffer.allocate(length);
+    }
+    @Override
+    public void putBuffer(ByteBuffer buffer) {
+    }
+  }
+  
+  private static void testFallbackImpl(InputStream stream,
+      byte original[]) throws Exception {
+    RestrictedAllocatingByteBufferPool bufferPool =
+        new RestrictedAllocatingByteBufferPool(
+            stream instanceof ByteBufferReadable);
+
+    ByteBuffer result = ByteBufferUtil.fallbackRead(stream, bufferPool, 10);
+    Assert.assertEquals(10, result.remaining());
+    Assert.assertArrayEquals(Arrays.copyOfRange(original, 0, 10),
+        byteBufferToArray(result));
+
+    result = ByteBufferUtil.fallbackRead(stream, bufferPool, 5000);
+    Assert.assertEquals(5000, result.remaining());
+    Assert.assertArrayEquals(Arrays.copyOfRange(original, 10, 5010),
+        byteBufferToArray(result));
+
+    result = ByteBufferUtil.fallbackRead(stream, bufferPool, 9999999);
+    Assert.assertEquals(11375, result.remaining());
+    Assert.assertArrayEquals(Arrays.copyOfRange(original, 5010, 16385),
+        byteBufferToArray(result));
+
+    result = ByteBufferUtil.fallbackRead(stream, bufferPool, 10);
+    Assert.assertNull(result);
+  }
+
+  /**
+   * Test the {@link ByteBufferUtil#fallbackRead} function directly.
+   */
+  @Test
+  public void testFallbackRead() throws Exception {
+    HdfsConfiguration conf = initZeroCopyTest();
+    MiniDFSCluster cluster = null;
+    final Path TEST_PATH = new Path("/a");
+    final int TEST_FILE_LENGTH = 16385;
+    final int RANDOM_SEED = 23453;
+    FSDataInputStream fsIn = null;
+    
+    DistributedFileSystem fs = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
+      cluster.waitActive();
+      fs = cluster.getFileSystem();
+      DFSTestUtil.createFile(fs, TEST_PATH,
+          TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
+      try {
+        DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
+      } catch (InterruptedException e) {
+        Assert.fail("unexpected InterruptedException during " +
+            "waitReplication: " + e);
+      } catch (TimeoutException e) {
+        Assert.fail("unexpected TimeoutException during " +
+            "waitReplication: " + e);
+      }
+      fsIn = fs.open(TEST_PATH);
+      byte original[] = new byte[TEST_FILE_LENGTH];
+      IOUtils.readFully(fsIn, original, 0, TEST_FILE_LENGTH);
+      fsIn.close();
+      fsIn = fs.open(TEST_PATH);
+      testFallbackImpl(fsIn, original);
+    } finally {
+      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
+      if (cluster != null) cluster.shutdown();
+    }
+  }
+
+  /**
+   * Test fallback reads on a stream which does not support the
+   * ByteBufferReadable * interface.
+   */
+  @Test
+  public void testIndirectFallbackReads() throws Exception {
+    final File TEST_DIR = new File(
+      System.getProperty("test.build.data","build/test/data"));
+    final String TEST_PATH = TEST_DIR + File.separator +
+        "indirectFallbackTestFile";
+    final int TEST_FILE_LENGTH = 16385;
+    final int RANDOM_SEED = 23453;
+    FileOutputStream fos = null;
+    FileInputStream fis = null;
+    try {
+      fos = new FileOutputStream(TEST_PATH);
+      Random random = new Random(RANDOM_SEED);
+      byte original[] = new byte[TEST_FILE_LENGTH];
+      random.nextBytes(original);
+      fos.write(original);
+      fos.close();
+      fos = null;
+      fis = new FileInputStream(TEST_PATH);
+      testFallbackImpl(fis, original);
+    } finally {
+      IOUtils.cleanup(LOG, fos, fis);
+      new File(TEST_PATH).delete();
+    }
+  }
+}
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java
@ -25,7 +25,6 @@
 import java.nio.ByteBuffer;
 import java.util.concurrent.TimeoutException;

-import org.apache.hadoop.hdfs.DFSInputStream.ReadStatistics;
 import org.apache.hadoop.fs.ChecksumException;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
@ -36,11 +35,26 @@
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.unix.DomainSocket;
 import org.apache.hadoop.net.unix.TemporarySocketDirectory;
+import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.Assume;
+import org.junit.BeforeClass;
 import org.junit.Test;

 public class TestBlockReaderLocal {
+  private static TemporarySocketDirectory sockDir;
+  
+  @BeforeClass
+  public static void init() {
+    sockDir = new TemporarySocketDirectory();
+    DomainSocket.disableBindPathValidation();
+  }
+  
+  @AfterClass
+  public static void shutdown() throws IOException {
+    sockDir.close();
+  }
+  
  public static void assertArrayRegionsEqual(byte []buf1, int off1, byte []buf2,
      int off2, int len) {
    for (int i = 0; i < len; i++) {
@ -100,10 +114,11 @@ public void runBlockReaderLocalTest(BlockReaderLocalTest test,
    FSDataInputStream fsIn = null;
    byte original[] = new byte[BlockReaderLocalTest.TEST_LENGTH];
    
+    FileSystem fs = null;
    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
      cluster.waitActive();
-      FileSystem fs = cluster.getFileSystem();
+      fs = cluster.getFileSystem();
      DFSTestUtil.createFile(fs, TEST_PATH,
          BlockReaderLocalTest.TEST_LENGTH, (short)1, RANDOM_SEED);
      try {
@ -138,6 +153,7 @@ public void runBlockReaderLocalTest(BlockReaderLocalTest test,
      test.doTest(blockReaderLocal, original);
    } finally {
      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
      if (cluster != null) cluster.shutdown();
      if (dataIn != null) dataIn.close();
      if (checkIn != null) checkIn.close();
@ -382,10 +398,11 @@ private void testStatistics(boolean isShortCircuit) throws Exception {
    final long RANDOM_SEED = 4567L;
    FSDataInputStream fsIn = null;
    byte original[] = new byte[BlockReaderLocalTest.TEST_LENGTH];
+    FileSystem fs = null;
    try {
      cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
      cluster.waitActive();
-      FileSystem fs = cluster.getFileSystem();
+      fs = cluster.getFileSystem();
      DFSTestUtil.createFile(fs, TEST_PATH,
          BlockReaderLocalTest.TEST_LENGTH, (short)1, RANDOM_SEED);
      try {
@ -417,6 +434,7 @@ private void testStatistics(boolean isShortCircuit) throws Exception {
    } finally {
      DFSInputStream.tcpReadsDisabledForTesting = false;
      if (fsIn != null) fsIn.close();
+      if (fs != null) fs.close();
      if (cluster != null) cluster.shutdown();
      if (sockDir != null) sockDir.close();
    }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestNameNodeHttpServer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestNameNodeHttpServer.java
@ -0,0 +1,39 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.Test;
+
+public class TestNameNodeHttpServer {
+
+  @Test
+  public void testSslConfiguration() throws IOException {
+    Configuration conf = new Configuration();
+    conf.setBoolean(DFSConfigKeys.DFS_HTTPS_ENABLE_KEY, true);
+    System.setProperty("jetty.ssl.password", "foo");
+    System.setProperty("jetty.ssl.keypassword", "bar");
+
+    MiniDFSCluster dfsCluster = new MiniDFSCluster.Builder(conf)
+        .numDataNodes(0).build();
+    dfsCluster.waitActive();
+    dfsCluster.shutdown();
+  }
+}
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -155,6 +155,8 @@ Release 2.3.0 - UNRELEASED
    MAPREDUCE-5411. Refresh size of loaded job cache on history server (Ashwin
    Shankar via jlowe)

+    MAPREDUCE-5332. Support token-preserving restart of history server (jlowe)
+
  IMPROVEMENTS

    MAPREDUCE-434. LocalJobRunner limited to single reducer (Sandy Ryza and
@ -232,6 +234,21 @@ Release 2.1.2 - UNRELEASED
    MAPREDUCE-5513. ConcurrentModificationException in JobControl (Robert
    Parker via jlowe)

+    MAPREDUCE-5531. Fix compat with hadoop-1 in mapreduce.(TaskID,
+    TaskAttemptID) by re-introducing missing constructors. (Robert Kanter via
+    acmurthy)
+
+    MAPREDUCE-5545. org.apache.hadoop.mapred.TestTaskAttemptListenerImpl.testCommitWindow
+    times out (Robert Kanter via jlowe)
+
+    MAPREDUCE-5529. Fix compat with hadoop-1 in mapred.TotalOrderPartitioner
+    by re-introducing (get,set)PartitionFile which takes in JobConf. (Robert 
+    Kanter via acmurthy)
+
+    MAPREDUCE-5538. Fixed MR AppMaster to send job-notification URL only after
+    the job is really done - a bug caused by MAPREDUCE-5505. (Zhijie Shen via
+    vinodkv)
+
 Release 2.1.1-beta - 2013-09-23

  INCOMPATIBLE CHANGES
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java
@ -531,19 +531,6 @@ public void shutDownJob() {
    // this is the only job, so shut down the Appmaster
    // note in a workflow scenario, this may lead to creation of a new
    // job (FIXME?)
-    // Send job-end notification
-    if (getConfig().get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL) != null) {
-      try {
-        LOG.info("Job end notification started for jobID : "
-            + job.getReport().getJobId());
-        JobEndNotifier notifier = new JobEndNotifier();
-        notifier.setConf(getConfig());
-        notifier.notify(job.getReport());
-      } catch (InterruptedException ie) {
-        LOG.warn("Job end notification interrupted for jobID : "
-            + job.getReport().getJobId(), ie);
-      }
-    }

    try {
      //if isLastAMRetry comes as true, should never set it to false
@ -559,10 +546,28 @@ public void shutDownJob() {
      LOG.info("Calling stop for all the services");
      MRAppMaster.this.stop();

-      // Except ClientService, other services are already stopped, it is safe to
-      // let clients know the final states. ClientService should wait for some
-      // time so clients have enough time to know the final states.
-      safeToReportTerminationToUser.set(true);
+      if (isLastAMRetry) {
+        // Except ClientService, other services are already stopped, it is safe to
+        // let clients know the final states. ClientService should wait for some
+        // time so clients have enough time to know the final states.
+        safeToReportTerminationToUser.set(true);
+
+        // Send job-end notification when it is safe to report termination to
+        // users and it is the last AM retry
+        if (getConfig().get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL) != null) {
+          try {
+            LOG.info("Job end notification started for jobID : "
+                + job.getReport().getJobId());
+            JobEndNotifier notifier = new JobEndNotifier();
+            notifier.setConf(getConfig());
+            notifier.notify(job.getReport());
+          } catch (InterruptedException ie) {
+            LOG.warn("Job end notification interrupted for jobID : "
+                + job.getReport().getJobId(), ie);
+          }
+        }
+      }
+
      try {
        Thread.sleep(5000);
      } catch (InterruptedException e) {
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java
@ -128,6 +128,8 @@
 import org.apache.hadoop.yarn.state.StateMachineFactory;
 import org.apache.hadoop.yarn.util.Clock;

+import com.google.common.annotations.VisibleForTesting;
+
 /** Implementation of Job interface. Maintains the state machines of Job.
 * The read and write calls use ReadWriteLock for concurrency.
 */
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java
@ -228,7 +228,7 @@ private static TaskAttemptCompletionEvent createTce(int eventId,
    return tce;
  }

-  @Test (timeout=1000)
+  @Test (timeout=10000)
  public void testCommitWindow() throws IOException {
    SystemClock clock = new SystemClock();

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestJobEndNotifier.java
@ -18,19 +18,41 @@

 package org.apache.hadoop.mapreduce.v2.app;

+import static org.mockito.Mockito.doNothing;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.PrintStream;
 import java.net.Proxy;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;

 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.http.HttpServer;
+import org.apache.hadoop.mapred.JobContext;
 import org.apache.hadoop.mapreduce.MRJobConfig;
 import org.apache.hadoop.mapreduce.v2.api.records.JobReport;
+import org.apache.hadoop.mapreduce.v2.api.records.JobState;
+import org.apache.hadoop.mapreduce.v2.app.job.JobStateInternal;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobEvent;
+import org.apache.hadoop.mapreduce.v2.app.job.event.JobEventType;
+import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl;
 import org.junit.Assert;
 import org.junit.Test;
-import org.mockito.Mockito;

 /**
 * Tests job end notification
 *
 */
+@SuppressWarnings("unchecked")
 public class TestJobEndNotifier extends JobEndNotifier {

  //Test maximum retries is capped by MR_JOB_END_NOTIFICATION_MAX_ATTEMPTS
@ -133,7 +155,7 @@ protected boolean notifyURLOnce() {
  public void testNotifyRetries() throws InterruptedException {
    Configuration conf = new Configuration();
    conf.set(MRJobConfig.MR_JOB_END_NOTIFICATION_URL, "http://nonexistent");
-    JobReport jobReport = Mockito.mock(JobReport.class);
+    JobReport jobReport = mock(JobReport.class);
 
    long startTime = System.currentTimeMillis();
    this.notificationCount = 0;
@ -162,4 +184,100 @@ public void testNotifyRetries() throws InterruptedException {

  }

+  @Test
+  public void testNotificationOnNormalShutdown() throws Exception {
+    HttpServer server = startHttpServer();
+    // Act like it is the second attempt. Default max attempts is 2
+    MRApp app = spy(new MRApp(2, 2, true, this.getClass().getName(), true, 2));
+    // Make use of safeToReportflag so that we can look at final job-state as
+    // seen by real users.
+    app.safeToReportTerminationToUser.set(false);
+    doNothing().when(app).sysexit();
+    Configuration conf = new Configuration();
+    conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
+        JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
+    JobImpl job = (JobImpl)app.submit(conf);
+    // Even though auto-complete is true, because app is not shut-down yet, user
+    // will only see RUNNING state.
+    app.waitForInternalState(job, JobStateInternal.SUCCEEDED);
+    app.waitForState(job, JobState.RUNNING);
+    // Now shutdown. User should see SUCCEEDED state.
+    app.shutDownJob();
+    app.waitForState(job, JobState.SUCCEEDED);
+    Assert.assertEquals(true, app.isLastAMRetry());
+    Assert.assertEquals(1, JobEndServlet.calledTimes);
+    Assert.assertEquals("jobid=" + job.getID() + "&status=SUCCEEDED",
+        JobEndServlet.requestUri.getQuery());
+    Assert.assertEquals(JobState.SUCCEEDED.toString(),
+      JobEndServlet.foundJobState);
+    server.stop();
+  }
+
+  @Test
+  public void testNotificationOnNonLastRetryShutdown() throws Exception {
+    HttpServer server = startHttpServer();
+    MRApp app = spy(new MRApp(2, 2, false, this.getClass().getName(), true));
+    doNothing().when(app).sysexit();
+    // Make use of safeToReportflag so that we can look at final job-state as
+    // seen by real users.
+    app.safeToReportTerminationToUser.set(false);
+    Configuration conf = new Configuration();
+    conf.set(JobContext.MR_JOB_END_NOTIFICATION_URL,
+        JobEndServlet.baseUrl + "jobend?jobid=$jobId&status=$jobStatus");
+    JobImpl job = (JobImpl)app.submit(new Configuration());
+    app.waitForState(job, JobState.RUNNING);
+    app.getContext().getEventHandler()
+      .handle(new JobEvent(app.getJobId(), JobEventType.JOB_AM_REBOOT));
+    app.waitForInternalState(job, JobStateInternal.REBOOT);
+    // Not the last AM attempt. So user should that the job is still running.
+    app.waitForState(job, JobState.RUNNING);
+    app.shutDownJob();
+    Assert.assertEquals(false, app.isLastAMRetry());
+    Assert.assertEquals(0, JobEndServlet.calledTimes);
+    Assert.assertEquals(null, JobEndServlet.requestUri);
+    Assert.assertEquals(null, JobEndServlet.foundJobState);
+    server.stop();
+  }
+
+  private static HttpServer startHttpServer() throws Exception {
+    new File(System.getProperty(
+        "build.webapps", "build/webapps") + "/test").mkdirs();
+    HttpServer server = new HttpServer.Builder().setName("test")
+        .setBindAddress("0.0.0.0").setPort(0).setFindPort(true).build();
+    server.addServlet("jobend", "/jobend", JobEndServlet.class);
+    server.start();
+
+    JobEndServlet.calledTimes = 0;
+    JobEndServlet.requestUri = null;
+    JobEndServlet.baseUrl = "http://localhost:" + server.getPort() + "/";
+    JobEndServlet.foundJobState = null;
+    return server;
+  }
+
+  @SuppressWarnings("serial")
+  public static class JobEndServlet extends HttpServlet {
+    public static volatile int calledTimes = 0;
+    public static URI requestUri;
+    public static String baseUrl;
+    public static String foundJobState;
+
+    @Override
+    public void doGet(HttpServletRequest request, HttpServletResponse response)
+        throws ServletException, IOException {
+      InputStreamReader in = new InputStreamReader(request.getInputStream());
+      PrintStream out = new PrintStream(response.getOutputStream());
+
+      calledTimes++;
+      try {
+        requestUri = new URI(null, null,
+            request.getRequestURI(), request.getQueryString(), null);
+        foundJobState = request.getParameter("status");
+      } catch (URISyntaxException e) {
+      }
+
+      in.close();
+      out.close();
+    }
+  }
+
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
@ -164,6 +164,27 @@ public class JHAdminConfig {
  public static final String MR_HISTORY_STORAGE =
    MR_HISTORY_PREFIX + "store.class";

+  /**
+   * Enable the history server to store server state and recover server state
+   * upon startup.
+   */
+  public static final String MR_HS_RECOVERY_ENABLE =
+      MR_HISTORY_PREFIX + "recovery.enable";
+  public static final boolean DEFAULT_MR_HS_RECOVERY_ENABLE = false;
+
+  /**
+   * The HistoryServerStateStoreService class to store and recover server state
+   */
+  public static final String MR_HS_STATE_STORE =
+      MR_HISTORY_PREFIX + "recovery.store.class";
+
+  /**
+   * The URI where server state will be stored when
+   * HistoryServerFileSystemStateStoreService is configured as the state store
+   */
+  public static final String MR_HS_FS_STATE_STORE_URI =
+      MR_HISTORY_PREFIX + "recovery.store.fs.uri";
+
  /** Whether to use fixed ports with the minicluster. */
  public static final String MR_HISTORY_MINICLUSTER_FIXED_PORTS = MR_HISTORY_PREFIX
       + "minicluster.fixed.ports";
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TotalOrderPartitioner.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TotalOrderPartitioner.java
@ -21,7 +21,7 @@

 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
-import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapred.Partitioner;

@ -41,4 +41,30 @@ public void configure(JobConf job) {
    super.setConf(job);
  }

+  /**
+   * Set the path to the SequenceFile storing the sorted partition keyset.
+   * It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
+   * keys in the SequenceFile.
+   * @deprecated Use 
+   * {@link #setPartitionFile(Configuration, Path)}
+   * instead
+   */
+  @Deprecated
+  public static void setPartitionFile(JobConf job, Path p) {
+    org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner.
+            setPartitionFile(job, p);
+  }
+
+  /**
+   * Get the path to the SequenceFile storing the sorted partition keyset.
+   * @see #setPartitionFile(JobConf,Path)
+   * @deprecated Use 
+   * {@link #getPartitionFile(Configuration)}
+   * instead
+   */
+  @Deprecated
+  public static String getPartitionFile(JobConf job) {
+    return org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner.
+            getPartitionFile(job);
+  }
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptID.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskAttemptID.java
@ -76,6 +76,20 @@ public TaskAttemptID(String jtIdentifier, int jobId, TaskType type,
                       int taskId, int id) {
    this(new TaskID(jtIdentifier, jobId, type, taskId), id);
  }
+
+  /**
+   * Constructs a TaskId object from given parts.
+   * @param jtIdentifier jobTracker identifier
+   * @param jobId job number
+   * @param isMap whether the tip is a map
+   * @param taskId taskId number
+   * @param id the task attempt number
+   */
+  @Deprecated
+  public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap,
+                       int taskId, int id) {
+    this(new TaskID(jtIdentifier, jobId, isMap, taskId), id);
+  }
  
  public TaskAttemptID() { 
    taskId = new TaskID();
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskID.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskID.java
@ -91,6 +91,29 @@ public TaskID(JobID jobId, TaskType type, int id) {
  public TaskID(String jtIdentifier, int jobId, TaskType type, int id) {
    this(new JobID(jtIdentifier, jobId), type, id);
  }
+
+  /**
+   * Constructs a TaskID object from given {@link JobID}.
+   * @param jobId JobID that this tip belongs to
+   * @param isMap whether the tip is a map
+   * @param id the tip number
+   */
+  @Deprecated
+  public TaskID(JobID jobId, boolean isMap, int id) {
+    this(jobId, isMap ? TaskType.MAP : TaskType.REDUCE, id);
+  }
+
+  /**
+   * Constructs a TaskInProgressId object from given parts.
+   * @param jtIdentifier jobTracker identifier
+   * @param jobId job number
+   * @param isMap whether the tip is a map
+   * @param id the tip number
+   */
+  @Deprecated
+  public TaskID(String jtIdentifier, int jobId, boolean isMap, int id) {
+    this(new JobID(jtIdentifier, jobId), isMap, id);
+  }
  
  public TaskID() { 
    jobId = new JobID();
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
@ -1181,4 +1181,28 @@
  <description>ACL of who can be admin of the History server.</description>
 </property>

+<property>
+  <name>mapreduce.jobhistory.recovery.enable</name>
+  <value>false</value>
+  <description>Enable the history server to store server state and recover
+  server state upon startup.  If enabled then
+  mapreduce.jobhistory.recovery.store.class must be specified.</description>
+</property>
+
+<property>
+  <name>mapreduce.jobhistory.recovery.store.class</name>
+  <value>org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService</value>
+  <description>The HistoryServerStateStoreService class to store history server
+  state for recovery.</description>
+</property>
+
+<property>
+  <name>mapreduce.jobhistory.recovery.store.fs.uri</name>
+  <value>${hadoop.tmp.dir}/mapred/history/recoverystore</value>
+  <!--value>hdfs://localhost:9000/mapred/history/recoverystore</value-->
+  <description>The URI where history server state will be stored if
+  HistoryServerFileSystemStateStoreService is configured as the recovery
+  storage class.</description>
+</property>
+
 </configuration>
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerFileSystemStateStoreService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerFileSystemStateStoreService.java
@ -0,0 +1,370 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.hs;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileAlreadyExistsException;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier;
+import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+
+@Private
+@Unstable
+/**
+ * A history server state storage implementation that supports any persistent
+ * storage that adheres to the FileSystem interface.
+ */
+public class HistoryServerFileSystemStateStoreService
+    extends HistoryServerStateStoreService {
+
+  public static final Log LOG =
+      LogFactory.getLog(HistoryServerFileSystemStateStoreService.class);
+
+  private static final String ROOT_STATE_DIR_NAME = "HistoryServerState";
+  private static final String TOKEN_STATE_DIR_NAME = "tokens";
+  private static final String TOKEN_KEYS_DIR_NAME = "keys";
+  private static final String TOKEN_BUCKET_DIR_PREFIX = "tb_";
+  private static final String TOKEN_BUCKET_NAME_FORMAT =
+      TOKEN_BUCKET_DIR_PREFIX + "%03d";
+  private static final String TOKEN_MASTER_KEY_FILE_PREFIX = "key_";
+  private static final String TOKEN_FILE_PREFIX = "token_";
+  private static final String TMP_FILE_PREFIX = "tmp-";
+  private static final FsPermission DIR_PERMISSIONS =
+      new FsPermission((short)0700);
+  private static final FsPermission FILE_PERMISSIONS =
+      new FsPermission((short)0400);
+  private static final int NUM_TOKEN_BUCKETS = 1000;
+
+  private FileSystem fs;
+  private Path rootStatePath;
+  private Path tokenStatePath;
+  private Path tokenKeysStatePath;
+
+  @Override
+  protected void initStorage(Configuration conf)
+      throws IOException {
+    final String storeUri = conf.get(JHAdminConfig.MR_HS_FS_STATE_STORE_URI);
+    if (storeUri == null) {
+      throw new IOException("No store location URI configured in " +
+          JHAdminConfig.MR_HS_FS_STATE_STORE_URI);
+    }
+
+    LOG.info("Using " + storeUri + " for history server state storage");
+    rootStatePath = new Path(storeUri, ROOT_STATE_DIR_NAME);
+  }
+
+  @Override
+  protected void startStorage() throws IOException {
+    fs = rootStatePath.getFileSystem(getConfig());
+    createDir(rootStatePath);
+    tokenStatePath = new Path(rootStatePath, TOKEN_STATE_DIR_NAME);
+    createDir(tokenStatePath);
+    tokenKeysStatePath = new Path(tokenStatePath, TOKEN_KEYS_DIR_NAME);
+    createDir(tokenKeysStatePath);
+    for (int i=0; i < NUM_TOKEN_BUCKETS; ++i) {
+      createDir(getTokenBucketPath(i));
+    }
+  }
+
+  @Override
+  protected void closeStorage() throws IOException {
+    // don't close the filesystem as it's part of the filesystem cache
+    // and other clients may still be using it
+  }
+
+  @Override
+  public HistoryServerState loadState() throws IOException {
+    LOG.info("Loading history server state from " + rootStatePath);
+    HistoryServerState state = new HistoryServerState();
+    loadTokenState(state);
+    return state;
+  }
+
+  @Override
+  public void storeToken(MRDelegationTokenIdentifier tokenId,
+      Long renewDate) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Storing token " + tokenId.getSequenceNumber());
+    }
+
+    Path tokenPath = getTokenPath(tokenId);
+    if (fs.exists(tokenPath)) {
+      throw new IOException(tokenPath + " already exists");
+    }
+
+    createFile(tokenPath, buildTokenData(tokenId, renewDate));
+  }
+
+  @Override
+  public void updateToken(MRDelegationTokenIdentifier tokenId,
+      Long renewDate) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Updating token " + tokenId.getSequenceNumber());
+    }
+    createFile(getTokenPath(tokenId), buildTokenData(tokenId, renewDate));
+  }
+
+  @Override
+  public void removeToken(MRDelegationTokenIdentifier tokenId)
+      throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Removing token " + tokenId.getSequenceNumber());
+    }
+    deleteFile(getTokenPath(tokenId));
+  }
+
+  @Override
+  public void storeTokenMasterKey(DelegationKey key) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Storing master key " + key.getKeyId());
+    }
+
+    Path keyPath = new Path(tokenKeysStatePath,
+        TOKEN_MASTER_KEY_FILE_PREFIX + key.getKeyId());
+    if (fs.exists(keyPath)) {
+      throw new IOException(keyPath + " already exists");
+    }
+
+    ByteArrayOutputStream memStream = new ByteArrayOutputStream();
+    DataOutputStream dataStream = new DataOutputStream(memStream);
+    try {
+      key.write(dataStream);
+    } finally {
+      IOUtils.cleanup(LOG, dataStream);
+    }
+
+    createFile(keyPath, memStream.toByteArray());
+  }
+
+  @Override
+  public void removeTokenMasterKey(DelegationKey key)
+      throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Removing master key " + key.getKeyId());
+    }
+
+    Path keyPath = new Path(tokenKeysStatePath,
+        TOKEN_MASTER_KEY_FILE_PREFIX + key.getKeyId());
+    deleteFile(keyPath);
+  }
+
+  private static int getBucketId(MRDelegationTokenIdentifier tokenId) {
+    return tokenId.getSequenceNumber() % NUM_TOKEN_BUCKETS;
+  }
+
+  private Path getTokenBucketPath(int bucketId) {
+    return new Path(tokenStatePath,
+        String.format(TOKEN_BUCKET_NAME_FORMAT, bucketId));
+  }
+
+  private Path getTokenPath(MRDelegationTokenIdentifier tokenId) {
+    Path bucketPath = getTokenBucketPath(getBucketId(tokenId));
+    return new Path(bucketPath,
+        TOKEN_FILE_PREFIX + tokenId.getSequenceNumber());
+  }
+
+  private void createDir(Path dir) throws IOException {
+    try {
+      FileStatus status = fs.getFileStatus(dir);
+      if (!status.isDirectory()) {
+        throw new FileAlreadyExistsException("Unexpected file in store: "
+            + dir);
+      }
+      if (!status.getPermission().equals(DIR_PERMISSIONS)) {
+        fs.setPermission(dir, DIR_PERMISSIONS);
+      }
+    } catch (FileNotFoundException e) {
+      fs.mkdirs(dir, DIR_PERMISSIONS);
+    }
+  }
+
+  private void createFile(Path file, byte[] data) throws IOException {
+    final int WRITE_BUFFER_SIZE = 4096;
+    Path tmp = new Path(file.getParent(), TMP_FILE_PREFIX + file.getName());
+    FSDataOutputStream out = fs.create(tmp, FILE_PERMISSIONS, true,
+        WRITE_BUFFER_SIZE, fs.getDefaultReplication(tmp),
+        fs.getDefaultBlockSize(tmp), null);
+    try {
+      try {
+        out.write(data);
+      } finally {
+        IOUtils.cleanup(LOG, out);
+      }
+      if (!fs.rename(tmp, file)) {
+        throw new IOException("Could not rename " + tmp + " to " + file);
+      }
+    } catch (IOException e) {
+      fs.delete(tmp, false);
+      throw e;
+    }
+  }
+
+  private byte[] readFile(Path file, long numBytes) throws IOException {
+    byte[] data = new byte[(int)numBytes];
+    FSDataInputStream in = fs.open(file);
+    try {
+      in.readFully(data);
+    } finally {
+      IOUtils.cleanup(LOG, in);
+    }
+    return data;
+  }
+
+  private void deleteFile(Path file) throws IOException {
+    boolean deleted;
+    try {
+      deleted = fs.delete(file, false);
+    } catch (FileNotFoundException e) {
+      deleted = true;
+    }
+    if (!deleted) {
+      throw new IOException("Unable to delete " + file);
+    }
+  }
+
+  private byte[] buildTokenData(MRDelegationTokenIdentifier tokenId,
+      Long renewDate) throws IOException {
+    ByteArrayOutputStream memStream = new ByteArrayOutputStream();
+    DataOutputStream dataStream = new DataOutputStream(memStream);
+    try {
+      tokenId.write(dataStream);
+      dataStream.writeLong(renewDate);
+    } finally {
+      IOUtils.cleanup(LOG, dataStream);
+    }
+    return memStream.toByteArray();
+  }
+
+  private void loadTokenMasterKey(HistoryServerState state, Path keyFile,
+      long numKeyFileBytes) throws IOException {
+    DelegationKey key = new DelegationKey();
+    byte[] keyData = readFile(keyFile, numKeyFileBytes);
+    DataInputStream in =
+        new DataInputStream(new ByteArrayInputStream(keyData));
+    try {
+      key.readFields(in);
+    } finally {
+      IOUtils.cleanup(LOG, in);
+    }
+    state.tokenMasterKeyState.add(key);
+  }
+
+  private MRDelegationTokenIdentifier loadToken(HistoryServerState state,
+      Path tokenFile, long numTokenFileBytes) throws IOException {
+    MRDelegationTokenIdentifier tokenId = new MRDelegationTokenIdentifier();
+    long renewDate;
+    byte[] tokenData = readFile(tokenFile, numTokenFileBytes);
+    DataInputStream in =
+        new DataInputStream(new ByteArrayInputStream(tokenData));
+    try {
+      tokenId.readFields(in);
+      renewDate = in.readLong();
+    } finally {
+      IOUtils.cleanup(LOG, in);
+    }
+    state.tokenState.put(tokenId, renewDate);
+    return tokenId;
+  }
+
+  private int loadTokensFromBucket(HistoryServerState state, Path bucket)
+      throws IOException {
+    String numStr =
+        bucket.getName().substring(TOKEN_BUCKET_DIR_PREFIX.length());
+    final int bucketId = Integer.parseInt(numStr);
+    int numTokens = 0;
+    FileStatus[] tokenStats = fs.listStatus(bucket);
+    for (FileStatus stat : tokenStats) {
+      String name = stat.getPath().getName();
+      if (name.startsWith(TOKEN_FILE_PREFIX)) {
+        MRDelegationTokenIdentifier token =
+            loadToken(state, stat.getPath(), stat.getLen());
+        int tokenBucketId = getBucketId(token);
+        if (tokenBucketId != bucketId) {
+          throw new IOException("Token " + stat.getPath()
+              + " should be in bucket " + tokenBucketId + ", found in bucket "
+              + bucketId);
+        }
+        ++numTokens;
+      } else {
+        LOG.warn("Skipping unexpected file in history server token bucket: "
+            + stat.getPath());
+      }
+    }
+    return numTokens;
+  }
+
+  private int loadKeys(HistoryServerState state) throws IOException {
+    FileStatus[] stats = fs.listStatus(tokenKeysStatePath);
+    int numKeys = 0;
+    for (FileStatus stat : stats) {
+      String name = stat.getPath().getName();
+      if (name.startsWith(TOKEN_MASTER_KEY_FILE_PREFIX)) {
+        loadTokenMasterKey(state, stat.getPath(), stat.getLen());
+        ++numKeys;
+      } else {
+        LOG.warn("Skipping unexpected file in history server token state: "
+            + stat.getPath());
+      }
+    }
+    return numKeys;
+  }
+
+  private int loadTokens(HistoryServerState state) throws IOException {
+    FileStatus[] stats = fs.listStatus(tokenStatePath);
+    int numTokens = 0;
+    for (FileStatus stat : stats) {
+      String name = stat.getPath().getName();
+      if (name.startsWith(TOKEN_BUCKET_DIR_PREFIX)) {
+        numTokens += loadTokensFromBucket(state, stat.getPath());
+      } else if (name.equals(TOKEN_KEYS_DIR_NAME)) {
+        // key loading is done elsewhere
+        continue;
+      } else {
+        LOG.warn("Skipping unexpected file in history server token state: "
+            + stat.getPath());
+      }
+    }
+    return numTokens;
+  }
+
+  private void loadTokenState(HistoryServerState state) throws IOException {
+    int numKeys = loadKeys(state);
+    int numTokens = loadTokens(state);
+    LOG.info("Loaded " + numKeys + " master keys and " + numTokens
+        + " tokens from " + tokenStatePath);
+  }
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerNullStateStoreService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerNullStateStoreService.java
@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.hs;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+
+@Private
+@Unstable
+public class HistoryServerNullStateStoreService
+    extends HistoryServerStateStoreService {
+
+  @Override
+  protected void initStorage(Configuration conf) throws IOException {
+    // Do nothing
+  }
+
+  @Override
+  protected void startStorage() throws IOException {
+    // Do nothing
+  }
+
+  @Override
+  protected void closeStorage() throws IOException {
+    // Do nothing
+  }
+
+  @Override
+  public HistoryServerState loadState() throws IOException {
+    throw new UnsupportedOperationException(
+        "Cannot load state from null store");
+  }
+
+  @Override
+  public void storeToken(MRDelegationTokenIdentifier tokenId, Long renewDate)
+      throws IOException {
+    // Do nothing
+  }
+
+  @Override
+  public void updateToken(MRDelegationTokenIdentifier tokenId, Long renewDate)
+      throws IOException {
+    // Do nothing
+  }
+
+  @Override
+  public void removeToken(MRDelegationTokenIdentifier tokenId)
+      throws IOException {
+    // Do nothing
+  }
+
+  @Override
+  public void storeTokenMasterKey(DelegationKey key) throws IOException {
+    // Do nothing
+  }
+
+  @Override
+  public void removeTokenMasterKey(DelegationKey key) throws IOException {
+    // Do nothing
+  }
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerStateStoreService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerStateStoreService.java
@ -0,0 +1,184 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce.v2.hs;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+import org.apache.hadoop.service.AbstractService;
+
+@Private
+@Unstable
+/**
+ * Base class for history server state storage.
+ * Storage implementations need to implement blocking store and load methods
+ * to actually store and load the state.
+ */
+public abstract class HistoryServerStateStoreService extends AbstractService {
+
+  public static class HistoryServerState {
+    Map<MRDelegationTokenIdentifier, Long> tokenState =
+        new HashMap<MRDelegationTokenIdentifier, Long>();
+    Set<DelegationKey> tokenMasterKeyState = new HashSet<DelegationKey>();
+
+    public Map<MRDelegationTokenIdentifier, Long> getTokenState() {
+      return tokenState;
+    }
+
+    public Set<DelegationKey> getTokenMasterKeyState() {
+      return tokenMasterKeyState;
+    }
+  }
+
+  public HistoryServerStateStoreService() {
+    super(HistoryServerStateStoreService.class.getName());
+  }
+
+  /**
+   * Initialize the state storage
+   *
+   * @param conf the configuration
+   * @throws IOException
+   */
+  @Override
+  public void serviceInit(Configuration conf) throws IOException {
+    initStorage(conf);
+  }
+
+  /**
+   * Start the state storage for use
+   *
+   * @throws IOException
+   */
+  @Override
+  public void serviceStart() throws IOException {
+    startStorage();
+  }
+
+  /**
+   * Shutdown the state storage.
+   * 
+   * @throws IOException
+   */
+  @Override
+  public void serviceStop() throws IOException {
+    closeStorage();
+  }
+
+  /**
+   * Implementation-specific initialization.
+   * 
+   * @param conf the configuration
+   * @throws IOException
+   */
+  protected abstract void initStorage(Configuration conf) throws IOException;
+
+  /**
+   * Implementation-specific startup.
+   * 
+   * @throws IOException
+   */
+  protected abstract void startStorage() throws IOException;
+
+  /**
+   * Implementation-specific shutdown.
+   * 
+   * @throws IOException
+   */
+  protected abstract void closeStorage() throws IOException;
+
+  /**
+   * Load the history server state from the state storage.
+   * 
+   * @throws IOException
+   */
+  public abstract HistoryServerState loadState() throws IOException;
+
+  /**
+   * Blocking method to store a delegation token along with the current token
+   * sequence number to the state storage.
+   * 
+   * Implementations must not return from this method until the token has been
+   * committed to the state store.
+   * 
+   * @param tokenId the token to store
+   * @param renewDate the token renewal deadline
+   * @throws IOException
+   */
+  public abstract void storeToken(MRDelegationTokenIdentifier tokenId,
+      Long renewDate) throws IOException;
+
+  /**
+   * Blocking method to update the expiration of a delegation token
+   * in the state storage.
+   * 
+   * Implementations must not return from this method until the expiration
+   * date of the token has been updated in the state store.
+   * 
+   * @param tokenId the token to update
+   * @param renewDate the new token renewal deadline
+   * @throws IOException
+   */
+  public abstract void updateToken(MRDelegationTokenIdentifier tokenId,
+      Long renewDate) throws IOException;
+
+  /**
+   * Blocking method to remove a delegation token from the state storage.
+   * 
+   * Implementations must not return from this method until the token has been
+   * removed from the state store.
+   * 
+   * @param tokenId the token to remove
+   * @throws IOException
+   */
+  public abstract void removeToken(MRDelegationTokenIdentifier tokenId)
+      throws IOException;
+
+  /**
+   * Blocking method to store a delegation token master key.
+   * 
+   * Implementations must not return from this method until the key has been
+   * committed to the state store.
+   * 
+   * @param key the master key to store
+   * @throws IOException
+   */
+  public abstract void storeTokenMasterKey(
+      DelegationKey key) throws IOException;
+
+  /**
+   * Blocking method to remove a delegation token master key.
+   * 
+   * Implementations must not return from this method until the key has been
+   * removed from the state store.
+   * 
+   * @param key the master key to remove
+   * @throws IOException
+   */
+  public abstract void removeTokenMasterKey(DelegationKey key)
+      throws IOException;
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerStateStoreServiceFactory.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerStateStoreServiceFactory.java
@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.v2.hs;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
+import org.apache.hadoop.util.ReflectionUtils;
+
+public class HistoryServerStateStoreServiceFactory {
+
+  /**
+   * Constructs an instance of the configured storage class
+   * 
+   * @param conf the configuration
+   * @return the state storage instance
+   */
+  public static HistoryServerStateStoreService getStore(Configuration conf) {
+    Class<? extends HistoryServerStateStoreService> storeClass =
+        HistoryServerNullStateStoreService.class;
+    boolean recoveryEnabled = conf.getBoolean(
+        JHAdminConfig.MR_HS_RECOVERY_ENABLE,
+        JHAdminConfig.DEFAULT_MR_HS_RECOVERY_ENABLE);
+    if (recoveryEnabled) {
+      storeClass = conf.getClass(JHAdminConfig.MR_HS_STATE_STORE, null,
+          HistoryServerStateStoreService.class);
+      if (storeClass == null) {
+        throw new RuntimeException("Unable to locate storage class, check "
+            + JHAdminConfig.MR_HS_STATE_STORE);
+      }
+    }
+    return ReflectionUtils.newInstance(storeClass, conf);
+  }
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JHSDelegationTokenSecretManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JHSDelegationTokenSecretManager.java
@ -18,10 +18,17 @@

 package org.apache.hadoop.mapreduce.v2.hs;

+import java.io.IOException;
+import java.util.Map.Entry;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier;
+import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService.HistoryServerState;
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
+import org.apache.hadoop.security.token.delegation.DelegationKey;

 /**
 * A MapReduce specific delegation token secret manager.
@ -33,6 +40,11 @@
 public class JHSDelegationTokenSecretManager
    extends AbstractDelegationTokenSecretManager<MRDelegationTokenIdentifier> {

+  private static final Log LOG = LogFactory.getLog(
+      JHSDelegationTokenSecretManager.class);
+
+  private HistoryServerStateStoreService store;
+
  /**
   * Create a secret manager
   * @param delegationKeyUpdateInterval the number of seconds for rolling new
@ -42,17 +54,94 @@ public class JHSDelegationTokenSecretManager
   * @param delegationTokenRenewInterval how often the tokens must be renewed
   * @param delegationTokenRemoverScanInterval how often the tokens are scanned
   *        for expired tokens
+   * @param store history server state store for persisting state
   */
  public JHSDelegationTokenSecretManager(long delegationKeyUpdateInterval,
                                      long delegationTokenMaxLifetime, 
                                      long delegationTokenRenewInterval,
-                                      long delegationTokenRemoverScanInterval) {
+                                      long delegationTokenRemoverScanInterval,
+                                      HistoryServerStateStoreService store) {
    super(delegationKeyUpdateInterval, delegationTokenMaxLifetime,
          delegationTokenRenewInterval, delegationTokenRemoverScanInterval);
+    this.store = store;
  }

  @Override
  public MRDelegationTokenIdentifier createIdentifier() {
    return new MRDelegationTokenIdentifier();
  }
+
+  @Override
+  protected void storeNewMasterKey(DelegationKey key) throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Storing master key " + key.getKeyId());
+    }
+    try {
+      store.storeTokenMasterKey(key);
+    } catch (IOException e) {
+      LOG.error("Unable to store master key " + key.getKeyId(), e);
+    }
+  }
+
+  @Override
+  protected void removeStoredMasterKey(DelegationKey key) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Removing master key " + key.getKeyId());
+    }
+    try {
+      store.removeTokenMasterKey(key);
+    } catch (IOException e) {
+      LOG.error("Unable to remove master key " + key.getKeyId(), e);
+    }
+  }
+
+  @Override
+  protected void storeNewToken(MRDelegationTokenIdentifier tokenId,
+      long renewDate) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Storing token " + tokenId.getSequenceNumber());
+    }
+    try {
+      store.storeToken(tokenId, renewDate);
+    } catch (IOException e) {
+      LOG.error("Unable to store token " + tokenId.getSequenceNumber(), e);
+    }
+  }
+
+  @Override
+  protected void removeStoredToken(MRDelegationTokenIdentifier tokenId)
+      throws IOException {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Storing token " + tokenId.getSequenceNumber());
+    }
+    try {
+      store.removeToken(tokenId);
+    } catch (IOException e) {
+      LOG.error("Unable to remove token " + tokenId.getSequenceNumber(), e);
+    }
+  }
+
+  @Override
+  protected void updateStoredToken(MRDelegationTokenIdentifier tokenId,
+      long renewDate) {
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Updating token " + tokenId.getSequenceNumber());
+    }
+    try {
+      store.updateToken(tokenId, renewDate);
+    } catch (IOException e) {
+      LOG.error("Unable to update token " + tokenId.getSequenceNumber(), e);
+    }
+  }
+
+  public void recover(HistoryServerState state) throws IOException {
+    LOG.info("Recovering " + getClass().getSimpleName());
+    for (DelegationKey key : state.tokenMasterKeyState) {
+      addKey(key);
+    }
+    for (Entry<MRDelegationTokenIdentifier, Long> entry :
+        state.tokenState.entrySet()) {
+      addPersistedDelegationToken(entry.getKey(), entry.getValue());
+    }
+  }
 }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/JobHistoryServer.java
@ -28,11 +28,13 @@
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.MRConfig;
 import org.apache.hadoop.mapreduce.v2.app.webapp.WebAppUtil;
+import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService.HistoryServerState;
 import org.apache.hadoop.mapreduce.v2.hs.server.HSAdminServer;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.metrics2.source.JvmMetrics;
 import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.service.AbstractService;
 import org.apache.hadoop.service.CompositeService;
 import org.apache.hadoop.util.ExitUtil;
 import org.apache.hadoop.util.ShutdownHookManager;
@ -64,6 +66,46 @@ public class JobHistoryServer extends CompositeService {
  private JHSDelegationTokenSecretManager jhsDTSecretManager;
  private AggregatedLogDeletionService aggLogDelService;
  private HSAdminServer hsAdminServer;
+  private HistoryServerStateStoreService stateStore;
+
+  // utility class to start and stop secret manager as part of service
+  // framework and implement state recovery for secret manager on startup
+  private class HistoryServerSecretManagerService
+      extends AbstractService {
+
+    public HistoryServerSecretManagerService() {
+      super(HistoryServerSecretManagerService.class.getName());
+    }
+
+    @Override
+    protected void serviceStart() throws Exception {
+      boolean recoveryEnabled = getConfig().getBoolean(
+          JHAdminConfig.MR_HS_RECOVERY_ENABLE,
+          JHAdminConfig.DEFAULT_MR_HS_RECOVERY_ENABLE);
+      if (recoveryEnabled) {
+        assert stateStore.isInState(STATE.STARTED);
+        HistoryServerState state = stateStore.loadState();
+        jhsDTSecretManager.recover(state);
+      }
+
+      try {
+        jhsDTSecretManager.startThreads();
+      } catch(IOException io) {
+        LOG.error("Error while starting the Secret Manager threads", io);
+        throw io;
+      }
+
+      super.serviceStart();
+    }
+
+    @Override
+    protected void serviceStop() throws Exception {
+      if (jhsDTSecretManager != null) {
+        jhsDTSecretManager.stopThreads();
+      }
+      super.serviceStop();
+    }
+  }

  public JobHistoryServer() {
    super(JobHistoryServer.class.getName());
@ -86,11 +128,14 @@ protected void serviceInit(Configuration conf) throws Exception {
    }
    jobHistoryService = new JobHistory();
    historyContext = (HistoryContext)jobHistoryService;
-    this.jhsDTSecretManager = createJHSSecretManager(conf);
+    stateStore = createStateStore(conf);
+    this.jhsDTSecretManager = createJHSSecretManager(conf, stateStore);
    clientService = new HistoryClientService(historyContext, 
        this.jhsDTSecretManager);
    aggLogDelService = new AggregatedLogDeletionService();
    hsAdminServer = new HSAdminServer(aggLogDelService, jobHistoryService);
+    addService(stateStore);
+    addService(new HistoryServerSecretManagerService());
    addService(jobHistoryService);
    addService(clientService);
    addService(aggLogDelService);
@ -99,7 +144,7 @@ protected void serviceInit(Configuration conf) throws Exception {
  }

  protected JHSDelegationTokenSecretManager createJHSSecretManager(
-      Configuration conf) {
+      Configuration conf, HistoryServerStateStoreService store) {
    long secretKeyInterval = 
        conf.getLong(MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_KEY, 
                     MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT);
@ -111,9 +156,14 @@ protected JHSDelegationTokenSecretManager createJHSSecretManager(
                     MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT);
      
    return new JHSDelegationTokenSecretManager(secretKeyInterval, 
-        tokenMaxLifetime, tokenRenewInterval, 3600000);
+        tokenMaxLifetime, tokenRenewInterval, 3600000, store);
  }
-  
+
+  protected HistoryServerStateStoreService createStateStore(
+      Configuration conf) {
+    return HistoryServerStateStoreServiceFactory.getStore(conf);
+  }
+
  protected void doSecureLogin(Configuration conf) throws IOException {
    SecurityUtil.login(conf, JHAdminConfig.MR_HISTORY_KEYTAB,
        JHAdminConfig.MR_HISTORY_PRINCIPAL);
@ -123,20 +173,11 @@ protected void doSecureLogin(Configuration conf) throws IOException {
  protected void serviceStart() throws Exception {
    DefaultMetricsSystem.initialize("JobHistoryServer");
    JvmMetrics.initSingleton("JobHistoryServer", null);
-    try {
-      jhsDTSecretManager.startThreads();
-    } catch(IOException io) {
-      LOG.error("Error while starting the Secret Manager threads", io);
-      throw io;
-    }
    super.serviceStart();
  }
  
  @Override
  protected void serviceStop() throws Exception {
-    if (jhsDTSecretManager != null) {
-      jhsDTSecretManager.stopThreads();
-    }
    DefaultMetricsSystem.shutdown();
    super.serviceStop();
  }
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerMemStateStoreService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/HistoryServerMemStateStoreService.java
@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.v2.hs;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+
+/**
+ * A state store backed by memory for unit tests
+ */
+class HistoryServerMemStateStoreService
+    extends HistoryServerStateStoreService {
+
+  HistoryServerState state;
+
+  @Override
+  protected void initStorage(Configuration conf) throws IOException {
+  }
+
+  @Override
+  protected void startStorage() throws IOException {
+    state = new HistoryServerState();
+  }
+
+  @Override
+  protected void closeStorage() throws IOException {
+    state = null;
+  }
+
+  @Override
+  public HistoryServerState loadState() throws IOException {
+    HistoryServerState result = new HistoryServerState();
+    result.tokenState.putAll(state.tokenState);
+    result.tokenMasterKeyState.addAll(state.tokenMasterKeyState);
+    return result;
+  }
+
+  @Override
+  public void storeToken(MRDelegationTokenIdentifier tokenId, Long renewDate)
+      throws IOException {
+    if (state.tokenState.containsKey(tokenId)) {
+      throw new IOException("token " + tokenId + " was stored twice");
+    }
+    state.tokenState.put(tokenId, renewDate);
+  }
+
+  @Override
+  public void updateToken(MRDelegationTokenIdentifier tokenId, Long renewDate)
+      throws IOException {
+    if (!state.tokenState.containsKey(tokenId)) {
+      throw new IOException("token " + tokenId + " not in store");
+    }
+    state.tokenState.put(tokenId, renewDate);
+  }
+
+  @Override
+  public void removeToken(MRDelegationTokenIdentifier tokenId)
+      throws IOException {
+    state.tokenState.remove(tokenId);
+  }
+
+  @Override
+  public void storeTokenMasterKey(DelegationKey key) throws IOException {
+    if (state.tokenMasterKeyState.contains(key)) {
+      throw new IOException("token master key " + key + " was stored twice");
+    }
+    state.tokenMasterKeyState.add(key);
+  }
+
+  @Override
+  public void removeTokenMasterKey(DelegationKey key) throws IOException {
+    state.tokenMasterKeyState.remove(key);
+  }
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryServerFileSystemStateStoreService.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryServerFileSystemStateStoreService.java
@ -0,0 +1,164 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.v2.hs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier;
+import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService.HistoryServerState;
+import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+public class TestHistoryServerFileSystemStateStoreService {
+
+  private static final File testDir = new File(
+      System.getProperty("test.build.data",
+          System.getProperty("java.io.tmpdir")),
+      "TestHistoryServerFileSystemStateStoreService");
+
+  private Configuration conf;
+
+  @Before
+  public void setup() {
+    FileUtil.fullyDelete(testDir);
+    testDir.mkdirs();
+    conf = new Configuration();
+    conf.setBoolean(JHAdminConfig.MR_HS_RECOVERY_ENABLE, true);
+    conf.setClass(JHAdminConfig.MR_HS_STATE_STORE,
+        HistoryServerFileSystemStateStoreService.class,
+        HistoryServerStateStoreService.class);
+    conf.set(JHAdminConfig.MR_HS_FS_STATE_STORE_URI,
+        testDir.getAbsoluteFile().toURI().toString());
+  }
+
+  @After
+  public void cleanup() {
+    FileUtil.fullyDelete(testDir);
+  }
+
+  private HistoryServerStateStoreService createAndStartStore()
+      throws IOException {
+    HistoryServerStateStoreService store =
+        HistoryServerStateStoreServiceFactory.getStore(conf);
+    assertTrue("Factory did not create a filesystem store",
+        store instanceof HistoryServerFileSystemStateStoreService);
+    store.init(conf);
+    store.start();
+    return store;
+  }
+
+  @Test
+  public void testTokenStore() throws IOException {
+    HistoryServerStateStoreService store = createAndStartStore();
+
+    HistoryServerState state = store.loadState();
+    assertTrue("token state not empty", state.tokenState.isEmpty());
+    assertTrue("key state not empty", state.tokenMasterKeyState.isEmpty());
+
+    final DelegationKey key1 = new DelegationKey(1, 2, "keyData1".getBytes());
+    final MRDelegationTokenIdentifier token1 =
+        new MRDelegationTokenIdentifier(new Text("tokenOwner1"),
+            new Text("tokenRenewer1"), new Text("tokenUser1"));
+    token1.setSequenceNumber(1);
+    final Long tokenDate1 = 1L;
+    final MRDelegationTokenIdentifier token2 =
+        new MRDelegationTokenIdentifier(new Text("tokenOwner2"),
+            new Text("tokenRenewer2"), new Text("tokenUser2"));
+    token2.setSequenceNumber(12345678);
+    final Long tokenDate2 = 87654321L;
+
+    store.storeTokenMasterKey(key1);
+    try {
+      store.storeTokenMasterKey(key1);
+      fail("redundant store of key undetected");
+    } catch (IOException e) {
+      // expected
+    }
+    store.storeToken(token1, tokenDate1);
+    store.storeToken(token2, tokenDate2);
+    try {
+      store.storeToken(token1, tokenDate1);
+      fail("redundant store of token undetected");
+    } catch (IOException e) {
+      // expected
+    }
+    store.close();
+
+    store = createAndStartStore();
+    state = store.loadState();
+    assertEquals("incorrect loaded token count", 2, state.tokenState.size());
+    assertTrue("missing token 1", state.tokenState.containsKey(token1));
+    assertEquals("incorrect token 1 date", tokenDate1,
+        state.tokenState.get(token1));
+    assertTrue("missing token 2", state.tokenState.containsKey(token2));
+    assertEquals("incorrect token 2 date", tokenDate2,
+        state.tokenState.get(token2));
+    assertEquals("incorrect master key count", 1,
+        state.tokenMasterKeyState.size());
+    assertTrue("missing master key 1",
+        state.tokenMasterKeyState.contains(key1));
+
+    final DelegationKey key2 = new DelegationKey(3, 4, "keyData2".getBytes());
+    final DelegationKey key3 = new DelegationKey(5, 6, "keyData3".getBytes());
+    final MRDelegationTokenIdentifier token3 =
+        new MRDelegationTokenIdentifier(new Text("tokenOwner3"),
+            new Text("tokenRenewer3"), new Text("tokenUser3"));
+    token3.setSequenceNumber(12345679);
+    final Long tokenDate3 = 87654321L;
+
+    store.removeToken(token1);
+    store.storeTokenMasterKey(key2);
+    final Long newTokenDate2 = 975318642L;
+    store.updateToken(token2, newTokenDate2);
+    store.removeTokenMasterKey(key1);
+    store.storeTokenMasterKey(key3);
+    store.storeToken(token3, tokenDate3);
+    store.close();
+
+    store = createAndStartStore();
+    state = store.loadState();
+    assertEquals("incorrect loaded token count", 2, state.tokenState.size());
+    assertFalse("token 1 not removed", state.tokenState.containsKey(token1));
+    assertTrue("missing token 2", state.tokenState.containsKey(token2));
+    assertEquals("incorrect token 2 date", newTokenDate2,
+        state.tokenState.get(token2));
+    assertTrue("missing token 3", state.tokenState.containsKey(token3));
+    assertEquals("incorrect token 3 date", tokenDate3,
+        state.tokenState.get(token3));
+    assertEquals("incorrect master key count", 2,
+        state.tokenMasterKeyState.size());
+    assertFalse("master key 1 not removed",
+        state.tokenMasterKeyState.contains(key1));
+    assertTrue("missing master key 2",
+        state.tokenMasterKeyState.contains(key2));
+    assertTrue("missing master key 3",
+        state.tokenMasterKeyState.contains(key3));
+  }
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJHSDelegationTokenSecretManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJHSDelegationTokenSecretManager.java
@ -0,0 +1,122 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapreduce.v2.hs;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.security.token.delegation.DelegationKey;
+import org.junit.Test;
+
+public class TestJHSDelegationTokenSecretManager {
+
+  @Test
+  public void testRecovery() throws IOException {
+    Configuration conf = new Configuration();
+    HistoryServerStateStoreService store =
+        new HistoryServerMemStateStoreService();
+    store.init(conf);
+    store.start();
+    JHSDelegationTokenSecretManagerForTest mgr =
+        new JHSDelegationTokenSecretManagerForTest(store);
+    mgr.startThreads();
+
+    MRDelegationTokenIdentifier tokenId1 = new MRDelegationTokenIdentifier(
+        new Text("tokenOwner"), new Text("tokenRenewer"),
+        new Text("tokenUser"));
+    Token<MRDelegationTokenIdentifier> token1 =
+        new Token<MRDelegationTokenIdentifier>(tokenId1, mgr);
+
+    MRDelegationTokenIdentifier tokenId2 = new MRDelegationTokenIdentifier(
+        new Text("tokenOwner"), new Text("tokenRenewer"),
+        new Text("tokenUser"));
+    Token<MRDelegationTokenIdentifier> token2 =
+        new Token<MRDelegationTokenIdentifier>(tokenId2, mgr);
+    DelegationKey[] keys = mgr.getAllKeys();
+    long tokenRenewDate1 = mgr.getAllTokens().get(tokenId1).getRenewDate();
+    long tokenRenewDate2 = mgr.getAllTokens().get(tokenId2).getRenewDate();
+    mgr.stopThreads();
+
+    mgr = new JHSDelegationTokenSecretManagerForTest(store);
+    mgr.recover(store.loadState());
+    List<DelegationKey> recoveredKeys = Arrays.asList(mgr.getAllKeys());
+    for (DelegationKey key : keys) {
+      assertTrue("key missing after recovery", recoveredKeys.contains(key));
+    }
+    assertTrue("token1 missing", mgr.getAllTokens().containsKey(tokenId1));
+    assertEquals("token1 renew date", tokenRenewDate1,
+        mgr.getAllTokens().get(tokenId1).getRenewDate());
+    assertTrue("token2 missing", mgr.getAllTokens().containsKey(tokenId2));
+    assertEquals("token2 renew date", tokenRenewDate2,
+        mgr.getAllTokens().get(tokenId2).getRenewDate());
+
+    mgr.startThreads();
+    mgr.verifyToken(tokenId1, token1.getPassword());
+    mgr.verifyToken(tokenId2, token2.getPassword());
+    MRDelegationTokenIdentifier tokenId3 = new MRDelegationTokenIdentifier(
+        new Text("tokenOwner"), new Text("tokenRenewer"),
+        new Text("tokenUser"));
+    Token<MRDelegationTokenIdentifier> token3 =
+        new Token<MRDelegationTokenIdentifier>(tokenId3, mgr);
+    assertEquals("sequence number restore", tokenId2.getSequenceNumber() + 1,
+        tokenId3.getSequenceNumber());
+    mgr.cancelToken(token1, "tokenOwner");
+    long tokenRenewDate3 = mgr.getAllTokens().get(tokenId3).getRenewDate();
+    mgr.stopThreads();
+
+    mgr = new JHSDelegationTokenSecretManagerForTest(store);
+    mgr.recover(store.loadState());
+    assertFalse("token1 should be missing",
+        mgr.getAllTokens().containsKey(tokenId1));
+    assertTrue("token2 missing", mgr.getAllTokens().containsKey(tokenId2));
+    assertEquals("token2 renew date", tokenRenewDate2,
+        mgr.getAllTokens().get(tokenId2).getRenewDate());
+    assertTrue("token3 missing", mgr.getAllTokens().containsKey(tokenId3));
+    assertEquals("token3 renew date", tokenRenewDate3,
+        mgr.getAllTokens().get(tokenId3).getRenewDate());
+
+    mgr.startThreads();
+    mgr.verifyToken(tokenId2, token2.getPassword());
+    mgr.verifyToken(tokenId3, token3.getPassword());
+    mgr.stopThreads();
+ }
+
+  private static class JHSDelegationTokenSecretManagerForTest
+      extends JHSDelegationTokenSecretManager {
+
+    public JHSDelegationTokenSecretManagerForTest(
+        HistoryServerStateStoreService store) {
+      super(10000, 10000, 10000, 10000, store);
+    }
+
+    public Map<MRDelegationTokenIdentifier, DelegationTokenInformation> getAllTokens() {
+      return new HashMap<MRDelegationTokenIdentifier, DelegationTokenInformation>(currentTokens);
+    }
+  }
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryServer.java
@ -72,7 +72,7 @@ public void testStartStopServer() throws Exception {
    Configuration config = new Configuration();
    historyServer.init(config);
    assertEquals(STATE.INITED, historyServer.getServiceState());
-    assertEquals(4, historyServer.getServices().size());
+    assertEquals(6, historyServer.getServices().size());
    HistoryClientService historyService = historyServer.getClientService();
    assertNotNull(historyServer.getClientService());
    assertEquals(STATE.INITED, historyService.getServiceState());
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java
@ -39,6 +39,7 @@
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetDelegationTokenRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.GetJobReportRequest;
 import org.apache.hadoop.mapreduce.v2.api.protocolrecords.RenewDelegationTokenRequest;
+import org.apache.hadoop.mapreduce.v2.hs.HistoryServerStateStoreService;
 import org.apache.hadoop.mapreduce.v2.hs.JHSDelegationTokenSecretManager;
 import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer;
 import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
@ -87,10 +88,11 @@ protected void doSecureLogin(Configuration conf) throws IOException {
          // no keytab based login
        };

+        @Override
        protected JHSDelegationTokenSecretManager createJHSSecretManager(
-            Configuration conf) {
+            Configuration conf, HistoryServerStateStoreService store) {
          return new JHSDelegationTokenSecretManager(initialInterval, 
-              maxLifetime, renewInterval, 3600000);
+              maxLifetime, renewInterval, 3600000, store);
        }
      };
 //      final JobHistoryServer jobHistoryServer = jhServer;
--- a/hadoop-project/pom.xml
+++ b/hadoop-project/pom.xml
@ -730,6 +730,16 @@
        <artifactId>hsqldb</artifactId>
        <version>2.0.0</version>
      </dependency>
+      <dependency>
+        <groupId>com.codahale.metrics</groupId>
+        <artifactId>metrics-core</artifactId>
+        <version>3.0.0</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-sls</artifactId>
+        <version>${project.version}</version>
+      </dependency>
    </dependencies>
  </dependencyManagement>

--- a/hadoop-project/src/site/site.xml
+++ b/hadoop-project/src/site/site.xml
@ -95,6 +95,7 @@
      <item name="Fair Scheduler" href="hadoop-yarn/hadoop-yarn-site/FairScheduler.html"/>
      <item name="Web Application Proxy" href="hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html"/>
      <item name="YARN Commands" href="hadoop-yarn/hadoop-yarn-site/YarnCommands.html"/>
+      <item name="Scheduler Load Simulator" href="hadoop-sls/SchedulerLoadSimulator.html"/>
    </menu>

    <menu name="YARN REST APIs" inherit="top">
--- a/hadoop-tools/hadoop-openstack/dev-support/findbugs-exclude.xml
+++ b/hadoop-tools/hadoop-openstack/dev-support/findbugs-exclude.xml
@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed under the Apache License, Version 2.0 (the "License");
+  ~   you may not use this file except in compliance with the License.
+  ~   You may obtain a copy of the License at
+  ~   
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~   
+  ~   Unless required by applicable law or agreed to in writing, software
+  ~   distributed under the License is distributed on an "AS IS" BASIS,
+  ~   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~   See the License for the specific language governing permissions and
+  ~   limitations under the License. See accompanying LICENSE file.
+  -->
+<FindBugsFilter>
+
+  <!--
+  OpenStack Swift FS module -closes streams in a different method
+  from where they are opened.
+  -->
+  <Match>
+    <Class name="org.apache.hadoop.fs.swift.snative.SwiftNativeOutputStream"/>
+    <Method name="uploadFileAttempt"/>
+    <Bug pattern="OBL_UNSATISFIED_OBLIGATION"/>
+    <Bug code="OBL"/>
+  </Match>
+  <Match>
+    <Class name="org.apache.hadoop.fs.swift.snative.SwiftNativeOutputStream"/>
+    <Method name="uploadFilePartAttempt"/>
+    <Bug pattern="OBL_UNSATISFIED_OBLIGATION"/>
+    <Bug code="OBL"/>
+  </Match>
+
+</FindBugsFilter>
--- a/hadoop-tools/hadoop-openstack/pom.xml
+++ b/hadoop-tools/hadoop-openstack/pom.xml
@ -66,7 +66,17 @@

  <build>
    <plugins>
-
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>findbugs-maven-plugin</artifactId>
+        <configuration>
+          <findbugsXmlOutput>true</findbugsXmlOutput>
+          <xmlOutput>true</xmlOutput>
+          <excludeFilterFile>${basedir}/dev-support/findbugs-exclude.xml
+          </excludeFilterFile>
+          <effort>Max</effort>
+        </configuration>
+      </plugin>
      <plugin>
        <groupId>org.apache.maven.plugins</groupId>
        <artifactId>maven-site-plugin</artifactId>
@ -131,14 +141,6 @@
      <scope>compile</scope>
    </dependency>

-    <!-- Used for mocking dependencies -->
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <version>1.8.5</version>
-      <scope>test</scope>
-    </dependency>
-
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
--- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/JSONUtil.java
+++ b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/JSONUtil.java
@ -90,11 +90,12 @@ public static <T> T toObject(String value, Class<T> klazz) throws
   * @param <T>           type
   * @return deserialized  T object
   */
+  @SuppressWarnings("unchecked")
  public static <T> T toObject(String value,
                               final TypeReference<T> typeReference)
            throws IOException {
    try {
-      return jsonMapper.readValue(value, typeReference);
+      return (T)jsonMapper.readValue(value, typeReference);
    } catch (JsonGenerationException e) {
      throw new SwiftJsonMarshallingException("Error generating response", e);
    } catch (JsonMappingException e) {
@ -108,11 +109,12 @@ public static <T> T toObject(String value,
   * @param <T>            type
   * @return deserialized  T object
   */
+  @SuppressWarnings("unchecked")
  public static <T> T toObject(String value,
                               final CollectionType collectionType)
              throws IOException {
    try {
-      return jsonMapper.readValue(value, collectionType);
+      return (T)jsonMapper.readValue(value, collectionType);
    } catch (JsonGenerationException e) {
      throw new SwiftJsonMarshallingException(e.toString()
                                              + " source: " + value,
--- a/hadoop-tools/hadoop-sls/README
+++ b/hadoop-tools/hadoop-sls/README
@ -0,0 +1,12 @@
+Yarn Scheduler Load Simulator (SLS)
+
+SLS is a stress and performance harness for the Yarn Resource Manager Scheduler
+that exercises the scheduler implementation simulating the cluster size and the
+applications load without having to have a cluster nor applications.
+
+SLS runs a regular RM without RPC endpoints and uses a NodeManager and
+Application Manager simulators to send and receive events simulating cluster
+and application load behavior.
+
+The size of the cluster and the application load is scripted in a configuration
+file.
--- a/hadoop-tools/hadoop-sls/dev-support/findbugs-exclude.xml
+++ b/hadoop-tools/hadoop-sls/dev-support/findbugs-exclude.xml
@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<FindBugsFilter>
+
+ <!-- Ignore comparedTo, equals warnings -->
+  <Match>
+    <Class name="org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator" />
+    <Bug pattern="EQ_COMPARETO_USE_OBJECT_EQUALS" />
+  </Match>
+
+</FindBugsFilter>
--- a/hadoop-tools/hadoop-sls/pom.xml
+++ b/hadoop-tools/hadoop-sls/pom.xml
@ -0,0 +1,184 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License. See accompanying LICENSE file.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.hadoop</groupId>
+    <artifactId>hadoop-project</artifactId>
+    <version>3.0.0-SNAPSHOT</version>
+    <relativePath>../../hadoop-project</relativePath>
+  </parent>
+  <groupId>org.apache.hadoop</groupId>
+  <artifactId>hadoop-sls</artifactId>
+  <version>3.0.0-SNAPSHOT</version>
+  <description>Apache Hadoop Scheduler Load Simulator</description>
+  <name>Apache Hadoop Scheduler Load Simulator</name>
+  <packaging>jar</packaging>
+
+  <dependencies>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-minicluster</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-rumen</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>com.codahale.metrics</groupId>
+      <artifactId>metrics-core</artifactId>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty</artifactId>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.mortbay.jetty</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.mortbay.jetty</groupId>
+      <artifactId>jetty-util</artifactId>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-source-plugin</artifactId>
+        <configuration>
+          <attach>true</attach>
+        </configuration>
+        <executions>
+          <execution>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>findbugs-maven-plugin</artifactId>
+        <configuration>
+          <findbugsXmlOutput>true</findbugsXmlOutput>
+          <xmlOutput>true</xmlOutput>
+          <excludeFilterFile>${basedir}/dev-support/findbugs-exclude.xml</excludeFilterFile>
+          <effort>Max</effort>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.rat</groupId>
+        <artifactId>apache-rat-plugin</artifactId>
+        <configuration>
+          <excludes>
+            <exclude>src/main/data/2jobs2min-rumen-jh.json</exclude>
+            <exclude>src/main/html/js/thirdparty/jquery.js</exclude>
+            <exclude>src/main/html/js/thirdparty/d3-LICENSE</exclude>
+            <exclude>src/main/html/js/thirdparty/d3.v3.js</exclude>
+            <exclude>src/main/html/simulate.html.template</exclude>
+            <exclude>src/main/html/simulate.info.html.template</exclude>
+            <exclude>src/main/html/track.html.template</exclude>
+            <exclude>src/test/resources/simulate.html.template</exclude>
+            <exclude>src/test/resources/simulate.info.html.template</exclude>
+            <exclude>src/test/resources/track.html.template</exclude>
+          </excludes>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>docs</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-site-plugin</artifactId>
+            <executions>
+              <execution>
+                <phase>package</phase>
+                <goals>
+                  <goal>site</goal>
+                </goals>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>dist</id>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-assembly-plugin</artifactId>
+            <dependencies>
+              <dependency>
+                <groupId>org.apache.hadoop</groupId>
+                <artifactId>hadoop-assemblies</artifactId>
+                <version>${project.version}</version>
+              </dependency>
+            </dependencies>
+            <executions>
+              <execution>
+                <id>dist</id>
+                <phase>prepare-package</phase>
+                <goals>
+                  <goal>single</goal>
+                </goals>
+                <configuration>
+                  <appendAssemblyId>false</appendAssemblyId>
+                  <attach>false</attach>
+                  <finalName>${project.artifactId}-${project.version}</finalName>
+                  <descriptorRefs>
+                    <descriptorRef>hadoop-sls</descriptorRef>
+                  </descriptorRefs>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+</project>
--- a/hadoop-tools/hadoop-sls/src/main/assemblies/sls.xml
+++ b/hadoop-tools/hadoop-sls/src/main/assemblies/sls.xml
@ -0,0 +1,55 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+  
+       http://www.apache.org/licenses/LICENSE-2.0
+  
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<assembly>
+  <id>sls</id>
+  <formats>
+    <format>dir</format>
+    <format>tar.gz</format>
+  </formats>
+  <includeBaseDirectory>false</includeBaseDirectory>
+
+  <fileSets>
+    <fileSet>
+      <directory>${basedir}/src/main/bin</directory>
+      <outputDirectory>bin</outputDirectory>
+      <fileMode>0755</fileMode>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/data</directory>
+      <outputDirectory>sample-data</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/html</directory>
+      <outputDirectory>html</outputDirectory>
+    </fileSet>
+    <fileSet>
+      <directory>${basedir}/src/main/sample-conf</directory>
+      <outputDirectory>sample-conf</outputDirectory>
+    </fileSet>
+  </fileSets>
+
+  <dependencySets>
+    <dependencySet>
+      <outputDirectory>/lib</outputDirectory>
+      <unpack>false</unpack>
+      <scope>compile</scope>
+      <useProjectArtifact>true</useProjectArtifact>
+    </dependencySet>
+  </dependencySets>
+
+</assembly>
--- a/hadoop-tools/hadoop-sls/src/main/bin/rumen2sls.sh
+++ b/hadoop-tools/hadoop-sls/src/main/bin/rumen2sls.sh
@ -0,0 +1,106 @@
+#!/bin/bash
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+
+###############################################################################
+printUsage() {
+  echo "Usage: rumen2sls.sh <OPTIONS>"
+  echo "                 --rumen-file=<RUMEN_FILE>"
+  echo "                 --output-dir=<SLS_OUTPUT_DIR>"
+  echo "                 [--output-prefix=<PREFIX>] (default is sls)"
+  echo
+}
+###############################################################################
+parseArgs() {
+  for i in $*
+  do
+    case $i in
+    --rumen-file=*)
+      rumenfile=${i#*=}
+      ;;
+    --output-dir=*)
+      outputdir=${i#*=}
+      ;;
+    --output-prefix=*)
+      outputprefix=${i#*=}
+      ;;
+    *)
+      echo "Invalid option"
+      echo
+      printUsage
+      exit 1
+      ;;
+    esac
+  done
+  if [[ "${rumenfile}" == "" || "${outputdir}" == "" ]] ; then
+    echo "Both --rumen-file ${rumenfile} and --output-dir \
+          ${outputfdir} must be specified"
+    echo
+    printUsage
+    exit 1
+  fi
+}
+###############################################################################
+calculateBasedir() {
+  # resolve links - $0 may be a softlink
+  PRG="${1}"
+
+  while [ -h "${PRG}" ]; do
+    ls=`ls -ld "${PRG}"`
+    link=`expr "$ls" : '.*-> \(.*\)$'`
+    if expr "$link" : '/.*' > /dev/null; then
+      PRG="$link"
+    else
+      PRG=`dirname "${PRG}"`/"$link"
+    fi
+  done
+
+  BASEDIR=`dirname ${PRG}`
+  BASEDIR=`cd ${BASEDIR}/..;pwd`
+}
+###############################################################################
+calculateClasspath() {
+  HADOOP_BASE=`which hadoop`
+  HADOOP_BASE=`dirname $HADOOP_BASE`
+  DEFAULT_LIBEXEC_DIR=${HADOOP_BASE}/../libexec
+  HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
+  . $HADOOP_LIBEXEC_DIR/hadoop-config.sh
+  export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${TOOL_PATH}"
+}
+###############################################################################
+runSLSGenerator() {
+  if [[ "${outputprefix}" == "" ]] ; then
+    outputprefix="sls"
+  fi
+
+  slsJobs=${outputdir}/${outputprefix}-jobs.json
+  slsNodes=${outputdir}/${outputprefix}-nodes.json
+
+  args="-input ${rumenfile} -outputJobs ${slsJobs}";
+  args="${args} -outputNodes ${slsNodes}";
+
+  hadoop org.apache.hadoop.yarn.sls.RumenToSLSConverter ${args}
+}
+###############################################################################
+
+calculateBasedir $0
+calculateClasspath
+parseArgs "$@"
+runSLSGenerator
+
+echo
+echo "SLS simulation files available at: ${outputdir}"
+echo
+
+exit 0
--- a/hadoop-tools/hadoop-sls/src/main/bin/slsrun.sh
+++ b/hadoop-tools/hadoop-sls/src/main/bin/slsrun.sh
@ -0,0 +1,112 @@
+#!/bin/bash
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License. See accompanying LICENSE file.
+#
+
+###############################################################################
+printUsage() {
+  echo "Usage: slsrun.sh <OPTIONS>"
+  echo "                 --input-rumen|--input-sls=<FILE1,FILE2,...>"
+  echo "                 --output-dir=<SLS_SIMULATION_OUTPUT_DIRECTORY>"
+  echo "                 [--nodes=<SLS_NODES_FILE>]"
+  echo "                 [--track-jobs=<JOBID1,JOBID2,...>]"
+  echo "                 [--print-simulation]"
+  echo                  
+}
+###############################################################################
+parseArgs() {
+  for i in $*
+  do
+    case $i in
+    --input-rumen=*)
+      inputrumen=${i#*=}
+      ;;
+    --input-sls=*)
+      inputsls=${i#*=}
+      ;;
+    --output-dir=*)
+      outputdir=${i#*=}
+      ;;
+    --nodes=*)
+      nodes=${i#*=}
+      ;;
+    --track-jobs=*)
+      trackjobs=${i#*=}
+      ;;
+    --print-simulation)
+      printsimulation="true"
+      ;;
+    *)
+      echo "Invalid option"
+      echo
+      printUsage
+      exit 1
+      ;;
+    esac
+  done
+
+  if [[ "${inputrumen}" == "" && "${inputsls}" == "" ]] ; then
+    echo "Either --input-rumen or --input-sls must be specified"
+    echo
+    printUsage
+    exit 1
+  fi
+
+  if [[ "${outputdir}" == "" ]] ; then
+    echo "The output directory --output-dir must be specified"
+    echo
+    printUsage
+    exit 1
+  fi
+}
+
+###############################################################################
+calculateClasspath() {
+  HADOOP_BASE=`which hadoop`
+  HADOOP_BASE=`dirname $HADOOP_BASE`
+  DEFAULT_LIBEXEC_DIR=${HADOOP_BASE}/../libexec
+  HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
+  . $HADOOP_LIBEXEC_DIR/hadoop-config.sh
+  export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${TOOL_PATH}:html"
+}
+###############################################################################
+runSimulation() {
+  if [[ "${inputsls}" == "" ]] ; then
+    args="-inputrumen ${inputrumen}"
+  else
+    args="-inputsls ${inputsls}"
+  fi
+
+  args="${args} -output ${outputdir}"
+
+  if [[ "${nodes}" != "" ]] ; then
+    args="${args} -nodes ${nodes}"
+  fi
+  
+  if [[ "${trackjobs}" != "" ]] ; then
+    args="${args} -trackjobs ${trackjobs}"
+  fi
+  
+  if [[ "${printsimulation}" == "true" ]] ; then
+    args="${args} -printsimulation"
+  fi
+
+  hadoop org.apache.hadoop.yarn.sls.SLSRunner ${args}
+}
+###############################################################################
+
+calculateClasspath
+parseArgs "$@"
+runSimulation
+
+exit 0
--- a/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
+++ b/hadoop-tools/hadoop-sls/src/main/data/2jobs2min-rumen-jh.json
--- a/hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css
+++ b/hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css
--- a/hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css
+++ b/hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css
--- a/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js
+++ b/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js
--- a/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3-LICENSE
+++ b/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3-LICENSE
@ -0,0 +1,26 @@
+Copyright (c) 2013, Michael Bostock
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* The name Michael Bostock may not be used to endorse or promote products
+  derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js
+++ b/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/d3.v3.js
--- a/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js
+++ b/hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js
--- a/hadoop-tools/hadoop-sls/src/main/html/showSimulationTrace.html
+++ b/hadoop-tools/hadoop-sls/src/main/html/showSimulationTrace.html
@ -0,0 +1,334 @@
+<!--
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. See accompanying LICENSE file.
+-->
+<!doctype>
+<html>
+<head>
+  <meta charset="utf-8">
+  <link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
+  <link rel="stylesheet" href="css/bootstrap-responsive.min.css">
+  <style type="text/css">
+    body {
+    font: 20px sans-serif;
+    }
+
+    .axis path,
+    .axis line {
+    fill: none;
+    stroke: #000;
+    shape-rendering: crispEdges;
+    }
+    .axis text {
+    font-family: sans-serif;
+    font-size: 20px;
+    }
+
+    .line {
+    fill: none;
+    stroke: steelblue;
+    stroke-width: 3px;
+    }
+
+    .legend {
+    padding: 1px;
+    font: 18px sans-serif;
+    background: yellow;
+    box-shadow: 2px 2px 1px #888;
+    }
+
+    .title {
+    font: 24px sans-serif;
+    }
+    .divborder {
+    border-width: 1px;
+    border-style: solid;
+    border-color: black;
+    margin-top:10px
+    }
+  </style>
+  <script src="js/thirdparty/d3.v3.js"></script>
+  <script src="js/thirdparty/jquery.js"></script>
+  <script src="js/thirdparty/bootstrap.min.js"></script>
+
+</head>
+
+<body>
+<div class="row">
+  <div class="offset5" style="margin-top:20px; margin-bottom:20px">
+    Select the generated metrics log file (realtimetrack.json): <input type='file' id='jsonfile' /> <input type='button' value='Generate !' onClick='draw()' /><br>
+  </div>
+</div>
+
+<div class="row">
+  <div class="divborder span8" style="margin-left:50px" id="area1"></div>
+  <div class="divborder span8" id="area2"></div>
+</div>
+
+<div class="row">
+  <div class="divborder span8" style="margin-left:50px" id="area3"></div>
+  <div class="divborder span8" id="area4"></div>
+</div>
+
+<div class="row">
+  <div class="divborder span8" style="margin-left:50px" id="area5"></div>
+  <div class="divborder span8" id="area6"></div>
+</div>
+
+<div class="row">
+  <div class="divborder span8" style="margin-left:50px" id="area7"></div>
+  <div class="span7" id="area8"></div>
+</div>
+<p>&nbsp;</p>
+<script>
+// select file and draw
+function draw() {
+var filepath = document.getElementById('jsonfile').value;
+if (filepath) {
+for (var i = 1; i < 9; i ++) {
+$('#area' + i).empty();
+}
+filepath = filepath.replace("C:\\fakepath\\", "");
+drawCharts(filepath);
+} else {
+alert('choose file firstly.');
+}
+}
+
+function drawCharts(filepath) {
+$.getJSON(filepath, function(data) {
+var numQueues = 0;
+var queueNames = new Array();
+for (var j in data[0]) {
+if (j.substring(0, 'queue'.length) === 'queue') {
+queueNames[numQueues] = j;
+numQueues ++;
+}
+}
+numQueues /= 2;
+
+// create graph
+$.getJSON(filepath, function(data) {
+var basetime = data[0].time;
+data.forEach(function(d) {
+d.time = (d.time - basetime) / 1000;
+});
+
+var legends = ["running.applications", "running.containers"];
+drawEachChart("#area1", data, legends, "Cluster running applications & containers", "Number", 0, 0);
+legends = ["jvm.free.memory", "jvm.max.memory", "jvm.total.memory"];
+drawEachChart("#area2", data, legends, "JVM memory", "Memory (GB)", 0, 0);
+legends = ["cluster.allocated.memory", "cluster.available.memory"];
+drawEachChart("#area3", data, legends, "Cluster allocated & available memory", "Memory (GB)", 0, 0);
+legends = ["cluster.allocated.vcores", "cluster.available.vcores"];
+drawEachChart("#area4", data, legends, "Cluster allocated & available vcores", "Number", 0, 0);
+
+for (var i = 0; i < numQueues; i ++) {
+legends[i] = queueNames[i * 2];
+}
+drawEachChart("#area5", data, legends, "Queue allocated memory", "Memory (GB)", 1, 100);
+for (var i = 0; i < numQueues; i ++) {
+legends[i] = queueNames[i * 2 + 1];
+}
+drawEachChart("#area6", data, legends, "Queue allocated vcores", "VCores", 1, 90);
+
+legends = [
+"scheduler.allocate.timecost",
+"scheduler.handle-NODE_ADDED.timecost", "scheduler.handle-NODE_REMOVED.timecost",
+"scheduler.handle-NODE_UPDATE.timecost", "scheduler.handle-APP_ADDED.timecost",
+"scheduler.handle-APP_REMOVED.timecost", "scheduler.handle-CONTAINER_EXPIRED.timecost"
+];
+drawEachChart("#area7", data, legends, "Scheduler allocate & handle operations timecost", "Timecost (ms)", 0, 210);
+});
+});
+}
+
+// draw different chart
+function drawEachChart(chartArea, data, legends, title, yLabelTitle, isArea, pl) {
+// drawchart
+var margin = {top: 50, right: 250, bottom: 50, left: 70};
+var width = 800 - margin.left - margin.right;
+var height = 420 - margin.top - margin.bottom;
+
+var x = d3.scale.linear().range([0, width]);
+var y = d3.scale.linear().range([height, 0]);
+var xAxis = d3.svg.axis().scale(x).orient("bottom");
+var yAxis = d3.svg.axis().scale(y).orient("left");
+
+var color = d3.scale.category10();
+
+if (isArea == 1){
+var area = d3.svg.area()
+.x(function(d) { return x(d.time); })
+.y0(function(d) { return y(d.y0); })
+.y1(function(d) { return y(d.y0 + d.y); });
+
+var stack = d3.layout.stack()
+.values(function(d) { return d.values; });
+
+// create chart
+var svg = d3.select(chartArea).append("svg")
+.attr("width", width + margin.left + margin.right)
+.attr("height", height + margin.top + margin.bottom)
+.append("g")
+.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
+
+color.domain(d3.keys(data[0])
+.filter(function(key) {return $.inArray(key, legends) !== -1; }));
+
+var points = stack(color.domain().map(function(name) {
+return {
+name: name,
+values: data.map(function(d) {
+return {time: d.time, y: d[name]};
+})
+};
+}));
+
+// x & y
+x.domain(d3.extent(data, function(d) { return d.time; }));
+y.domain([
+d3.min(points, function(c) {
+return 0.9 * d3.min(c.values, function(v) { return v.y; }); }),
+d3.max(points, function(c) {
+return 1.1 * d3.max(c.values, function(v) { return v.y + v.y0; }); })
+]);
+
+svg.append("g").attr("class", "x axis")
+.attr("transform", "translate(0," + height + ")")
+.call(xAxis)
+.append("text")
+.attr("transform", "translate(" + (width / 2) + ", 45)")
+.style("text-anchor", "middle")
+.text("Time (s)");
+
+svg.append("g")
+.attr("class", "y axis")
+.call(yAxis)
+.append("text")
+.attr("transform", "rotate(-90)")
+.attr("y", 0 - margin.left)
+.attr("x",0 - (height / 2))
+.attr("dy", "1em")
+.style("text-anchor", "middle")
+.text(yLabelTitle);
+
+var point = svg.selectAll(".point")
+.data(points)
+.enter().append("g");
+
+point.append("path")
+.attr("class", "area")
+.attr("d", function(d) { return area(d.values); })
+.style("fill", function(d) { return color(d.name); });
+} else {
+// lines
+var line = d3.svg.line()
+.interpolate("basis")
+.x(function(d) { return x(d.time); })
+.y(function(d) { return y(d.value); });
+
+// create chart
+var svg = d3.select(chartArea).append("svg")
+.attr("id", title)
+.attr("width", width + margin.left + margin.right)
+.attr("height", height + margin.top + margin.bottom)
+.append("g")
+.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
+
+color.domain(d3.keys(data[0])
+.filter(function(key) {return $.inArray(key, legends) !== -1; }));
+
+var values = color.domain().map(function(name) {
+return {
+name: name,
+values: data.map(function(d) {
+return {time: d.time, value: +d[name]};
+})
+};
+});
+
+// x & y
+x.domain(d3.extent(data, function(d) { return d.time; }));
+y.domain([
+d3.min(values, function(c) { return 0.9 * d3.min(c.values, function(v) { return v.value; }); }),
+d3.max(values, function(c) { return 1.1 * d3.max(c.values, function(v) { return v.value; }); })
+]);
+
+svg.append("g").attr("class", "x axis")
+.attr("transform", "translate(0," + height + ")")
+.call(xAxis)
+.append("text")
+.attr("transform", "translate(" + (width / 2) + ", 45)")
+.style("text-anchor", "middle")
+.text("Time (s)");
+
+svg.append("g")
+.attr("class", "y axis")
+.call(yAxis)
+.append("text")
+.attr("transform", "rotate(-90)")
+.attr("y", 0 - margin.left)
+.attr("x",0 - (height / 2))
+.attr("dy", "1em")
+.style("text-anchor", "middle")
+.text(yLabelTitle);
+
+var value = svg.selectAll(".city")
+.data(values)
+.enter().append("g")
+.attr("class", "city");
+
+value.append("path")
+.attr("class", "line")
+.attr("d", function(d) { return line(d.values); })
+.style("stroke", function(d) { return color(d.name); });
+}
+// title
+svg.append("text")
+.attr("x", (width / 2))
+.attr("y", 10 - (margin.top / 2))
+.attr("text-anchor", "middle")
+.text(title);
+
+// legend
+var legend = svg.append("g")
+.attr("class", "legend")
+.attr("x", width - 50)
+.attr("y", 25)
+.attr("height", 120)
+.attr("width", 140);
+
+legend.selectAll('g').data(legends)
+.enter()
+.append('g')
+.each(function(d, i) {
+var g = d3.select(this);
+g.append("rect")
+.attr("x", width - 5 - pl)
+.attr("y", i*20 + 0)
+.attr("width", 10)
+.attr("height", 10)
+.style("fill", color(d));
+
+g.append("text")
+.attr("x", width + 15 - pl)
+.attr("y", i * 20 + 8)
+.attr("height",30)
+.attr("width",250)
+.style("fill", color(d))
+.text(d);
+});
+}
+</script>
+</body>
+</html>
--- a/hadoop-tools/hadoop-sls/src/main/html/simulate.html.template
+++ b/hadoop-tools/hadoop-sls/src/main/html/simulate.html.template
@ -0,0 +1,278 @@
+<html>
+  <head>
+    <meta charset="utf-8">
+      <link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
+      <link rel="stylesheet" href="css/bootstrap-responsive.min.css">
+      <style type="text/css">
+        body '{' font: 20px sans-serif; '}'
+        .axis path,
+        .axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges; '}'
+        .axis text '{' font-family: sans-serif; font-size: 20px; '}'
+        .line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
+        .legend '{'
+          padding: 5px;
+          font: 18px sans-serif;
+          background: yellow;
+          box-shadow: 2px 2px 1px #888;
+        '}'
+        .title '{' font: 24px sans-serif; '}'
+        .divborder '{'
+          border-width: 1px;
+          border-style: solid;
+          border-color: black;
+          margin-top:10px
+        '}'
+      </style>
+      <script src="js/thirdparty/d3.v3.js"></script>
+      <script src="js/thirdparty/jquery.js"></script>
+      <script src="js/thirdparty/bootstrap.min.js"></script>
+    </head>
+  <body>
+    <div class="row">
+      <div class="span10 offset2"><br>
+        <input type="button" style="float: right;" value="Stop"
+               onClick="stop()" />
+      </div>
+    </div>
+    <div class="row">
+      <div class="divborder span8" style="margin-left:50px" id="area1"></div>
+      <div class="divborder span8" id="area2"></div>
+    </div>
+    <div class="row">
+      <div class="divborder span8" style="margin-left:50px" id="area3"></div>
+      <div class="divborder span8" id="area4"></div>
+    </div>
+    <div class="row">
+      <div class="divborder span8" style="margin-left:50px" id="area5"></div>
+      <div class="divborder span8" id="area6"></div>
+    </div>
+    <div class="row">
+      <div class="divborder span8" style="margin-left:50px" id="area7"></div>
+      <div class="span8" id="area8"></div>
+    </div><br/><br/>
+
+    <script>
+      var basetime = 0;
+      var running = 1;
+      var data = [];
+      var width, height;
+      var legends = [];
+      var titles = [];
+      var yLabels = [];
+      var isAreas = [];
+      var svgs = [];
+      var xs = [];
+      var ys = [];
+      var xAxiss = [];
+      var yAxiss = [];
+      var lineAreas = [];
+      var stacks = [];
+
+      // legends
+      legends[0] = [''running.applications'', ''running.containers''];
+      legends[1] = [''jvm.free.memory'', ''jvm.max.memory'', ''jvm.total.memory''];
+      legends[2] = [''cluster.allocated.memory'', ''cluster.available.memory''];
+      legends[3] = [''cluster.allocated.vcores'', ''cluster.available.vcores''];
+      legends[4] = [];
+      legends[5] = [];
+      {0}
+      legends[6] = [''scheduler.allocate.timecost'',
+                    ''scheduler.handle-NODE_ADDED.timecost'',
+                    ''scheduler.handle-NODE_REMOVED.timecost'',
+                    ''scheduler.handle-NODE_UPDATE.timecost'',
+                    ''scheduler.handle-APP_ADDED.timecost'',
+                    ''scheduler.handle-APP_REMOVED.timecost'',
+                    ''scheduler.handle-CONTAINER_EXPIRED.timecost''];
+
+      // title
+      titles[0] = ''Cluster running applications & containers'';
+      titles[1] = ''JVM memory'';
+      titles[2] = ''Cluster allocated & available memory'';
+      titles[3] = ''Cluster allocated & available vcores'';
+      titles[4] = ''Queue allocated memory'';
+      titles[5] = ''Queue allocated vcores'';
+      titles[6] = ''Scheduler allocate & handle operation timecost'';
+
+      // ylabels
+      yLabels[0] = ''Number'';
+      yLabels[1] = ''Memory (GB)'';
+      yLabels[2] = ''Memory (GB)'';
+      yLabels[3] = ''Number'';
+      yLabels[4] = ''Memory (GB)'';
+      yLabels[5] = ''Number'';
+      yLabels[6] = ''Timecost (ms)'';
+
+      // is area?
+      isAreas = [0, 0, 0, 0, 1, 1, 0];
+
+      // draw all charts
+      for (var i = 0; i < 7; i ++) '{'
+        drawEachChart(i);
+      '}'
+
+      // draw each chart
+      function drawEachChart(index) '{'
+        var margin = '{'top: 50, right: 250, bottom: 50, left: 70'}';
+        width = 750 - margin.left - margin.right;
+        height = 420 - margin.top - margin.bottom;
+
+      xs[index] = d3.scale.linear().range([0, width]);
+      ys[index] = d3.scale.linear().range([height, 0]);
+      xAxiss[index] = d3.svg.axis().scale(xs[index]).orient(''bottom'');
+      yAxiss[index] = d3.svg.axis().scale(ys[index]).orient(''left'');
+
+      if (isAreas[index] == 1)'{'
+        lineAreas[index] = d3.svg.area()
+                            .x(function(d) '{' return xs[index](d.time); '}')
+                            .y0(function(d) '{' return ys[index](d.y0); '}')
+                            .y1(function(d) '{' return ys[index](d.y0 + d.y); '}');
+
+        stacks[index] = d3.layout.stack()
+                          .values(function(d) '{' return d.values; '}');
+      '}' else '{'
+        lineAreas[index] = d3.svg.line()
+                            .interpolate(''basis'')
+                            .x(function(d) '{' return xs[index](d.time); '}')
+                            .y(function(d) '{' return ys[index](d.value); '}');
+      '}'
+
+      svgs[index] = d3.select(''#area'' + (index + 1)).append(''svg'')
+        .attr(''width'', width + margin.left + margin.right)
+        .attr(''height'', height + margin.top + margin.bottom)
+        .append(''g'')
+        .attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
+
+      // x, y and title
+      svgs[index].append(''text'')
+        .attr(''transform'', ''translate('' + (width / 2) + '' ,'' +
+              (height + margin.bottom - 10 ) + '')'')
+        .style(''text-anchor'', ''middle'')
+        .text(''Time ({1})'');
+
+      svgs[index].append(''text'')
+        .attr(''transform'', ''rotate(-90)'')
+        .attr(''y'', 0 - margin.left)
+        .attr(''x'',0 - (height / 2))
+        .attr(''dy'', ''1em'')
+        .style(''text-anchor'', ''middle'')
+        .text(yLabels[index]);
+
+      svgs[index].append(''text'')
+        .attr(''x'', (width / 2))
+        .attr(''y'', 10 - (margin.top / 2))
+        .attr(''text-anchor'', ''middle'')
+        .text(titles[index]);
+    '}'
+
+    // request data
+    function requestData() '{'
+      $.ajax('{'url: ''simulateMetrics'',
+        success: function(point) '{'
+          // update data
+          if (basetime == 0)  basetime = point.time;
+          point.time = (point.time - basetime) / {2};
+          data.push(point);
+
+          // clear old
+          for (var i = 0; i < 7; i ++) '{'
+            svgs[i].selectAll(''g.tick'').remove();
+            svgs[i].selectAll(''g'').remove();
+            var color = d3.scale.category10();
+            color.domain(d3.keys(data[0]).filter(function(key) '{'
+              return $.inArray(key, legends[i]) !== -1;
+            '}'));
+
+            var values;
+            if (isAreas[i] == 1) '{'
+              values = stacks[i](color.domain().map(function(name) '{'
+                return '{'
+                  name: name,
+                  values: data.map(function(d) '{'
+                    return '{'time: d.time, y: d[name]'}';
+                  '}')
+                '}'
+              '}'));
+              xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
+              ys[i].domain([
+                d3.min(values, function(c) '{' return 0; '}'),
+                d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
+                                        function(v) '{' return v.y + v.y0; '}'); '}')
+              ]);
+            '}' else '{'
+              values = color.domain().map(function(name) '{'
+                return '{'
+                  name: name,
+                  values: data.map(function(d) '{'
+                    return '{'time: d.time, value: d[name]'}';
+                  '}')
+                '}'
+              '}');
+              xs[i].domain(d3.extent(data, function(d) '{' return d.time;'}'));
+              ys[i].domain([
+                d3.min(values, function(c) '{' return 0; '}'),
+                d3.max(values, function(c) '{' return 1.1 * d3.max(c.values,
+                                        function(v) '{' return v.value; '}'); '}')
+              ]);
+            '}'
+
+            svgs[i].append(''g'').attr(''class'', ''x axis'')
+              .attr(''transform'', ''translate(0,'' + height + '')'').call(xAxiss[i]);
+
+            svgs[i].append(''g'').attr(''class'', ''y axis'').call(yAxiss[i]);
+
+            var value = svgs[i].selectAll(''.path'')
+              .data(values).enter().append(''g'').attr(''class'', ''line'');
+
+            if(isAreas[i] == 1) '{'
+              value.append(''path'').attr(''class'', ''area'')
+                    .attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
+                    .style(''fill'', function(d) '{'return color(d.name); '}');
+            '}' else '{'
+              value.append(''path'').attr(''class'', ''line'')
+                    .attr(''d'', function(d) '{'return lineAreas[i](d.values); '}')
+                    .style(''stroke'', function(d) '{'return color(d.name); '}');
+            '}'
+
+            // legend
+            var legend = svgs[i].append(''g'')
+              .attr(''class'', ''legend'')
+              .attr(''x'', width + 5)
+              .attr(''y'', 25)
+              .attr(''height'', 120)
+              .attr(''width'', 140);
+            legend.selectAll(''g'').data(legends[i])
+              .enter()
+              .append(''g'')
+              .each(function(d, i) '{'
+                var g = d3.select(this);
+                g.append(''rect'')
+                  .attr(''x'', width + 5)
+                  .attr(''y'', i*20)
+                  .attr(''width'', 10)
+                  .attr(''height'', 10)
+                  .style(''fill'', color(d));
+                g.append(''text'')
+                  .attr(''x'', width + 25)
+                  .attr(''y'', i * 20 + 8)
+                  .attr(''height'',30)
+                  .attr(''width'',250)
+                  .style(''fill'', color(d))
+                  .text(d);
+              '}');
+            '}'
+
+            if(running == 1)
+              setTimeout(requestData, {3});
+          '}',
+          cache: false
+        '}');
+      '}'
+
+      // stop
+      function stop() '{'
+        running = 0;
+      '}'
+      requestData();
+    </script>
+  </body>
+</html>
--- a/hadoop-tools/hadoop-sls/src/main/html/simulate.info.html.template
+++ b/hadoop-tools/hadoop-sls/src/main/html/simulate.info.html.template
@ -0,0 +1,50 @@
+<html>
+  <head>
+    <meta charset="utf-8">
+    <style type="text/css">
+      .td1 '{'
+        border-width: 1px;
+        padding: 8px;
+        border-style: solid;
+        border-color: #666666;
+        background-color: #dedede;
+        width: 50%;
+      '}'
+      table.gridtable '{'
+        font-family: verdana,arial,sans-serif;
+        font-size:11px;
+        color:#333333;
+        border-width: 1px;
+        border-color: #666666;
+        border-collapse: collapse;
+        margin-top: 80px;
+      '}'
+      .td2 '{'
+        border-width: 1px;
+        padding: 8px;
+        border-style: solid;
+        border-color: #666666;
+        background-color: #ffffff;
+        width: 50%;
+      '}'
+    </style>
+  </head>
+  <body>
+    <table class="gridtable" align="center" width="400px">
+      <tr>
+        <td colspan="2" class="td2" align="center">
+          <b>SLS Simulate Information</b>
+        </td>
+      </tr>
+      {0}
+      <tr>
+        <td align="center" height="80px">
+          <a href="simulate">Simulation Charts</a>
+        </td>
+        <td align="center">
+          <a href="track">Tracked Jobs & Queues</a>
+        </td>
+      </tr>
+    </table>
+  </body>
+</html>
--- a/hadoop-tools/hadoop-sls/src/main/html/track.html.template
+++ b/hadoop-tools/hadoop-sls/src/main/html/track.html.template
@ -0,0 +1,193 @@
+<html>
+  <head>
+    <meta charset="utf-8">
+    <link rel="stylesheet" href="css/bootstrap.min.css" media="screen">
+    <link rel="stylesheet" href="css/bootstrap-responsive.min.css">
+    <style type="text/css">
+      body '{' font: 20px sans-serif;'}'
+      .axis path,
+      .axis line '{' fill: none; stroke: #000; shape-rendering: crispEdges;'}'
+      .axis text '{' font-family: sans-serif; font-size: 20px; '}'
+      .line '{' fill: none; stroke: steelblue; stroke-width: 3px; '}'
+      .legend '{' padding: 5px; font: 18px sans-serif; background: yellow;
+                box-shadow: 2px 2px 1px #888;'}'
+      .title '{' font: 24px sans-serif; '}'
+      .divborder '{' border-width: 1px; border-style: solid; border-color: black;
+                    margin-top:10px '}'
+    </style>
+    <script src="js/thirdparty/d3.v3.js"></script>
+    <script src="js/thirdparty/jquery.js"></script>
+    <script src="js/thirdparty/bootstrap.min.js"></script>
+  </head>
+  <body>
+    <div class="row">
+      <div class="offset4 span8"><br/><br/><br/>
+        Select Tracked Job/Queue:
+        <select id="trackedSelect" onchange="redrawChart()">
+          <option>----Queue----</option>
+          {0}
+          <option>----Job----</option>
+          {1}
+        </select>
+        <input type="button" style="float: right;" value="Stop"
+               onClick="stop()" />
+      </div>
+    </div>
+    <div class="row">
+      <div class="divborder span9 offset4" id="area1"></div>
+    </div>
+    <script>
+      // global variables
+      var basetime = 0;
+      var running = 1;
+      var para = '''';
+      var data = [];
+      var path, line, svg;
+      var x, y;
+      var width, height;
+      var xAxis, yAxis;
+      var legends = [''usage.memory'', ''demand.memory'', ''maxshare.memory'',
+                      ''minshare.memory'', ''fairshare.memory''];
+
+      // stop function
+      function stop() '{'
+        running = 0;
+      '}'
+
+      // select changed event
+      function redrawChart() '{'
+        var value = $(''#trackedSelect'').val();
+        if (value.substring(0, ''Job ''.length) === ''Job ''
+                      || value.substring(0, ''Queue ''.length) === ''Queue '') '{'
+          para = value;
+          running = 0;
+          basetime = 0;
+          data = [];
+          $(''#area1'').empty();
+          drawChart(''Tracking '' + value);
+          running = 1;
+          requestData();
+        }
+      }
+
+      // draw chart
+      function drawChart(title) '{'
+        // location
+        var margin = '{'top: 50, right: 150, bottom: 50, left: 80'}';
+        width = 800 - margin.left - margin.right;
+        height = 420 - margin.top - margin.bottom;
+        x = d3.scale.linear().range([0, width]);
+        y = d3.scale.linear().range([height, 0]);
+        xAxis = d3.svg.axis().scale(x).orient(''bottom'');
+        yAxis = d3.svg.axis().scale(y).orient(''left'');
+        // lines
+        line = d3.svg.line().interpolate(''basis'')
+                .x(function(d) '{' return x(d.time); })
+                .y(function(d) '{' return y(d.value); });
+        // create chart
+        svg = d3.select(''#area1'').append(''svg'')
+                .attr(''width'', width + margin.left + margin.right)
+                .attr(''height'', height + margin.top + margin.bottom)
+                .append(''g'')
+                .attr(''transform'', ''translate('' + margin.left + '','' + margin.top + '')'');
+        // axis labels
+        svg.append(''text'')
+                .attr(''transform'', ''translate('' + (width / 2) + '','' + (height + margin.bottom - 5 ) + '')'')
+                .style(''text-anchor'', ''middle'')
+                .text(''Time ({2})'');
+        svg.append(''text'')
+                .attr(''transform'', ''rotate(-90)'')
+                .attr(''y'', 0 - margin.left)
+                .attr(''x'',0 - (height / 2))
+                .attr(''dy'', ''1em'')
+                .style(''text-anchor'', ''middle'')
+                .text(''Memory (GB)'');
+        // title
+        svg.append(''text'')
+                .attr(''x'', (width / 2))
+                .attr(''y'', 10 - (margin.top / 2))
+                .attr(''text-anchor'', ''middle'')
+                .text(title);
+      '}'
+
+      // request data
+      function requestData() '{'
+        $.ajax('{'url: ''trackMetrics?t='' + para,
+          success: function(point) '{'
+            // clear old
+            svg.selectAll(''g.tick'').remove();
+            svg.selectAll(''g'').remove();
+
+          if(basetime == 0)  basetime = point.time;
+          point.time = (point.time - basetime)/{3};
+          data.push(point);
+
+          var color = d3.scale.category10();
+          color.domain(d3.keys(data[0]).filter(function(key) '{'
+            return $.inArray(key, legends) !== -1;
+          '}'));
+
+          var values = color.domain().map(function(name) '{'
+            return '{'
+              name: name,
+              values: data.map(function(d) '{'
+                return '{' time: d.time, value: d[name]'}';
+              '}')
+            '}';
+          '}');
+
+          // set x/y range
+          x.domain(d3.extent(data, function(d) '{' return d.time; '}'));
+          y.domain([
+            d3.min(values, function(c) '{' return 0 '}'),
+            d3.max(values, function(c) '{' return 1.1 * d3.max(c.values, function(v) '{' return v.value; '}'); '}')
+          ]);
+
+          svg.append(''g'').attr(''class'', ''x axis'')
+            .attr(''transform'', ''translate(0,'' + height + '')'').call(xAxis);
+          svg.append(''g'').attr(''class'', ''y axis'').call(yAxis);
+          var value = svg.selectAll(''.path'')
+            .data(values).enter().append(''g'').attr(''class'', ''line'');
+
+          value.append(''path'').attr(''class'', ''line'')
+            .attr(''d'', function(d) '{'return line(d.values); '}')
+            .style(''stroke'', function(d) '{'return color(d.name); '}');
+
+          // legend
+          var legend = svg.append(''g'')
+            .attr(''class'', ''legend'')
+            .attr(''x'', width + 5)
+            .attr(''y'', 25)
+            .attr(''height'', 120)
+            .attr(''width'', 180);
+
+          legend.selectAll(''g'').data(legends)
+            .enter()
+            .append(''g'')
+            .each(function(d, i) '{'
+              var g = d3.select(this);
+              g.append(''rect'')
+                .attr(''x'', width + 5)
+                .attr(''y'', i * 20)
+                .attr(''width'', 10)
+                .attr(''height'', 10)
+                .style(''fill'', color(d));
+
+              g.append(''text'')
+                .attr(''x'', width + 25)
+                .attr(''y'', i * 20 + 8)
+                .attr(''height'',30)
+                .attr(''width'',250)
+                .style(''fill'', color(d))
+                .text(d);
+            '}');
+
+          if(running == 1)
+            setTimeout(requestData, {4});
+        '}',
+        cache: false
+      '}');
+    '}'
+  </script>
+</body>
+</html>
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java
@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.sls;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.Options;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.map.ObjectMapper;
+import org.codehaus.jackson.map.ObjectWriter;
+
+import org.apache.hadoop.yarn.sls.utils.SLSUtils;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Writer;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+public class RumenToSLSConverter {
+  private static final String EOL = System.getProperty("line.separator");
+
+  private static long baseline = 0;
+  private static Map<String, Set<String>> rackNodeMap =
+          new TreeMap<String, Set<String>>();
+
+  public static void main(String args[]) throws Exception {
+    Options options = new Options();
+    options.addOption("input", true, "input rumen json file");
+    options.addOption("outputJobs", true, "output jobs file");
+    options.addOption("outputNodes", true, "output nodes file");
+
+    CommandLineParser parser = new GnuParser();
+    CommandLine cmd = parser.parse(options, args);
+
+    if (! cmd.hasOption("input") ||
+            ! cmd.hasOption("outputJobs") ||
+            ! cmd.hasOption("outputNodes")) {
+      System.err.println();
+      System.err.println("ERROR: Missing input or output file");
+      System.err.println();
+      System.err.println("LoadGenerator creates a SLS script " +
+              "from a Hadoop Rumen output");
+      System.err.println();
+      System.err.println("Options: -input FILE -outputJobs FILE " +
+              "-outputNodes FILE");
+      System.err.println();
+      System.exit(1);
+    }
+
+    String inputFile = cmd.getOptionValue("input");
+    String outputJsonFile = cmd.getOptionValue("outputJobs");
+    String outputNodeFile = cmd.getOptionValue("outputNodes");
+
+    // check existing
+    if (! new File(inputFile).exists()) {
+      System.err.println();
+      System.err.println("ERROR: input does not exist");
+      System.exit(1);
+    }
+    if (new File(outputJsonFile).exists()) {
+      System.err.println();
+      System.err.println("ERROR: output job file is existing");
+      System.exit(1);
+    }
+    if (new File(outputNodeFile).exists()) {
+      System.err.println();
+      System.err.println("ERROR: output node file is existing");
+      System.exit(1);
+    }
+
+    File jsonFile = new File(outputJsonFile);
+    if (! jsonFile.getParentFile().exists()
+            && ! jsonFile.getParentFile().mkdirs()) {
+      System.err.println("ERROR: Cannot create output directory in path: "
+              + jsonFile.getParentFile().getAbsoluteFile());
+      System.exit(1);
+    }
+    File nodeFile = new File(outputNodeFile);
+    if (! nodeFile.getParentFile().exists()
+            && ! nodeFile.getParentFile().mkdirs()) {
+      System.err.println("ERROR: Cannot create output directory in path: "
+              + jsonFile.getParentFile().getAbsoluteFile());
+      System.exit(1);
+    }
+
+    generateSLSLoadFile(inputFile, outputJsonFile);
+    generateSLSNodeFile(outputNodeFile);
+  }
+
+  private static void generateSLSLoadFile(String inputFile, String outputFile)
+          throws IOException {
+    Reader input = new FileReader(inputFile);
+    try {
+      Writer output = new FileWriter(outputFile);
+      try {
+        ObjectMapper mapper = new ObjectMapper();
+        ObjectWriter writer = mapper.defaultPrettyPrintingWriter();
+        Iterator<Map> i = mapper.readValues(
+                new JsonFactory().createJsonParser(input), Map.class);
+        while (i.hasNext()) {
+          Map m = i.next();
+          output.write(writer.writeValueAsString(createSLSJob(m)) + EOL);
+        }
+      } finally {
+        output.close();
+      }
+    } finally {
+      input.close();
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  private static void generateSLSNodeFile(String outputFile)
+          throws IOException {
+    Writer output = new FileWriter(outputFile);
+    try {
+      ObjectMapper mapper = new ObjectMapper();
+      ObjectWriter writer = mapper.defaultPrettyPrintingWriter();
+      for (Map.Entry<String, Set<String>> entry : rackNodeMap.entrySet()) {
+        Map rack = new LinkedHashMap();
+        rack.put("rack", entry.getKey());
+        List nodes = new ArrayList();
+        for (String name : entry.getValue()) {
+          Map node = new LinkedHashMap();
+          node.put("node", name);
+          nodes.add(node);
+        }
+        rack.put("nodes", nodes);
+        output.write(writer.writeValueAsString(rack) + EOL);
+      }
+    } finally {
+      output.close();
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  private static Map createSLSJob(Map rumenJob) {
+    Map json = new LinkedHashMap();
+    long jobStart = (Long) rumenJob.get("submitTime");
+    long jobFinish = (Long) rumenJob.get("finishTime");
+    String jobId = rumenJob.get("jobID").toString();
+    String queue = rumenJob.get("queue").toString();
+    String user = rumenJob.get("user").toString();
+    if (baseline == 0) {
+      baseline = jobStart;
+    }
+    jobStart -= baseline;
+    jobFinish -= baseline;
+    long offset = 0;
+    if (jobStart < 0) {
+      System.out.println("Warning: reset job " + jobId + " start time to 0.");
+      offset = -jobStart;
+      jobFinish = jobFinish - jobStart;
+      jobStart = 0;
+    }
+
+    json.put("am.type", "mapreduce");
+    json.put("job.start.ms", jobStart);
+    json.put("job.end.ms", jobFinish);
+    json.put("job.queue.name", queue);
+    json.put("job.id", jobId);
+    json.put("job.user", user);
+
+    List maps = createSLSTasks("map",
+            (List) rumenJob.get("mapTasks"), offset);
+    List reduces = createSLSTasks("reduce",
+            (List) rumenJob.get("reduceTasks"), offset);
+    List tasks = new ArrayList();
+    tasks.addAll(maps);
+    tasks.addAll(reduces);
+    json.put("job.tasks", tasks);
+    return json;
+  }
+
+  @SuppressWarnings("unchecked")
+  private static List createSLSTasks(String taskType,
+                                     List rumenTasks, long offset) {
+    int priority = taskType.equals("reduce") ? 10 : 20;
+    List array = new ArrayList();
+    for (Object e : rumenTasks) {
+      Map rumenTask = (Map) e;
+      for (Object ee : (List) rumenTask.get("attempts"))  {
+        Map rumenAttempt = (Map) ee;
+        long taskStart = (Long) rumenAttempt.get("startTime");
+        long taskFinish = (Long) rumenAttempt.get("finishTime");
+        String hostname = (String) rumenAttempt.get("hostName");
+        taskStart = taskStart - baseline + offset;
+        taskFinish = taskFinish - baseline + offset;
+        Map task = new LinkedHashMap();
+        task.put("container.host", hostname);
+        task.put("container.start.ms", taskStart);
+        task.put("container.end.ms", taskFinish);
+        task.put("container.priority", priority);
+        task.put("container.type", taskType);
+        array.add(task);
+        String rackHost[] = SLSUtils.getRackHostName(hostname);
+        if (rackNodeMap.containsKey(rackHost[0])) {
+          rackNodeMap.get(rackHost[0]).add(rackHost[1]);
+        } else {
+          Set<String> hosts = new TreeSet<String>();
+          hosts.add(rackHost[1]);
+          rackNodeMap.put(rackHost[0], hosts);
+        }
+      }
+    }
+    return array;
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java
@ -0,0 +1,526 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.yarn.sls;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.text.MessageFormat;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Random;
+import java.util.Arrays;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.tools.rumen.JobTraceReader;
+import org.apache.hadoop.tools.rumen.LoggedJob;
+import org.apache.hadoop.tools.rumen.LoggedTask;
+import org.apache.hadoop.tools.rumen.LoggedTaskAttempt;
+import org.apache.hadoop.yarn.api.records.NodeState;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.sls.appmaster.AMSimulator;
+import org.apache.hadoop.yarn.sls.conf.SLSConfiguration;
+import org.apache.hadoop.yarn.sls.nodemanager.NMSimulator;
+import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
+import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
+import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.Options;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.ReflectionUtils;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.utils.BuilderUtils;
+import org.apache.hadoop.yarn.sls.utils.SLSUtils;
+import org.apache.log4j.Logger;
+import org.codehaus.jackson.JsonFactory;
+import org.codehaus.jackson.map.ObjectMapper;
+
+public class SLSRunner {
+  // RM, Runner
+  private ResourceManager rm;
+  private static TaskRunner runner = new TaskRunner();
+  private String[] inputTraces;
+  private Configuration conf;
+  private Map<String, Integer> queueAppNumMap;
+  
+  // NM simulator
+  private HashMap<NodeId, NMSimulator> nmMap;
+  private int nmMemoryMB, nmVCores;
+  private String nodeFile;
+  
+  // AM simulator
+  private int AM_ID;
+  private Map<String, AMSimulator> amMap;
+  private Set<String> trackedApps;
+  private Map<String, Class> amClassMap;
+  private static int remainingApps = 0;
+
+  // metrics
+  private String metricsOutputDir;
+  private boolean printSimulation;
+
+  // other simulation information
+  private int numNMs, numRacks, numAMs, numTasks;
+  private long maxRuntime;
+  public final static Map<String, Object> simulateInfoMap =
+          new HashMap<String, Object>();
+
+  // logger
+  public final static Logger LOG = Logger.getLogger(SLSRunner.class);
+
+  // input traces, input-rumen or input-sls
+  private boolean isSLS;
+  
+  public SLSRunner(boolean isSLS, String inputTraces[], String nodeFile,
+                   String outputDir, Set<String> trackedApps,
+                   boolean printsimulation)
+          throws IOException, ClassNotFoundException {
+    this.isSLS = isSLS;
+    this.inputTraces = inputTraces.clone();
+    this.nodeFile = nodeFile;
+    this.trackedApps = trackedApps;
+    this.printSimulation = printsimulation;
+    metricsOutputDir = outputDir;
+    
+    nmMap = new HashMap<NodeId, NMSimulator>();
+    queueAppNumMap = new HashMap<String, Integer>();
+    amMap = new HashMap<String, AMSimulator>();
+    amClassMap = new HashMap<String, Class>();
+    
+    // runner configuration
+    conf = new Configuration(false);
+    conf.addResource("sls-runner.xml");
+    // runner
+    int poolSize = conf.getInt(SLSConfiguration.RUNNER_POOL_SIZE, 
+                                SLSConfiguration.RUNNER_POOL_SIZE_DEFAULT); 
+    SLSRunner.runner.setQueueSize(poolSize);
+    // <AMType, Class> map
+    for (Map.Entry e : conf) {
+      String key = e.getKey().toString();
+      if (key.startsWith(SLSConfiguration.AM_TYPE)) {
+        String amType = key.substring(SLSConfiguration.AM_TYPE.length());
+        amClassMap.put(amType, Class.forName(conf.get(key)));
+      }
+    }
+  }
+  
+  public void start() throws Exception {
+    // start resource manager
+    startRM();
+    // start node managers
+    startNM();
+    // start application masters
+    startAM();
+    // set queue & tracked apps information
+    ((ResourceSchedulerWrapper) rm.getResourceScheduler())
+                            .setQueueSet(this.queueAppNumMap.keySet());
+    ((ResourceSchedulerWrapper) rm.getResourceScheduler())
+                            .setTrackedAppSet(this.trackedApps);
+    // print out simulation info
+    printSimulationInfo();
+    // blocked until all nodes RUNNING
+    waitForNodesRunning();
+    // starting the runner once everything is ready to go,
+    runner.start();
+  }
+  
+  private void startRM() throws IOException, ClassNotFoundException {
+    Configuration rmConf = new YarnConfiguration();
+    String schedulerClass = rmConf.get(YarnConfiguration.RM_SCHEDULER);
+    rmConf.set(SLSConfiguration.RM_SCHEDULER, schedulerClass);
+    rmConf.set(YarnConfiguration.RM_SCHEDULER,
+            ResourceSchedulerWrapper.class.getName());
+    rmConf.set(SLSConfiguration.METRICS_OUTPUT_DIR, metricsOutputDir);
+    rm = new ResourceManager();
+    rm.init(rmConf);
+    rm.start();
+  }
+
+  private void startNM() throws YarnException, IOException {
+    // nm configuration
+    nmMemoryMB = conf.getInt(SLSConfiguration.NM_MEMORY_MB,
+            SLSConfiguration.NM_MEMORY_MB_DEFAULT);
+    nmVCores = conf.getInt(SLSConfiguration.NM_VCORES,
+            SLSConfiguration.NM_VCORES_DEFAULT);
+    int heartbeatInterval = conf.getInt(
+            SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS,
+            SLSConfiguration.NM_HEARTBEAT_INTERVAL_MS_DEFAULT);
+    // nm information (fetch from topology file, or from sls/rumen json file)
+    Set<String> nodeSet = new HashSet<String>();
+    if (nodeFile.isEmpty()) {
+      if (isSLS) {
+        for (String inputTrace : inputTraces) {
+          nodeSet.addAll(SLSUtils.parseNodesFromSLSTrace(inputTrace));
+        }
+      } else {
+        for (String inputTrace : inputTraces) {
+          nodeSet.addAll(SLSUtils.parseNodesFromRumenTrace(inputTrace));
+        }
+      }
+
+    } else {
+      nodeSet.addAll(SLSUtils.parseNodesFromNodeFile(nodeFile));
+    }
+    // create NM simulators
+    Random random = new Random();
+    Set<String> rackSet = new HashSet<String>();
+    for (String hostName : nodeSet) {
+      // we randomize the heartbeat start time from zero to 1 interval
+      NMSimulator nm = new NMSimulator();
+      nm.init(hostName, nmMemoryMB, nmVCores, 
+          random.nextInt(heartbeatInterval), heartbeatInterval, rm);
+      nmMap.put(nm.getNode().getNodeID(), nm);
+      runner.schedule(nm);
+      rackSet.add(nm.getNode().getRackName());
+    }
+    numRacks = rackSet.size();
+    numNMs = nmMap.size();
+  }
+
+  private void waitForNodesRunning() throws InterruptedException {
+    long startTimeMS = System.currentTimeMillis();
+    while (true) {
+      int numRunningNodes = 0;
+      for (RMNode node : rm.getRMContext().getRMNodes().values()) {
+        if (node.getState() == NodeState.RUNNING) {
+          numRunningNodes ++;
+        }
+      }
+      if (numRunningNodes == numNMs) {
+        break;
+      }
+      LOG.info(MessageFormat.format("SLSRunner is waiting for all " +
+              "nodes RUNNING. {0} of {1} NMs initialized.",
+              numRunningNodes, numNMs));
+      Thread.sleep(1000);
+    }
+    LOG.info(MessageFormat.format("SLSRunner takes {0} ms to launch all nodes.",
+            (System.currentTimeMillis() - startTimeMS)));
+  }
+
+  @SuppressWarnings("unchecked")
+  private void startAM() throws YarnException, IOException {
+    // application/container configuration
+    int heartbeatInterval = conf.getInt(
+            SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS,
+            SLSConfiguration.AM_HEARTBEAT_INTERVAL_MS_DEFAULT);
+    int containerMemoryMB = conf.getInt(SLSConfiguration.CONTAINER_MEMORY_MB,
+            SLSConfiguration.CONTAINER_MEMORY_MB_DEFAULT);
+    int containerVCores = conf.getInt(SLSConfiguration.CONTAINER_VCORES,
+            SLSConfiguration.CONTAINER_VCORES_DEFAULT);
+    Resource containerResource =
+            BuilderUtils.newResource(containerMemoryMB, containerVCores);
+
+    // application workload
+    if (isSLS) {
+      startAMFromSLSTraces(containerResource, heartbeatInterval);
+    } else {
+      startAMFromRumenTraces(containerResource, heartbeatInterval);
+    }
+    numAMs = amMap.size();
+    remainingApps = numAMs;
+  }
+
+  /**
+   * parse workload information from sls trace files
+   */
+  @SuppressWarnings("unchecked")
+  private void startAMFromSLSTraces(Resource containerResource,
+                                    int heartbeatInterval) throws IOException {
+    // parse from sls traces
+    JsonFactory jsonF = new JsonFactory();
+    ObjectMapper mapper = new ObjectMapper();
+    for (String inputTrace : inputTraces) {
+      Reader input = new FileReader(inputTrace);
+      try {
+        Iterator<Map> i = mapper.readValues(jsonF.createJsonParser(input),
+                Map.class);
+        while (i.hasNext()) {
+          Map jsonJob = i.next();
+
+          // load job information
+          long jobStartTime = Long.parseLong(
+                  jsonJob.get("job.start.ms").toString());
+          long jobFinishTime = Long.parseLong(
+                  jsonJob.get("job.end.ms").toString());
+
+          String user = (String) jsonJob.get("job.user");
+          if (user == null)  user = "default";
+          String queue = jsonJob.get("job.queue.name").toString();
+
+          String oldAppId = jsonJob.get("job.id").toString();
+          boolean isTracked = trackedApps.contains(oldAppId);
+          int queueSize = queueAppNumMap.containsKey(queue) ?
+                  queueAppNumMap.get(queue) : 0;
+          queueSize ++;
+          queueAppNumMap.put(queue, queueSize);
+          // tasks
+          List tasks = (List) jsonJob.get("job.tasks");
+          if (tasks == null || tasks.size() == 0) {
+            continue;
+          }
+          List<ContainerSimulator> containerList =
+                  new ArrayList<ContainerSimulator>();
+          for (Object o : tasks) {
+            Map jsonTask = (Map) o;
+            String hostname = jsonTask.get("container.host").toString();
+            long taskStart = Long.parseLong(
+                    jsonTask.get("container.start.ms").toString());
+            long taskFinish = Long.parseLong(
+                    jsonTask.get("container.end.ms").toString());
+            long lifeTime = taskFinish - taskStart;
+            int priority = Integer.parseInt(
+                    jsonTask.get("container.priority").toString());
+            String type = jsonTask.get("container.type").toString();
+            containerList.add(new ContainerSimulator(containerResource,
+                    lifeTime, hostname, priority, type));
+          }
+
+          // create a new AM
+          String amType = jsonJob.get("am.type").toString();
+          AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
+                  amClassMap.get(amType), new Configuration());
+          if (amSim != null) {
+            amSim.init(AM_ID++, heartbeatInterval, containerList, rm,
+                    this, jobStartTime, jobFinishTime, user, queue,
+                    isTracked, oldAppId);
+            runner.schedule(amSim);
+            maxRuntime = Math.max(maxRuntime, jobFinishTime);
+            numTasks += containerList.size();
+            amMap.put(oldAppId, amSim);
+          }
+        }
+      } finally {
+        input.close();
+      }
+    }
+  }
+
+  /**
+   * parse workload information from rumen trace files
+   */
+  @SuppressWarnings("unchecked")
+  private void startAMFromRumenTraces(Resource containerResource,
+                                      int heartbeatInterval)
+          throws IOException {
+    Configuration conf = new Configuration();
+    conf.set("fs.defaultFS", "file:///");
+    long baselineTimeMS = 0;
+    for (String inputTrace : inputTraces) {
+      File fin = new File(inputTrace);
+      JobTraceReader reader = new JobTraceReader(
+              new Path(fin.getAbsolutePath()), conf);
+      try {
+        LoggedJob job = null;
+        while ((job = reader.getNext()) != null) {
+          // only support MapReduce currently
+          String jobType = "mapreduce";
+          String user = job.getUser() == null ?
+                  "default" : job.getUser().getValue();
+          String jobQueue = job.getQueue().getValue();
+          String oldJobId = job.getJobID().toString();
+          long jobStartTimeMS = job.getSubmitTime();
+          long jobFinishTimeMS = job.getFinishTime();
+          if (baselineTimeMS == 0) {
+            baselineTimeMS = jobStartTimeMS;
+          }
+          jobStartTimeMS -= baselineTimeMS;
+          jobFinishTimeMS -= baselineTimeMS;
+          if (jobStartTimeMS < 0) {
+            LOG.warn("Warning: reset job " + oldJobId + " start time to 0.");
+            jobFinishTimeMS = jobFinishTimeMS - jobStartTimeMS;
+            jobStartTimeMS = 0;
+          }
+
+          boolean isTracked = trackedApps.contains(oldJobId);
+          int queueSize = queueAppNumMap.containsKey(jobQueue) ?
+                  queueAppNumMap.get(jobQueue) : 0;
+          queueSize ++;
+          queueAppNumMap.put(jobQueue, queueSize);
+
+          List<ContainerSimulator> containerList =
+                  new ArrayList<ContainerSimulator>();
+          // map tasks
+          for(LoggedTask mapTask : job.getMapTasks()) {
+            LoggedTaskAttempt taskAttempt = mapTask.getAttempts()
+                    .get(mapTask.getAttempts().size() - 1);
+            String hostname = taskAttempt.getHostName().getValue();
+            long containerLifeTime = taskAttempt.getFinishTime()
+                    - taskAttempt.getStartTime();
+            containerList.add(new ContainerSimulator(containerResource,
+                    containerLifeTime, hostname, 10, "map"));
+          }
+
+          // reduce tasks
+          for(LoggedTask reduceTask : job.getReduceTasks()) {
+            LoggedTaskAttempt taskAttempt = reduceTask.getAttempts()
+                    .get(reduceTask.getAttempts().size() - 1);
+            String hostname = taskAttempt.getHostName().getValue();
+            long containerLifeTime = taskAttempt.getFinishTime()
+                    - taskAttempt.getStartTime();
+            containerList.add(new ContainerSimulator(containerResource,
+                    containerLifeTime, hostname, 20, "reduce"));
+          }
+
+          // create a new AM
+          AMSimulator amSim = (AMSimulator) ReflectionUtils.newInstance(
+                  amClassMap.get(jobType), conf);
+          if (amSim != null) {
+            amSim.init(AM_ID ++, heartbeatInterval, containerList,
+                    rm, this, jobStartTimeMS, jobFinishTimeMS, user, jobQueue,
+                    isTracked, oldJobId);
+            runner.schedule(amSim);
+            maxRuntime = Math.max(maxRuntime, jobFinishTimeMS);
+            numTasks += containerList.size();
+            amMap.put(oldJobId, amSim);
+          }
+        }
+      } finally {
+        reader.close();
+      }
+    }
+  }
+  
+  private void printSimulationInfo() {
+    if (printSimulation) {
+      // node
+      LOG.info("------------------------------------");
+      LOG.info(MessageFormat.format("# nodes = {0}, # racks = {1}, capacity " +
+              "of each node {2} MB memory and {3} vcores.",
+              numNMs, numRacks, nmMemoryMB, nmVCores));
+      LOG.info("------------------------------------");
+      // job
+      LOG.info(MessageFormat.format("# applications = {0}, # total " +
+              "tasks = {1}, average # tasks per application = {2}",
+              numAMs, numTasks, (int)(Math.ceil((numTasks + 0.0) / numAMs))));
+      LOG.info("JobId\tQueue\tAMType\tDuration\t#Tasks");
+      for (Map.Entry<String, AMSimulator> entry : amMap.entrySet()) {
+        AMSimulator am = entry.getValue();
+        LOG.info(entry.getKey() + "\t" + am.getQueue() + "\t" + am.getAMType() 
+            + "\t" + am.getDuration() + "\t" + am.getNumTasks());
+      }
+      LOG.info("------------------------------------");
+      // queue
+      LOG.info(MessageFormat.format("number of queues = {0}  average " +
+              "number of apps = {1}", queueAppNumMap.size(),
+              (int)(Math.ceil((numAMs + 0.0) / queueAppNumMap.size()))));
+      LOG.info("------------------------------------");
+      // runtime
+      LOG.info(MessageFormat.format("estimated simulation time is {0}" +
+              " seconds", (long)(Math.ceil(maxRuntime / 1000.0))));
+      LOG.info("------------------------------------");
+    }
+    // package these information in the simulateInfoMap used by other places
+    simulateInfoMap.put("Number of racks", numRacks);
+    simulateInfoMap.put("Number of nodes", numNMs);
+    simulateInfoMap.put("Node memory (MB)", nmMemoryMB);
+    simulateInfoMap.put("Node VCores", nmVCores);
+    simulateInfoMap.put("Number of applications", numAMs);
+    simulateInfoMap.put("Number of tasks", numTasks);
+    simulateInfoMap.put("Average tasks per applicaion",
+            (int)(Math.ceil((numTasks + 0.0) / numAMs)));
+    simulateInfoMap.put("Number of queues", queueAppNumMap.size());
+    simulateInfoMap.put("Average applications per queue",
+            (int)(Math.ceil((numAMs + 0.0) / queueAppNumMap.size())));
+    simulateInfoMap.put("Estimated simulate time (s)",
+            (long)(Math.ceil(maxRuntime / 1000.0)));
+  }
+
+  public HashMap<NodeId, NMSimulator> getNmMap() {
+    return nmMap;
+  }
+
+  public static TaskRunner getRunner() {
+    return runner;
+  }
+
+  public static void decreaseRemainingApps() {
+    remainingApps --;
+
+    if (remainingApps == 0) {
+      LOG.info("SLSRunner tears down.");
+      System.exit(0);
+    }
+  }
+
+  public static void main(String args[]) throws Exception {
+    Options options = new Options();
+    options.addOption("inputrumen", true, "input rumen files");
+    options.addOption("inputsls", true, "input sls files");
+    options.addOption("nodes", true, "input topology");
+    options.addOption("output", true, "output directory");
+    options.addOption("trackjobs", true,
+            "jobs to be tracked during simulating");
+    options.addOption("printsimulation", false,
+            "print out simulation information");
+    
+    CommandLineParser parser = new GnuParser();
+    CommandLine cmd = parser.parse(options, args);
+
+    String inputRumen = cmd.getOptionValue("inputrumen");
+    String inputSLS = cmd.getOptionValue("inputsls");
+    String output = cmd.getOptionValue("output");
+    
+    if ((inputRumen == null && inputSLS == null) || output == null) {
+      System.err.println();
+      System.err.println("ERROR: Missing input or output file");
+      System.err.println();
+      System.err.println("Options: -inputrumen|-inputsls FILE,FILE... " +
+              "-output FILE [-nodes FILE] [-trackjobs JobId,JobId...] " +
+              "[-printsimulation]");
+      System.err.println();
+      System.exit(1);
+    }
+    
+    File outputFile = new File(output);
+    if (! outputFile.exists()
+            && ! outputFile.mkdirs()) {
+      System.err.println("ERROR: Cannot create output directory "
+              + outputFile.getAbsolutePath());
+      System.exit(1);
+    }
+    
+    Set<String> trackedJobSet = new HashSet<String>();
+    if (cmd.hasOption("trackjobs")) {
+      String trackjobs = cmd.getOptionValue("trackjobs");
+      String jobIds[] = trackjobs.split(",");
+      trackedJobSet.addAll(Arrays.asList(jobIds));
+    }
+    
+    String nodeFile = cmd.hasOption("nodes") ? cmd.getOptionValue("nodes") : "";
+
+    boolean isSLS = inputSLS != null;
+    String inputFiles[] = isSLS ? inputSLS.split(",") : inputRumen.split(",");
+    SLSRunner sls = new SLSRunner(isSLS, inputFiles, nodeFile, output,
+        trackedJobSet, cmd.hasOption("printsimulation"));
+    sls.start();
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java
@ -0,0 +1,385 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.appmaster;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.security.PrivilegedExceptionAction;
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords
+        .FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
+
+import org.apache.hadoop.yarn.api.protocolrecords
+        .RegisterApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords
+        .RegisterApplicationMasterResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.factories.RecordFactory;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
+import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper;
+import org.apache.hadoop.yarn.sls.SLSRunner;
+import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
+import org.apache.hadoop.yarn.sls.utils.SLSUtils;
+
+public abstract class AMSimulator extends TaskRunner.Task {
+  // resource manager
+  protected ResourceManager rm;
+  // main
+  protected SLSRunner se;
+  // application
+  protected ApplicationId appId;
+  protected ApplicationAttemptId appAttemptId;
+  protected String oldAppId;    // jobId from the jobhistory file
+  // record factory
+  protected final static RecordFactory recordFactory =
+          RecordFactoryProvider.getRecordFactory(null);
+  // response queue
+  protected final BlockingQueue<AllocateResponse> responseQueue;
+  protected int RESPONSE_ID = 1;
+  // user name
+  protected String user;  
+  // queue name
+  protected String queue;
+  // am type
+  protected String amtype;
+  // job start/end time
+  protected long traceStartTimeMS;
+  protected long traceFinishTimeMS;
+  protected long simulateStartTimeMS;
+  protected long simulateFinishTimeMS;
+  // whether tracked in Metrics
+  protected boolean isTracked;
+  // progress
+  protected int totalContainers;
+  protected int finishedContainers;
+  
+  protected final Logger LOG = Logger.getLogger(AMSimulator.class);
+  
+  public AMSimulator() {
+    this.responseQueue = new LinkedBlockingQueue<AllocateResponse>();
+  }
+
+  public void init(int id, int heartbeatInterval, 
+      List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
+      long traceStartTime, long traceFinishTime, String user, String queue, 
+      boolean isTracked, String oldAppId) {
+    super.init(traceStartTime, traceStartTime + 1000000L * heartbeatInterval,
+            heartbeatInterval);
+    this.user = user;
+    this.rm = rm;
+    this.se = se;
+    this.user = user;
+    this.queue = queue;
+    this.oldAppId = oldAppId;
+    this.isTracked = isTracked;
+    this.traceStartTimeMS = traceStartTime;
+    this.traceFinishTimeMS = traceFinishTime;
+  }
+
+  /**
+   * register with RM
+   */
+  @Override
+  public void firstStep()
+          throws YarnException, IOException, InterruptedException {
+    simulateStartTimeMS = System.currentTimeMillis() - 
+                          SLSRunner.getRunner().getStartTimeMS();
+
+    // submit application, waiting until ACCEPTED
+    submitApp();
+
+    // register application master
+    registerAM();
+
+    // track app metrics
+    trackApp();
+  }
+
+  @Override
+  public void middleStep()
+          throws InterruptedException, YarnException, IOException {
+    // process responses in the queue
+    processResponseQueue();
+    
+    // send out request
+    sendContainerRequest();
+    
+    // check whether finish
+    checkStop();
+  }
+
+  @Override
+  public void lastStep() {
+    LOG.info(MessageFormat.format("Application {0} is shutting down.", appId));
+    // unregister tracking
+    if (isTracked) {
+      untrackApp();
+    }
+    // unregister application master
+    final FinishApplicationMasterRequest finishAMRequest = recordFactory
+                  .newRecordInstance(FinishApplicationMasterRequest.class);
+    finishAMRequest.setFinalApplicationStatus(FinalApplicationStatus.SUCCEEDED);
+
+    try {
+      UserGroupInformation ugi =
+              UserGroupInformation.createRemoteUser(appAttemptId.toString());
+      Token<AMRMTokenIdentifier> token =
+              rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
+                .getRMAppAttempt(appAttemptId).getAMRMToken();
+      ugi.addTokenIdentifier(token.decodeIdentifier());
+      ugi.doAs(new PrivilegedExceptionAction<Object>() {
+        @Override
+        public Object run() throws Exception {
+          rm.getApplicationMasterService()
+                  .finishApplicationMaster(finishAMRequest);
+          return null;
+        }
+      });
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (InterruptedException e) {
+      e.printStackTrace();
+    }
+
+    simulateFinishTimeMS = System.currentTimeMillis() -
+        SLSRunner.getRunner().getStartTimeMS();
+    // record job running information
+    ((ResourceSchedulerWrapper)rm.getResourceScheduler())
+         .addAMRuntime(appId, 
+                      traceStartTimeMS, traceFinishTimeMS, 
+                      simulateStartTimeMS, simulateFinishTimeMS);
+  }
+  
+  protected ResourceRequest createResourceRequest(
+          Resource resource, String host, int priority, int numContainers) {
+    ResourceRequest request = recordFactory
+        .newRecordInstance(ResourceRequest.class);
+    request.setCapability(resource);
+    request.setResourceName(host);
+    request.setNumContainers(numContainers);
+    Priority prio = recordFactory.newRecordInstance(Priority.class);
+    prio.setPriority(priority);
+    request.setPriority(prio);
+    return request;
+  }
+  
+  protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask,
+      List<ContainerId> toRelease) {
+    AllocateRequest allocateRequest =
+            recordFactory.newRecordInstance(AllocateRequest.class);
+    allocateRequest.setResponseId(RESPONSE_ID ++);
+    allocateRequest.setAskList(ask);
+    allocateRequest.setReleaseList(toRelease);
+    return allocateRequest;
+  }
+  
+  protected AllocateRequest createAllocateRequest(List<ResourceRequest> ask) {
+    return createAllocateRequest(ask, new ArrayList<ContainerId>());
+  }
+
+  protected abstract void processResponseQueue()
+          throws InterruptedException, YarnException, IOException;
+  
+  protected abstract void sendContainerRequest()
+          throws YarnException, IOException, InterruptedException;
+  
+  protected abstract void checkStop();
+  
+  private void submitApp()
+          throws YarnException, InterruptedException, IOException {
+    // ask for new application
+    GetNewApplicationRequest newAppRequest =
+        Records.newRecord(GetNewApplicationRequest.class);
+    GetNewApplicationResponse newAppResponse = 
+        rm.getClientRMService().getNewApplication(newAppRequest);
+    appId = newAppResponse.getApplicationId();
+    
+    // submit the application
+    final SubmitApplicationRequest subAppRequest =
+        Records.newRecord(SubmitApplicationRequest.class);
+    ApplicationSubmissionContext appSubContext = 
+        Records.newRecord(ApplicationSubmissionContext.class);
+    appSubContext.setApplicationId(appId);
+    appSubContext.setMaxAppAttempts(1);
+    appSubContext.setQueue(queue);
+    appSubContext.setPriority(Priority.newInstance(0));
+    ContainerLaunchContext conLauContext = 
+        Records.newRecord(ContainerLaunchContext.class);
+    conLauContext.setApplicationACLs(
+        new HashMap<ApplicationAccessType, String>());
+    conLauContext.setCommands(new ArrayList<String>());
+    conLauContext.setEnvironment(new HashMap<String, String>());
+    conLauContext.setLocalResources(new HashMap<String, LocalResource>());
+    conLauContext.setServiceData(new HashMap<String, ByteBuffer>());
+    appSubContext.setAMContainerSpec(conLauContext);
+    appSubContext.setUnmanagedAM(true);
+    subAppRequest.setApplicationSubmissionContext(appSubContext);
+    UserGroupInformation ugi = UserGroupInformation.createRemoteUser(user);
+    ugi.doAs(new PrivilegedExceptionAction<Object>() {
+      @Override
+      public Object run() throws YarnException {
+        rm.getClientRMService().submitApplication(subAppRequest);
+        return null;
+      }
+    });
+    LOG.info(MessageFormat.format("Submit a new application {0}", appId));
+    
+    // waiting until application ACCEPTED
+    RMApp app = rm.getRMContext().getRMApps().get(appId);
+    while(app.getState() != RMAppState.ACCEPTED) {
+      Thread.sleep(50);
+    }
+
+    appAttemptId = rm.getRMContext().getRMApps().get(appId)
+            .getCurrentAppAttempt().getAppAttemptId();
+  }
+
+  private void registerAM()
+          throws YarnException, IOException, InterruptedException {
+    // register application master
+    final RegisterApplicationMasterRequest amRegisterRequest =
+            Records.newRecord(RegisterApplicationMasterRequest.class);
+    amRegisterRequest.setHost("localhost");
+    amRegisterRequest.setRpcPort(1000);
+    amRegisterRequest.setTrackingUrl("localhost:1000");
+
+    UserGroupInformation ugi =
+            UserGroupInformation.createRemoteUser(appAttemptId.toString());
+    Token<AMRMTokenIdentifier> token =
+            rm.getRMContext().getRMApps().get(appAttemptId.getApplicationId())
+                    .getRMAppAttempt(appAttemptId).getAMRMToken();
+    ugi.addTokenIdentifier(token.decodeIdentifier());
+
+    ugi.doAs(
+            new PrivilegedExceptionAction<RegisterApplicationMasterResponse>() {
+      @Override
+      public RegisterApplicationMasterResponse run() throws Exception {
+        return rm.getApplicationMasterService()
+                .registerApplicationMaster(amRegisterRequest);
+      }
+    });
+
+    LOG.info(MessageFormat.format(
+            "Register the application master for application {0}", appId));
+  }
+
+  private void trackApp() {
+    if (isTracked) {
+      ((ResourceSchedulerWrapper) rm.getResourceScheduler())
+              .addTrackedApp(appAttemptId, oldAppId);
+    }
+  }
+  public void untrackApp() {
+    if (isTracked) {
+      ((ResourceSchedulerWrapper) rm.getResourceScheduler())
+              .removeTrackedApp(appAttemptId, oldAppId);
+    }
+  }
+  
+  protected List<ResourceRequest> packageRequests(
+          List<ContainerSimulator> csList, int priority) {
+    // create requests
+    Map<String, ResourceRequest> rackLocalRequestMap = new HashMap<String, ResourceRequest>();
+    Map<String, ResourceRequest> nodeLocalRequestMap = new HashMap<String, ResourceRequest>();
+    ResourceRequest anyRequest = null;
+    for (ContainerSimulator cs : csList) {
+      String rackHostNames[] = SLSUtils.getRackHostName(cs.getHostname());
+      // check rack local
+      String rackname = rackHostNames[0];
+      if (rackLocalRequestMap.containsKey(rackname)) {
+        rackLocalRequestMap.get(rackname).setNumContainers(
+            rackLocalRequestMap.get(rackname).getNumContainers() + 1);
+      } else {
+        ResourceRequest request = createResourceRequest(
+                cs.getResource(), rackname, priority, 1);
+        rackLocalRequestMap.put(rackname, request);
+      }
+      // check node local
+      String hostname = rackHostNames[1];
+      if (nodeLocalRequestMap.containsKey(hostname)) {
+        nodeLocalRequestMap.get(hostname).setNumContainers(
+            nodeLocalRequestMap.get(hostname).getNumContainers() + 1);
+      } else {
+        ResourceRequest request = createResourceRequest(
+                cs.getResource(), hostname, priority, 1);
+        nodeLocalRequestMap.put(hostname, request);
+      }
+      // any
+      if (anyRequest == null) {
+        anyRequest = createResourceRequest(
+                cs.getResource(), ResourceRequest.ANY, priority, 1);
+      } else {
+        anyRequest.setNumContainers(anyRequest.getNumContainers() + 1);
+      }
+    }
+    List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
+    ask.addAll(nodeLocalRequestMap.values());
+    ask.addAll(rackLocalRequestMap.values());
+    if (anyRequest != null) {
+      ask.add(anyRequest);
+    }
+    return ask;
+  }
+
+  public String getQueue() {
+    return queue;
+  }
+  public String getAMType() {
+    return amtype;
+  }
+  public long getDuration() {
+    return simulateFinishTimeMS - simulateStartTimeMS;
+  }
+  public int getNumTasks() {
+    return totalContainers;
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/MRAMSimulator.java
@ -0,0 +1,405 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.appmaster;
+
+import java.io.IOException;
+import java.security.PrivilegedExceptionAction;
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.ResourceRequest;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.utils.BuilderUtils;
+
+import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
+import org.apache.hadoop.yarn.sls.SLSRunner;
+import org.apache.log4j.Logger;
+
+public class MRAMSimulator extends AMSimulator {
+  /*
+  Vocabulary Used: 
+  pending -> requests which are NOT yet sent to RM
+  scheduled -> requests which are sent to RM but not yet assigned
+  assigned -> requests which are assigned to a container
+  completed -> request corresponding to which container has completed
+  
+  Maps are scheduled as soon as their requests are received. Reduces are
+  scheduled when all maps have finished (not support slow-start currently).
+  */
+  
+  private static final int PRIORITY_REDUCE = 10;
+  private static final int PRIORITY_MAP = 20;
+  
+  // pending maps
+  private LinkedList<ContainerSimulator> pendingMaps =
+          new LinkedList<ContainerSimulator>();
+  
+  // pending failed maps
+  private LinkedList<ContainerSimulator> pendingFailedMaps =
+          new LinkedList<ContainerSimulator>();
+  
+  // scheduled maps
+  private LinkedList<ContainerSimulator> scheduledMaps =
+          new LinkedList<ContainerSimulator>();
+  
+  // assigned maps
+  private Map<ContainerId, ContainerSimulator> assignedMaps =
+          new HashMap<ContainerId, ContainerSimulator>();
+  
+  // reduces which are not yet scheduled
+  private LinkedList<ContainerSimulator> pendingReduces =
+          new LinkedList<ContainerSimulator>();
+  
+  // pending failed reduces
+  private LinkedList<ContainerSimulator> pendingFailedReduces =
+          new LinkedList<ContainerSimulator>();
+ 
+  // scheduled reduces
+  private LinkedList<ContainerSimulator> scheduledReduces =
+          new LinkedList<ContainerSimulator>();
+  
+  // assigned reduces
+  private Map<ContainerId, ContainerSimulator> assignedReduces =
+          new HashMap<ContainerId, ContainerSimulator>();
+  
+  // all maps & reduces
+  private LinkedList<ContainerSimulator> allMaps =
+          new LinkedList<ContainerSimulator>();
+  private LinkedList<ContainerSimulator> allReduces =
+          new LinkedList<ContainerSimulator>();
+
+  // counters
+  private int mapFinished = 0;
+  private int mapTotal = 0;
+  private int reduceFinished = 0;
+  private int reduceTotal = 0;
+  // waiting for AM container 
+  private boolean isAMContainerRunning = false;
+  private Container amContainer;
+  // finished
+  private boolean isFinished = false;
+  // resource for AM container
+  private final static int MR_AM_CONTAINER_RESOURCE_MEMORY_MB = 1024;
+  private final static int MR_AM_CONTAINER_RESOURCE_VCORES = 1;
+
+  public final Logger LOG = Logger.getLogger(MRAMSimulator.class);
+
+  public void init(int id, int heartbeatInterval,
+      List<ContainerSimulator> containerList, ResourceManager rm, SLSRunner se,
+      long traceStartTime, long traceFinishTime, String user, String queue, 
+      boolean isTracked, String oldAppId) {
+    super.init(id, heartbeatInterval, containerList, rm, se, 
+              traceStartTime, traceFinishTime, user, queue,
+              isTracked, oldAppId);
+    amtype = "mapreduce";
+    
+    // get map/reduce tasks
+    for (ContainerSimulator cs : containerList) {
+      if (cs.getType().equals("map")) {
+        cs.setPriority(PRIORITY_MAP);
+        pendingMaps.add(cs);
+      } else if (cs.getType().equals("reduce")) {
+        cs.setPriority(PRIORITY_REDUCE);
+        pendingReduces.add(cs);
+      }
+    }
+    allMaps.addAll(pendingMaps);
+    allReduces.addAll(pendingReduces);
+    mapTotal = pendingMaps.size();
+    reduceTotal = pendingReduces.size();
+    totalContainers = mapTotal + reduceTotal;
+  }
+
+  @Override
+  public void firstStep()
+          throws YarnException, IOException, InterruptedException {
+    super.firstStep();
+    
+    requestAMContainer();
+  }
+
+  /**
+   * send out request for AM container
+   */
+  protected void requestAMContainer()
+          throws YarnException, IOException, InterruptedException {
+    List<ResourceRequest> ask = new ArrayList<ResourceRequest>();
+    ResourceRequest amRequest = createResourceRequest(
+            BuilderUtils.newResource(MR_AM_CONTAINER_RESOURCE_MEMORY_MB,
+                    MR_AM_CONTAINER_RESOURCE_VCORES),
+            ResourceRequest.ANY, 1, 1);
+    ask.add(amRequest);
+    LOG.debug(MessageFormat.format("Application {0} sends out allocate " +
+            "request for its AM", appId));
+    final AllocateRequest request = this.createAllocateRequest(ask);
+
+    UserGroupInformation ugi =
+            UserGroupInformation.createRemoteUser(appAttemptId.toString());
+    Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps()
+            .get(appAttemptId.getApplicationId())
+            .getRMAppAttempt(appAttemptId).getAMRMToken();
+    ugi.addTokenIdentifier(token.decodeIdentifier());
+    AllocateResponse response = ugi.doAs(
+            new PrivilegedExceptionAction<AllocateResponse>() {
+      @Override
+      public AllocateResponse run() throws Exception {
+        return rm.getApplicationMasterService().allocate(request);
+      }
+    });
+
+    // waiting until the AM container is allocated
+    while (true) {
+      if (response != null && ! response.getAllocatedContainers().isEmpty()) {
+        // get AM container
+        Container container = response.getAllocatedContainers().get(0);
+        se.getNmMap().get(container.getNodeId())
+                .addNewContainer(container, -1L);
+        // start AM container
+        amContainer = container;
+        LOG.debug(MessageFormat.format("Application {0} starts its " +
+                "AM container ({1}).", appId, amContainer.getId()));
+        isAMContainerRunning = true;
+        break;
+      }
+      // this sleep time is different from HeartBeat
+      Thread.sleep(1000);
+      // send out empty request
+      sendContainerRequest();
+      response = responseQueue.take();
+    }
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  protected void processResponseQueue()
+          throws InterruptedException, YarnException, IOException {
+    while (! responseQueue.isEmpty()) {
+      AllocateResponse response = responseQueue.take();
+
+      // check completed containers
+      if (! response.getCompletedContainersStatuses().isEmpty()) {
+        for (ContainerStatus cs : response.getCompletedContainersStatuses()) {
+          ContainerId containerId = cs.getContainerId();
+          if (cs.getExitStatus() == ContainerExitStatus.SUCCESS) {
+            if (assignedMaps.containsKey(containerId)) {
+              LOG.debug(MessageFormat.format("Application {0} has one" +
+                      "mapper finished ({1}).", appId, containerId));
+              assignedMaps.remove(containerId);
+              mapFinished ++;
+              finishedContainers ++;
+            } else if (assignedReduces.containsKey(containerId)) {
+              LOG.debug(MessageFormat.format("Application {0} has one" +
+                      "reducer finished ({1}).", appId, containerId));
+              assignedReduces.remove(containerId);
+              reduceFinished ++;
+              finishedContainers ++;
+            } else {
+              // am container released event
+              isFinished = true;
+              LOG.info(MessageFormat.format("Application {0} goes to " +
+                      "finish.", appId));
+            }
+          } else {
+            // container to be killed
+            if (assignedMaps.containsKey(containerId)) {
+              LOG.debug(MessageFormat.format("Application {0} has one " +
+                      "mapper killed ({1}).", appId, containerId));
+              pendingFailedMaps.add(assignedMaps.remove(containerId));
+            } else if (assignedReduces.containsKey(containerId)) {
+              LOG.debug(MessageFormat.format("Application {0} has one " +
+                      "reducer killed ({1}).", appId, containerId));
+              pendingFailedReduces.add(assignedReduces.remove(containerId));
+            } else {
+              LOG.info(MessageFormat.format("Application {0}'s AM is " +
+                      "going to be killed. Restarting...", appId));
+              restart();
+            }
+          }
+        }
+      }
+      
+      // check finished
+      if (isAMContainerRunning &&
+              (mapFinished == mapTotal) &&
+              (reduceFinished == reduceTotal)) {
+        // to release the AM container
+        se.getNmMap().get(amContainer.getNodeId())
+                .cleanupContainer(amContainer.getId());
+        isAMContainerRunning = false;
+        LOG.debug(MessageFormat.format("Application {0} sends out event " +
+                "to clean up its AM container.", appId));
+        isFinished = true;
+      }
+
+      // check allocated containers
+      for (Container container : response.getAllocatedContainers()) {
+        if (! scheduledMaps.isEmpty()) {
+          ContainerSimulator cs = scheduledMaps.remove();
+          LOG.debug(MessageFormat.format("Application {0} starts a " +
+                  "launch a mapper ({1}).", appId, container.getId()));
+          assignedMaps.put(container.getId(), cs);
+          se.getNmMap().get(container.getNodeId())
+                  .addNewContainer(container, cs.getLifeTime());
+        } else if (! this.scheduledReduces.isEmpty()) {
+          ContainerSimulator cs = scheduledReduces.remove();
+          LOG.debug(MessageFormat.format("Application {0} starts a " +
+                  "launch a reducer ({1}).", appId, container.getId()));
+          assignedReduces.put(container.getId(), cs);
+          se.getNmMap().get(container.getNodeId())
+                  .addNewContainer(container, cs.getLifeTime());
+        }
+      }
+    }
+  }
+  
+  /**
+   * restart running because of the am container killed
+   */
+  private void restart()
+          throws YarnException, IOException, InterruptedException {
+    // clear 
+    finishedContainers = 0;
+    isFinished = false;
+    mapFinished = 0;
+    reduceFinished = 0;
+    pendingFailedMaps.clear();
+    pendingMaps.clear();
+    pendingReduces.clear();
+    pendingFailedReduces.clear();
+    pendingMaps.addAll(allMaps);
+    pendingReduces.addAll(pendingReduces);
+    isAMContainerRunning = false;
+    amContainer = null;
+    // resent am container request
+    requestAMContainer();
+  }
+
+  @Override
+  protected void sendContainerRequest()
+          throws YarnException, IOException, InterruptedException {
+    if (isFinished) {
+      return;
+    }
+
+    // send out request
+    List<ResourceRequest> ask = null;
+    if (isAMContainerRunning) {
+      if (mapFinished != mapTotal) {
+        // map phase
+        if (! pendingMaps.isEmpty()) {
+          ask = packageRequests(pendingMaps, PRIORITY_MAP);
+          LOG.debug(MessageFormat.format("Application {0} sends out " +
+                  "request for {1} mappers.", appId, pendingMaps.size()));
+          scheduledMaps.addAll(pendingMaps);
+          pendingMaps.clear();
+        } else if (! pendingFailedMaps.isEmpty() && scheduledMaps.isEmpty()) {
+          ask = packageRequests(pendingFailedMaps, PRIORITY_MAP);
+          LOG.debug(MessageFormat.format("Application {0} sends out " +
+                  "requests for {1} failed mappers.", appId,
+                  pendingFailedMaps.size()));
+          scheduledMaps.addAll(pendingFailedMaps);
+          pendingFailedMaps.clear();
+        }
+      } else if (reduceFinished != reduceTotal) {
+        // reduce phase
+        if (! pendingReduces.isEmpty()) {
+          ask = packageRequests(pendingReduces, PRIORITY_REDUCE);
+          LOG.debug(MessageFormat.format("Application {0} sends out " +
+                  "requests for {1} reducers.", appId, pendingReduces.size()));
+          scheduledReduces.addAll(pendingReduces);
+          pendingReduces.clear();
+        } else if (! pendingFailedReduces.isEmpty()
+                && scheduledReduces.isEmpty()) {
+          ask = packageRequests(pendingFailedReduces, PRIORITY_REDUCE);
+          LOG.debug(MessageFormat.format("Application {0} sends out " +
+                  "request for {1} failed reducers.", appId,
+                  pendingFailedReduces.size()));
+          scheduledReduces.addAll(pendingFailedReduces);
+          pendingFailedReduces.clear();
+        }
+      }
+    }
+    if (ask == null) {
+      ask = new ArrayList<ResourceRequest>();
+    }
+    
+    final AllocateRequest request = createAllocateRequest(ask);
+    if (totalContainers == 0) {
+      request.setProgress(1.0f);
+    } else {
+      request.setProgress((float) finishedContainers / totalContainers);
+    }
+
+    UserGroupInformation ugi =
+            UserGroupInformation.createRemoteUser(appAttemptId.toString());
+    Token<AMRMTokenIdentifier> token = rm.getRMContext().getRMApps()
+            .get(appAttemptId.getApplicationId())
+            .getRMAppAttempt(appAttemptId).getAMRMToken();
+    ugi.addTokenIdentifier(token.decodeIdentifier());
+    AllocateResponse response = ugi.doAs(
+            new PrivilegedExceptionAction<AllocateResponse>() {
+      @Override
+      public AllocateResponse run() throws Exception {
+        return rm.getApplicationMasterService().allocate(request);
+      }
+    });
+    if (response != null) {
+      responseQueue.put(response);
+    }
+  }
+
+  @Override
+  protected void checkStop() {
+    if (isFinished) {
+      super.setEndTime(System.currentTimeMillis());
+    }
+  }
+
+  @Override
+  public void lastStep() {
+    super.lastStep();
+
+    // clear data structures
+    allMaps.clear();
+    allReduces.clear();
+    assignedMaps.clear();
+    assignedReduces.clear();
+    pendingFailedMaps.clear();
+    pendingFailedReduces.clear();
+    pendingMaps.clear();
+    pendingReduces.clear();
+    scheduledMaps.clear();
+    scheduledReduces.clear();
+    responseQueue.clear();
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/conf/SLSConfiguration.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/conf/SLSConfiguration.java
@ -0,0 +1,68 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.conf;
+
+public class SLSConfiguration {
+  // sls
+  public static final String PREFIX = "yarn.sls.";
+  // runner
+  public static final String RUNNER_PREFIX = PREFIX + "runner.";
+  public static final String RUNNER_POOL_SIZE = RUNNER_PREFIX + "pool.size";
+  public static final int RUNNER_POOL_SIZE_DEFAULT = 10;
+  // scheduler
+  public static final String SCHEDULER_PREFIX = PREFIX + "scheduler.";
+  public static final String RM_SCHEDULER = SCHEDULER_PREFIX + "class";
+  // metrics
+  public static final String METRICS_PREFIX = PREFIX + "metrics.";
+  public static final String METRICS_SWITCH = METRICS_PREFIX + "switch"; 
+  public static final String METRICS_WEB_ADDRESS_PORT = METRICS_PREFIX
+                                                  + "web.address.port";
+  public static final String METRICS_OUTPUT_DIR = METRICS_PREFIX + "output";
+  public static final int METRICS_WEB_ADDRESS_PORT_DEFAULT = 10001;
+  public static final String METRICS_TIMER_WINDOW_SIZE = METRICS_PREFIX
+                                                  + "timer.window.size";
+  public static final int METRICS_TIMER_WINDOW_SIZE_DEFAULT = 100;
+  public static final String METRICS_RECORD_INTERVAL_MS = METRICS_PREFIX
+                                                  + "record.interval.ms";
+  public static final int METRICS_RECORD_INTERVAL_MS_DEFAULT = 1000;
+  // nm
+  public static final String NM_PREFIX = PREFIX + "nm.";
+  public static final String NM_MEMORY_MB = NM_PREFIX + "memory.mb";
+  public static final int NM_MEMORY_MB_DEFAULT = 10240;
+  public static final String NM_VCORES = NM_PREFIX + "vcores";
+  public static final int NM_VCORES_DEFAULT = 10;
+  public static final String NM_HEARTBEAT_INTERVAL_MS = NM_PREFIX
+                                                  + "heartbeat.interval.ms";
+  public static final int NM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
+  // am
+  public static final String AM_PREFIX = PREFIX + "am.";
+  public static final String AM_HEARTBEAT_INTERVAL_MS = AM_PREFIX
+                                                  + "heartbeat.interval.ms";
+  public static final int AM_HEARTBEAT_INTERVAL_MS_DEFAULT = 1000;
+  public static final String AM_TYPE = AM_PREFIX + "type.";
+
+  // container
+  public static final String CONTAINER_PREFIX = PREFIX + "container.";
+  public static final String CONTAINER_MEMORY_MB = CONTAINER_PREFIX
+          + "memory.mb";
+  public static final int CONTAINER_MEMORY_MB_DEFAULT = 1024;
+  public static final String CONTAINER_VCORES = CONTAINER_PREFIX + "vcores";
+  public static final int CONTAINER_VCORES_DEFAULT = 1;
+
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NMSimulator.java
@ -0,0 +1,261 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.nodemanager;
+
+import java.io.IOException;
+import java.text.MessageFormat;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.DelayQueue;
+
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
+import org.apache.hadoop.yarn.server.api.protocolrecords
+        .RegisterNodeManagerRequest;
+import org.apache.hadoop.yarn.server.api.protocolrecords
+        .RegisterNodeManagerResponse;
+import org.apache.hadoop.yarn.server.api.records.MasterKey;
+import org.apache.hadoop.yarn.server.api.records.NodeAction;
+import org.apache.hadoop.yarn.server.api.records.NodeHealthStatus;
+import org.apache.hadoop.yarn.server.api.records.NodeStatus;
+import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.utils.BuilderUtils;
+import org.apache.hadoop.yarn.util.Records;
+import org.apache.log4j.Logger;
+
+import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator;
+import org.apache.hadoop.yarn.sls.scheduler.TaskRunner;
+import org.apache.hadoop.yarn.sls.utils.SLSUtils;
+
+public class NMSimulator extends TaskRunner.Task {
+  // node resource
+  private RMNode node;
+  // master key
+  private MasterKey masterKey;
+  // containers with various STATE
+  private List<ContainerId> completedContainerList;
+  private List<ContainerId> releasedContainerList;
+  private DelayQueue<ContainerSimulator> containerQueue;
+  private Map<ContainerId, ContainerSimulator> runningContainers;
+  private List<ContainerId> amContainerList;
+  // resource manager
+  private ResourceManager rm;
+  // heart beat response id
+  private int RESPONSE_ID = 1;
+  private final static Logger LOG = Logger.getLogger(NMSimulator.class);
+  
+  public void init(String nodeIdStr, int memory, int cores,
+          int dispatchTime, int heartBeatInterval, ResourceManager rm)
+          throws IOException, YarnException {
+    super.init(dispatchTime, dispatchTime + 1000000L * heartBeatInterval,
+            heartBeatInterval);
+    // create resource
+    String rackHostName[] = SLSUtils.getRackHostName(nodeIdStr);
+    this.node = NodeInfo.newNodeInfo(rackHostName[0], rackHostName[1], 
+                  BuilderUtils.newResource(memory, cores));
+    this.rm = rm;
+    // init data structures
+    completedContainerList =
+            Collections.synchronizedList(new ArrayList<ContainerId>());
+    releasedContainerList =
+            Collections.synchronizedList(new ArrayList<ContainerId>());
+    containerQueue = new DelayQueue<ContainerSimulator>();
+    amContainerList =
+            Collections.synchronizedList(new ArrayList<ContainerId>());
+    runningContainers =
+            new ConcurrentHashMap<ContainerId, ContainerSimulator>();
+    // register NM with RM
+    RegisterNodeManagerRequest req =
+            Records.newRecord(RegisterNodeManagerRequest.class);
+    req.setNodeId(node.getNodeID());
+    req.setResource(node.getTotalCapability());
+    req.setHttpPort(80);
+    RegisterNodeManagerResponse response = rm.getResourceTrackerService()
+            .registerNodeManager(req);
+    masterKey = response.getNMTokenMasterKey();
+  }
+
+  @Override
+  public void firstStep() throws YarnException, IOException {
+    // do nothing
+  }
+
+  @Override
+  public void middleStep() {
+    // we check the lifetime for each running containers
+    ContainerSimulator cs = null;
+    synchronized(completedContainerList) {
+      while ((cs = containerQueue.poll()) != null) {
+        runningContainers.remove(cs.getId());
+        completedContainerList.add(cs.getId());
+        LOG.debug(MessageFormat.format("Container {0} has completed",
+                cs.getId()));
+      }
+    }
+    
+    // send heart beat
+    NodeHeartbeatRequest beatRequest =
+            Records.newRecord(NodeHeartbeatRequest.class);
+    beatRequest.setLastKnownNMTokenMasterKey(masterKey);
+    NodeStatus ns = Records.newRecord(NodeStatus.class);
+    
+    ns.setContainersStatuses(generateContainerStatusList());
+    ns.setNodeId(node.getNodeID());
+    ns.setKeepAliveApplications(new ArrayList<ApplicationId>());
+    ns.setResponseId(RESPONSE_ID ++);
+    ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
+    beatRequest.setNodeStatus(ns);
+    try {
+      NodeHeartbeatResponse beatResponse =
+              rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
+      if (! beatResponse.getContainersToCleanup().isEmpty()) {
+        // remove from queue
+        synchronized(releasedContainerList) {
+          for (ContainerId containerId : beatResponse.getContainersToCleanup()){
+            if (amContainerList.contains(containerId)) {
+              // AM container (not killed?, only release)
+              synchronized(amContainerList) {
+                amContainerList.remove(containerId);
+              }
+              LOG.debug(MessageFormat.format("NodeManager {0} releases " +
+                      "an AM ({1}).", node.getNodeID(), containerId));
+            } else {
+              cs = runningContainers.remove(containerId);
+              containerQueue.remove(cs);
+              releasedContainerList.add(containerId);
+              LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
+                      "container ({1}).", node.getNodeID(), containerId));
+            }
+          }
+        }
+      }
+      if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
+        lastStep();
+      }
+    } catch (YarnException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+  }
+
+  @Override
+  public void lastStep() {
+    // do nothing
+  }
+
+  /**
+   * catch status of all containers located on current node
+   */
+  private ArrayList<ContainerStatus> generateContainerStatusList() {
+    ArrayList<ContainerStatus> csList = new ArrayList<ContainerStatus>();
+    // add running containers
+    for (ContainerSimulator container : runningContainers.values()) {
+      csList.add(newContainerStatus(container.getId(),
+        ContainerState.RUNNING, ContainerExitStatus.SUCCESS));
+    }
+    synchronized(amContainerList) {
+      for (ContainerId cId : amContainerList) {
+        csList.add(newContainerStatus(cId,
+            ContainerState.RUNNING, ContainerExitStatus.SUCCESS));
+      }
+    }
+    // add complete containers
+    synchronized(completedContainerList) {
+      for (ContainerId cId : completedContainerList) {
+        LOG.debug(MessageFormat.format("NodeManager {0} completed" +
+                " container ({1}).", node.getNodeID(), cId));
+        csList.add(newContainerStatus(
+                cId, ContainerState.COMPLETE, ContainerExitStatus.SUCCESS));
+      }
+      completedContainerList.clear();
+    }
+    // released containers
+    synchronized(releasedContainerList) {
+      for (ContainerId cId : releasedContainerList) {
+        LOG.debug(MessageFormat.format("NodeManager {0} released container" +
+                " ({1}).", node.getNodeID(), cId));
+        csList.add(newContainerStatus(
+                cId, ContainerState.COMPLETE, ContainerExitStatus.ABORTED));
+      }
+      releasedContainerList.clear();
+    }
+    return csList;
+  }
+
+  private ContainerStatus newContainerStatus(ContainerId cId, 
+                                             ContainerState state,
+                                             int exitState) {
+    ContainerStatus cs = Records.newRecord(ContainerStatus.class);
+    cs.setContainerId(cId);
+    cs.setState(state);
+    cs.setExitStatus(exitState);
+    return cs;
+  }
+
+  public RMNode getNode() {
+    return node;
+  }
+
+  /**
+   * launch a new container with the given life time
+   */
+  public void addNewContainer(Container container, long lifeTimeMS) {
+    LOG.debug(MessageFormat.format("NodeManager {0} launches a new " +
+            "container ({1}).", node.getNodeID(), container.getId()));
+    if (lifeTimeMS != -1) {
+      // normal container
+      ContainerSimulator cs = new ContainerSimulator(container.getId(),
+              container.getResource(), lifeTimeMS + System.currentTimeMillis(),
+              lifeTimeMS);
+      containerQueue.add(cs);
+      runningContainers.put(cs.getId(), cs);
+    } else {
+      // AM container
+      // -1 means AMContainer
+      synchronized(amContainerList) {
+        amContainerList.add(container.getId());
+      }
+    }
+  }
+
+  /**
+   * clean up an AM container and add to completed list
+   * @param containerId id of the container to be cleaned
+   */
+  public void cleanupContainer(ContainerId containerId) {
+    synchronized(amContainerList) {
+      amContainerList.remove(containerId);
+    }
+    synchronized(completedContainerList) {
+      completedContainerList.add(containerId);
+    }
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java
@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.nodemanager;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.net.Node;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerState;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.NodeState;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode
+        .UpdatedContainerInfo;
+
+public class NodeInfo {
+  private static int NODE_ID = 0;
+
+  public static NodeId newNodeID(String host, int port) {
+    return NodeId.newInstance(host, port);
+  }
+
+  private static class FakeRMNodeImpl implements RMNode {
+    private NodeId nodeId;
+    private String hostName;
+    private String nodeAddr;
+    private String httpAddress;
+    private int cmdPort;
+    private Resource perNode;
+    private String rackName;
+    private String healthReport;
+    private NodeState state;
+    private List<ContainerId> toCleanUpContainers;
+    private List<ApplicationId> toCleanUpApplications;
+    
+    public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
+        Resource perNode, String rackName, String healthReport,
+        int cmdPort, String hostName, NodeState state) {
+      this.nodeId = nodeId;
+      this.nodeAddr = nodeAddr;
+      this.httpAddress = httpAddress;
+      this.perNode = perNode;
+      this.rackName = rackName;
+      this.healthReport = healthReport;
+      this.cmdPort = cmdPort;
+      this.hostName = hostName;
+      this.state = state;
+      toCleanUpApplications = new ArrayList<ApplicationId>();
+      toCleanUpContainers = new ArrayList<ContainerId>();
+    }
+
+    public NodeId getNodeID() {
+      return nodeId;
+    }
+    
+    public String getHostName() {
+      return hostName;
+    }
+    
+    public int getCommandPort() {
+      return cmdPort;
+    }
+    
+    public int getHttpPort() {
+      return 0;
+    }
+
+    public String getNodeAddress() {
+      return nodeAddr;
+    }
+
+    public String getHttpAddress() {
+      return httpAddress;
+    }
+
+    public String getHealthReport() {
+      return healthReport;
+    }
+
+    public long getLastHealthReportTime() {
+      return 0; 
+    }
+
+    public Resource getTotalCapability() {
+      return perNode;
+    }
+
+    public String getRackName() {
+      return rackName;
+    }
+
+    public Node getNode() {
+      throw new UnsupportedOperationException("Not supported yet.");
+    }
+
+    public NodeState getState() {
+      return state;
+    }
+
+    public List<ContainerId> getContainersToCleanUp() {
+      return toCleanUpContainers;
+    }
+
+    public List<ApplicationId> getAppsToCleanup() {
+      return toCleanUpApplications;
+    }
+
+    public void updateNodeHeartbeatResponseForCleanup(
+            NodeHeartbeatResponse response) {
+    }
+
+    public NodeHeartbeatResponse getLastNodeHeartBeatResponse() {
+      return null;
+    }
+
+    public List<UpdatedContainerInfo> pullContainerUpdates() {
+      ArrayList<UpdatedContainerInfo> list = new ArrayList<UpdatedContainerInfo>();
+      
+      ArrayList<ContainerStatus> list2 = new ArrayList<ContainerStatus>();
+      for(ContainerId cId : this.toCleanUpContainers) {
+        list2.add(ContainerStatus.newInstance(cId, ContainerState.RUNNING, "", 
+          ContainerExitStatus.SUCCESS));
+      }
+      list.add(new UpdatedContainerInfo(new ArrayList<ContainerStatus>(), 
+        list2));
+      return list;
+    }
+  }
+  
+  public static RMNode newNodeInfo(String rackName, String hostName,
+                              final Resource resource, int port) {
+    final NodeId nodeId = newNodeID(hostName, port);
+    final String nodeAddr = hostName + ":" + port;
+    final String httpAddress = hostName;
+    
+    return new FakeRMNodeImpl(nodeId, nodeAddr, httpAddress,
+        resource, rackName, "Me good",
+        port, hostName, null);
+  }
+  
+  public static RMNode newNodeInfo(String rackName, String hostName,
+                              final Resource resource) {
+    return newNodeInfo(rackName, hostName, resource, NODE_ID++);
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/CapacitySchedulerMetrics.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/CapacitySchedulerMetrics.java
@ -0,0 +1,31 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.scheduler;
+
+public class CapacitySchedulerMetrics extends SchedulerMetrics {
+
+  public CapacitySchedulerMetrics() {
+    super();
+  }
+
+  @Override
+  public void trackQueue(String queueName) {
+    trackedQueues.add(queueName);
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ContainerSimulator.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ContainerSimulator.java
@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.scheduler;
+
+import java.util.concurrent.Delayed;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+
+public class ContainerSimulator implements Delayed {
+  // id
+  private ContainerId id;
+  // resource allocated
+  private Resource resource;
+  // end time
+  private long endTime;
+  // life time (ms)
+  private long lifeTime;
+  // host name
+  private String hostname;
+  // priority
+  private int priority;
+  // type 
+  private String type;
+
+  /**
+   * invoked when AM schedules containers to allocate
+   */
+  public ContainerSimulator(Resource resource, long lifeTime,
+      String hostname, int priority, String type) {
+    this.resource = resource;
+    this.lifeTime = lifeTime;
+    this.hostname = hostname;
+    this.priority = priority;
+    this.type = type;
+  }
+
+  /**
+   * invoke when NM schedules containers to run
+   */
+  public ContainerSimulator(ContainerId id, Resource resource, long endTime,
+      long lifeTime) {
+    this.id = id;
+    this.resource = resource;
+    this.endTime = endTime;
+    this.lifeTime = lifeTime;
+  }
+  
+  public Resource getResource() {
+    return resource;
+  }
+  
+  public ContainerId getId() {
+    return id;
+  }
+
+  @Override
+  public int compareTo(Delayed o) {
+    if (!(o instanceof ContainerSimulator)) {
+      throw new IllegalArgumentException(
+              "Parameter must be a ContainerSimulator instance");
+    }
+    ContainerSimulator other = (ContainerSimulator) o;
+    return (int) Math.signum(endTime - other.endTime);
+  }
+
+  @Override
+  public long getDelay(TimeUnit unit) {
+    return unit.convert(endTime - System.currentTimeMillis(),
+          TimeUnit.MILLISECONDS);
+  }
+  
+  public long getLifeTime() {
+    return lifeTime;
+  }
+  
+  public String getHostname() {
+    return hostname;
+  }
+  
+  public long getEndTime() {
+    return endTime;
+  }
+  
+  public int getPriority() {
+    return priority;
+  }
+  
+  public String getType() {
+    return type;
+  }
+  
+  public void setPriority(int p) {
+    priority = p;
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/FairSchedulerMetrics.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/FairSchedulerMetrics.java
@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.scheduler;
+
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
+        .AppSchedulable;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FSQueue;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair
+        .FairScheduler;
+
+import com.codahale.metrics.Gauge;
+import org.apache.hadoop.yarn.sls.SLSRunner;
+
+public class FairSchedulerMetrics extends SchedulerMetrics {
+
+  private int totalMemoryMB = Integer.MAX_VALUE;
+  private int totalVCores = Integer.MAX_VALUE;
+  private boolean maxReset = false;
+
+  public FairSchedulerMetrics() {
+    super();
+    appTrackedMetrics.add("demand.memory");
+    appTrackedMetrics.add("demand.vcores");
+    appTrackedMetrics.add("usage.memory");
+    appTrackedMetrics.add("usage.vcores");
+    appTrackedMetrics.add("minshare.memory");
+    appTrackedMetrics.add("minshare.vcores");
+    appTrackedMetrics.add("maxshare.memory");
+    appTrackedMetrics.add("maxshare.vcores");
+    appTrackedMetrics.add("fairshare.memory");
+    appTrackedMetrics.add("fairshare.vcores");
+    queueTrackedMetrics.add("demand.memory");
+    queueTrackedMetrics.add("demand.vcores");
+    queueTrackedMetrics.add("usage.memory");
+    queueTrackedMetrics.add("usage.vcores");
+    queueTrackedMetrics.add("minshare.memory");
+    queueTrackedMetrics.add("minshare.vcores");
+    queueTrackedMetrics.add("maxshare.memory");
+    queueTrackedMetrics.add("maxshare.vcores");
+    queueTrackedMetrics.add("fairshare.memory");
+    queueTrackedMetrics.add("fairshare.vcores");
+  }
+  
+  @Override
+  public void trackApp(ApplicationAttemptId appAttemptId, String oldAppId) {
+    super.trackApp(appAttemptId, oldAppId);
+    FairScheduler fair = (FairScheduler) scheduler;
+    final AppSchedulable app = fair.getSchedulerApp(appAttemptId)
+            .getAppSchedulable();
+    metrics.register("variable.app." + oldAppId + ".demand.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return app.getDemand().getMemory();
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".demand.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return app.getDemand().getVirtualCores();
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".usage.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return app.getResourceUsage().getMemory();
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".usage.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return app.getResourceUsage().getVirtualCores();
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".minshare.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return app.getMinShare().getMemory();
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".minshare.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return app.getMinShare().getMemory();
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".maxshare.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return Math.min(app.getMaxShare().getMemory(), totalMemoryMB);
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".maxshare.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return Math.min(app.getMaxShare().getVirtualCores(), totalVCores);
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".fairshare.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return app.getFairShare().getVirtualCores();
+        }
+      }
+    );
+    metrics.register("variable.app." + oldAppId + ".fairshare.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return app.getFairShare().getVirtualCores();
+        }
+      }
+    );
+  }
+
+  @Override
+  public void trackQueue(String queueName) {
+    trackedQueues.add(queueName);
+    FairScheduler fair = (FairScheduler) scheduler;
+    final FSQueue queue = fair.getQueueManager().getQueue(queueName);
+    metrics.register("variable.queue." + queueName + ".demand.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return queue.getDemand().getMemory();
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".demand.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return queue.getDemand().getVirtualCores();
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".usage.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return queue.getResourceUsage().getMemory();
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".usage.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return queue.getResourceUsage().getVirtualCores();
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".minshare.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return queue.getMinShare().getMemory();
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".minshare.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return queue.getMinShare().getVirtualCores();
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".maxshare.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          if (! maxReset &&
+                  SLSRunner.simulateInfoMap.containsKey("Number of nodes") &&
+                  SLSRunner.simulateInfoMap.containsKey("Node memory (MB)") &&
+                  SLSRunner.simulateInfoMap.containsKey("Node VCores")) {
+            int numNMs = Integer.parseInt(
+                  SLSRunner.simulateInfoMap.get("Number of nodes").toString());
+            int numMemoryMB = Integer.parseInt(
+                  SLSRunner.simulateInfoMap.get("Node memory (MB)").toString());
+            int numVCores = Integer.parseInt(
+                  SLSRunner.simulateInfoMap.get("Node VCores").toString());
+
+            totalMemoryMB = numNMs * numMemoryMB;
+            totalVCores = numNMs * numVCores;
+            maxReset = false;
+          }
+
+          return Math.min(queue.getMaxShare().getMemory(), totalMemoryMB);
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".maxshare.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return Math.min(queue.getMaxShare().getVirtualCores(), totalVCores);
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".fairshare.memory",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return queue.getFairShare().getMemory();
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".fairshare.vcores",
+      new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return queue.getFairShare().getVirtualCores();
+        }
+      }
+    );
+  }
+
+  @Override
+  public void untrackQueue(String queueName) {
+    trackedQueues.remove(queueName);
+    metrics.remove("variable.queue." + queueName + ".demand.memory");
+    metrics.remove("variable.queue." + queueName + ".demand.vcores");
+    metrics.remove("variable.queue." + queueName + ".usage.memory");
+    metrics.remove("variable.queue." + queueName + ".usage.vcores");
+    metrics.remove("variable.queue." + queueName + ".minshare.memory");
+    metrics.remove("variable.queue." + queueName + ".minshare.vcores");
+    metrics.remove("variable.queue." + queueName + ".maxshare.memory");
+    metrics.remove("variable.queue." + queueName + ".maxshare.vcores");
+    metrics.remove("variable.queue." + queueName + ".fairshare.memory");
+    metrics.remove("variable.queue." + queueName + ".fairshare.vcores");
+  }
+}
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/FifoSchedulerMetrics.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/FifoSchedulerMetrics.java
@ -0,0 +1,58 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.sls.scheduler;
+
+import org.apache.hadoop.yarn.api.records.QueueInfo;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo
+        .FifoScheduler;
+
+import com.codahale.metrics.Gauge;
+
+public class FifoSchedulerMetrics extends SchedulerMetrics {
+  
+  public FifoSchedulerMetrics() {
+    super();
+  }
+
+  @Override
+  public void trackQueue(String queueName) {
+    trackedQueues.add(queueName);
+    FifoScheduler fifo = (FifoScheduler) scheduler;
+    // for FifoScheduler, only DEFAULT_QUEUE
+    // here the three parameters doesn't affect results
+    final QueueInfo queue = fifo.getQueueInfo(queueName, false, false);
+    // track currentCapacity, maximumCapacity (always 1.0f)
+    metrics.register("variable.queue." + queueName + ".currentcapacity",
+      new Gauge<Float>() {
+        @Override
+        public Float getValue() {
+          return queue.getCurrentCapacity();
+        }
+      }
+    );
+    metrics.register("variable.queue." + queueName + ".",
+      new Gauge<Float>() {
+        @Override
+        public Float getValue() {
+          return queue.getCurrentCapacity();
+        }
+      }
+    );
+  }
+}
--- a/Show More
+++ b/Show More