MAPREDUCE-1981. Improve getSplits performance by using listLocatedStatus. Contributed by Hairong Kuang and Jason Lowe

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1507385 13f79535-47bb-0310-9956-ffa450edef68
2013-07-26 18:16:19 +00:00 · 2013-07-26 18:16:19 +00:00 · ec18984252
parent 55dfadec80
commit ec18984252
6 changed files with 271 additions and 90 deletions
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -156,6 +156,9 @@ Release 2.3.0 - UNRELEASED

  OPTIMIZATIONS

+    MAPREDUCE-1981. Improve getSplits performance by using listLocatedStatus
+    (Hairong Kuang and Jason Lowe via jlowe)
+
  BUG FIXES

    MAPREDUCE-5316. job -list-attempt-ids command does not handle illegal
@ -1232,6 +1235,9 @@ Release 0.23.10 - UNRELEASED

  OPTIMIZATIONS

+    MAPREDUCE-1981. Improve getSplits performance by using listLocatedStatus
+    (Hairong Kuang and Jason Lowe via jlowe)
+
  BUG FIXES

    MAPREDUCE-3193. FileInputFormat doesn't read files recursively in the
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java
@ -36,8 +36,10 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.mapreduce.security.TokenCache;
 import org.apache.hadoop.net.NetworkTopology;
 import org.apache.hadoop.net.Node;
@ -169,11 +171,15 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
  protected void addInputPathRecursively(List<FileStatus> result,
      FileSystem fs, Path path, PathFilter inputFilter) 
      throws IOException {
-    for(FileStatus stat: fs.listStatus(path, inputFilter)) {
-      if (stat.isDirectory()) {
-        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
-      } else {
-        result.add(stat);
+    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
+    while (iter.hasNext()) {
+      LocatedFileStatus stat = iter.next();
+      if (inputFilter.accept(stat.getPath())) {
+        if (stat.isDirectory()) {
+          addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
+        } else {
+          result.add(stat);
+        }
      }
    }
  }
@ -221,12 +227,17 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
      } else {
        for (FileStatus globStat: matches) {
          if (globStat.isDirectory()) {
-            for(FileStatus stat: fs.listStatus(globStat.getPath(),
-                inputFilter)) {
-              if (recursive && stat.isDirectory()) {
-                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
-              } else {
-                result.add(stat);
+            RemoteIterator<LocatedFileStatus> iter =
+                fs.listLocatedStatus(globStat.getPath());
+            while (iter.hasNext()) {
+              LocatedFileStatus stat = iter.next();
+              if (inputFilter.accept(stat.getPath())) {
+                if (recursive && stat.isDirectory()) {
+                  addInputPathRecursively(result, fs, stat.getPath(),
+                      inputFilter);
+                } else {
+                  result.add(stat);
+                }
              }
            }
          } else {
@ -254,7 +265,6 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {

  /** Splits files returned by {@link #listStatus(JobConf)} when
   * they're too big.*/ 
-  @SuppressWarnings("deprecation")
  public InputSplit[] getSplits(JobConf job, int numSplits)
    throws IOException {
    FileStatus[] files = listStatus(job);
@ -278,31 +288,38 @@ public abstract class FileInputFormat<K, V> implements InputFormat<K, V> {
    NetworkTopology clusterMap = new NetworkTopology();
    for (FileStatus file: files) {
      Path path = file.getPath();
-      FileSystem fs = path.getFileSystem(job);
      long length = file.getLen();
-      BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
-      if ((length != 0) && isSplitable(fs, path)) { 
-        long blockSize = file.getBlockSize();
-        long splitSize = computeSplitSize(goalSize, minSize, blockSize);
-
-        long bytesRemaining = length;
-        while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
-          String[] splitHosts = getSplitHosts(blkLocations, 
-              length-bytesRemaining, splitSize, clusterMap);
-          splits.add(makeSplit(path, length-bytesRemaining, splitSize, 
-                               splitHosts));
-          bytesRemaining -= splitSize;
+      if (length != 0) {
+        FileSystem fs = path.getFileSystem(job);
+        BlockLocation[] blkLocations;
+        if (file instanceof LocatedFileStatus) {
+          blkLocations = ((LocatedFileStatus) file).getBlockLocations();
+        } else {
+          blkLocations = fs.getFileBlockLocations(file, 0, length);
        }
+        if (isSplitable(fs, path)) {
+          long blockSize = file.getBlockSize();
+          long splitSize = computeSplitSize(goalSize, minSize, blockSize);

-        if (bytesRemaining != 0) {
-          String[] splitHosts = getSplitHosts(blkLocations, length
-              - bytesRemaining, bytesRemaining, clusterMap);
-          splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
-              splitHosts));
+          long bytesRemaining = length;
+          while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
+            String[] splitHosts = getSplitHosts(blkLocations,
+                length-bytesRemaining, splitSize, clusterMap);
+            splits.add(makeSplit(path, length-bytesRemaining, splitSize,
+                splitHosts));
+            bytesRemaining -= splitSize;
+          }
+
+          if (bytesRemaining != 0) {
+            String[] splitHosts = getSplitHosts(blkLocations, length
+                - bytesRemaining, bytesRemaining, clusterMap);
+            splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
+                splitHosts));
+          }
+        } else {
+          String[] splitHosts = getSplitHosts(blkLocations,0,length,clusterMap);
+          splits.add(makeSplit(path, 0, length, splitHosts));
        }
-      } else if (length != 0) {
-        String[] splitHosts = getSplitHosts(blkLocations,0,length,clusterMap);
-        splits.add(makeSplit(path, 0, length, splitHosts));
      } else { 
        //Create empty hosts array for zero length files
        splits.add(makeSplit(path, 0, length, new String[0]));
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
@ -21,7 +21,6 @@ package org.apache.hadoop.mapreduce.lib.input;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.LinkedList;
 import java.util.HashSet;
 import java.util.List;
 import java.util.HashMap;
@ -33,7 +32,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileStatus;
@ -204,46 +203,33 @@ public abstract class CombineFileInputFormat<K, V>
    }

    // all the files in input set
-    Path[] paths = FileUtil.stat2Paths(
-                     listStatus(job).toArray(new FileStatus[0]));
+    List<FileStatus> stats = listStatus(job);
    List<InputSplit> splits = new ArrayList<InputSplit>();
-    if (paths.length == 0) {
+    if (stats.size() == 0) {
      return splits;    
    }

-    // Convert them to Paths first. This is a costly operation and 
-    // we should do it first, otherwise we will incur doing it multiple
-    // times, one time each for each pool in the next loop.
-    List<Path> newpaths = new LinkedList<Path>();
-    for (int i = 0; i < paths.length; i++) {
-      FileSystem fs = paths[i].getFileSystem(conf);
-      Path p = fs.makeQualified(paths[i]);
-      newpaths.add(p);
-    }
-
    // In one single iteration, process all the paths in a single pool.
    // Processing one pool at a time ensures that a split contains paths
    // from a single pool only.
    for (MultiPathFilter onepool : pools) {
-      ArrayList<Path> myPaths = new ArrayList<Path>();
+      ArrayList<FileStatus> myPaths = new ArrayList<FileStatus>();
      
      // pick one input path. If it matches all the filters in a pool,
      // add it to the output set
-      for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) {
-        Path p = iter.next();
-        if (onepool.accept(p)) {
+      for (Iterator<FileStatus> iter = stats.iterator(); iter.hasNext();) {
+        FileStatus p = iter.next();
+        if (onepool.accept(p.getPath())) {
          myPaths.add(p); // add it to my output set
          iter.remove();
        }
      }
      // create splits for all files in this pool.
-      getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), 
-                    maxSize, minSizeNode, minSizeRack, splits);
+      getMoreSplits(job, myPaths, maxSize, minSizeNode, minSizeRack, splits);
    }

    // create splits for all files that are not in any pool.
-    getMoreSplits(job, newpaths.toArray(new Path[newpaths.size()]), 
-                  maxSize, minSizeNode, minSizeRack, splits);
+    getMoreSplits(job, stats, maxSize, minSizeNode, minSizeRack, splits);

    // free up rackToNodes map
    rackToNodes.clear();
@ -253,7 +239,7 @@ public abstract class CombineFileInputFormat<K, V>
  /**
   * Return all the splits in the specified set of paths
   */
-  private void getMoreSplits(JobContext job, Path[] paths, 
+  private void getMoreSplits(JobContext job, List<FileStatus> stats,
                             long maxSize, long minSizeNode, long minSizeRack,
                             List<InputSplit> splits)
    throws IOException {
@ -274,15 +260,16 @@ public abstract class CombineFileInputFormat<K, V>
    HashMap<String, List<OneBlockInfo>> nodeToBlocks = 
                              new HashMap<String, List<OneBlockInfo>>();
    
-    files = new OneFileInfo[paths.length];
-    if (paths.length == 0) {
+    files = new OneFileInfo[stats.size()];
+    if (stats.size() == 0) {
      return; 
    }

    // populate all the blocks for all files
    long totLength = 0;
-    for (int i = 0; i < paths.length; i++) {
-      files[i] = new OneFileInfo(paths[i], conf, isSplitable(job, paths[i]),
+    int i = 0;
+    for (FileStatus stat : stats) {
+      files[i] = new OneFileInfo(stat, conf, isSplitable(job, stat.getPath()),
                                 rackToBlocks, blockToNodes, nodeToBlocks,
                                 rackToNodes, maxSize);
      totLength += files[i].getLength();
@ -523,7 +510,7 @@ public abstract class CombineFileInputFormat<K, V>
    private long fileSize;               // size of the file
    private OneBlockInfo[] blocks;       // all blocks in this file

-    OneFileInfo(Path path, Configuration conf,
+    OneFileInfo(FileStatus stat, Configuration conf,
                boolean isSplitable,
                HashMap<String, List<OneBlockInfo>> rackToBlocks,
                HashMap<OneBlockInfo, String[]> blockToNodes,
@ -534,10 +521,13 @@ public abstract class CombineFileInputFormat<K, V>
      this.fileSize = 0;

      // get block locations from file system
-      FileSystem fs = path.getFileSystem(conf);
-      FileStatus stat = fs.getFileStatus(path);
-      BlockLocation[] locations = fs.getFileBlockLocations(stat, 0, 
-                                                           stat.getLen());
+      BlockLocation[] locations;
+      if (stat instanceof LocatedFileStatus) {
+        locations = ((LocatedFileStatus) stat).getBlockLocations();
+      } else {
+        FileSystem fs = stat.getPath().getFileSystem(conf);
+        locations = fs.getFileBlockLocations(stat, 0, stat.getLen());
+      }
      // create a list of all block and their locations
      if (locations == null) {
        blocks = new OneBlockInfo[0];
@ -552,8 +542,8 @@ public abstract class CombineFileInputFormat<K, V>
          // full file length
          blocks = new OneBlockInfo[1];
          fileSize = stat.getLen();
-          blocks[0] = new OneBlockInfo(path, 0, fileSize, locations[0]
-              .getHosts(), locations[0].getTopologyPaths());
+          blocks[0] = new OneBlockInfo(stat.getPath(), 0, fileSize,
+              locations[0].getHosts(), locations[0].getTopologyPaths());
        } else {
          ArrayList<OneBlockInfo> blocksList = new ArrayList<OneBlockInfo>(
              locations.length);
@ -579,9 +569,9 @@ public abstract class CombineFileInputFormat<K, V>
                  myLength = Math.min(maxSize, left);
                }
              }
-              OneBlockInfo oneblock = new OneBlockInfo(path, myOffset,
-                  myLength, locations[i].getHosts(), locations[i]
-                      .getTopologyPaths());
+              OneBlockInfo oneblock = new OneBlockInfo(stat.getPath(),
+                  myOffset, myLength, locations[i].getHosts(),
+                  locations[i].getTopologyPaths());
              left -= myLength;
              myOffset += myLength;

@ -693,6 +683,9 @@ public abstract class CombineFileInputFormat<K, V>

  protected BlockLocation[] getFileBlockLocations(
    FileSystem fs, FileStatus stat) throws IOException {
+    if (stat instanceof LocatedFileStatus) {
+      return ((LocatedFileStatus) stat).getBlockLocations();
+    }
    return fs.getFileBlockLocations(stat, 0, stat.getLen());
  }

--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java
@ -29,9 +29,11 @@ import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.mapreduce.InputFormat;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
@ -259,12 +261,17 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
      } else {
        for (FileStatus globStat: matches) {
          if (globStat.isDirectory()) {
-            for(FileStatus stat: fs.listStatus(globStat.getPath(),
-                inputFilter)) {
-              if (recursive && stat.isDirectory()) {
-                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
-              } else {
-                result.add(stat);
+            RemoteIterator<LocatedFileStatus> iter =
+                fs.listLocatedStatus(globStat.getPath());
+            while (iter.hasNext()) {
+              LocatedFileStatus stat = iter.next();
+              if (inputFilter.accept(stat.getPath())) {
+                if (recursive && stat.isDirectory()) {
+                  addInputPathRecursively(result, fs, stat.getPath(),
+                      inputFilter);
+                } else {
+                  result.add(stat);
+                }
              }
            }
          } else {
@ -296,11 +303,15 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
  protected void addInputPathRecursively(List<FileStatus> result,
      FileSystem fs, Path path, PathFilter inputFilter) 
      throws IOException {
-    for(FileStatus stat: fs.listStatus(path, inputFilter)) {
-      if (stat.isDirectory()) {
-        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
-      } else {
-        result.add(stat);
+    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
+    while (iter.hasNext()) {
+      LocatedFileStatus stat = iter.next();
+      if (inputFilter.accept(stat.getPath())) {
+        if (stat.isDirectory()) {
+          addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
+        } else {
+          result.add(stat);
+        }
      }
    }
  }
@ -331,8 +342,13 @@ public abstract class FileInputFormat<K, V> extends InputFormat<K, V> {
      Path path = file.getPath();
      long length = file.getLen();
      if (length != 0) {
-        FileSystem fs = path.getFileSystem(job.getConfiguration());
-        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);
+        BlockLocation[] blkLocations;
+        if (file instanceof LocatedFileStatus) {
+          blkLocations = ((LocatedFileStatus) file).getBlockLocations();
+        } else {
+          FileSystem fs = path.getFileSystem(job.getConfiguration());
+          blkLocations = fs.getFileBlockLocations(file, 0, length);
+        }
        if (isSplitable(job, path)) {
          long blockSize = file.getBlockSize();
          long splitSize = computeSplitSize(blockSize, minSize, maxSize);
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestFileInputFormat.java
@ -0,0 +1,113 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.mapred;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.fs.RemoteIterator;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestFileInputFormat {
+
+  @Test
+  public void testListLocatedStatus() throws Exception {
+    Configuration conf = getConfiguration();
+    conf.setBoolean("fs.test.impl.disable.cache", false);
+    conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
+        "test:///a1/a2");
+    MockFileSystem mockFs =
+        (MockFileSystem) new Path("test:///").getFileSystem(conf);
+    Assert.assertEquals("listLocatedStatus already called",
+        0, mockFs.numListLocatedStatusCalls);
+    JobConf job = new JobConf(conf);
+    TextInputFormat fileInputFormat = new TextInputFormat();
+    fileInputFormat.configure(job);
+    InputSplit[] splits = fileInputFormat.getSplits(job, 1);
+    Assert.assertEquals("Input splits are not correct", 2, splits.length);
+    Assert.assertEquals("listLocatedStatuss calls",
+        1, mockFs.numListLocatedStatusCalls);
+  }
+
+  private Configuration getConfiguration() {
+    Configuration conf = new Configuration();
+    conf.set("fs.test.impl.disable.cache", "true");
+    conf.setClass("fs.test.impl", MockFileSystem.class, FileSystem.class);
+    conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR,
+        "test:///a1");
+    return conf;
+  }
+
+  static class MockFileSystem extends RawLocalFileSystem {
+    int numListLocatedStatusCalls = 0;
+
+    @Override
+    public FileStatus[] listStatus(Path f) throws FileNotFoundException,
+        IOException {
+      if (f.toString().equals("test:/a1")) {
+        return new FileStatus[] {
+            new FileStatus(0, true, 1, 150, 150, new Path("test:/a1/a2")),
+            new FileStatus(10, false, 1, 150, 150, new Path("test:/a1/file1")) };
+      } else if (f.toString().equals("test:/a1/a2")) {
+        return new FileStatus[] {
+            new FileStatus(10, false, 1, 150, 150,
+                new Path("test:/a1/a2/file2")),
+            new FileStatus(10, false, 1, 151, 150,
+                new Path("test:/a1/a2/file3")) };
+      }
+      return new FileStatus[0];
+    }
+
+    @Override
+    public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
+        throws IOException {
+      return new FileStatus[] { new FileStatus(10, true, 1, 150, 150,
+          pathPattern) };
+    }
+
+    @Override
+    public FileStatus[] listStatus(Path f, PathFilter filter)
+        throws FileNotFoundException, IOException {
+      return this.listStatus(f);
+    }
+
+    @Override
+    public BlockLocation[] getFileBlockLocations(Path p, long start, long len)
+        throws IOException {
+      return new BlockLocation[] {
+          new BlockLocation(new String[] { "localhost:50010" },
+              new String[] { "localhost" }, 0, len) };
+    }
+
+    @Override
+    protected RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f,
+        PathFilter filter) throws FileNotFoundException, IOException {
+      ++numListLocatedStatusCalls;
+      return super.listLocatedStatus(f, filter);
+    }
+  }
+}
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFileInputFormat.java
@ -24,11 +24,14 @@ import java.util.List;
 import junit.framework.Assert;

 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.BlockLocation;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.fs.RemoteIterator;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.Job;
 import org.junit.Test;
@ -77,6 +80,23 @@ public class TestFileInputFormat {
        .toString());
  }

+  @Test
+  public void testListLocatedStatus() throws Exception {
+    Configuration conf = getConfiguration();
+    conf.setBoolean("fs.test.impl.disable.cache", false);
+    conf.set(FileInputFormat.INPUT_DIR, "test:///a1/a2");
+    MockFileSystem mockFs =
+        (MockFileSystem) new Path("test:///").getFileSystem(conf);
+    Assert.assertEquals("listLocatedStatus already called",
+        0, mockFs.numListLocatedStatusCalls);
+    Job job = Job.getInstance(conf);
+    FileInputFormat<?, ?> fileInputFormat = new TextInputFormat();
+    List<InputSplit> splits = fileInputFormat.getSplits(job);
+    Assert.assertEquals("Input splits are not correct", 2, splits.size());
+    Assert.assertEquals("listLocatedStatuss calls",
+        1, mockFs.numListLocatedStatusCalls);
+  }
+
  private Configuration getConfiguration() {
    Configuration conf = new Configuration();
    conf.set("fs.test.impl.disable.cache", "true");
@ -86,13 +106,14 @@ public class TestFileInputFormat {
  }

  static class MockFileSystem extends RawLocalFileSystem {
+    int numListLocatedStatusCalls = 0;

    @Override
    public FileStatus[] listStatus(Path f) throws FileNotFoundException,
        IOException {
      if (f.toString().equals("test:/a1")) {
        return new FileStatus[] {
-            new FileStatus(10, true, 1, 150, 150, new Path("test:/a1/a2")),
+            new FileStatus(0, true, 1, 150, 150, new Path("test:/a1/a2")),
            new FileStatus(10, false, 1, 150, 150, new Path("test:/a1/file1")) };
      } else if (f.toString().equals("test:/a1/a2")) {
        return new FileStatus[] {
@ -116,5 +137,20 @@ public class TestFileInputFormat {
        throws FileNotFoundException, IOException {
      return this.listStatus(f);
    }
+
+    @Override
+    public BlockLocation[] getFileBlockLocations(Path p, long start, long len)
+        throws IOException {
+      return new BlockLocation[] {
+          new BlockLocation(new String[] { "localhost:50010" },
+              new String[] { "localhost" }, 0, len) };
+    }
+
+    @Override
+    protected RemoteIterator<LocatedFileStatus> listLocatedStatus(Path f,
+        PathFilter filter) throws FileNotFoundException, IOException {
+      ++numListLocatedStatusCalls;
+      return super.listLocatedStatus(f, filter);
+    }
  }
 }