MAPREDUCE-4892. Modify CombineFileInputFormat to not skew input slits' allocation on small clusters. Contributed by Bikas Saha.

svn merge --ignore-ancestry -c 1450912 ../../trunk/ git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1450915 13f79535-47bb-0310-9956-ffa450edef68
2013-02-27 18:53:03 +00:00 · 2013-02-27 18:53:03 +00:00 · c3981d3f2c
commit c3981d3f2c
parent 829a798201
3 changed files with 223 additions and 111 deletions
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -11,6 +11,9 @@ Release 2.0.4-beta - UNRELEASED
    MAPREDUCE-5033. mapred shell script should respect usage flags
    (--help -help -h). (Andrew Wang via atm)
    MAPREDUCE-4892. Modify CombineFileInputFormat to not skew input slits'
    allocation on small clusters. (Bikas Saha via vinodkv)
  OPTIMIZATIONS
  BUG FIXES
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java
@ -49,6 +49,8 @@
 import org.apache.hadoop.net.NodeBase;
 import org.apache.hadoop.net.NetworkTopology;
 import com.google.common.annotations.VisibleForTesting;
 /**
 * An abstract {@link InputFormat} that returns {@link CombineFileSplit}'s in 
 * {@link InputFormat#getSplits(JobContext)} method. 
@ -163,7 +165,6 @@ public CombineFileInputFormat() {
  @Override
  public List<InputSplit> getSplits(JobContext job) 
    throws IOException {
    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;
@ -286,17 +287,38 @@ private void getMoreSplits(JobContext job, Path[] paths,
                                 rackToNodes, maxSize);
      totLength += files[i].getLength();
    }
    createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength, 
                 maxSize, minSizeNode, minSizeRack, splits);
  }
  @VisibleForTesting
  void createSplits(HashMap<String, List<OneBlockInfo>> nodeToBlocks,
                     HashMap<OneBlockInfo, String[]> blockToNodes,
                     HashMap<String, List<OneBlockInfo>> rackToBlocks,
                     long totLength,
                     long maxSize,
                     long minSizeNode,
                     long minSizeRack,
                     List<InputSplit> splits                     
                    ) {
    ArrayList<OneBlockInfo> validBlocks = new ArrayList<OneBlockInfo>();
    Set<String> nodes = new HashSet<String>();
    long curSplitSize = 0;
-    // process all nodes and create splits that are local
+    int numNodes = nodeToBlocks.size();
-    // to a node. 
+    long totalLength = totLength;
    for (Iterator<Map.Entry<String, 
         List<OneBlockInfo>>> iter = nodeToBlocks.entrySet().iterator(); 
         iter.hasNext();) {
    while(true) {
      // it is allowed for maxSize to be 0. Disable smoothing load for such cases
      int avgSplitsPerNode = maxSize > 0 && numNodes > 0 ?
                                        ((int) (totalLength/maxSize))/numNodes
                                        : Integer.MAX_VALUE;
      int maxSplitsByNodeOnly = (avgSplitsPerNode > 0) ? avgSplitsPerNode : 1;
      numNodes = 0;
      // process all nodes and create splits that are local to a node.
      for (Iterator<Map.Entry<String, List<OneBlockInfo>>> iter = nodeToBlocks
          .entrySet().iterator(); iter.hasNext();) {
        Map.Entry<String, List<OneBlockInfo>> one = iter.next();
        nodes.add(one.getKey());
        List<OneBlockInfo> blocksInNode = one.getValue();
@ -304,6 +326,7 @@ private void getMoreSplits(JobContext job, Path[] paths,
        // for each block, copy it into validBlocks. Delete it from
        // blockToNodes so that the same block does not appear in
        // two different splits.
        int splitsInNode = 0;
        for (OneBlockInfo oneblock : blocksInNode) {
          if (blockToNodes.containsKey(oneblock)) {
            validBlocks.add(oneblock);
@ -315,8 +338,19 @@ private void getMoreSplits(JobContext job, Path[] paths,
            if (maxSize != 0 && curSplitSize >= maxSize) {
              // create an input split and add it to the splits array
              addCreatedSplit(splits, nodes, validBlocks);
              totalLength -= curSplitSize;
              curSplitSize = 0;
              validBlocks.clear();
              splitsInNode++;
              if (splitsInNode == maxSplitsByNodeOnly) {
                // stop grouping on a node so as not to create
                // disproportionately more splits on a node because it happens
                // to have many blocks
                // consider only these nodes in next round of grouping because
                // they have leftover blocks that may need to be grouped
                numNodes++;
                break;
              }
            }
          }
        }
@ -325,9 +359,15 @@ private void getMoreSplits(JobContext job, Path[] paths,
        // Otherwise add them back to the unprocessed pool. It is likely
        // that they will be combined with other blocks from the
        // same rack later on.
-      if (minSizeNode != 0 && curSplitSize >= minSizeNode) {
+        if (minSizeNode != 0 && curSplitSize >= minSizeNode
            && splitsInNode == 0) {
          // haven't created any split on this machine. so its ok to add a
          // smaller
          // one for parallelism. Otherwise group it in the rack for balanced
          // size
          // create an input split and add it to the splits array
          addCreatedSplit(splits, nodes, validBlocks);
          totalLength -= curSplitSize;
        } else {
          for (OneBlockInfo oneblock : validBlocks) {
            blockToNodes.put(oneblock, oneblock.hosts);
@ -338,6 +378,11 @@ private void getMoreSplits(JobContext job, Path[] paths,
        curSplitSize = 0;
      }
      if(!(numNodes>0 && totalLength>0)) {
        break;
      }
    }
    // if blocks in a rack are below the specified minimum size, then keep them
    // in 'overflow'. After the processing of all racks is complete, these 
    // overflow blocks will be combined into splits.
@ -458,7 +503,6 @@ private void addCreatedSplit(List<InputSplit> splitList,
      offset[i] = validBlocks.get(i).offset;
      length[i] = validBlocks.get(i).length;
    }
     // add this split to the list that is returned
    CombineFileSplit thissplit = new CombineFileSplit(fl, offset, 
                                   length, locations.toArray(new String[0]));
@ -474,7 +518,8 @@ public abstract RecordReader<K, V> createRecordReader(InputSplit split,
  /**
   * information about one file from the File System
   */
-  private static class OneFileInfo {
+  @VisibleForTesting
  static class OneFileInfo {
    private long fileSize;               // size of the file
    private OneBlockInfo[] blocks;       // all blocks in this file
@ -546,6 +591,17 @@ private static class OneFileInfo {
          blocks = blocksList.toArray(new OneBlockInfo[blocksList.size()]);
        }
        populateBlockInfo(blocks, rackToBlocks, blockToNodes, 
                          nodeToBlocks, rackToNodes);
      }
    }
    @VisibleForTesting
    static void populateBlockInfo(OneBlockInfo[] blocks,
                          HashMap<String, List<OneBlockInfo>> rackToBlocks,
                          HashMap<OneBlockInfo, String[]> blockToNodes,
                          HashMap<String, List<OneBlockInfo>> nodeToBlocks,
                          HashMap<String, Set<String>> rackToNodes) {
      for (OneBlockInfo oneblock : blocks) {
        // add this block to the block --> node locations map
        blockToNodes.put(oneblock, oneblock.hosts);
@ -586,7 +642,6 @@ private static class OneFileInfo {
        }
      }
    }
    }
    long getLength() {
      return fileSize;
@ -600,7 +655,8 @@ OneBlockInfo[] getBlocks() {
  /**
   * information about one block from the File System
   */
-  private static class OneBlockInfo {
+  @VisibleForTesting
  static class OneBlockInfo {
    Path onepath;                // name of this file
    long offset;                 // offset in file
    long length;                 // length of this block
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestCombineFileInputFormat.java
@ -20,11 +20,14 @@
 import java.io.IOException;
 import java.io.OutputStream;
 import java.net.URI;
 import java.util.HashMap;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Set;
 import java.util.zip.GZIPOutputStream;
 import java.util.concurrent.TimeoutException;
 import junit.framework.Assert;
 import junit.framework.TestCase;
 import org.apache.hadoop.fs.*;
@ -42,9 +45,13 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.hadoop.mapreduce.TaskType;
 import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneBlockInfo;
 import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat.OneFileInfo;
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
 import org.junit.Test;
 import com.google.common.collect.HashMultiset;
 public class TestCombineFileInputFormat extends TestCase {
  private static final String rack1[] = new String[] {
@ -476,23 +483,23 @@ public void testSplitPlacement() throws Exception {
      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
      fileSplit = (CombineFileSplit) splits.get(1);
-      assertEquals(file3.getName(), fileSplit.getPath(0).getName());
+      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
-      assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(0));
+      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file4.getName(), fileSplit.getPath(1).getName());
+      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
-      assertEquals(0, fileSplit.getOffset(1));
+      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
+      assertEquals("host2.rack2.com", fileSplit.getLocations()[0]);
      fileSplit = (CombineFileSplit) splits.get(2);
      assertEquals(2, fileSplit.getNumPaths());
      assertEquals(1, fileSplit.getLocations().length);
-      assertEquals(file4.getName(), fileSplit.getPath(0).getName());
+      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
-      assertEquals(BLOCKSIZE, fileSplit.getOffset(0));
+      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
-      assertEquals(file4.getName(), fileSplit.getPath(1).getName());
+      assertEquals(file3.getName(), fileSplit.getPath(1).getName());
      assertEquals(2 * BLOCKSIZE, fileSplit.getOffset(1));
      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
-      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
+      assertEquals("host1.rack1.com", fileSplit.getLocations()[0]);
      // maximum split size is 3 blocks 
      inFormat = new DummyInputFormat();
@ -504,7 +511,7 @@ public void testSplitPlacement() throws Exception {
      for (InputSplit split : splits) {
        System.out.println("File split(Test5): " + split);
      }
-      assertEquals(4, splits.size());
+      assertEquals(3, splits.size());
      fileSplit = (CombineFileSplit) splits.get(0);
      assertEquals(3, fileSplit.getNumPaths());
      assertEquals(1, fileSplit.getLocations().length);
@ -519,32 +526,28 @@ public void testSplitPlacement() throws Exception {
      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
      fileSplit = (CombineFileSplit) splits.get(1);
      assertEquals(file4.getName(), fileSplit.getPath(0).getName());
      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
      assertEquals(file4.getName(), fileSplit.getPath(1).getName());
      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
      assertEquals(file4.getName(), fileSplit.getPath(2).getName());
      assertEquals( 2 * BLOCKSIZE, fileSplit.getOffset(2));
      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
      assertEquals("host3.rack3.com", fileSplit.getLocations()[0]);
      fileSplit = (CombineFileSplit) splits.get(2);
      assertEquals(2, fileSplit.getNumPaths());
      assertEquals(1, fileSplit.getLocations().length);
      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
      assertEquals(file2.getName(), fileSplit.getPath(1).getName());
      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
      assertEquals(file4.getName(), fileSplit.getPath(2).getName());
      assertEquals(0, fileSplit.getOffset(2));
      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
      assertEquals("host2.rack2.com", fileSplit.getLocations()[0]);
-      fileSplit = (CombineFileSplit) splits.get(3);
+      fileSplit = (CombineFileSplit) splits.get(2);
-      assertEquals(1, fileSplit.getNumPaths());
+      assertEquals(3, fileSplit.getNumPaths());
      assertEquals(1, fileSplit.getLocations().length);
      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(BLOCKSIZE, fileSplit.getLength(0));
      assertEquals(file4.getName(), fileSplit.getPath(1).getName());
      assertEquals(BLOCKSIZE, fileSplit.getOffset(1));
      assertEquals(BLOCKSIZE, fileSplit.getLength(1));
      assertEquals(file4.getName(), fileSplit.getPath(2).getName());
      assertEquals(2*BLOCKSIZE, fileSplit.getOffset(2));
      assertEquals(BLOCKSIZE, fileSplit.getLength(2));
      assertEquals("host1.rack1.com", fileSplit.getLocations()[0]);
      // maximum split size is 4 blocks 
@ -713,6 +716,56 @@ private static void writeDataAndSetReplication(FileSystem fileSys, Path name,
    DFSTestUtil.waitReplication(fileSys, name, replication);
  }
  public void testNodeInputSplit() throws IOException, InterruptedException {
    // Regression test for MAPREDUCE-4892. There are 2 nodes with all blocks on 
    // both nodes. The grouping ensures that both nodes get splits instead of 
    // just the first node
    DummyInputFormat inFormat = new DummyInputFormat();
    int numBlocks = 12;
    long totLength = 0;
    long blockSize = 100;
    long maxSize = 200;
    long minSizeNode = 50;
    long minSizeRack = 50;
    String[] locations = { "h1", "h2" };
    String[] racks = new String[0];
    Path path = new Path("hdfs://file");
    OneBlockInfo[] blocks = new OneBlockInfo[numBlocks];
    for(int i=0; i<numBlocks; ++i) {
      blocks[i] = new OneBlockInfo(path, i*blockSize, blockSize, locations, racks);
      totLength += blockSize;
    }
    List<InputSplit> splits = new ArrayList<InputSplit>();
    HashMap<String, Set<String>> rackToNodes = 
                              new HashMap<String, Set<String>>();
    HashMap<String, List<OneBlockInfo>> rackToBlocks = 
                              new HashMap<String, List<OneBlockInfo>>();
    HashMap<OneBlockInfo, String[]> blockToNodes = 
                              new HashMap<OneBlockInfo, String[]>();
    HashMap<String, List<OneBlockInfo>> nodeToBlocks = 
                              new HashMap<String, List<OneBlockInfo>>();
    OneFileInfo.populateBlockInfo(blocks, rackToBlocks, blockToNodes, 
                             nodeToBlocks, rackToNodes);
    inFormat.createSplits(nodeToBlocks, blockToNodes, rackToBlocks, totLength,  
                          maxSize, minSizeNode, minSizeRack, splits);
    int expectedSplitCount = (int)(totLength/maxSize);
    Assert.assertEquals(expectedSplitCount, splits.size());
    HashMultiset<String> nodeSplits = HashMultiset.create();
    for(int i=0; i<expectedSplitCount; ++i) {
      InputSplit inSplit = splits.get(i);
      Assert.assertEquals(maxSize, inSplit.getLength());
      Assert.assertEquals(1, inSplit.getLocations().length);
      nodeSplits.add(inSplit.getLocations()[0]);
    }
    Assert.assertEquals(3, nodeSplits.count(locations[0]));
    Assert.assertEquals(3, nodeSplits.count(locations[1]));
  }
  public void testSplitPlacementForCompressedFiles() throws Exception {
    MiniDFSCluster dfs = null;
    FileSystem fileSys = null;
@ -889,24 +942,24 @@ public void testSplitPlacementForCompressedFiles() throws Exception {
      assertEquals(f3.getLen(), fileSplit.getLength(0));
      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
      fileSplit = (CombineFileSplit) splits.get(1);
      assertEquals(file4.getName(), fileSplit.getPath(0).getName());
      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(f4.getLen(), fileSplit.getLength(0));
      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r3
      fileSplit = (CombineFileSplit) splits.get(2);
      assertEquals(1, fileSplit.getNumPaths());
      assertEquals(1, fileSplit.getLocations().length);
      assertEquals(file2.getName(), fileSplit.getPath(0).getName());
      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(f2.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r2
+      assertEquals(hosts2[0], fileSplit.getLocations()[0]); // should be on r3
-      fileSplit = (CombineFileSplit) splits.get(3);
+      fileSplit = (CombineFileSplit) splits.get(2);
      assertEquals(1, fileSplit.getNumPaths());
      assertEquals(1, fileSplit.getLocations().length);
      assertEquals(file1.getName(), fileSplit.getPath(0).getName());
      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(f1.getLen(), fileSplit.getLength(0));
-      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r1
+      assertEquals(hosts1[0], fileSplit.getLocations()[0]); // should be on r2
      fileSplit = (CombineFileSplit) splits.get(3);
      assertEquals(1, fileSplit.getNumPaths());
      assertEquals(1, fileSplit.getLocations().length);
      assertEquals(file4.getName(), fileSplit.getPath(0).getName());
      assertEquals(0, fileSplit.getOffset(0));
      assertEquals(f4.getLen(), fileSplit.getLength(0));
      assertEquals(hosts3[0], fileSplit.getLocations()[0]); // should be on r1
      // maximum split size is twice file1's length
      inFormat = new DummyInputFormat();