MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary compatibility with mapred in 1.x Contributed by Mayank Bansal.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1485906 13f79535-47bb-0310-9956-ffa450edef68
2013-05-24 00:42:22 +00:00 · 2013-05-24 00:42:22 +00:00 · a791527dd1
parent 61885df2fa
commit a791527dd1
2 changed files with 21 additions and 0 deletions
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@ -257,6 +257,9 @@ Release 2.0.5-beta - UNRELEASED
    MAPREDUCE-5246. Specify application-type at the time of job submission after
    YARN-563. (Mayank Bansal via vinodkv)
    MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary
    compatibility with mapred in 1.x (Mayank Bansal via vinodkv)
  OPTIMIZATIONS
    MAPREDUCE-4974. Optimising the LineRecordReader initialize() method 
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/NLineInputFormat.java
@ -24,6 +24,7 @@ import java.util.ArrayList;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.FileInputFormat;
@ -90,4 +91,21 @@ public class NLineInputFormat extends FileInputFormat<LongWritable, Text>
  public void configure(JobConf conf) {
    N = conf.getInt("mapreduce.input.lineinputformat.linespermap", 1);
  }
  /**
   * NLineInputFormat uses LineRecordReader, which always reads
   * (and consumes) at least one character out of its upper split
   * boundary. So to make sure that each mapper gets N lines, we
   * move back the upper split limits of each split 
   * by one character here.
   * @param fileName  Path of file
   * @param begin  the position of the first byte in the file to process
   * @param length  number of bytes in InputSplit
   * @return  FileSplit
   */
  protected static FileSplit createFileSplit(Path fileName, long begin, long length) {
    return (begin == 0) 
    ? new FileSplit(fileName, begin, length - 1, new String[] {})
    : new FileSplit(fileName, begin - 1, length, new String[] {});
  }
 }