MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary compatibility with mapred in 1.x Contributed by Mayank Bansal.

svn merge --ignore-ancestry -c 1485906 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1485908 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-05-24 00:42:53 +00:00
parent a6c4b42353
commit 004a9d5567
2 changed files with 21 additions and 0 deletions

View File

@ -93,6 +93,9 @@ Release 2.0.5-beta - UNRELEASED
MAPREDUCE-5246. Specify application-type at the time of job submission after
YARN-563. (Mayank Bansal via vinodkv)
MAPREDUCE-5230. Bring back NLineInputFormat.createFileSplit for binary
compatibility with mapred in 1.x (Mayank Bansal via vinodkv)
OPTIMIZATIONS
MAPREDUCE-4974. Optimising the LineRecordReader initialize() method

View File

@ -24,6 +24,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
@ -90,4 +91,21 @@ public InputSplit[] getSplits(JobConf job, int numSplits)
public void configure(JobConf conf) {
N = conf.getInt("mapreduce.input.lineinputformat.linespermap", 1);
}
/**
* NLineInputFormat uses LineRecordReader, which always reads
* (and consumes) at least one character out of its upper split
* boundary. So to make sure that each mapper gets N lines, we
* move back the upper split limits of each split
* by one character here.
* @param fileName Path of file
* @param begin the position of the first byte in the file to process
* @param length number of bytes in InputSplit
* @return FileSplit
*/
protected static FileSplit createFileSplit(Path fileName, long begin, long length) {
return (begin == 0)
? new FileSplit(fileName, begin, length - 1, new String[] {})
: new FileSplit(fileName, begin - 1, length, new String[] {});
}
}