svn merge -c 1468232 FIXES: MAPREDUCE-4974. Optimising the LineRecordReader initialize() method (Gelesh via bobby)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1468235 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5a98de170c
commit
a7fba8fcd7
|
@ -45,6 +45,9 @@ Release 2.0.5-beta - UNRELEASED
|
|||
|
||||
OPTIMIZATIONS
|
||||
|
||||
MAPREDUCE-4974. Optimising the LineRecordReader initialize() method
|
||||
(Gelesh via bobby)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-4671. AM does not tell the RM about container requests which are
|
||||
|
|
|
@ -52,7 +52,6 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
|
|||
public static final String MAX_LINE_LENGTH =
|
||||
"mapreduce.input.linerecordreader.line.maxlength";
|
||||
|
||||
private CompressionCodecFactory compressionCodecs = null;
|
||||
private long start;
|
||||
private long pos;
|
||||
private long end;
|
||||
|
@ -60,9 +59,9 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
|
|||
private FSDataInputStream fileIn;
|
||||
private Seekable filePosition;
|
||||
private int maxLineLength;
|
||||
private LongWritable key = null;
|
||||
private Text value = null;
|
||||
private CompressionCodec codec;
|
||||
private LongWritable key;
|
||||
private Text value;
|
||||
private boolean isCompressedInput;
|
||||
private Decompressor decompressor;
|
||||
private byte[] recordDelimiterBytes;
|
||||
|
||||
|
@ -81,13 +80,14 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
|
|||
start = split.getStart();
|
||||
end = start + split.getLength();
|
||||
final Path file = split.getPath();
|
||||
compressionCodecs = new CompressionCodecFactory(job);
|
||||
codec = compressionCodecs.getCodec(file);
|
||||
|
||||
// open the file and seek to the start of the split
|
||||
final FileSystem fs = file.getFileSystem(job);
|
||||
fileIn = fs.open(file);
|
||||
if (isCompressedInput()) {
|
||||
|
||||
CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
|
||||
if (null!=codec) {
|
||||
isCompressedInput = true;
|
||||
decompressor = CodecPool.getDecompressor(codec);
|
||||
if (codec instanceof SplittableCompressionCodec) {
|
||||
final SplitCompressionInputStream cIn =
|
||||
|
@ -132,19 +132,16 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
|
|||
this.pos = start;
|
||||
}
|
||||
|
||||
private boolean isCompressedInput() {
|
||||
return (codec != null);
|
||||
}
|
||||
|
||||
private int maxBytesToConsume(long pos) {
|
||||
return isCompressedInput()
|
||||
return isCompressedInput
|
||||
? Integer.MAX_VALUE
|
||||
: (int) Math.min(Integer.MAX_VALUE, end - pos);
|
||||
}
|
||||
|
||||
private long getFilePosition() throws IOException {
|
||||
long retVal;
|
||||
if (isCompressedInput() && null != filePosition) {
|
||||
if (isCompressedInput && null != filePosition) {
|
||||
retVal = filePosition.getPos();
|
||||
} else {
|
||||
retVal = pos;
|
||||
|
@ -166,9 +163,6 @@ public class LineRecordReader extends RecordReader<LongWritable, Text> {
|
|||
while (getFilePosition() <= end) {
|
||||
newSize = in.readLine(value, maxLineLength,
|
||||
Math.max(maxBytesToConsume(pos), maxLineLength));
|
||||
if (newSize == 0) {
|
||||
break;
|
||||
}
|
||||
pos += newSize;
|
||||
if (newSize < maxLineLength) {
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue