MAPREDUCE-3252. Fix map tasks to not rewrite data an extra time when map output fits in spill buffer. Contributed by Todd Lipcon.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1188424 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a503755990
commit
43553df4e3
|
@ -1750,6 +1750,9 @@ Release 0.23.0 - Unreleased
|
||||||
MAPREDUCE-3249. Ensure shuffle-port is correctly used duringMR AM recovery.
|
MAPREDUCE-3249. Ensure shuffle-port is correctly used duringMR AM recovery.
|
||||||
(vinodkv via acmurthy)
|
(vinodkv via acmurthy)
|
||||||
|
|
||||||
|
MAPREDUCE-3252. Fix map tasks to not rewrite data an extra time when
|
||||||
|
map output fits in spill buffer. (todd)
|
||||||
|
|
||||||
Release 0.22.0 - Unreleased
|
Release 0.22.0 - Unreleased
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.mapred;
|
||||||
import java.io.DataInput;
|
import java.io.DataInput;
|
||||||
import java.io.DataOutput;
|
import java.io.DataOutput;
|
||||||
import java.io.DataOutputStream;
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
@ -36,8 +37,10 @@ import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.FileSystem.Statistics;
|
import org.apache.hadoop.fs.FileSystem.Statistics;
|
||||||
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.fs.LocalFileSystem;
|
import org.apache.hadoop.fs.LocalFileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RawLocalFileSystem;
|
||||||
import org.apache.hadoop.io.DataInputBuffer;
|
import org.apache.hadoop.io.DataInputBuffer;
|
||||||
import org.apache.hadoop.io.RawComparator;
|
import org.apache.hadoop.io.RawComparator;
|
||||||
import org.apache.hadoop.io.SequenceFile;
|
import org.apache.hadoop.io.SequenceFile;
|
||||||
|
@ -1727,10 +1730,10 @@ class MapTask extends Task {
|
||||||
finalOutFileSize += rfs.getFileStatus(filename[i]).getLen();
|
finalOutFileSize += rfs.getFileStatus(filename[i]).getLen();
|
||||||
}
|
}
|
||||||
if (numSpills == 1) { //the spill is the final output
|
if (numSpills == 1) { //the spill is the final output
|
||||||
rfs.rename(filename[0],
|
sameVolRename(filename[0],
|
||||||
mapOutputFile.getOutputFileForWriteInVolume(filename[0]));
|
mapOutputFile.getOutputFileForWriteInVolume(filename[0]));
|
||||||
if (indexCacheList.size() == 0) {
|
if (indexCacheList.size() == 0) {
|
||||||
rfs.rename(mapOutputFile.getSpillIndexFile(0),
|
sameVolRename(mapOutputFile.getSpillIndexFile(0),
|
||||||
mapOutputFile.getOutputIndexFileForWriteInVolume(filename[0]));
|
mapOutputFile.getOutputIndexFileForWriteInVolume(filename[0]));
|
||||||
} else {
|
} else {
|
||||||
indexCacheList.get(0).writeToFile(
|
indexCacheList.get(0).writeToFile(
|
||||||
|
@ -1847,7 +1850,29 @@ class MapTask extends Task {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Rename srcPath to dstPath on the same volume. This is the same
|
||||||
|
* as RawLocalFileSystem's rename method, except that it will not
|
||||||
|
* fall back to a copy, and it will create the target directory
|
||||||
|
* if it doesn't exist.
|
||||||
|
*/
|
||||||
|
private void sameVolRename(Path srcPath,
|
||||||
|
Path dstPath) throws IOException {
|
||||||
|
RawLocalFileSystem rfs = (RawLocalFileSystem)this.rfs;
|
||||||
|
File src = rfs.pathToFile(srcPath);
|
||||||
|
File dst = rfs.pathToFile(dstPath);
|
||||||
|
if (!dst.getParentFile().exists()) {
|
||||||
|
if (!dst.getParentFile().mkdirs()) {
|
||||||
|
throw new IOException("Unable to rename " + src + " to "
|
||||||
|
+ dst + ": couldn't create parent directory");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!src.renameTo(dst)) {
|
||||||
|
throw new IOException("Unable to rename " + src + " to " + dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
} // MapOutputBuffer
|
} // MapOutputBuffer
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue