Merged 1245118 from trunk for HDFS-2655.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1245122 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
91ebbeefb4
commit
ecacbc94a3
|
@ -19,6 +19,9 @@ Release 0.23.2 - UNRELEASED
|
||||||
HDFS-2931. Switch DataNode's BlockVolumeChoosingPolicy to private-audience.
|
HDFS-2931. Switch DataNode's BlockVolumeChoosingPolicy to private-audience.
|
||||||
(harsh via szetszwo)
|
(harsh via szetszwo)
|
||||||
|
|
||||||
|
HDFS-2655. BlockReaderLocal#skip performs unnecessary IO. (Brandon Li
|
||||||
|
via jitendra)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -369,26 +369,68 @@ class BlockReaderLocal implements BlockReader {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("skip " + n);
|
LOG.debug("skip " + n);
|
||||||
}
|
}
|
||||||
|
if (n <= 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
if (!verifyChecksum) {
|
if (!verifyChecksum) {
|
||||||
return dataIn.skip(n);
|
return dataIn.skip(n);
|
||||||
}
|
}
|
||||||
// Skip by reading the data so we stay in sync with checksums.
|
|
||||||
// This could be implemented more efficiently in the future to
|
// caller made sure newPosition is not beyond EOF.
|
||||||
// skip to the beginning of the appropriate checksum chunk
|
int remaining = dataBuff.remaining();
|
||||||
// and then only read to the middle of that chunk.
|
int position = dataBuff.position();
|
||||||
|
int newPosition = position + (int)n;
|
||||||
|
|
||||||
|
// if the new offset is already read into dataBuff, just reposition
|
||||||
|
if (n <= remaining) {
|
||||||
|
assert offsetFromChunkBoundary == 0;
|
||||||
|
dataBuff.position(newPosition);
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
// for small gap, read through to keep the data/checksum in sync
|
||||||
|
if (n - remaining <= bytesPerChecksum) {
|
||||||
|
dataBuff.position(position + remaining);
|
||||||
|
if (skipBuf == null) {
|
||||||
|
skipBuf = new byte[bytesPerChecksum];
|
||||||
|
}
|
||||||
|
int ret = read(skipBuf, 0, (int)(n - remaining));
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// optimize for big gap: discard the current buffer, skip to
|
||||||
|
// the beginning of the appropriate checksum chunk and then
|
||||||
|
// read to the middle of that chunk to be in sync with checksums.
|
||||||
|
this.offsetFromChunkBoundary = newPosition % bytesPerChecksum;
|
||||||
|
long toskip = n - remaining - this.offsetFromChunkBoundary;
|
||||||
|
|
||||||
|
dataBuff.clear();
|
||||||
|
checksumBuff.clear();
|
||||||
|
|
||||||
|
long dataSkipped = dataIn.skip(toskip);
|
||||||
|
if (dataSkipped != toskip) {
|
||||||
|
throw new IOException("skip error in data input stream");
|
||||||
|
}
|
||||||
|
long checkSumOffset = (dataSkipped / bytesPerChecksum) * checksumSize;
|
||||||
|
if (checkSumOffset > 0) {
|
||||||
|
long skipped = checksumIn.skip(checkSumOffset);
|
||||||
|
if (skipped != checkSumOffset) {
|
||||||
|
throw new IOException("skip error in checksum input stream");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// read into the middle of the chunk
|
||||||
if (skipBuf == null) {
|
if (skipBuf == null) {
|
||||||
skipBuf = new byte[bytesPerChecksum];
|
skipBuf = new byte[bytesPerChecksum];
|
||||||
}
|
}
|
||||||
long nSkipped = 0;
|
assert skipBuf.length == bytesPerChecksum;
|
||||||
while ( nSkipped < n ) {
|
assert this.offsetFromChunkBoundary < bytesPerChecksum;
|
||||||
int toSkip = (int)Math.min(n-nSkipped, skipBuf.length);
|
int ret = read(skipBuf, 0, this.offsetFromChunkBoundary);
|
||||||
int ret = read(skipBuf, 0, toSkip);
|
if (ret == -1) { // EOS
|
||||||
if ( ret <= 0 ) {
|
return toskip;
|
||||||
return nSkipped;
|
} else {
|
||||||
}
|
return (toskip + ret);
|
||||||
nSkipped += ret;
|
|
||||||
}
|
}
|
||||||
return nSkipped;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -238,7 +238,53 @@ public class TestShortCircuitLocalRead {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSkipWithVerifyChecksum() throws IOException {
|
||||||
|
int size = blockSize;
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
|
||||||
|
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
|
||||||
|
conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
|
||||||
|
UserGroupInformation.getCurrentUser().getShortUserName());
|
||||||
|
if (simulatedStorage) {
|
||||||
|
conf.setBoolean(SimulatedFSDataset.CONFIG_PROPERTY_SIMULATED, true);
|
||||||
|
}
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
|
||||||
|
.format(true).build();
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
try {
|
||||||
|
// check that / exists
|
||||||
|
Path path = new Path("/");
|
||||||
|
assertTrue("/ should be a directory", fs.getFileStatus(path)
|
||||||
|
.isDirectory() == true);
|
||||||
|
|
||||||
|
byte[] fileData = AppendTestUtil.randomBytes(seed, size*3);
|
||||||
|
// create a new file in home directory. Do not close it.
|
||||||
|
Path file1 = new Path("filelocal.dat");
|
||||||
|
FSDataOutputStream stm = createFile(fs, file1, 1);
|
||||||
|
|
||||||
|
// write to file
|
||||||
|
stm.write(fileData);
|
||||||
|
stm.close();
|
||||||
|
|
||||||
|
// now test the skip function
|
||||||
|
FSDataInputStream instm = fs.open(file1);
|
||||||
|
byte[] actual = new byte[fileData.length];
|
||||||
|
// read something from the block first, otherwise BlockReaderLocal.skip()
|
||||||
|
// will not be invoked
|
||||||
|
int nread = instm.read(actual, 0, 3);
|
||||||
|
long skipped = 2*size+3;
|
||||||
|
instm.seek(skipped);
|
||||||
|
nread = instm.read(actual, (int)(skipped + nread), 3);
|
||||||
|
instm.close();
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
fs.close();
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test to run benchmarks between shortcircuit read vs regular read with
|
* Test to run benchmarks between shortcircuit read vs regular read with
|
||||||
* specified number of threads simultaneously reading.
|
* specified number of threads simultaneously reading.
|
||||||
|
|
Loading…
Reference in New Issue