HDFS-8722. Optimize datanode writes for small writes and flushes. Contributed by Kihwal Lee

This commit is contained in:
Kihwal Lee 2015-07-14 14:04:06 -05:00
parent b7fb6ec451
commit 59388a8015
2 changed files with 24 additions and 12 deletions

View File

@ -1053,6 +1053,8 @@ Release 2.7.2 - UNRELEASED
OPTIMIZATIONS
HDFS-8722. Optimize datanode writes for small writes and flushes (kihwal)
BUG FIXES
Release 2.7.1 - 2015-07-06

View File

@ -598,14 +598,19 @@ class BlockReceiver implements Closeable {
// bytes should be skipped when writing the data and checksum
// buffers out to disk.
long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
long lastChunkBoundary = onDiskLen - partialChunkSizeOnDisk;
boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
// Since data is always appended, not overwritten, partial CRC
// recalculation is necessary if the on-disk data is not chunk-
// aligned, regardless of whether the beginning of the data in
// the packet is chunk-aligned.
boolean doPartialCrc = !alignedOnDisk && !shouldNotWriteChecksum;
// If the end of the on-disk data is not chunk-aligned, the last
// checksum needs to be overwritten.
boolean overwriteLastCrc = !alignedOnDisk && !shouldNotWriteChecksum;
// If the starting offset of the packat data is at the last chunk
// boundary of the data on disk, the partial checksum recalculation
// can be skipped and the checksum supplied by the client can be used
// instead. This reduces disk reads and cpu load.
boolean doCrcRecalc = overwriteLastCrc &&
(lastChunkBoundary != firstByteInBlock);
// If this is a partial chunk, then verify that this is the only
// chunk in the packet. If the starting offset is not chunk
@ -621,9 +626,10 @@ class BlockReceiver implements Closeable {
// If the last portion of the block file is not a full chunk,
// then read in pre-existing partial data chunk and recalculate
// the checksum so that the checksum calculation can continue
// from the right state.
// from the right state. If the client provided the checksum for
// the whole chunk, this is not necessary.
Checksum partialCrc = null;
if (doPartialCrc) {
if (doCrcRecalc) {
if (LOG.isDebugEnabled()) {
LOG.debug("receivePacket for " + block
+ ": previous write did not end at the chunk boundary."
@ -659,8 +665,15 @@ class BlockReceiver implements Closeable {
int skip = 0;
byte[] crcBytes = null;
// First, overwrite the partial crc at the end, if necessary.
if (doPartialCrc) { // not chunk-aligned on disk
// First, prepare to overwrite the partial crc at the end.
if (overwriteLastCrc) { // not chunk-aligned on disk
// prepare to overwrite last checksum
adjustCrcFilePosition();
}
// The CRC was recalculated for the last partial chunk. Update the
// CRC by reading the rest of the chunk, then write it out.
if (doCrcRecalc) {
// Calculate new crc for this chunk.
int bytesToReadForRecalc =
(int)(bytesPerChecksum - partialChunkSizeOnDisk);
@ -673,8 +686,6 @@ class BlockReceiver implements Closeable {
byte[] buf = FSOutputSummer.convertToByteStream(partialCrc,
checksumSize);
crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
// prepare to overwrite last checksum
adjustCrcFilePosition();
checksumOut.write(buf);
if(LOG.isDebugEnabled()) {
LOG.debug("Writing out partial crc for data len " + len +
@ -687,7 +698,6 @@ class BlockReceiver implements Closeable {
// boundary. The checksum after the boundary was already counted
// above. Only count the number of checksums skipped up to the
// boundary here.
long lastChunkBoundary = onDiskLen - (onDiskLen%bytesPerChecksum);
long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
if (skippedDataBytes > 0) {