HDFS-8722. Optimize datanode writes for small writes and flushes. Contributed by Kihwal Lee

(cherry picked from commit 59388a8015)
This commit is contained in:
Kihwal Lee 2015-07-14 14:07:38 -05:00
parent 471037883f
commit b0a2dc9c84
2 changed files with 24 additions and 12 deletions

View File

@ -717,6 +717,8 @@ Release 2.7.2 - UNRELEASED
OPTIMIZATIONS OPTIMIZATIONS
HDFS-8722. Optimize datanode writes for small writes and flushes (kihwal)
BUG FIXES BUG FIXES
Release 2.7.1 - 2015-07-06 Release 2.7.1 - 2015-07-06

View File

@ -598,14 +598,19 @@ class BlockReceiver implements Closeable {
// bytes should be skipped when writing the data and checksum // bytes should be skipped when writing the data and checksum
// buffers out to disk. // buffers out to disk.
long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum; long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
long lastChunkBoundary = onDiskLen - partialChunkSizeOnDisk;
boolean alignedOnDisk = partialChunkSizeOnDisk == 0; boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0; boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
// Since data is always appended, not overwritten, partial CRC // If the end of the on-disk data is not chunk-aligned, the last
// recalculation is necessary if the on-disk data is not chunk- // checksum needs to be overwritten.
// aligned, regardless of whether the beginning of the data in boolean overwriteLastCrc = !alignedOnDisk && !shouldNotWriteChecksum;
// the packet is chunk-aligned. // If the starting offset of the packat data is at the last chunk
boolean doPartialCrc = !alignedOnDisk && !shouldNotWriteChecksum; // boundary of the data on disk, the partial checksum recalculation
// can be skipped and the checksum supplied by the client can be used
// instead. This reduces disk reads and cpu load.
boolean doCrcRecalc = overwriteLastCrc &&
(lastChunkBoundary != firstByteInBlock);
// If this is a partial chunk, then verify that this is the only // If this is a partial chunk, then verify that this is the only
// chunk in the packet. If the starting offset is not chunk // chunk in the packet. If the starting offset is not chunk
@ -621,9 +626,10 @@ class BlockReceiver implements Closeable {
// If the last portion of the block file is not a full chunk, // If the last portion of the block file is not a full chunk,
// then read in pre-existing partial data chunk and recalculate // then read in pre-existing partial data chunk and recalculate
// the checksum so that the checksum calculation can continue // the checksum so that the checksum calculation can continue
// from the right state. // from the right state. If the client provided the checksum for
// the whole chunk, this is not necessary.
Checksum partialCrc = null; Checksum partialCrc = null;
if (doPartialCrc) { if (doCrcRecalc) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("receivePacket for " + block LOG.debug("receivePacket for " + block
+ ": previous write did not end at the chunk boundary." + ": previous write did not end at the chunk boundary."
@ -659,8 +665,15 @@ class BlockReceiver implements Closeable {
int skip = 0; int skip = 0;
byte[] crcBytes = null; byte[] crcBytes = null;
// First, overwrite the partial crc at the end, if necessary. // First, prepare to overwrite the partial crc at the end.
if (doPartialCrc) { // not chunk-aligned on disk if (overwriteLastCrc) { // not chunk-aligned on disk
// prepare to overwrite last checksum
adjustCrcFilePosition();
}
// The CRC was recalculated for the last partial chunk. Update the
// CRC by reading the rest of the chunk, then write it out.
if (doCrcRecalc) {
// Calculate new crc for this chunk. // Calculate new crc for this chunk.
int bytesToReadForRecalc = int bytesToReadForRecalc =
(int)(bytesPerChecksum - partialChunkSizeOnDisk); (int)(bytesPerChecksum - partialChunkSizeOnDisk);
@ -673,8 +686,6 @@ class BlockReceiver implements Closeable {
byte[] buf = FSOutputSummer.convertToByteStream(partialCrc, byte[] buf = FSOutputSummer.convertToByteStream(partialCrc,
checksumSize); checksumSize);
crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length); crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
// prepare to overwrite last checksum
adjustCrcFilePosition();
checksumOut.write(buf); checksumOut.write(buf);
if(LOG.isDebugEnabled()) { if(LOG.isDebugEnabled()) {
LOG.debug("Writing out partial crc for data len " + len + LOG.debug("Writing out partial crc for data len " + len +
@ -687,7 +698,6 @@ class BlockReceiver implements Closeable {
// boundary. The checksum after the boundary was already counted // boundary. The checksum after the boundary was already counted
// above. Only count the number of checksums skipped up to the // above. Only count the number of checksums skipped up to the
// boundary here. // boundary here.
long lastChunkBoundary = onDiskLen - (onDiskLen%bytesPerChecksum);
long skippedDataBytes = lastChunkBoundary - firstByteInBlock; long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
if (skippedDataBytes > 0) { if (skippedDataBytes > 0) {