HDFS-8722. Optimize datanode writes for small writes and flushes. Contributed by Kihwal Lee
This commit is contained in:
parent
b7fb6ec451
commit
59388a8015
|
@ -1053,6 +1053,8 @@ Release 2.7.2 - UNRELEASED
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
|
HDFS-8722. Optimize datanode writes for small writes and flushes (kihwal)
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
Release 2.7.1 - 2015-07-06
|
Release 2.7.1 - 2015-07-06
|
||||||
|
|
|
@ -598,14 +598,19 @@ class BlockReceiver implements Closeable {
|
||||||
// bytes should be skipped when writing the data and checksum
|
// bytes should be skipped when writing the data and checksum
|
||||||
// buffers out to disk.
|
// buffers out to disk.
|
||||||
long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
|
long partialChunkSizeOnDisk = onDiskLen % bytesPerChecksum;
|
||||||
|
long lastChunkBoundary = onDiskLen - partialChunkSizeOnDisk;
|
||||||
boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
|
boolean alignedOnDisk = partialChunkSizeOnDisk == 0;
|
||||||
boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
|
boolean alignedInPacket = firstByteInBlock % bytesPerChecksum == 0;
|
||||||
|
|
||||||
// Since data is always appended, not overwritten, partial CRC
|
// If the end of the on-disk data is not chunk-aligned, the last
|
||||||
// recalculation is necessary if the on-disk data is not chunk-
|
// checksum needs to be overwritten.
|
||||||
// aligned, regardless of whether the beginning of the data in
|
boolean overwriteLastCrc = !alignedOnDisk && !shouldNotWriteChecksum;
|
||||||
// the packet is chunk-aligned.
|
// If the starting offset of the packat data is at the last chunk
|
||||||
boolean doPartialCrc = !alignedOnDisk && !shouldNotWriteChecksum;
|
// boundary of the data on disk, the partial checksum recalculation
|
||||||
|
// can be skipped and the checksum supplied by the client can be used
|
||||||
|
// instead. This reduces disk reads and cpu load.
|
||||||
|
boolean doCrcRecalc = overwriteLastCrc &&
|
||||||
|
(lastChunkBoundary != firstByteInBlock);
|
||||||
|
|
||||||
// If this is a partial chunk, then verify that this is the only
|
// If this is a partial chunk, then verify that this is the only
|
||||||
// chunk in the packet. If the starting offset is not chunk
|
// chunk in the packet. If the starting offset is not chunk
|
||||||
|
@ -621,9 +626,10 @@ class BlockReceiver implements Closeable {
|
||||||
// If the last portion of the block file is not a full chunk,
|
// If the last portion of the block file is not a full chunk,
|
||||||
// then read in pre-existing partial data chunk and recalculate
|
// then read in pre-existing partial data chunk and recalculate
|
||||||
// the checksum so that the checksum calculation can continue
|
// the checksum so that the checksum calculation can continue
|
||||||
// from the right state.
|
// from the right state. If the client provided the checksum for
|
||||||
|
// the whole chunk, this is not necessary.
|
||||||
Checksum partialCrc = null;
|
Checksum partialCrc = null;
|
||||||
if (doPartialCrc) {
|
if (doCrcRecalc) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("receivePacket for " + block
|
LOG.debug("receivePacket for " + block
|
||||||
+ ": previous write did not end at the chunk boundary."
|
+ ": previous write did not end at the chunk boundary."
|
||||||
|
@ -659,8 +665,15 @@ class BlockReceiver implements Closeable {
|
||||||
int skip = 0;
|
int skip = 0;
|
||||||
byte[] crcBytes = null;
|
byte[] crcBytes = null;
|
||||||
|
|
||||||
// First, overwrite the partial crc at the end, if necessary.
|
// First, prepare to overwrite the partial crc at the end.
|
||||||
if (doPartialCrc) { // not chunk-aligned on disk
|
if (overwriteLastCrc) { // not chunk-aligned on disk
|
||||||
|
// prepare to overwrite last checksum
|
||||||
|
adjustCrcFilePosition();
|
||||||
|
}
|
||||||
|
|
||||||
|
// The CRC was recalculated for the last partial chunk. Update the
|
||||||
|
// CRC by reading the rest of the chunk, then write it out.
|
||||||
|
if (doCrcRecalc) {
|
||||||
// Calculate new crc for this chunk.
|
// Calculate new crc for this chunk.
|
||||||
int bytesToReadForRecalc =
|
int bytesToReadForRecalc =
|
||||||
(int)(bytesPerChecksum - partialChunkSizeOnDisk);
|
(int)(bytesPerChecksum - partialChunkSizeOnDisk);
|
||||||
|
@ -673,8 +686,6 @@ class BlockReceiver implements Closeable {
|
||||||
byte[] buf = FSOutputSummer.convertToByteStream(partialCrc,
|
byte[] buf = FSOutputSummer.convertToByteStream(partialCrc,
|
||||||
checksumSize);
|
checksumSize);
|
||||||
crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
|
crcBytes = copyLastChunkChecksum(buf, checksumSize, buf.length);
|
||||||
// prepare to overwrite last checksum
|
|
||||||
adjustCrcFilePosition();
|
|
||||||
checksumOut.write(buf);
|
checksumOut.write(buf);
|
||||||
if(LOG.isDebugEnabled()) {
|
if(LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Writing out partial crc for data len " + len +
|
LOG.debug("Writing out partial crc for data len " + len +
|
||||||
|
@ -687,7 +698,6 @@ class BlockReceiver implements Closeable {
|
||||||
// boundary. The checksum after the boundary was already counted
|
// boundary. The checksum after the boundary was already counted
|
||||||
// above. Only count the number of checksums skipped up to the
|
// above. Only count the number of checksums skipped up to the
|
||||||
// boundary here.
|
// boundary here.
|
||||||
long lastChunkBoundary = onDiskLen - (onDiskLen%bytesPerChecksum);
|
|
||||||
long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
|
long skippedDataBytes = lastChunkBoundary - firstByteInBlock;
|
||||||
|
|
||||||
if (skippedDataBytes > 0) {
|
if (skippedDataBytes > 0) {
|
||||||
|
|
Loading…
Reference in New Issue