From c4cbb419a35043c3397476d32b8197cbacd21863 Mon Sep 17 00:00:00 2001 From: Allan Yang Date: Fri, 28 Apr 2017 08:48:00 +0800 Subject: [PATCH] HBASE-17757 Unify blocksize after encoding to decrease memory fragment Signed-off-by: anoopsamjohn --- .../hadoop/hbase/io/hfile/HFileBlock.java | 20 +++++++++++++++++++ .../hbase/io/hfile/HFileWriterImpl.java | 20 +++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 066a9fa6d5c..445dc86862f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -871,6 +871,10 @@ public class HFileBlock implements Cacheable { // includes the header size also. private int unencodedDataSizeWritten; + // Size of actual data being written. considering the block encoding. This + // includes the header size also. + private int encodedDataSizeWritten; + /** * Bytes to be written to the file system, including the header. Compressed * if compression is turned on. It also includes the checksum data that @@ -958,6 +962,7 @@ public class HFileBlock implements Cacheable { this.dataBlockEncoder.startBlockEncoding(dataBlockEncodingCtx, userDataStream); } this.unencodedDataSizeWritten = 0; + this.encodedDataSizeWritten = 0; return userDataStream; } @@ -968,8 +973,10 @@ public class HFileBlock implements Cacheable { */ void write(Cell cell) throws IOException{ expectState(State.WRITING); + int posBeforeEncode = this.userDataStream.size(); this.unencodedDataSizeWritten += this.dataBlockEncoder.encode(cell, dataBlockEncodingCtx, this.userDataStream); + this.encodedDataSizeWritten += this.userDataStream.size() - posBeforeEncode; } /** @@ -1197,6 +1204,19 @@ public class HFileBlock implements Cacheable { return state == State.WRITING; } + /** + * Returns the number of bytes written into the current block so far, or + * zero if not writing the block at the moment. Note that this will return + * zero in the "block ready" state as well. + * + * @return the number of bytes written + */ + public int encodedBlockSizeWritten() { + if (state != State.WRITING) + return 0; + return this.encodedDataSizeWritten; + } + /** * Returns the number of bytes written into the current block so far, or * zero if not writing the block at the moment. Note that this will return diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 6a20b99c0e0..b258ce2f612 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -63,6 +63,12 @@ public class HFileWriterImpl implements HFile.Writer { private static final long UNSET = -1; + /** if this feature is enabled, preCalculate encoded data size before real encoding happens*/ + public static final String UNIFIED_ENCODED_BLOCKSIZE_RATIO = "hbase.writer.unified.encoded.blocksize.ratio"; + + /** Block size limit after encoding, used to unify encoded block Cache entry size*/ + private final int encodedBlockSizeLimit; + /** The Cell previously appended. Becomes the last cell in the file.*/ protected Cell lastCell = null; @@ -174,6 +180,8 @@ public class HFileWriterImpl implements HFile.Writer { closeOutputStream = path != null; this.cacheConf = cacheConf; + float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f); + this.encodedBlockSizeLimit = (int)(hFileContext.getBlocksize() * encodeBlockSizeRatio); finishInit(conf); if (LOG.isTraceEnabled()) { LOG.trace("Writer" + (path != null ? " for " + path : "") + @@ -306,10 +314,14 @@ public class HFileWriterImpl implements HFile.Writer { * @throws IOException */ protected void checkBlockBoundary() throws IOException { - if (blockWriter.blockSizeWritten() < hFileContext.getBlocksize()) return; - finishBlock(); - writeInlineBlocks(false); - newBlock(); + //for encoder like prefixTree, encoded size is not available, so we have to compare both encoded size + //and unencoded size to blocksize limit. + if (blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit + || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize()) { + finishBlock(); + writeInlineBlocks(false); + newBlock(); + } } /** Clean up the data block that is currently being written.*/