HBASE-17757 Unify blocksize after encoding to decrease memory fragment
Signed-off-by: anoopsamjohn <anoopsamjohn@gmail.com>
This commit is contained in:
parent
4bc0eb31c3
commit
c4cbb419a3
|
@ -871,6 +871,10 @@ public class HFileBlock implements Cacheable {
|
|||
// includes the header size also.
|
||||
private int unencodedDataSizeWritten;
|
||||
|
||||
// Size of actual data being written. considering the block encoding. This
|
||||
// includes the header size also.
|
||||
private int encodedDataSizeWritten;
|
||||
|
||||
/**
|
||||
* Bytes to be written to the file system, including the header. Compressed
|
||||
* if compression is turned on. It also includes the checksum data that
|
||||
|
@ -958,6 +962,7 @@ public class HFileBlock implements Cacheable {
|
|||
this.dataBlockEncoder.startBlockEncoding(dataBlockEncodingCtx, userDataStream);
|
||||
}
|
||||
this.unencodedDataSizeWritten = 0;
|
||||
this.encodedDataSizeWritten = 0;
|
||||
return userDataStream;
|
||||
}
|
||||
|
||||
|
@ -968,8 +973,10 @@ public class HFileBlock implements Cacheable {
|
|||
*/
|
||||
void write(Cell cell) throws IOException{
|
||||
expectState(State.WRITING);
|
||||
int posBeforeEncode = this.userDataStream.size();
|
||||
this.unencodedDataSizeWritten +=
|
||||
this.dataBlockEncoder.encode(cell, dataBlockEncodingCtx, this.userDataStream);
|
||||
this.encodedDataSizeWritten += this.userDataStream.size() - posBeforeEncode;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1197,6 +1204,19 @@ public class HFileBlock implements Cacheable {
|
|||
return state == State.WRITING;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes written into the current block so far, or
|
||||
* zero if not writing the block at the moment. Note that this will return
|
||||
* zero in the "block ready" state as well.
|
||||
*
|
||||
* @return the number of bytes written
|
||||
*/
|
||||
public int encodedBlockSizeWritten() {
|
||||
if (state != State.WRITING)
|
||||
return 0;
|
||||
return this.encodedDataSizeWritten;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes written into the current block so far, or
|
||||
* zero if not writing the block at the moment. Note that this will return
|
||||
|
|
|
@ -63,6 +63,12 @@ public class HFileWriterImpl implements HFile.Writer {
|
|||
|
||||
private static final long UNSET = -1;
|
||||
|
||||
/** if this feature is enabled, preCalculate encoded data size before real encoding happens*/
|
||||
public static final String UNIFIED_ENCODED_BLOCKSIZE_RATIO = "hbase.writer.unified.encoded.blocksize.ratio";
|
||||
|
||||
/** Block size limit after encoding, used to unify encoded block Cache entry size*/
|
||||
private final int encodedBlockSizeLimit;
|
||||
|
||||
/** The Cell previously appended. Becomes the last cell in the file.*/
|
||||
protected Cell lastCell = null;
|
||||
|
||||
|
@ -174,6 +180,8 @@ public class HFileWriterImpl implements HFile.Writer {
|
|||
|
||||
closeOutputStream = path != null;
|
||||
this.cacheConf = cacheConf;
|
||||
float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f);
|
||||
this.encodedBlockSizeLimit = (int)(hFileContext.getBlocksize() * encodeBlockSizeRatio);
|
||||
finishInit(conf);
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("Writer" + (path != null ? " for " + path : "") +
|
||||
|
@ -306,10 +314,14 @@ public class HFileWriterImpl implements HFile.Writer {
|
|||
* @throws IOException
|
||||
*/
|
||||
protected void checkBlockBoundary() throws IOException {
|
||||
if (blockWriter.blockSizeWritten() < hFileContext.getBlocksize()) return;
|
||||
finishBlock();
|
||||
writeInlineBlocks(false);
|
||||
newBlock();
|
||||
//for encoder like prefixTree, encoded size is not available, so we have to compare both encoded size
|
||||
//and unencoded size to blocksize limit.
|
||||
if (blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit
|
||||
|| blockWriter.blockSizeWritten() >= hFileContext.getBlocksize()) {
|
||||
finishBlock();
|
||||
writeInlineBlocks(false);
|
||||
newBlock();
|
||||
}
|
||||
}
|
||||
|
||||
/** Clean up the data block that is currently being written.*/
|
||||
|
|
Loading…
Reference in New Issue