diff --git a/CHANGES.txt b/CHANGES.txt index f49a4f5fe46..6d7d97f1caa 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -523,6 +523,9 @@ Release 0.21.0 - Unreleased HBASE-2986 multi writable can npe causing client hang HBASE-2979 Fix failing TestMultParrallel in hudson build HBASE-2899 hfile.min.blocksize.size ignored/documentation wrong + HBASE-3006 Reading compressed HFile blocks causes way too many DFS RPC + calls severly impacting performance + (Kannan Muthukkaruppan via Stack) IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable diff --git a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index f9c8dd6d520..8f3e08ca8c0 100644 --- a/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -19,6 +19,7 @@ */ package org.apache.hadoop.hbase.io.hfile; +import java.io.BufferedInputStream; import java.io.Closeable; import java.io.DataInputStream; import java.io.DataOutputStream; @@ -1051,10 +1052,15 @@ public class HFile { // decompressor reading into next block -- IIRC, it just grabs a // bunch of data w/o regard to whether decompressor is coming to end of a // decompression. + + // We use a buffer of DEFAULT_BLOCKSIZE size. This might be extreme. + // Could maybe do with less. Study and figure it: TODO InputStream is = this.compressAlgo.createDecompressionStream( - new BoundedRangeFileInputStream(this.istream, offset, compressedSize, - pread), - decompressor, 0); + new BufferedInputStream( + new BoundedRangeFileInputStream(this.istream, offset, compressedSize, + pread), + Math.min(DEFAULT_BLOCKSIZE, compressedSize)), + decompressor, 0); buf = ByteBuffer.allocate(decompressedSize); IOUtils.readFully(is, buf.array(), 0, buf.capacity()); is.close();