From 51455471542c372563f24b0099a5a340dfedf96a Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 15 Sep 2010 23:57:15 +0000 Subject: [PATCH] HBASE-2899 hfile.min.blocksize.size ignored/documentation wrong git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@997544 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../hadoop/hbase/HColumnDescriptor.java | 19 ++++++++++++++----- .../hbase/mapreduce/HFileOutputFormat.java | 3 ++- src/main/resources/hbase-default.xml | 13 +++++++++---- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 100925959a5..f49a4f5fe46 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -522,6 +522,7 @@ Release 0.21.0 - Unreleased (Alex Newman via Todd Lipcon) HBASE-2986 multi writable can npe causing client hang HBASE-2979 Fix failing TestMultParrallel in hudson build + HBASE-2899 hfile.min.blocksize.size ignored/documentation wrong IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable diff --git a/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java b/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java index f3e87cc369e..365e4b99be8 100644 --- a/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java +++ b/src/main/java/org/apache/hadoop/hbase/HColumnDescriptor.java @@ -74,7 +74,14 @@ public class HColumnDescriptor implements WritableComparable public static final String COMPRESSION = "COMPRESSION"; public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT"; public static final String BLOCKCACHE = "BLOCKCACHE"; + + /** + * Size of storefile/hfile 'blocks'. Default is {@link #DEFAULT_BLOCKSIZE}. + * Use smaller block sizes for faster random-access at expense of larger + * indices (more memory consumption). + */ public static final String BLOCKSIZE = "BLOCKSIZE"; + public static final String LENGTH = "LENGTH"; public static final String TTL = "TTL"; public static final String BLOOMFILTER = "BLOOMFILTER"; @@ -109,8 +116,7 @@ public class HColumnDescriptor implements WritableComparable public static final boolean DEFAULT_BLOCKCACHE = true; /** - * Default size of blocks in files store to the filesytem. Use smaller for - * faster random-access at expense of larger indices (more memory consumption). + * Default size of blocks in files stored to the filesytem (hfiles). */ public static final int DEFAULT_BLOCKSIZE = HFile.DEFAULT_BLOCKSIZE; @@ -224,7 +230,9 @@ public class HColumnDescriptor implements WritableComparable * @param inMemory If true, column data should be kept in an HRegionServer's * cache * @param blockCacheEnabled If true, MapFile blocks should be cached - * @param blocksize + * @param blocksize Block size to use when writing out storefiles. Use + * smaller blocksizes for faster random-access at expense of larger indices + * (more memory consumption). Default is usually 64k. * @param timeToLive Time-to-live of cell contents, in seconds * (use HConstants.FOREVER for unlimited TTL) * @param bloomFilter Bloom filter type for this column @@ -385,7 +393,7 @@ public class HColumnDescriptor implements WritableComparable } /** - * @return Blocksize. + * @return The storefile/hfile blocksize for this column family. */ public synchronized int getBlocksize() { if (this.blocksize == null) { @@ -397,7 +405,8 @@ public class HColumnDescriptor implements WritableComparable } /** - * @param s + * @param s Blocksize to use when writing out storefiles/hfiles on this + * column family. */ public void setBlocksize(int s) { setValue(BLOCKSIZE, Integer.toString(s)); diff --git a/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java b/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java index dd148ba28e6..48bbf105f7e 100644 --- a/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java +++ b/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java @@ -75,7 +75,8 @@ public class HFileOutputFormat extends FileOutputFormat - hfile.min.blocksize.size + hbase.mapreduce.hfileoutputformat.blocksize 65536 - Minimum store file block size. The smaller you make this, the - bigger your index and the less you fetch on a random-access. Set size down - if you have small cells and want faster random-access of individual cells. + The mapreduce HFileOutputFormat writes storefiles/hfiles. + This is the minimum hfile blocksize to emit. Usually in hbase, writing + hfiles, the blocksize is gotten from the table schema (HColumnDescriptor) + but in the mapreduce outputformat context, we don't have access to the + schema so get blocksize from Configuation. The smaller you make + the blocksize, the bigger your index and the less you fetch on a + random-access. Set the blocksize down if you have small cells and want + faster random-access of individual cells.