HBASE-14906 Improvements on FlushLargeStoresPolicy (Yu Li)

This commit is contained in:
stack 2015-12-10 16:49:23 -08:00
parent bebcc09fb3
commit c15e0af84a
4 changed files with 51 additions and 24 deletions

View File

@ -612,16 +612,17 @@ possible configurations would overwhelm and obscure the important.
every hbase.server.thread.wakefrequency.</description>
</property>
<property>
<name>hbase.hregion.percolumnfamilyflush.size.lower.bound</name>
<name>hbase.hregion.percolumnfamilyflush.size.lower.bound.min</name>
<value>16777216</value>
<description>
If FlushLargeStoresPolicy is used, then every time that we hit the
total memstore limit, we find out all the column families whose memstores
exceed this value, and only flush them, while retaining the others whose
memstores are lower than this limit. If none of the families have their
memstore size more than this, all the memstores will be flushed
(just as usual). This value should be less than half of the total memstore
threshold (hbase.hregion.memstore.flush.size).
If FlushLargeStoresPolicy is used and there are multiple column families,
then every time that we hit the total memstore limit, we find out all the
column families whose memstores exceed a "lower bound" and only flush them
while retaining the others in memory. The "lower bound" will be
"hbase.hregion.memstore.flush.size / column_family_number" by default
unless value of this property is larger than that. If none of the families
have their memstore size more than lower bound, all the memstores will be
flushed (just as usual).
</description>
</property>
<property>

View File

@ -38,35 +38,50 @@ public class FlushLargeStoresPolicy extends FlushPolicy {
public static final String HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND =
"hbase.hregion.percolumnfamilyflush.size.lower.bound";
private static final long DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND = 1024 * 1024 * 16L;
public static final String HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN =
"hbase.hregion.percolumnfamilyflush.size.lower.bound.min";
private long flushSizeLowerBound;
private static final long DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN =
1024 * 1024 * 16L;
private long flushSizeLowerBound = -1;
@Override
protected void configureForRegion(HRegion region) {
super.configureForRegion(region);
long flushSizeLowerBound;
int familyNumber = region.getTableDesc().getFamilies().size();
if (familyNumber <= 1) {
// No need to parse and set flush size lower bound if only one family
// Family number might also be zero in some of our unit test case
return;
}
// For multiple families, lower bound is the "average flush size" by default
// unless setting in configuration is larger.
long flushSizeLowerBound = region.getMemstoreFlushSize() / familyNumber;
long minimumLowerBound =
getConf().getLong(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN);
if (minimumLowerBound > flushSizeLowerBound) {
flushSizeLowerBound = minimumLowerBound;
}
// use the setting in table description if any
String flushedSizeLowerBoundString =
region.getTableDesc().getValue(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND);
if (flushedSizeLowerBoundString == null) {
flushSizeLowerBound =
getConf().getLong(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND);
if (LOG.isDebugEnabled()) {
LOG.debug(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND
+ " is not specified, use global config(" + flushSizeLowerBound + ") instead");
LOG.debug("No " + HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND
+ " set in description of table " + region.getTableDesc().getTableName()
+ ", use config (" + flushSizeLowerBound + ") instead");
}
} else {
try {
flushSizeLowerBound = Long.parseLong(flushedSizeLowerBoundString);
} catch (NumberFormatException nfe) {
flushSizeLowerBound =
getConf().getLong(HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
DEFAULT_HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND);
// fall back for fault setting
LOG.warn("Number format exception when parsing "
+ HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND + " for table "
+ region.getTableDesc().getTableName() + ":" + flushedSizeLowerBoundString + ". " + nfe
+ ", use global config(" + flushSizeLowerBound + ") instead");
+ ", use config (" + flushSizeLowerBound + ") instead");
}
}
@ -87,6 +102,11 @@ public class FlushLargeStoresPolicy extends FlushPolicy {
@Override
public Collection<Store> selectStoresToFlush() {
// no need to select stores if only one family
if (region.getTableDesc().getFamilies().size() == 1) {
return region.stores.values();
}
// start selection
Collection<Store> stores = region.stores.values();
Set<Store> specificStoresToFlush = new HashSet<Store>();
for (Store store : stores) {

View File

@ -8183,4 +8183,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
return this.getRegionInfo().isMetaRegion() ? CellComparator.META_COMPARATOR
: CellComparator.COMPARATOR;
}
public long getMemstoreFlushSize() {
return this.memstoreFlushSize;
}
}

View File

@ -128,7 +128,8 @@ public class TestPerColumnFamilyFlush {
Configuration conf = HBaseConfiguration.create();
conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 200 * 1024);
conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, FlushLargeStoresPolicy.class.getName());
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND, 100 * 1024);
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
100 * 1024);
// Intialize the region
Region region = initHRegion("testSelectiveFlushWhenEnabled", conf);
// Add 1200 entries for CF1, 100 for CF2 and 50 for CF3
@ -336,7 +337,7 @@ public class TestPerColumnFamilyFlush {
conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 20000);
// Carefully chosen limits so that the memstore just flushes when we're done
conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, FlushLargeStoresPolicy.class.getName());
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND, 10000);
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN, 10000);
final int numRegionServers = 4;
try {
TEST_UTIL.startMiniCluster(numRegionServers);
@ -451,7 +452,7 @@ public class TestPerColumnFamilyFlush {
conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, 128 * 1024 * 1024);
conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, FlushLargeStoresPolicy.class.getName());
long cfFlushSizeLowerBound = 2048;
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND,
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN,
cfFlushSizeLowerBound);
// One hour, prevent periodic rolling
@ -568,7 +569,6 @@ public class TestPerColumnFamilyFlush {
Configuration conf = TEST_UTIL.getConfiguration();
conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, memstoreFlushSize);
conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, FlushAllStoresPolicy.class.getName());
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND, 400 * 1024);
conf.setInt(HStore.BLOCKING_STOREFILES_KEY, 10000);
conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
ConstantSizeRegionSplitPolicy.class.getName());
@ -608,6 +608,8 @@ public class TestPerColumnFamilyFlush {
LOG.info("==============Test with selective flush enabled===============");
conf.set(FlushPolicyFactory.HBASE_FLUSH_POLICY_KEY, FlushLargeStoresPolicy.class.getName());
// default value of per-cf flush lower bound is too big, set to a small enough value
conf.setLong(FlushLargeStoresPolicy.HREGION_COLUMNFAMILY_FLUSH_SIZE_LOWER_BOUND_MIN, 0);
try {
TEST_UTIL.startMiniCluster(1);
TEST_UTIL.getHBaseAdmin().createNamespace(