HBASE-4365 Add a decent heuristic for region size

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1293099 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2012-02-24 06:41:37 +00:00
parent ce71b8a1bc
commit e531e2595c
8 changed files with 214 additions and 19 deletions

View File

@ -227,6 +227,9 @@ public final class HConstants {
public static final String HREGION_MAX_FILESIZE = public static final String HREGION_MAX_FILESIZE =
"hbase.hregion.max.filesize"; "hbase.hregion.max.filesize";
/** Default maximum file size */
public static final long DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024;
/** /**
* The max number of threads used for opening and closing stores or store * The max number of threads used for opening and closing stores or store
* files in parallel * files in parallel
@ -240,8 +243,6 @@ public final class HConstants {
*/ */
public static final int DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX = 1; public static final int DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX = 1;
/** Default maximum file size */
public static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;
/** Conf key for the memstore size at which we flush the memstore */ /** Conf key for the memstore size at which we flush the memstore */
public static final String HREGION_MEMSTORE_FLUSH_SIZE = public static final String HREGION_MEMSTORE_FLUSH_SIZE =

View File

@ -158,7 +158,7 @@ public class HTableDescriptor implements WritableComparable<HTableDescriptor> {
* Constant that denotes the maximum default size of the memstore after which * Constant that denotes the maximum default size of the memstore after which
* the contents are flushed to the store files * the contents are flushed to the store files
*/ */
public static final long DEFAULT_MEMSTORE_FLUSH_SIZE = 1024*1024*64L; public static final long DEFAULT_MEMSTORE_FLUSH_SIZE = 1024*1024*128L;
private volatile Boolean meta = null; private volatile Boolean meta = null;
private volatile Boolean root = null; private volatile Boolean root = null;

View File

@ -780,6 +780,14 @@ public class HRegion implements HeapSize { // , Writable{
return this.regionInfo; return this.regionInfo;
} }
/**
* @return Instance of {@link RegionServerServices} used by this HRegion.
* Can be null.
*/
RegionServerServices getRegionServerServices() {
return this.rsServices;
}
/** @return requestsCount for this region */ /** @return requestsCount for this region */
public long getRequestsCount() { public long getRequestsCount() {
return this.readRequestsCount.get() + this.writeRequestsCount.get(); return this.readRequestsCount.get() + this.writeRequestsCount.get();

View File

@ -0,0 +1,112 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.util.Bytes;
/**
* Split size is the number of regions that are on this server that all are
* of the same table, squared, times the region flush size OR the maximum
* region split size, whichever is smaller. For example, if the flush size
* is 128M, then on first flush we will split which will make two regions
* that will split when their size is 2 * 2 * 128M = 512M. If one of these
* regions splits, then there are three regions and now the split size is
* 3 * 3 * 128M = 1152M, and so on until we reach the configured
* maximum filesize and then from there on out, we'll use that.
*/
public class IncreasingToUpperBoundRegionSplitPolicy
extends ConstantSizeRegionSplitPolicy {
static final Log LOG =
LogFactory.getLog(IncreasingToUpperBoundRegionSplitPolicy.class);
private long flushSize;
@Override
protected void configureForRegion(HRegion region) {
super.configureForRegion(region);
this.flushSize = region.getTableDesc() != null?
region.getTableDesc().getMemStoreFlushSize():
getConf().getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE,
HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
}
@Override
protected boolean shouldSplit() {
if (region.shouldForceSplit()) return true;
boolean foundABigStore = false;
// Get count of regions that have the same common table as this.region
int tableRegionsCount = getCountOfCommonTableRegions();
// Get size to check
long sizeToCheck = getSizeToCheck(tableRegionsCount);
for (Store store : region.getStores().values()) {
// If any of the stores is unable to split (eg they contain reference files)
// then don't split
if ((!store.canSplit())) {
return false;
}
// Mark if any store is big enough
long size = store.getSize();
if (size > sizeToCheck) {
LOG.debug("ShouldSplit because " + store.getColumnFamilyName() +
" size=" + size + ", sizeToCheck=" + sizeToCheck +
", regionsWithCommonTable=" + tableRegionsCount);
foundABigStore = true;
break;
}
}
return foundABigStore;
}
/**
* @return Region max size or <code>count of regions squared * flushsize, which ever is
* smaller; guard against there being zero regions on this server.
*/
long getSizeToCheck(final int tableRegionsCount) {
return tableRegionsCount == 0? getDesiredMaxFileSize():
Math.min(getDesiredMaxFileSize(),
this.flushSize * (tableRegionsCount * tableRegionsCount));
}
/**
* @return Count of regions on this server that share the table this.region
* belongs to
*/
private int getCountOfCommonTableRegions() {
RegionServerServices rss = this.region.getRegionServerServices();
// Can be null in tests
if (rss == null) return 0;
byte [] tablename = this.region.getTableDesc().getName();
int tableRegionsCount = 0;
try {
List<HRegion> hri = rss.getOnlineRegions(tablename);
tableRegionsCount = hri == null || hri.isEmpty()? 0: hri.size();
} catch (IOException e) {
LOG.debug("Failed getOnlineRegions " + Bytes.toString(tablename), e);
}
return tableRegionsCount;
}
}

View File

@ -29,7 +29,7 @@ import org.apache.commons.logging.LogFactory;
* This ensures that a region is not split "inside" a prefix of a row key. * This ensures that a region is not split "inside" a prefix of a row key.
* I.e. rows can be co-located in a regionb by their prefix. * I.e. rows can be co-located in a regionb by their prefix.
*/ */
public class KeyPrefixRegionSplitPolicy extends ConstantSizeRegionSplitPolicy { public class KeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
private static final Log LOG = LogFactory private static final Log LOG = LogFactory
.getLog(KeyPrefixRegionSplitPolicy.class); .getLog(KeyPrefixRegionSplitPolicy.class);
public static String PREFIX_LENGTH_KEY = "prefix_split_key_policy.prefix_length"; public static String PREFIX_LENGTH_KEY = "prefix_split_key_policy.prefix_length";

View File

@ -33,8 +33,8 @@ import com.google.common.base.Preconditions;
* {@see ConstantSizeRegionSplitPolicy} * {@see ConstantSizeRegionSplitPolicy}
*/ */
public abstract class RegionSplitPolicy extends Configured { public abstract class RegionSplitPolicy extends Configured {
private static final Class<ConstantSizeRegionSplitPolicy> private static final Class<? extends RegionSplitPolicy>
DEFAULT_SPLIT_POLICY_CLASS = ConstantSizeRegionSplitPolicy.class; DEFAULT_SPLIT_POLICY_CLASS = IncreasingToUpperBoundRegionSplitPolicy.class;
/** /**
* The region configured for this split policy. * The region configured for this split policy.

View File

@ -389,11 +389,11 @@
</property> </property>
<property> <property>
<name>hbase.hregion.max.filesize</name> <name>hbase.hregion.max.filesize</name>
<value>1073741824</value> <value>10737418240</value>
<description> <description>
Maximum HStoreFile size. If any one of a column families' HStoreFiles has Maximum HStoreFile size. If any one of a column families' HStoreFiles has
grown to exceed this value, the hosting HRegion is split in two. grown to exceed this value, the hosting HRegion is split in two.
Default: 1G. Default: 10G.
</description> </description>
</property> </property>
<property> <property>

View File

@ -17,14 +17,22 @@
*/ */
package org.apache.hadoop.hbase.regionserver; package org.apache.hadoop.hbase.regionserver;
import static org.junit.Assert.*; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.ArrayList;
import java.util.List;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -38,14 +46,13 @@ public class TestRegionSplitPolicy {
private HTableDescriptor htd; private HTableDescriptor htd;
private HRegion mockRegion; private HRegion mockRegion;
private TreeMap<byte[], Store> stores; private TreeMap<byte[], Store> stores;
private static final byte [] TABLENAME = new byte [] {'t'};
@Before @Before
public void setupMocks() { public void setupMocks() {
conf = HBaseConfiguration.create(); conf = HBaseConfiguration.create();
HRegionInfo hri = new HRegionInfo(TABLENAME);
HRegionInfo hri = new HRegionInfo(Bytes.toBytes("testtable")); htd = new HTableDescriptor(TABLENAME);
htd = new HTableDescriptor();
mockRegion = Mockito.mock(HRegion.class); mockRegion = Mockito.mock(HRegion.class);
Mockito.doReturn(htd).when(mockRegion).getTableDesc(); Mockito.doReturn(htd).when(mockRegion).getTableDesc();
Mockito.doReturn(hri).when(mockRegion).getRegionInfo(); Mockito.doReturn(hri).when(mockRegion).getRegionInfo();
@ -54,6 +61,65 @@ public class TestRegionSplitPolicy {
Mockito.doReturn(stores).when(mockRegion).getStores(); Mockito.doReturn(stores).when(mockRegion).getStores();
} }
@Test
public void testIncreasingToUpperBoundRegionSplitPolicy() throws IOException {
// Configure IncreasingToUpperBoundRegionSplitPolicy as our split policy
conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
IncreasingToUpperBoundRegionSplitPolicy.class.getName());
// Now make it so the mock region has a RegionServerService that will
// return 'online regions'.
RegionServerServices rss = Mockito.mock(RegionServerServices.class);
final List<HRegion> regions = new ArrayList<HRegion>();
Mockito.when(rss.getOnlineRegions(TABLENAME)).thenReturn(regions);
Mockito.when(mockRegion.getRegionServerServices()).thenReturn(rss);
// Set max size for this 'table'.
long maxSplitSize = 1024L;
htd.setMaxFileSize(maxSplitSize);
// Set flush size to 1/4. IncreasingToUpperBoundRegionSplitPolicy
// grows by the square of the number of regions times flushsize each time.
long flushSize = maxSplitSize/4;
conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSize);
htd.setMemStoreFlushSize(flushSize);
// If RegionServerService with no regions in it -- 'online regions' == 0 --
// then IncreasingToUpperBoundRegionSplitPolicy should act like a
// ConstantSizePolicy
IncreasingToUpperBoundRegionSplitPolicy policy =
(IncreasingToUpperBoundRegionSplitPolicy)RegionSplitPolicy.create(mockRegion, conf);
doConstantSizePolicyTests(policy);
// Add a store in excess of split size. Because there are "no regions"
// on this server -- rss.getOnlineRegions is 0 -- then we should split
// like a constantsizeregionsplitpolicy would
Store mockStore = Mockito.mock(Store.class);
Mockito.doReturn(2000L).when(mockStore).getSize();
Mockito.doReturn(true).when(mockStore).canSplit();
stores.put(new byte[]{1}, mockStore);
// It should split
assertTrue(policy.shouldSplit());
// Now test that we increase our split size as online regions for a table
// grows. With one region, split size should be flushsize.
regions.add(mockRegion);
Mockito.doReturn(flushSize/2).when(mockStore).getSize();
// Should not split since store is 1/2 flush size.
assertFalse(policy.shouldSplit());
// Set size of store to be > flush size and we should split
Mockito.doReturn(flushSize + 1).when(mockStore).getSize();
assertTrue(policy.shouldSplit());
// Add another region to the 'online regions' on this server and we should
// now be no longer be splittable since split size has gone up.
regions.add(mockRegion);
assertFalse(policy.shouldSplit());
// Quadruple (2 squared) the store size and make sure its just over; verify it'll split
Mockito.doReturn((flushSize * 2 * 2) + 1).when(mockStore).getSize();
assertTrue(policy.shouldSplit());
// Finally assert that even if loads of regions, we'll split at max size
assertEquals(maxSplitSize, policy.getSizeToCheck(1000));
// Assert same is true if count of regions is zero.
assertEquals(maxSplitSize, policy.getSizeToCheck(0));
}
@Test @Test
public void testCreateDefault() throws IOException { public void testCreateDefault() throws IOException {
conf.setLong(HConstants.HREGION_MAX_FILESIZE, 1234L); conf.setLong(HConstants.HREGION_MAX_FILESIZE, 1234L);
@ -110,10 +176,16 @@ public class TestRegionSplitPolicy {
@Test @Test
public void testConstantSizePolicy() throws IOException { public void testConstantSizePolicy() throws IOException {
htd.setMaxFileSize(1024L); htd.setMaxFileSize(1024L);
ConstantSizeRegionSplitPolicy policy = ConstantSizeRegionSplitPolicy policy =
(ConstantSizeRegionSplitPolicy)RegionSplitPolicy.create(mockRegion, conf); (ConstantSizeRegionSplitPolicy)RegionSplitPolicy.create(mockRegion, conf);
doConstantSizePolicyTests(policy);
}
/**
* Run through tests for a ConstantSizeRegionSplitPolicy
* @param policy
*/
private void doConstantSizePolicyTests(final ConstantSizeRegionSplitPolicy policy) {
// For no stores, should not split // For no stores, should not split
assertFalse(policy.shouldSplit()); assertFalse(policy.shouldSplit());
@ -141,6 +213,9 @@ public class TestRegionSplitPolicy {
// Turn off forceSplit, should not split // Turn off forceSplit, should not split
Mockito.doReturn(false).when(mockRegion).shouldForceSplit(); Mockito.doReturn(false).when(mockRegion).shouldForceSplit();
assertFalse(policy.shouldSplit()); assertFalse(policy.shouldSplit());
// Clear families we added above
stores.clear();
} }
@Test @Test
@ -179,4 +254,3 @@ public class TestRegionSplitPolicy {
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
} }