HBASE-4365 Add a decent heuristic for region size
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1293099 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ce71b8a1bc
commit
e531e2595c
|
@ -227,6 +227,9 @@ public final class HConstants {
|
|||
public static final String HREGION_MAX_FILESIZE =
|
||||
"hbase.hregion.max.filesize";
|
||||
|
||||
/** Default maximum file size */
|
||||
public static final long DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* The max number of threads used for opening and closing stores or store
|
||||
* files in parallel
|
||||
|
@ -240,8 +243,6 @@ public final class HConstants {
|
|||
*/
|
||||
public static final int DEFAULT_HSTORE_OPEN_AND_CLOSE_THREADS_MAX = 1;
|
||||
|
||||
/** Default maximum file size */
|
||||
public static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;
|
||||
|
||||
/** Conf key for the memstore size at which we flush the memstore */
|
||||
public static final String HREGION_MEMSTORE_FLUSH_SIZE =
|
||||
|
|
|
@ -158,7 +158,7 @@ public class HTableDescriptor implements WritableComparable<HTableDescriptor> {
|
|||
* Constant that denotes the maximum default size of the memstore after which
|
||||
* the contents are flushed to the store files
|
||||
*/
|
||||
public static final long DEFAULT_MEMSTORE_FLUSH_SIZE = 1024*1024*64L;
|
||||
public static final long DEFAULT_MEMSTORE_FLUSH_SIZE = 1024*1024*128L;
|
||||
|
||||
private volatile Boolean meta = null;
|
||||
private volatile Boolean root = null;
|
||||
|
|
|
@ -780,6 +780,14 @@ public class HRegion implements HeapSize { // , Writable{
|
|||
return this.regionInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Instance of {@link RegionServerServices} used by this HRegion.
|
||||
* Can be null.
|
||||
*/
|
||||
RegionServerServices getRegionServerServices() {
|
||||
return this.rsServices;
|
||||
}
|
||||
|
||||
/** @return requestsCount for this region */
|
||||
public long getRequestsCount() {
|
||||
return this.readRequestsCount.get() + this.writeRequestsCount.get();
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.regionserver;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
||||
/**
|
||||
* Split size is the number of regions that are on this server that all are
|
||||
* of the same table, squared, times the region flush size OR the maximum
|
||||
* region split size, whichever is smaller. For example, if the flush size
|
||||
* is 128M, then on first flush we will split which will make two regions
|
||||
* that will split when their size is 2 * 2 * 128M = 512M. If one of these
|
||||
* regions splits, then there are three regions and now the split size is
|
||||
* 3 * 3 * 128M = 1152M, and so on until we reach the configured
|
||||
* maximum filesize and then from there on out, we'll use that.
|
||||
*/
|
||||
public class IncreasingToUpperBoundRegionSplitPolicy
|
||||
extends ConstantSizeRegionSplitPolicy {
|
||||
static final Log LOG =
|
||||
LogFactory.getLog(IncreasingToUpperBoundRegionSplitPolicy.class);
|
||||
private long flushSize;
|
||||
|
||||
@Override
|
||||
protected void configureForRegion(HRegion region) {
|
||||
super.configureForRegion(region);
|
||||
this.flushSize = region.getTableDesc() != null?
|
||||
region.getTableDesc().getMemStoreFlushSize():
|
||||
getConf().getLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE,
|
||||
HTableDescriptor.DEFAULT_MEMSTORE_FLUSH_SIZE);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean shouldSplit() {
|
||||
if (region.shouldForceSplit()) return true;
|
||||
boolean foundABigStore = false;
|
||||
// Get count of regions that have the same common table as this.region
|
||||
int tableRegionsCount = getCountOfCommonTableRegions();
|
||||
// Get size to check
|
||||
long sizeToCheck = getSizeToCheck(tableRegionsCount);
|
||||
|
||||
for (Store store : region.getStores().values()) {
|
||||
// If any of the stores is unable to split (eg they contain reference files)
|
||||
// then don't split
|
||||
if ((!store.canSplit())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Mark if any store is big enough
|
||||
long size = store.getSize();
|
||||
if (size > sizeToCheck) {
|
||||
LOG.debug("ShouldSplit because " + store.getColumnFamilyName() +
|
||||
" size=" + size + ", sizeToCheck=" + sizeToCheck +
|
||||
", regionsWithCommonTable=" + tableRegionsCount);
|
||||
foundABigStore = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return foundABigStore;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Region max size or <code>count of regions squared * flushsize, which ever is
|
||||
* smaller; guard against there being zero regions on this server.
|
||||
*/
|
||||
long getSizeToCheck(final int tableRegionsCount) {
|
||||
return tableRegionsCount == 0? getDesiredMaxFileSize():
|
||||
Math.min(getDesiredMaxFileSize(),
|
||||
this.flushSize * (tableRegionsCount * tableRegionsCount));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Count of regions on this server that share the table this.region
|
||||
* belongs to
|
||||
*/
|
||||
private int getCountOfCommonTableRegions() {
|
||||
RegionServerServices rss = this.region.getRegionServerServices();
|
||||
// Can be null in tests
|
||||
if (rss == null) return 0;
|
||||
byte [] tablename = this.region.getTableDesc().getName();
|
||||
int tableRegionsCount = 0;
|
||||
try {
|
||||
List<HRegion> hri = rss.getOnlineRegions(tablename);
|
||||
tableRegionsCount = hri == null || hri.isEmpty()? 0: hri.size();
|
||||
} catch (IOException e) {
|
||||
LOG.debug("Failed getOnlineRegions " + Bytes.toString(tablename), e);
|
||||
}
|
||||
return tableRegionsCount;
|
||||
}
|
||||
}
|
|
@ -29,7 +29,7 @@ import org.apache.commons.logging.LogFactory;
|
|||
* This ensures that a region is not split "inside" a prefix of a row key.
|
||||
* I.e. rows can be co-located in a regionb by their prefix.
|
||||
*/
|
||||
public class KeyPrefixRegionSplitPolicy extends ConstantSizeRegionSplitPolicy {
|
||||
public class KeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
|
||||
private static final Log LOG = LogFactory
|
||||
.getLog(KeyPrefixRegionSplitPolicy.class);
|
||||
public static String PREFIX_LENGTH_KEY = "prefix_split_key_policy.prefix_length";
|
||||
|
@ -75,4 +75,4 @@ public class KeyPrefixRegionSplitPolicy extends ConstantSizeRegionSplitPolicy {
|
|||
return splitPoint;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -33,8 +33,8 @@ import com.google.common.base.Preconditions;
|
|||
* {@see ConstantSizeRegionSplitPolicy}
|
||||
*/
|
||||
public abstract class RegionSplitPolicy extends Configured {
|
||||
private static final Class<ConstantSizeRegionSplitPolicy>
|
||||
DEFAULT_SPLIT_POLICY_CLASS = ConstantSizeRegionSplitPolicy.class;
|
||||
private static final Class<? extends RegionSplitPolicy>
|
||||
DEFAULT_SPLIT_POLICY_CLASS = IncreasingToUpperBoundRegionSplitPolicy.class;
|
||||
|
||||
/**
|
||||
* The region configured for this split policy.
|
||||
|
|
|
@ -389,11 +389,11 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>hbase.hregion.max.filesize</name>
|
||||
<value>1073741824</value>
|
||||
<value>10737418240</value>
|
||||
<description>
|
||||
Maximum HStoreFile size. If any one of a column families' HStoreFiles has
|
||||
grown to exceed this value, the hosting HRegion is split in two.
|
||||
Default: 1G.
|
||||
Default: 10G.
|
||||
</description>
|
||||
</property>
|
||||
<property>
|
||||
|
|
|
@ -17,14 +17,22 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.regionserver;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.*;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.SmallTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
@ -38,14 +46,13 @@ public class TestRegionSplitPolicy {
|
|||
private HTableDescriptor htd;
|
||||
private HRegion mockRegion;
|
||||
private TreeMap<byte[], Store> stores;
|
||||
private static final byte [] TABLENAME = new byte [] {'t'};
|
||||
|
||||
@Before
|
||||
public void setupMocks() {
|
||||
conf = HBaseConfiguration.create();
|
||||
|
||||
HRegionInfo hri = new HRegionInfo(Bytes.toBytes("testtable"));
|
||||
|
||||
htd = new HTableDescriptor();
|
||||
HRegionInfo hri = new HRegionInfo(TABLENAME);
|
||||
htd = new HTableDescriptor(TABLENAME);
|
||||
mockRegion = Mockito.mock(HRegion.class);
|
||||
Mockito.doReturn(htd).when(mockRegion).getTableDesc();
|
||||
Mockito.doReturn(hri).when(mockRegion).getRegionInfo();
|
||||
|
@ -54,6 +61,65 @@ public class TestRegionSplitPolicy {
|
|||
Mockito.doReturn(stores).when(mockRegion).getStores();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIncreasingToUpperBoundRegionSplitPolicy() throws IOException {
|
||||
// Configure IncreasingToUpperBoundRegionSplitPolicy as our split policy
|
||||
conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
|
||||
IncreasingToUpperBoundRegionSplitPolicy.class.getName());
|
||||
// Now make it so the mock region has a RegionServerService that will
|
||||
// return 'online regions'.
|
||||
RegionServerServices rss = Mockito.mock(RegionServerServices.class);
|
||||
final List<HRegion> regions = new ArrayList<HRegion>();
|
||||
Mockito.when(rss.getOnlineRegions(TABLENAME)).thenReturn(regions);
|
||||
Mockito.when(mockRegion.getRegionServerServices()).thenReturn(rss);
|
||||
// Set max size for this 'table'.
|
||||
long maxSplitSize = 1024L;
|
||||
htd.setMaxFileSize(maxSplitSize);
|
||||
// Set flush size to 1/4. IncreasingToUpperBoundRegionSplitPolicy
|
||||
// grows by the square of the number of regions times flushsize each time.
|
||||
long flushSize = maxSplitSize/4;
|
||||
conf.setLong(HConstants.HREGION_MEMSTORE_FLUSH_SIZE, flushSize);
|
||||
htd.setMemStoreFlushSize(flushSize);
|
||||
// If RegionServerService with no regions in it -- 'online regions' == 0 --
|
||||
// then IncreasingToUpperBoundRegionSplitPolicy should act like a
|
||||
// ConstantSizePolicy
|
||||
IncreasingToUpperBoundRegionSplitPolicy policy =
|
||||
(IncreasingToUpperBoundRegionSplitPolicy)RegionSplitPolicy.create(mockRegion, conf);
|
||||
doConstantSizePolicyTests(policy);
|
||||
|
||||
// Add a store in excess of split size. Because there are "no regions"
|
||||
// on this server -- rss.getOnlineRegions is 0 -- then we should split
|
||||
// like a constantsizeregionsplitpolicy would
|
||||
Store mockStore = Mockito.mock(Store.class);
|
||||
Mockito.doReturn(2000L).when(mockStore).getSize();
|
||||
Mockito.doReturn(true).when(mockStore).canSplit();
|
||||
stores.put(new byte[]{1}, mockStore);
|
||||
// It should split
|
||||
assertTrue(policy.shouldSplit());
|
||||
|
||||
// Now test that we increase our split size as online regions for a table
|
||||
// grows. With one region, split size should be flushsize.
|
||||
regions.add(mockRegion);
|
||||
Mockito.doReturn(flushSize/2).when(mockStore).getSize();
|
||||
// Should not split since store is 1/2 flush size.
|
||||
assertFalse(policy.shouldSplit());
|
||||
// Set size of store to be > flush size and we should split
|
||||
Mockito.doReturn(flushSize + 1).when(mockStore).getSize();
|
||||
assertTrue(policy.shouldSplit());
|
||||
// Add another region to the 'online regions' on this server and we should
|
||||
// now be no longer be splittable since split size has gone up.
|
||||
regions.add(mockRegion);
|
||||
assertFalse(policy.shouldSplit());
|
||||
// Quadruple (2 squared) the store size and make sure its just over; verify it'll split
|
||||
Mockito.doReturn((flushSize * 2 * 2) + 1).when(mockStore).getSize();
|
||||
assertTrue(policy.shouldSplit());
|
||||
|
||||
// Finally assert that even if loads of regions, we'll split at max size
|
||||
assertEquals(maxSplitSize, policy.getSizeToCheck(1000));
|
||||
// Assert same is true if count of regions is zero.
|
||||
assertEquals(maxSplitSize, policy.getSizeToCheck(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateDefault() throws IOException {
|
||||
conf.setLong(HConstants.HREGION_MAX_FILESIZE, 1234L);
|
||||
|
@ -110,10 +176,16 @@ public class TestRegionSplitPolicy {
|
|||
@Test
|
||||
public void testConstantSizePolicy() throws IOException {
|
||||
htd.setMaxFileSize(1024L);
|
||||
|
||||
ConstantSizeRegionSplitPolicy policy =
|
||||
(ConstantSizeRegionSplitPolicy)RegionSplitPolicy.create(mockRegion, conf);
|
||||
doConstantSizePolicyTests(policy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run through tests for a ConstantSizeRegionSplitPolicy
|
||||
* @param policy
|
||||
*/
|
||||
private void doConstantSizePolicyTests(final ConstantSizeRegionSplitPolicy policy) {
|
||||
// For no stores, should not split
|
||||
assertFalse(policy.shouldSplit());
|
||||
|
||||
|
@ -141,6 +213,9 @@ public class TestRegionSplitPolicy {
|
|||
// Turn off forceSplit, should not split
|
||||
Mockito.doReturn(false).when(mockRegion).shouldForceSplit();
|
||||
assertFalse(policy.shouldSplit());
|
||||
|
||||
// Clear families we added above
|
||||
stores.clear();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -178,5 +253,4 @@ public class TestRegionSplitPolicy {
|
|||
@org.junit.Rule
|
||||
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
|
||||
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue