HBASE-24664 Some changing of split region by overall region size rath… (#2011)

Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
bsglz 2020-07-13 17:00:48 +08:00 committed by GitHub
parent af1cc2fc44
commit 3bd54217a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 110 additions and 37 deletions

View File

@ -382,6 +382,13 @@ public final class HConstants {
/** Default maximum file size */ /** Default maximum file size */
public static final long DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024L; public static final long DEFAULT_MAX_FILE_SIZE = 10 * 1024 * 1024 * 1024L;
/** Conf key for if we should sum overall region files size when check to split */
public static final String OVERALL_HREGION_FILES =
"hbase.hregion.split.overallfiles";
/** Default overall region files */
public static final boolean DEFAULT_OVERALL_HREGION_FILES = true;
/** /**
* Max size of single row for Get's or Scan's without in-row scanning flag set. * Max size of single row for Get's or Scan's without in-row scanning flag set.
*/ */

View File

@ -772,6 +772,11 @@ possible configurations would overwhelm and obscure the important.
Maximum HFile size. If the sum of the sizes of a region's HFiles has grown to exceed this Maximum HFile size. If the sum of the sizes of a region's HFiles has grown to exceed this
value, the region is split in two.</description> value, the region is split in two.</description>
</property> </property>
<property>
<name>hbase.hregion.split.overallfiles</name>
<value>true</value>
<description>If we should sum overall region files size when check to split.</description>
</property>
<property> <property>
<name>hbase.hregion.majorcompaction</name> <name>hbase.hregion.majorcompaction</name>
<value>604800000</value> <value>604800000</value>

View File

@ -26,6 +26,9 @@ import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* A {@link RegionSplitPolicy} implementation which splits a region * A {@link RegionSplitPolicy} implementation which splits a region
@ -38,10 +41,13 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
*/ */
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy { public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
private static final Logger LOG =
LoggerFactory.getLogger(ConstantSizeRegionSplitPolicy.class);
private static final Random RANDOM = new Random(); private static final Random RANDOM = new Random();
private long desiredMaxFileSize; private long desiredMaxFileSize;
private double jitterRate; private double jitterRate;
protected boolean overallHRegionFiles;
@Override @Override
protected void configureForRegion(HRegion region) { protected void configureForRegion(HRegion region) {
@ -55,6 +61,8 @@ public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
this.desiredMaxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, this.desiredMaxFileSize = conf.getLong(HConstants.HREGION_MAX_FILESIZE,
HConstants.DEFAULT_MAX_FILE_SIZE); HConstants.DEFAULT_MAX_FILE_SIZE);
} }
this.overallHRegionFiles = conf.getBoolean(HConstants.OVERALL_HREGION_FILES,
HConstants.DEFAULT_OVERALL_HREGION_FILES);
double jitter = conf.getDouble("hbase.hregion.max.filesize.jitter", 0.25D); double jitter = conf.getDouble("hbase.hregion.max.filesize.jitter", 0.25D);
this.jitterRate = (RANDOM.nextFloat() - 0.5D) * jitter; this.jitterRate = (RANDOM.nextFloat() - 0.5D) * jitter;
long jitterValue = (long) (this.desiredMaxFileSize * this.jitterRate); long jitterValue = (long) (this.desiredMaxFileSize * this.jitterRate);
@ -68,22 +76,10 @@ public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
@Override @Override
protected boolean shouldSplit() { protected boolean shouldSplit() {
boolean foundABigStore = false; if (!canSplit()) {
for (HStore store : region.getStores()) {
// If any of the stores are unable to split (eg they contain reference files)
// then don't split
if ((!store.canSplit())) {
return false; return false;
} }
return isExceedSize(desiredMaxFileSize);
// Mark if any store is big enough
if (store.getSize() > desiredMaxFileSize) {
foundABigStore = true;
}
}
return foundABigStore;
} }
long getDesiredMaxFileSize() { long getDesiredMaxFileSize() {
@ -94,4 +90,33 @@ public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy {
public boolean positiveJitterRate() { public boolean positiveJitterRate() {
return this.jitterRate > 0; return this.jitterRate > 0;
} }
/**
* @return true if region size exceed the sizeToCheck
*/
protected final boolean isExceedSize(long sizeToCheck) {
if (overallHRegionFiles) {
long sumSize = 0;
for (HStore store : region.getStores()) {
sumSize += store.getSize();
}
if (sumSize > sizeToCheck) {
LOG.debug("ShouldSplit because region size is big enough "
+ "size={}, sizeToCheck={}{}", StringUtils.humanSize(sumSize),
StringUtils.humanSize(sizeToCheck));
return true;
}
} else {
for (HStore store : region.getStores()) {
long size = store.getSize();
if (size > sizeToCheck) {
LOG.debug("ShouldSplit because {} size={}, sizeToCheck={}{}",
store.getColumnFamilyName(), StringUtils.humanSize(size),
StringUtils.humanSize(sizeToCheck));
return true;
}
}
}
return false;
}
} }

View File

@ -28,7 +28,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.procedure2.util.StringUtils;
/** /**
* Split size is the number of regions that are on this server that all are * Split size is the number of regions that are on this server that all are
@ -70,31 +69,18 @@ public class IncreasingToUpperBoundRegionSplitPolicy extends ConstantSizeRegionS
@Override @Override
protected boolean shouldSplit() { protected boolean shouldSplit() {
boolean foundABigStore = false; if (!canSplit()) {
return false;
}
// Get count of regions that have the same common table as this.region // Get count of regions that have the same common table as this.region
int tableRegionsCount = getCountOfCommonTableRegions(); int tableRegionsCount = getCountOfCommonTableRegions();
// Get size to check // Get size to check
long sizeToCheck = getSizeToCheck(tableRegionsCount); long sizeToCheck = getSizeToCheck(tableRegionsCount);
boolean shouldSplit = isExceedSize(sizeToCheck);
for (HStore store : region.getStores()) { if (shouldSplit) {
// If any of the stores is unable to split (eg they contain reference files) LOG.debug("regionsWithCommonTable={}", tableRegionsCount);
// then don't split
if (!store.canSplit()) {
return false;
} }
return shouldSplit;
// Mark if any store is big enough
long size = store.getSize();
if (size > sizeToCheck) {
LOG.debug("ShouldSplit because " + store.getColumnFamilyName() +
" size=" + StringUtils.humanSize(size) +
", sizeToCheck=" + StringUtils.humanSize(sizeToCheck) +
", regionsWithCommonTable=" + tableRegionsCount);
foundABigStore = true;
}
}
return foundABigStore;
} }
/** /**
@ -129,4 +115,5 @@ public class IncreasingToUpperBoundRegionSplitPolicy extends ConstantSizeRegionS
: Math.min(getDesiredMaxFileSize(), : Math.min(getDesiredMaxFileSize(),
initialSize * tableRegionsCount * tableRegionsCount * tableRegionsCount); initialSize * tableRegionsCount * tableRegionsCount * tableRegionsCount);
} }
} }

View File

@ -75,7 +75,7 @@ public abstract class RegionSplitPolicy extends Configured {
*/ */
protected boolean canSplit() { protected boolean canSplit() {
return !region.getRegionInfo().isMetaRegion() && region.isAvailable() && return !region.getRegionInfo().isMetaRegion() && region.isAvailable() &&
!region.hasReferences(); region.getStores().stream().allMatch(HStore::canSplit);
} }
/** /**

View File

@ -65,6 +65,7 @@ public class TestRegionSplitPolicy {
RegionInfo hri = RegionInfoBuilder.newBuilder(TABLENAME).build(); RegionInfo hri = RegionInfoBuilder.newBuilder(TABLENAME).build();
mockRegion = mock(HRegion.class); mockRegion = mock(HRegion.class);
doReturn(hri).when(mockRegion).getRegionInfo(); doReturn(hri).when(mockRegion).getRegionInfo();
doReturn(true).when(mockRegion).isAvailable();
stores = new ArrayList<>(); stores = new ArrayList<>();
doReturn(stores).when(mockRegion).getStores(); doReturn(stores).when(mockRegion).getStores();
} }
@ -153,6 +154,43 @@ public class TestRegionSplitPolicy {
assertWithinJitter(maxSplitSize, policy.getSizeToCheck(0)); assertWithinJitter(maxSplitSize, policy.getSizeToCheck(0));
} }
@Test
public void testIsExceedSize() throws IOException {
// Configure SteppingAllStoresSizeSplitPolicy as our split policy
conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
ConstantSizeRegionSplitPolicy.class.getName());
// Now make it so the mock region has a RegionServerService that will
// return 'online regions'.
RegionServerServices rss = mock(RegionServerServices.class);
final List<HRegion> regions = new ArrayList<>();
doReturn(regions).when(rss).getRegions(TABLENAME);
when(mockRegion.getRegionServerServices()).thenReturn(rss);
TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLENAME).build();
doReturn(td).when(mockRegion).getTableDescriptor();
ConstantSizeRegionSplitPolicy policy =
(ConstantSizeRegionSplitPolicy) RegionSplitPolicy.create(mockRegion, conf);
regions.add(mockRegion);
HStore mockStore1 = mock(HStore.class);
doReturn(100L).when(mockStore1).getSize();
HStore mockStore2 = mock(HStore.class);
doReturn(924L).when(mockStore2).getSize();
HStore mockStore3 = mock(HStore.class);
doReturn(925L).when(mockStore3).getSize();
// test sum of store's size not greater than sizeToCheck
stores.add(mockStore1);
stores.add(mockStore2);
assertFalse(policy.isExceedSize(1024));
stores.clear();
// test sum of store's size greater than sizeToCheck
stores.add(mockStore1);
stores.add(mockStore3);
assertTrue(policy.isExceedSize(1024));
}
@Test @Test
public void testBusyRegionSplitPolicy() throws Exception { public void testBusyRegionSplitPolicy() throws Exception {
doReturn(TableDescriptorBuilder.newBuilder(TABLENAME).build()).when(mockRegion) doReturn(TableDescriptorBuilder.newBuilder(TABLENAME).build()).when(mockRegion)

View File

@ -894,6 +894,17 @@ Time to sleep in between searches for work (in milliseconds).
`10737418240` `10737418240`
[[hbase.hregion.split.overallfiles]]
*`hbase.hregion.split.overallfiles`*::
+
.Description
If we should sum overall region files size when check to split.
+
.Default
`true`
[[hbase.hregion.majorcompaction]] [[hbase.hregion.majorcompaction]]
*`hbase.hregion.majorcompaction`*:: *`hbase.hregion.majorcompaction`*::
+ +