From df4a9d4ad3ad7b3b530be92c8f5c5942aa8d0093 Mon Sep 17 00:00:00 2001 From: larsh Date: Thu, 2 Feb 2012 23:00:06 +0000 Subject: [PATCH] HBASE-5304 Pluggable split key policy git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1239909 13f79535-47bb-0310-9956-ffa450edef68 --- src/docbkx/book.xml | 12 ++++ .../apache/hadoop/hbase/HTableDescriptor.java | 2 +- .../ConstantSizeRegionSplitPolicy.java | 7 ++- .../hadoop/hbase/regionserver/HRegion.java | 4 -- .../hbase/regionserver/RegionSplitPolicy.java | 12 ++-- .../regionserver/PrefixSplitKeyPolicy.java | 59 +++++++++++++++++++ .../regionserver/TestRegionSplitPolicy.java | 36 +++++++++++ 7 files changed, 120 insertions(+), 12 deletions(-) create mode 100644 src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java diff --git a/src/docbkx/book.xml b/src/docbkx/book.xml index b05ffbe4b76..ce478bca469 100644 --- a/src/docbkx/book.xml +++ b/src/docbkx/book.xml @@ -2002,6 +2002,18 @@ rs.close(); the parent's hosting RegionServer and then reports the split to the Master. See for how to manually manage splits (and for why you might do this) +
+ Custom Split Policies + The default split policy can be overwritten using a custom RegionSplitPolicy (HBase 0.94+). + Typically a custom split policy should extend HBase's default split policy: ConstantSizeRegionSplitPolicy. + + The policy can set globally through the HBaseConfiguration used or on a per table basis: + +HTableDescriptor myHtd = ...; +myHtd.setValue(HTableDescriptor.SPLIT_POLICY, MyCustomSplitPolicy.class.getName()); + + +
diff --git a/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java b/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java index 133759db6b7..472a22e4a8d 100644 --- a/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java +++ b/src/main/java/org/apache/hadoop/hbase/HTableDescriptor.java @@ -69,7 +69,7 @@ public class HTableDescriptor implements WritableComparable { private static final String FAMILIES = "FAMILIES"; - private static final String SPLIT_POLICY = "SPLIT_POLICY"; + public static final String SPLIT_POLICY = "SPLIT_POLICY"; /** * INTERNAL Used by HBase Shell interface to access this metadata diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java b/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java index 13b5bbf1ac1..e0c27f099e4 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/ConstantSizeRegionSplitPolicy.java @@ -23,12 +23,13 @@ import org.apache.hadoop.hbase.HConstants; * A {@link RegionSplitPolicy} implementation which splits a region * as soon as any of its store files exceeds a maximum configurable * size. + *

This is the default split policy.

*/ -class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy { +public class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy { private long desiredMaxFileSize; @Override - void configureForRegion(HRegion region) { + protected void configureForRegion(HRegion region) { super.configureForRegion(region); long maxFileSize = region.getTableDesc().getMaxFileSize(); @@ -41,7 +42,7 @@ class ConstantSizeRegionSplitPolicy extends RegionSplitPolicy { } @Override - boolean shouldSplit() { + protected boolean shouldSplit() { boolean force = region.shouldForceSplit(); boolean foundABigStore = false; diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 1d12bd1fb59..25cb31d5313 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -4863,10 +4863,6 @@ public class HRegion implements HeapSize { // , Writable{ return null; } - if (this.explicitSplitPoint != null) { - return this.explicitSplitPoint; - } - if (!splitPolicy.shouldSplit()) { return null; } diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java index 4b350f4da1f..8f1d1af57ec 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/RegionSplitPolicy.java @@ -32,7 +32,7 @@ import com.google.common.base.Preconditions; * A split policy determines when a region should be split. * {@see ConstantSizeRegionSplitPolicy} */ -abstract class RegionSplitPolicy extends Configured { +public abstract class RegionSplitPolicy extends Configured { private static final Class DEFAULT_SPLIT_POLICY_CLASS = ConstantSizeRegionSplitPolicy.class; @@ -45,7 +45,7 @@ abstract class RegionSplitPolicy extends Configured { * Upon construction, this method will be called with the region * to be governed. It will be called once and only once. */ - void configureForRegion(HRegion region) { + protected void configureForRegion(HRegion region) { Preconditions.checkState( this.region == null, "Policy already configured for region {}", @@ -57,14 +57,18 @@ abstract class RegionSplitPolicy extends Configured { /** * @return true if the specified region should be split. */ - abstract boolean shouldSplit(); + protected abstract boolean shouldSplit(); /** * @return the key at which the region should be split, or null * if it cannot be split. This will only be called if shouldSplit * previously returned true. */ - byte[] getSplitPoint() { + protected byte[] getSplitPoint() { + byte[] explicitSplitPoint = this.region.getExplicitSplitPoint(); + if (explicitSplitPoint != null) { + return explicitSplitPoint; + } Map stores = region.getStores(); byte[] splitPointFromLargestStore = null; diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java b/src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java new file mode 100644 index 00000000000..6b90bd637d1 --- /dev/null +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/PrefixSplitKeyPolicy.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.Arrays; + +/** + * A custom RegionSplitPolicy for testing. + * This class also demonstrates how to implement a SplitPolicy that groups + * rows by a prefix of the row-key + * + * This ensures that a region is not split "inside" + * a prefix of a row key. I.e. rows can be co-located by + * their prefix. + */ +public class PrefixSplitKeyPolicy extends ConstantSizeRegionSplitPolicy { + public static String PREFIX_LENGTH_KEY = "prefix_split_key_policy.prefix_length"; + + private int prefix_length; + + @Override + protected void configureForRegion(HRegion region) { + super.configureForRegion(region); + + if (region != null) { + // this demonstrates how a RegionSplitPolicy can be configured + // through HTableDescriptor values + prefix_length = Integer.parseInt(region.getTableDesc().getValue( + PREFIX_LENGTH_KEY)); + } + } + + @Override + protected byte[] getSplitPoint() { + byte[] splitPoint = super.getSplitPoint(); + if (splitPoint != null && splitPoint.length > 0) { + // group split keys by a prefix + return Arrays.copyOf(splitPoint, + Math.min(prefix_length, splitPoint.length)); + } else { + return splitPoint; + } + } +} diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java index 47d07d6d52d..332791f8464 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.regionserver; import static org.junit.Assert.*; import java.io.IOException; +import java.util.Arrays; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; @@ -71,6 +72,41 @@ public class TestRegionSplitPolicy { assertEquals(9999L, policy.getDesiredMaxFileSize()); } + /** + * Test setting up a customized split policy + */ + @Test + public void testCustomPolicy() throws IOException { + HTableDescriptor myHtd = new HTableDescriptor(); + myHtd.setValue(HTableDescriptor.SPLIT_POLICY, + PrefixSplitKeyPolicy.class.getName()); + myHtd.setValue(PrefixSplitKeyPolicy.PREFIX_LENGTH_KEY, String.valueOf(2)); + + HRegion myMockRegion = Mockito.mock(HRegion.class); + Mockito.doReturn(myHtd).when(myMockRegion).getTableDesc(); + Mockito.doReturn(stores).when(myMockRegion).getStores(); + + Store mockStore = Mockito.mock(Store.class); + Mockito.doReturn(2000L).when(mockStore).getSize(); + Mockito.doReturn(true).when(mockStore).canSplit(); + Mockito.doReturn(Bytes.toBytes("abcd")).when(mockStore).getSplitPoint(); + stores.put(new byte[] { 1 }, mockStore); + + PrefixSplitKeyPolicy policy = (PrefixSplitKeyPolicy) RegionSplitPolicy + .create(myMockRegion, conf); + + assertEquals("ab", Bytes.toString(policy.getSplitPoint())); + + Mockito.doReturn(true).when(myMockRegion).shouldForceSplit(); + Mockito.doReturn(Bytes.toBytes("efgh")).when(myMockRegion) + .getExplicitSplitPoint(); + + policy = (PrefixSplitKeyPolicy) RegionSplitPolicy + .create(myMockRegion, conf); + + assertEquals("ef", Bytes.toString(policy.getSplitPoint())); + } + @Test public void testConstantSizePolicy() throws IOException { htd.setMaxFileSize(1024L);