diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java
new file mode 100644
index 00000000000..c0940edb356
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DelimitedKeyPrefixRegionSplitPolicy.java
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import java.util.Arrays;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * A custom RegionSplitPolicy implementing a SplitPolicy that groups
+ * rows by a prefix of the row-key with a delimiter. Only the first delimiter
+ * for the row key will define the prefix of the row key that is used for grouping.
+ *
+ * This ensures that a region is not split "inside" a prefix of a row key.
+ * I.e. rows can be co-located in a region by their prefix.
+ *
+ * As an example, if you have row keys delimited with _
, like
+ * userid_eventtype_eventid
, and use prefix delimiter _, this split policy
+ * ensures that all rows starting with the same userid, belongs to the same region.
+ * @see KeyPrefixRegionSplitPolicy
+ */
+@InterfaceAudience.Private
+public class DelimitedKeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
+
+ private static final Log LOG = LogFactory
+ .getLog(DelimitedKeyPrefixRegionSplitPolicy.class);
+ public static final String DELIMITER_KEY = "DelimitedKeyPrefixRegionSplitPolicy.delimiter";
+
+ private byte[] delimiter = null;
+
+ @Override
+ protected void configureForRegion(HRegion region) {
+ super.configureForRegion(region);
+ if (region != null) {
+
+ // read the prefix length from the table descriptor
+ String delimiterString = region.getTableDesc().getValue(
+ DELIMITER_KEY);
+ if (delimiterString == null || delimiterString.length() == 0) {
+ LOG.error(DELIMITER_KEY + " not specified for table "
+ + region.getTableDesc().getNameAsString()
+ + ". Using default RegionSplitPolicy");
+ return;
+ }
+
+ delimiter = Bytes.toBytes(delimiterString);
+ }
+ }
+
+ @Override
+ protected byte[] getSplitPoint() {
+ byte[] splitPoint = super.getSplitPoint();
+ if (delimiter != null) {
+
+ //find the first occurrence of delimiter in split point
+ int index = com.google.common.primitives.Bytes.indexOf(splitPoint, delimiter);
+ if (index < 0) {
+ LOG.warn("Delimiter " + Bytes.toString(delimiter) + " not found for split key "
+ + Bytes.toString(splitPoint));
+ return splitPoint;
+ }
+
+ // group split keys by a prefix
+ return Arrays.copyOf(splitPoint, Math.min(index, splitPoint.length));
+ } else {
+ return splitPoint;
+ }
+ }
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitPolicy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitPolicy.java
index 313b7626c63..3548a62cd2b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitPolicy.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyPrefixRegionSplitPolicy.java
@@ -28,13 +28,15 @@ import org.apache.hadoop.classification.InterfaceAudience;
* rows by a prefix of the row-key
*
* This ensures that a region is not split "inside" a prefix of a row key.
- * I.e. rows can be co-located in a regionb by their prefix.
+ * I.e. rows can be co-located in a region by their prefix.
*/
@InterfaceAudience.Private
public class KeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSplitPolicy {
private static final Log LOG = LogFactory
.getLog(KeyPrefixRegionSplitPolicy.class);
- public static final String PREFIX_LENGTH_KEY = "prefix_split_key_policy.prefix_length";
+ @Deprecated
+ public static final String PREFIX_LENGTH_KEY_DEPRECATED = "prefix_split_key_policy.prefix_length";
+ public static final String PREFIX_LENGTH_KEY = "KeyPrefixRegionSplitPolicy.prefix_length";
private int prefixLength = 0;
@@ -48,10 +50,14 @@ public class KeyPrefixRegionSplitPolicy extends IncreasingToUpperBoundRegionSpli
String prefixLengthString = region.getTableDesc().getValue(
PREFIX_LENGTH_KEY);
if (prefixLengthString == null) {
- LOG.error(PREFIX_LENGTH_KEY + " not specified for table "
- + region.getTableDesc().getNameAsString()
- + ". Using default RegionSplitPolicy");
- return;
+ //read the deprecated value
+ prefixLengthString = region.getTableDesc().getValue(PREFIX_LENGTH_KEY_DEPRECATED);
+ if (prefixLengthString == null) {
+ LOG.error(PREFIX_LENGTH_KEY + " not specified for table "
+ + region.getTableDesc().getNameAsString()
+ + ". Using default RegionSplitPolicy");
+ return;
+ }
}
try {
prefixLength = Integer.parseInt(prefixLengthString);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
index 766057b4591..d1633192d45 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionSplitPolicy.java
@@ -28,7 +28,6 @@ import java.util.List;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hbase.CompoundConfiguration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
@@ -251,4 +250,40 @@ public class TestRegionSplitPolicy {
Bytes.toString(policy.getSplitPoint()));
}
+ @Test
+ public void testDelimitedKeyPrefixRegionSplitPolicy() throws IOException {
+ HTableDescriptor myHtd = new HTableDescriptor();
+ myHtd.setValue(HTableDescriptor.SPLIT_POLICY,
+ DelimitedKeyPrefixRegionSplitPolicy.class.getName());
+ myHtd.setValue(DelimitedKeyPrefixRegionSplitPolicy.DELIMITER_KEY, ",");
+
+ HRegion myMockRegion = Mockito.mock(HRegion.class);
+ Mockito.doReturn(myHtd).when(myMockRegion).getTableDesc();
+ Mockito.doReturn(stores).when(myMockRegion).getStores();
+
+ HStore mockStore = Mockito.mock(HStore.class);
+ Mockito.doReturn(2000L).when(mockStore).getSize();
+ Mockito.doReturn(true).when(mockStore).canSplit();
+ Mockito.doReturn(Bytes.toBytes("ab,cd")).when(mockStore).getSplitPoint();
+ stores.put(new byte[] { 1 }, mockStore);
+
+ DelimitedKeyPrefixRegionSplitPolicy policy = (DelimitedKeyPrefixRegionSplitPolicy) RegionSplitPolicy
+ .create(myMockRegion, conf);
+
+ assertEquals("ab", Bytes.toString(policy.getSplitPoint()));
+
+ Mockito.doReturn(true).when(myMockRegion).shouldForceSplit();
+ Mockito.doReturn(Bytes.toBytes("efg,h")).when(myMockRegion)
+ .getExplicitSplitPoint();
+
+ policy = (DelimitedKeyPrefixRegionSplitPolicy) RegionSplitPolicy
+ .create(myMockRegion, conf);
+
+ assertEquals("efg", Bytes.toString(policy.getSplitPoint()));
+
+ Mockito.doReturn(Bytes.toBytes("ijk")).when(myMockRegion)
+ .getExplicitSplitPoint();
+ assertEquals("ijk", Bytes.toString(policy.getSplitPoint()));
+ }
+
}