diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
index 3ee593afca8..06bccd13abd 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java
@@ -274,6 +274,12 @@ public class RegionSplitter {
*
bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
* myTable HexStringSplit
*
+ * create a table named 'myTable' with 50 pre-split regions,
+ * assuming the keys are decimal-encoded ASCII:
+ *
+ * - bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 50
+ * myTable DecimalStringSplit
+ *
* perform a rolling split of 'myTable' (i.e. 60 => 120 regions), # 2
* outstanding splits at a time, assuming keys are uniformly distributed
* bytes:
@@ -283,9 +289,9 @@ public class RegionSplitter {
*
*
*
- * There are two SplitAlgorithms built into RegionSplitter, HexStringSplit
- * and UniformSplit. These are different strategies for choosing region
- * boundaries. See their source code for details.
+ * There are three SplitAlgorithms built into RegionSplitter, HexStringSplit,
+ * DecimalStringSplit, and UniformSplit. These are different strategies for
+ * choosing region boundaries. See their source code for details.
*
* @param args
* Usage: RegionSplitter <TABLE> <SPLITALGORITHM>
@@ -353,9 +359,10 @@ public class RegionSplitter {
if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
new HelpFormatter().printHelp("RegionSplitter \n"+
"SPLITALGORITHM is a java class name of a class implementing " +
- "SplitAlgorithm, or one of the special strings HexStringSplit " +
- "or UniformSplit, which are built-in split algorithms. " +
+ "SplitAlgorithm, or one of the special strings HexStringSplit or " +
+ "DecimalStringSplit or UniformSplit, which are built-in split algorithms. " +
"HexStringSplit treats keys as hexadecimal ASCII, and " +
+ "DecimalStringSplit treats keys as decimal ASCII, and " +
"UniformSplit treats keys as arbitrary bytes.", opt);
return;
}
@@ -660,6 +667,8 @@ public class RegionSplitter {
// their simple class name instead of a fully qualified class name.
if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
splitClass = HexStringSplit.class;
+ } else if (splitClassName.equals(DecimalStringSplit.class.getSimpleName())) {
+ splitClass = DecimalStringSplit.class;
} else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
splitClass = UniformSplit.class;
} else {
@@ -893,15 +902,52 @@ public class RegionSplitter {
* Since this split algorithm uses hex strings as keys, it is easy to read &
* write in the shell but takes up more space and may be non-intuitive.
*/
- public static class HexStringSplit implements SplitAlgorithm {
+ public static class HexStringSplit extends NumberStringSplit {
final static String DEFAULT_MIN_HEX = "00000000";
final static String DEFAULT_MAX_HEX = "FFFFFFFF";
+ final static int RADIX_HEX = 16;
- String firstRow = DEFAULT_MIN_HEX;
- BigInteger firstRowInt = BigInteger.ZERO;
- String lastRow = DEFAULT_MAX_HEX;
- BigInteger lastRowInt = new BigInteger(lastRow, 16);
- int rowComparisonLength = lastRow.length();
+ public HexStringSplit() {
+ super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX);
+ }
+
+ }
+
+ /**
+ * The format of a DecimalStringSplit region boundary is the ASCII representation of
+ * reversed sequential number, or any other uniformly distributed decimal value.
+ * Row are decimal-encoded long values in the range
+ * "00000000" => "99999999" and are left-padded with zeros to keep the
+ * same order lexicographically as if they were binary.
+ */
+ public static class DecimalStringSplit extends NumberStringSplit {
+ final static String DEFAULT_MIN_DEC = "00000000";
+ final static String DEFAULT_MAX_DEC = "99999999";
+ final static int RADIX_DEC = 10;
+
+ public DecimalStringSplit() {
+ super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC);
+ }
+
+ }
+
+ public abstract static class NumberStringSplit implements SplitAlgorithm {
+
+ String firstRow;
+ BigInteger firstRowInt;
+ String lastRow;
+ BigInteger lastRowInt;
+ int rowComparisonLength;
+ int radix;
+
+ NumberStringSplit(String minRow, String maxRow, int radix) {
+ this.firstRow = minRow;
+ this.lastRow = maxRow;
+ this.radix = radix;
+ this.firstRowInt = BigInteger.ZERO;
+ this.lastRowInt = new BigInteger(lastRow, this.radix);
+ this.rowComparisonLength = lastRow.length();
+ }
public byte[] split(byte[] start, byte[] end) {
BigInteger s = convertToBigInteger(start);
@@ -973,18 +1019,18 @@ public class RegionSplitter {
public void setFirstRow(String userInput) {
firstRow = userInput;
- firstRowInt = new BigInteger(firstRow, 16);
+ firstRowInt = new BigInteger(firstRow, radix);
}
public void setLastRow(String userInput) {
lastRow = userInput;
- lastRowInt = new BigInteger(lastRow, 16);
+ lastRowInt = new BigInteger(lastRow, radix);
// Precondition: lastRow > firstRow, so last's length is the greater
rowComparisonLength = lastRow.length();
}
public byte[] strToRow(String in) {
- return convertToByte(new BigInteger(in, 16));
+ return convertToByte(new BigInteger(in, radix));
}
public String rowToStr(byte[] row) {
@@ -1037,8 +1083,8 @@ public class RegionSplitter {
* @param pad padding length
* @return byte corresponding to input BigInteger
*/
- public static byte[] convertToByte(BigInteger bigInteger, int pad) {
- String bigIntegerString = bigInteger.toString(16);
+ public byte[] convertToByte(BigInteger bigInteger, int pad) {
+ String bigIntegerString = bigInteger.toString(radix);
bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');
return Bytes.toBytes(bigIntegerString);
}
@@ -1060,7 +1106,7 @@ public class RegionSplitter {
* @return the corresponding BigInteger
*/
public BigInteger convertToBigInteger(byte[] row) {
- return (row.length > 0) ? new BigInteger(Bytes.toString(row), 16)
+ return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix)
: BigInteger.ZERO;
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
index aa42616832f..9643443471b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java
@@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
+import org.apache.hadoop.hbase.util.RegionSplitter.DecimalStringSplit;
import org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm;
import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
import org.junit.AfterClass;
@@ -143,7 +144,7 @@ public class TestRegionSplitter {
byte[][] twoRegionsSplits = splitter.split(2);
assertEquals(1, twoRegionsSplits.length);
- assertArrayEquals(twoRegionsSplits[0], "80000000".getBytes());
+ assertArrayEquals("80000000".getBytes(), twoRegionsSplits[0]);
byte[][] threeRegionsSplits = splitter.split(3);
assertEquals(2, threeRegionsSplits.length);
@@ -163,21 +164,73 @@ public class TestRegionSplitter {
// Halfway between 00... and 20... should be 10...
splitPoint = splitter.split(firstRow, "20000000".getBytes());
- assertArrayEquals(splitPoint, "10000000".getBytes());
+ assertArrayEquals("10000000".getBytes(), splitPoint);
// Halfway between df... and ff... should be ef....
splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
- assertArrayEquals(splitPoint,"efffffff".getBytes());
+ assertArrayEquals("efffffff".getBytes(), splitPoint);
// Check splitting region with multiple mappers per region
byte[][] splits = splitter.split("00000000".getBytes(), "30000000".getBytes(), 3, false);
assertEquals(2, splits.length);
- assertArrayEquals(splits[0], "10000000".getBytes());
- assertArrayEquals(splits[1], "20000000".getBytes());
+ assertArrayEquals("10000000".getBytes(), splits[0]);
+ assertArrayEquals("20000000".getBytes(), splits[1]);
splits = splitter.split("00000000".getBytes(), "20000000".getBytes(), 2, true);
assertEquals(3, splits.length);
- assertArrayEquals(splits[1], "10000000".getBytes());
+ assertArrayEquals("10000000".getBytes(), splits[1]);
+ }
+
+ /**
+ * Unit tests for the DecimalStringSplit algorithm. Makes sure it divides up the
+ * space of keys in the way that we expect.
+ */
+ @Test
+ public void unitTestDecimalStringSplit() {
+ DecimalStringSplit splitter = new DecimalStringSplit();
+ // Check splitting while starting from scratch
+
+ byte[][] twoRegionsSplits = splitter.split(2);
+ assertEquals(1, twoRegionsSplits.length);
+ assertArrayEquals("50000000".getBytes(), twoRegionsSplits[0]);
+
+ byte[][] threeRegionsSplits = splitter.split(3);
+ assertEquals(2, threeRegionsSplits.length);
+ byte[] expectedSplit0 = "33333333".getBytes();
+ assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
+ byte[] expectedSplit1 = "66666666".getBytes();
+ assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
+
+ // Check splitting existing regions that have start and end points
+ byte[] splitPoint = splitter.split("10000000".getBytes(), "30000000".getBytes());
+ assertArrayEquals("20000000".getBytes(), splitPoint);
+
+ byte[] lastRow = "99999999".getBytes();
+ assertArrayEquals(lastRow, splitter.lastRow());
+ byte[] firstRow = "00000000".getBytes();
+ assertArrayEquals(firstRow, splitter.firstRow());
+
+ // Halfway between 00... and 20... should be 10...
+ splitPoint = splitter.split(firstRow, "20000000".getBytes());
+ assertArrayEquals("10000000".getBytes(), splitPoint);
+
+ // Halfway between 00... and 19... should be 09...
+ splitPoint = splitter.split(firstRow, "19999999".getBytes());
+ assertArrayEquals("09999999".getBytes(), splitPoint);
+
+ // Halfway between 79... and 99... should be 89....
+ splitPoint = splitter.split("79999999".getBytes(), lastRow);
+ assertArrayEquals("89999999".getBytes(), splitPoint);
+
+ // Check splitting region with multiple mappers per region
+ byte[][] splits = splitter.split("00000000".getBytes(), "30000000".getBytes(), 3, false);
+ assertEquals(2, splits.length);
+ assertArrayEquals("10000000".getBytes(), splits[0]);
+ assertArrayEquals("20000000".getBytes(), splits[1]);
+
+ splits = splitter.split("00000000".getBytes(), "20000000".getBytes(), 2, true);
+ assertEquals(3, splits.length);
+ assertArrayEquals("10000000".getBytes(), splits[1]);
}
/**
@@ -248,6 +301,15 @@ public class TestRegionSplitter {
assertFalse(splitFailsPrecondition(algo, "0", "A", 11)); // should be fine
assertTrue(splitFailsPrecondition(algo, "0", "A", 12)); // too granular
+ algo = new DecimalStringSplit();
+ assertFalse(splitFailsPrecondition(algo)); // default settings are fine
+ assertFalse(splitFailsPrecondition(algo, "00", "99")); // custom is fine
+ assertTrue(splitFailsPrecondition(algo, "99", "00")); // range error
+ assertTrue(splitFailsPrecondition(algo, "99", "99")); // range error
+ assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine
+ assertFalse(splitFailsPrecondition(algo, "0", "9", 10)); // should be fine
+ assertTrue(splitFailsPrecondition(algo, "0", "9", 11)); // too granular
+
algo = new UniformSplit();
assertFalse(splitFailsPrecondition(algo)); // default settings are fine
assertFalse(splitFailsPrecondition(algo, "\\x00", "\\xAA")); // custom is fine