HBASE-19051 Add new split algorithm for num string
Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
parent
4bf71c3a1a
commit
ae6a951658
|
@ -258,6 +258,12 @@ public class RegionSplitter {
|
||||||
* <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
|
* <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
|
||||||
* myTable HexStringSplit
|
* myTable HexStringSplit
|
||||||
* </ul>
|
* </ul>
|
||||||
|
* <li>create a table named 'myTable' with 50 pre-split regions,
|
||||||
|
* assuming the keys are decimal-encoded ASCII:
|
||||||
|
* <ul>
|
||||||
|
* <li>bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 50
|
||||||
|
* myTable DecimalStringSplit
|
||||||
|
* </ul>
|
||||||
* <li>perform a rolling split of 'myTable' (i.e. 60 => 120 regions), # 2
|
* <li>perform a rolling split of 'myTable' (i.e. 60 => 120 regions), # 2
|
||||||
* outstanding splits at a time, assuming keys are uniformly distributed
|
* outstanding splits at a time, assuming keys are uniformly distributed
|
||||||
* bytes:
|
* bytes:
|
||||||
|
@ -267,9 +273,9 @@ public class RegionSplitter {
|
||||||
* </ul>
|
* </ul>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* There are two SplitAlgorithms built into RegionSplitter, HexStringSplit
|
* There are three SplitAlgorithms built into RegionSplitter, HexStringSplit,
|
||||||
* and UniformSplit. These are different strategies for choosing region
|
* DecimalStringSplit, and UniformSplit. These are different strategies for
|
||||||
* boundaries. See their source code for details.
|
* choosing region boundaries. See their source code for details.
|
||||||
*
|
*
|
||||||
* @param args
|
* @param args
|
||||||
* Usage: RegionSplitter <TABLE> <SPLITALGORITHM>
|
* Usage: RegionSplitter <TABLE> <SPLITALGORITHM>
|
||||||
|
@ -337,9 +343,10 @@ public class RegionSplitter {
|
||||||
if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
|
if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
|
||||||
new HelpFormatter().printHelp("RegionSplitter <TABLE> <SPLITALGORITHM>\n"+
|
new HelpFormatter().printHelp("RegionSplitter <TABLE> <SPLITALGORITHM>\n"+
|
||||||
"SPLITALGORITHM is a java class name of a class implementing " +
|
"SPLITALGORITHM is a java class name of a class implementing " +
|
||||||
"SplitAlgorithm, or one of the special strings HexStringSplit " +
|
"SplitAlgorithm, or one of the special strings HexStringSplit or " +
|
||||||
"or UniformSplit, which are built-in split algorithms. " +
|
"DecimalStringSplit or UniformSplit, which are built-in split algorithms. " +
|
||||||
"HexStringSplit treats keys as hexadecimal ASCII, and " +
|
"HexStringSplit treats keys as hexadecimal ASCII, and " +
|
||||||
|
"DecimalStringSplit treats keys as decimal ASCII, and " +
|
||||||
"UniformSplit treats keys as arbitrary bytes.", opt);
|
"UniformSplit treats keys as arbitrary bytes.", opt);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -644,6 +651,8 @@ public class RegionSplitter {
|
||||||
// their simple class name instead of a fully qualified class name.
|
// their simple class name instead of a fully qualified class name.
|
||||||
if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
|
if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
|
||||||
splitClass = HexStringSplit.class;
|
splitClass = HexStringSplit.class;
|
||||||
|
} else if (splitClassName.equals(DecimalStringSplit.class.getSimpleName())) {
|
||||||
|
splitClass = DecimalStringSplit.class;
|
||||||
} else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
|
} else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
|
||||||
splitClass = UniformSplit.class;
|
splitClass = UniformSplit.class;
|
||||||
} else {
|
} else {
|
||||||
|
@ -877,15 +886,52 @@ public class RegionSplitter {
|
||||||
* Since this split algorithm uses hex strings as keys, it is easy to read &
|
* Since this split algorithm uses hex strings as keys, it is easy to read &
|
||||||
* write in the shell but takes up more space and may be non-intuitive.
|
* write in the shell but takes up more space and may be non-intuitive.
|
||||||
*/
|
*/
|
||||||
public static class HexStringSplit implements SplitAlgorithm {
|
public static class HexStringSplit extends NumberStringSplit {
|
||||||
final static String DEFAULT_MIN_HEX = "00000000";
|
final static String DEFAULT_MIN_HEX = "00000000";
|
||||||
final static String DEFAULT_MAX_HEX = "FFFFFFFF";
|
final static String DEFAULT_MAX_HEX = "FFFFFFFF";
|
||||||
|
final static int RADIX_HEX = 16;
|
||||||
|
|
||||||
String firstRow = DEFAULT_MIN_HEX;
|
public HexStringSplit() {
|
||||||
BigInteger firstRowInt = BigInteger.ZERO;
|
super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX);
|
||||||
String lastRow = DEFAULT_MAX_HEX;
|
}
|
||||||
BigInteger lastRowInt = new BigInteger(lastRow, 16);
|
|
||||||
int rowComparisonLength = lastRow.length();
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The format of a DecimalStringSplit region boundary is the ASCII representation of
|
||||||
|
* reversed sequential number, or any other uniformly distributed decimal value.
|
||||||
|
* Row are decimal-encoded long values in the range
|
||||||
|
* <b>"00000000" => "99999999"</b> and are left-padded with zeros to keep the
|
||||||
|
* same order lexicographically as if they were binary.
|
||||||
|
*/
|
||||||
|
public static class DecimalStringSplit extends NumberStringSplit {
|
||||||
|
final static String DEFAULT_MIN_DEC = "00000000";
|
||||||
|
final static String DEFAULT_MAX_DEC = "99999999";
|
||||||
|
final static int RADIX_DEC = 10;
|
||||||
|
|
||||||
|
public DecimalStringSplit() {
|
||||||
|
super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract static class NumberStringSplit implements SplitAlgorithm {
|
||||||
|
|
||||||
|
String firstRow;
|
||||||
|
BigInteger firstRowInt;
|
||||||
|
String lastRow;
|
||||||
|
BigInteger lastRowInt;
|
||||||
|
int rowComparisonLength;
|
||||||
|
int radix;
|
||||||
|
|
||||||
|
NumberStringSplit(String minRow, String maxRow, int radix) {
|
||||||
|
this.firstRow = minRow;
|
||||||
|
this.lastRow = maxRow;
|
||||||
|
this.radix = radix;
|
||||||
|
this.firstRowInt = BigInteger.ZERO;
|
||||||
|
this.lastRowInt = new BigInteger(lastRow, this.radix);
|
||||||
|
this.rowComparisonLength = lastRow.length();
|
||||||
|
}
|
||||||
|
|
||||||
public byte[] split(byte[] start, byte[] end) {
|
public byte[] split(byte[] start, byte[] end) {
|
||||||
BigInteger s = convertToBigInteger(start);
|
BigInteger s = convertToBigInteger(start);
|
||||||
|
@ -924,18 +970,18 @@ public class RegionSplitter {
|
||||||
|
|
||||||
public void setFirstRow(String userInput) {
|
public void setFirstRow(String userInput) {
|
||||||
firstRow = userInput;
|
firstRow = userInput;
|
||||||
firstRowInt = new BigInteger(firstRow, 16);
|
firstRowInt = new BigInteger(firstRow, radix);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setLastRow(String userInput) {
|
public void setLastRow(String userInput) {
|
||||||
lastRow = userInput;
|
lastRow = userInput;
|
||||||
lastRowInt = new BigInteger(lastRow, 16);
|
lastRowInt = new BigInteger(lastRow, radix);
|
||||||
// Precondition: lastRow > firstRow, so last's length is the greater
|
// Precondition: lastRow > firstRow, so last's length is the greater
|
||||||
rowComparisonLength = lastRow.length();
|
rowComparisonLength = lastRow.length();
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] strToRow(String in) {
|
public byte[] strToRow(String in) {
|
||||||
return convertToByte(new BigInteger(in, 16));
|
return convertToByte(new BigInteger(in, radix));
|
||||||
}
|
}
|
||||||
|
|
||||||
public String rowToStr(byte[] row) {
|
public String rowToStr(byte[] row) {
|
||||||
|
@ -988,8 +1034,8 @@ public class RegionSplitter {
|
||||||
* @param pad padding length
|
* @param pad padding length
|
||||||
* @return byte corresponding to input BigInteger
|
* @return byte corresponding to input BigInteger
|
||||||
*/
|
*/
|
||||||
public static byte[] convertToByte(BigInteger bigInteger, int pad) {
|
public byte[] convertToByte(BigInteger bigInteger, int pad) {
|
||||||
String bigIntegerString = bigInteger.toString(16);
|
String bigIntegerString = bigInteger.toString(radix);
|
||||||
bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');
|
bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');
|
||||||
return Bytes.toBytes(bigIntegerString);
|
return Bytes.toBytes(bigIntegerString);
|
||||||
}
|
}
|
||||||
|
@ -1011,7 +1057,7 @@ public class RegionSplitter {
|
||||||
* @return the corresponding BigInteger
|
* @return the corresponding BigInteger
|
||||||
*/
|
*/
|
||||||
public BigInteger convertToBigInteger(byte[] row) {
|
public BigInteger convertToBigInteger(byte[] row) {
|
||||||
return (row.length > 0) ? new BigInteger(Bytes.toString(row), 16)
|
return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix)
|
||||||
: BigInteger.ZERO;
|
: BigInteger.ZERO;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.HTable;
|
import org.apache.hadoop.hbase.client.HTable;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
|
import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
|
||||||
|
import org.apache.hadoop.hbase.util.RegionSplitter.DecimalStringSplit;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm;
|
import org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm;
|
||||||
import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
|
import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
|
@ -137,7 +138,7 @@ public class TestRegionSplitter {
|
||||||
|
|
||||||
byte[][] twoRegionsSplits = splitter.split(2);
|
byte[][] twoRegionsSplits = splitter.split(2);
|
||||||
assertEquals(1, twoRegionsSplits.length);
|
assertEquals(1, twoRegionsSplits.length);
|
||||||
assertArrayEquals(twoRegionsSplits[0], "80000000".getBytes());
|
assertArrayEquals("80000000".getBytes(), twoRegionsSplits[0]);
|
||||||
|
|
||||||
byte[][] threeRegionsSplits = splitter.split(3);
|
byte[][] threeRegionsSplits = splitter.split(3);
|
||||||
assertEquals(2, threeRegionsSplits.length);
|
assertEquals(2, threeRegionsSplits.length);
|
||||||
|
@ -157,11 +158,53 @@ public class TestRegionSplitter {
|
||||||
|
|
||||||
// Halfway between 00... and 20... should be 10...
|
// Halfway between 00... and 20... should be 10...
|
||||||
splitPoint = splitter.split(firstRow, "20000000".getBytes());
|
splitPoint = splitter.split(firstRow, "20000000".getBytes());
|
||||||
assertArrayEquals(splitPoint, "10000000".getBytes());
|
assertArrayEquals("10000000".getBytes(), splitPoint);
|
||||||
|
|
||||||
// Halfway between df... and ff... should be ef....
|
// Halfway between df... and ff... should be ef....
|
||||||
splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
|
splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
|
||||||
assertArrayEquals(splitPoint,"efffffff".getBytes());
|
assertArrayEquals("efffffff".getBytes(), splitPoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests for the DecimalStringSplit algorithm. Makes sure it divides up the
|
||||||
|
* space of keys in the way that we expect.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void unitTestDecimalStringSplit() {
|
||||||
|
DecimalStringSplit splitter = new DecimalStringSplit();
|
||||||
|
// Check splitting while starting from scratch
|
||||||
|
|
||||||
|
byte[][] twoRegionsSplits = splitter.split(2);
|
||||||
|
assertEquals(1, twoRegionsSplits.length);
|
||||||
|
assertArrayEquals("50000000".getBytes(), twoRegionsSplits[0]);
|
||||||
|
|
||||||
|
byte[][] threeRegionsSplits = splitter.split(3);
|
||||||
|
assertEquals(2, threeRegionsSplits.length);
|
||||||
|
byte[] expectedSplit0 = "33333333".getBytes();
|
||||||
|
assertArrayEquals(expectedSplit0, threeRegionsSplits[0]);
|
||||||
|
byte[] expectedSplit1 = "66666666".getBytes();
|
||||||
|
assertArrayEquals(expectedSplit1, threeRegionsSplits[1]);
|
||||||
|
|
||||||
|
// Check splitting existing regions that have start and end points
|
||||||
|
byte[] splitPoint = splitter.split("10000000".getBytes(), "30000000".getBytes());
|
||||||
|
assertArrayEquals("20000000".getBytes(), splitPoint);
|
||||||
|
|
||||||
|
byte[] lastRow = "99999999".getBytes();
|
||||||
|
assertArrayEquals(lastRow, splitter.lastRow());
|
||||||
|
byte[] firstRow = "00000000".getBytes();
|
||||||
|
assertArrayEquals(firstRow, splitter.firstRow());
|
||||||
|
|
||||||
|
// Halfway between 00... and 20... should be 10...
|
||||||
|
splitPoint = splitter.split(firstRow, "20000000".getBytes());
|
||||||
|
assertArrayEquals("10000000".getBytes(), splitPoint);
|
||||||
|
|
||||||
|
// Halfway between 00... and 19... should be 09...
|
||||||
|
splitPoint = splitter.split(firstRow, "19999999".getBytes());
|
||||||
|
assertArrayEquals("09999999".getBytes(), splitPoint);
|
||||||
|
|
||||||
|
// Halfway between 79... and 99... should be 89....
|
||||||
|
splitPoint = splitter.split("79999999".getBytes(), lastRow);
|
||||||
|
assertArrayEquals("89999999".getBytes(), splitPoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -223,6 +266,15 @@ public class TestRegionSplitter {
|
||||||
assertFalse(splitFailsPrecondition(algo, "0", "A", 11)); // should be fine
|
assertFalse(splitFailsPrecondition(algo, "0", "A", 11)); // should be fine
|
||||||
assertTrue(splitFailsPrecondition(algo, "0", "A", 12)); // too granular
|
assertTrue(splitFailsPrecondition(algo, "0", "A", 12)); // too granular
|
||||||
|
|
||||||
|
algo = new DecimalStringSplit();
|
||||||
|
assertFalse(splitFailsPrecondition(algo)); // default settings are fine
|
||||||
|
assertFalse(splitFailsPrecondition(algo, "00", "99")); // custom is fine
|
||||||
|
assertTrue(splitFailsPrecondition(algo, "99", "00")); // range error
|
||||||
|
assertTrue(splitFailsPrecondition(algo, "99", "99")); // range error
|
||||||
|
assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine
|
||||||
|
assertFalse(splitFailsPrecondition(algo, "0", "9", 10)); // should be fine
|
||||||
|
assertTrue(splitFailsPrecondition(algo, "0", "9", 11)); // too granular
|
||||||
|
|
||||||
algo = new UniformSplit();
|
algo = new UniformSplit();
|
||||||
assertFalse(splitFailsPrecondition(algo)); // default settings are fine
|
assertFalse(splitFailsPrecondition(algo)); // default settings are fine
|
||||||
assertFalse(splitFailsPrecondition(algo, "\\x00", "\\xAA")); // custom is fine
|
assertFalse(splitFailsPrecondition(algo, "\\x00", "\\xAA")); // custom is fine
|
||||||
|
|
Loading…
Reference in New Issue