[jira] [HBASE-4627] Ability to specify a custom start/end to RegionSplitter

Summary:
[HBASE-4627]

added a custom start/end row to RegionSplitter.  Also solved
an off-by-one error because the end row is prefix-inclusive and not
exclusive.

<a href="https://issues.apache.org/jira/browse/HBASE-4489" title="Better key splitting in RegionSplitter"><del>HBASE-4489</del></a> changed the default endKey on HexStringSplit from 7FFF... to FFFF...  While this is correct, existing users of 0.90 RegionSplitter have 7FFF as the end key in their schema and the last region will not split properly under this new code.  We need to let the user specify a custom start/end key range for when situations like this arise.  Optimally, we should also write the start/end key in META so we could figure this out implicitly instead of requiring the user to explicitly specify it.

Test Plan:
 - mvn test -Dtest=TestRegionSplitter

CC: JIRA

Reviewers: DUMMY_REVIEWER

Differential Revision: 39

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1196772 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nicolas Spiegelberg 2011-11-02 19:24:18 +00:00
parent 5721a5ac53
commit 119ae0c4ac
3 changed files with 308 additions and 111 deletions

View File

@ -1337,21 +1337,48 @@ public class Bytes {
* @return Array of dividing values * @return Array of dividing values
*/ */
public static byte [][] split(final byte [] a, final byte [] b, final int num) { public static byte [][] split(final byte [] a, final byte [] b, final int num) {
byte[][] ret = new byte[num+2][]; return split(a, b, false, num);
}
/**
* Split passed range. Expensive operation relatively. Uses BigInteger math.
* Useful splitting ranges for MapReduce jobs.
* @param a Beginning of range
* @param b End of range
* @param inclusive Whether the end of range is prefix-inclusive or is
* considered an exclusive boundary. Automatic splits are generally exclusive
* and manual splits with an explicit range utilize an inclusive end of range.
* @param num Number of times to split range. Pass 1 if you want to split
* the range in two; i.e. one split.
* @return Array of dividing values
*/
public static byte[][] split(final byte[] a, final byte[] b,
boolean inclusive, final int num) {
byte[][] ret = new byte[num + 2][];
int i = 0; int i = 0;
Iterable<byte[]> iter = iterateOnSplits(a, b, num); Iterable<byte[]> iter = iterateOnSplits(a, b, inclusive, num);
if (iter == null) return null; if (iter == null)
return null;
for (byte[] elem : iter) { for (byte[] elem : iter) {
ret[i++] = elem; ret[i++] = elem;
} }
return ret; return ret;
} }
/** /**
* Iterate over keys within the passed inclusive range. * Iterate over keys within the passed range, splitting at an [a,b) boundary.
*/
public static Iterable<byte[]> iterateOnSplits(final byte[] a,
final byte[] b, final int num)
{
return iterateOnSplits(a, b, false, num);
}
/**
* Iterate over keys within the passed range.
*/ */
public static Iterable<byte[]> iterateOnSplits( public static Iterable<byte[]> iterateOnSplits(
final byte[] a, final byte[]b, final int num) final byte[] a, final byte[]b, boolean inclusive, final int num)
{ {
byte [] aPadded; byte [] aPadded;
byte [] bPadded; byte [] bPadded;
@ -1374,7 +1401,10 @@ public class Bytes {
byte [] prependHeader = {1, 0}; byte [] prependHeader = {1, 0};
final BigInteger startBI = new BigInteger(add(prependHeader, aPadded)); final BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded)); final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
final BigInteger diffBI = stopBI.subtract(startBI); BigInteger diffBI = stopBI.subtract(startBI);
if (inclusive) {
diffBI = diffBI.add(BigInteger.ONE);
}
final BigInteger splitsBI = BigInteger.valueOf(num + 1); final BigInteger splitsBI = BigInteger.valueOf(num + 1);
if(diffBI.compareTo(splitsBI) < 0) { if(diffBI.compareTo(splitsBI) < 0) {
return null; return null;

View File

@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.util;
import java.io.IOException; import java.io.IOException;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.LinkedList; import java.util.LinkedList;
@ -150,9 +151,9 @@ public class RegionSplitter {
* Split a pre-existing region into 2 regions. * Split a pre-existing region into 2 regions.
* *
* @param start * @param start
* row * first row (inclusive)
* @param end * @param end
* row * last row (exclusive)
* @return the split row to use * @return the split row to use
*/ */
byte[] split(byte[] start, byte[] end); byte[] split(byte[] start, byte[] end);
@ -163,8 +164,11 @@ public class RegionSplitter {
* @param numRegions * @param numRegions
* number of regions to split the table into * number of regions to split the table into
* *
* @return array of split keys for the initial regions of the table. The length of the * @throws RuntimeException
* returned array should be numRegions-1. * user input is validated at this time. may throw a runtime
* exception in response to a parse failure
* @return array of split keys for the initial regions of the table. The
* length of the returned array should be numRegions-1.
*/ */
byte[][] split(int numRegions); byte[][] split(int numRegions);
@ -186,6 +190,27 @@ public class RegionSplitter {
*/ */
byte[] lastRow(); byte[] lastRow();
/**
* In HBase, the last row is represented by an empty byte array. Set this
* value to help the split code understand how to evenly divide the first
* region.
*
* @param userInput
* raw user input (may throw RuntimeException on parse failure)
*/
void setFirstRow(String userInput);
/**
* In HBase, the last row is represented by an empty byte array. Set this
* value to help the split code understand how to evenly divide the last
* region. Note that this last row is inclusive for all rows sharing the
* same prefix.
*
* @param userInput
* raw user input (may throw RuntimeException on parse failure)
*/
void setLastRow(String userInput);
/** /**
* @param input * @param input
* user or file input for row * user or file input for row
@ -263,6 +288,10 @@ public class RegionSplitter {
opt.addOption(OptionBuilder.withArgName("count").hasArg().withDescription( opt.addOption(OptionBuilder.withArgName("count").hasArg().withDescription(
"Max outstanding splits that have unfinished major compactions") "Max outstanding splits that have unfinished major compactions")
.create("o")); .create("o"));
opt.addOption(null, "firstrow", true,
"First Row in Table for Split Algorithm");
opt.addOption(null, "lastrow", true,
"Last Row in Table for Split Algorithm");
opt.addOption(null, "risky", false, opt.addOption(null, "risky", false,
"Skip verification steps to complete quickly." "Skip verification steps to complete quickly."
+ "STRONGLY DISCOURAGED for production systems. "); + "STRONGLY DISCOURAGED for production systems. ");
@ -299,24 +328,31 @@ public class RegionSplitter {
} }
String tableName = cmd.getArgs()[0]; String tableName = cmd.getArgs()[0];
String splitClass = cmd.getArgs()[1]; String splitClass = cmd.getArgs()[1];
SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClass);
if (cmd.hasOption("firstrow")) {
splitAlgo.setFirstRow(cmd.getOptionValue("firstrow"));
}
if (cmd.hasOption("lastrow")) {
splitAlgo.setLastRow(cmd.getOptionValue("lastrow"));
}
if (createTable) { if (createTable) {
conf.set("split.count", cmd.getOptionValue("c")); conf.set("split.count", cmd.getOptionValue("c"));
createPresplitTable(tableName, splitClass, cmd.getOptionValue("f").split(":"), conf); createPresplitTable(tableName, splitAlgo, cmd.getOptionValue("f").split(":"), conf);
} }
if (rollingSplit) { if (rollingSplit) {
if (cmd.hasOption("o")) { if (cmd.hasOption("o")) {
conf.set("split.outstanding", cmd.getOptionValue("o")); conf.set("split.outstanding", cmd.getOptionValue("o"));
} }
rollingSplit(tableName, splitClass, conf); rollingSplit(tableName, splitAlgo, conf);
} }
} }
static void createPresplitTable(String tableName, String splitClassName, static void createPresplitTable(String tableName, SplitAlgorithm splitAlgo,
String[] columnFamilies, Configuration conf) throws IOException, String[] columnFamilies, Configuration conf) throws IOException,
InterruptedException { InterruptedException {
SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClassName);
final int splitCount = conf.getInt("split.count", 0); final int splitCount = conf.getInt("split.count", 0);
Preconditions.checkArgument(splitCount > 1, "Split count must be > 1"); Preconditions.checkArgument(splitCount > 1, "Split count must be > 1");
@ -351,9 +387,8 @@ public class RegionSplitter {
LOG.debug("Finished creating table with " + splitCount + " regions"); LOG.debug("Finished creating table with " + splitCount + " regions");
} }
static void rollingSplit(String tableName, String splitClassName, static void rollingSplit(String tableName, SplitAlgorithm splitAlgo,
Configuration conf) throws IOException, InterruptedException { Configuration conf) throws IOException, InterruptedException {
SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClassName);
final int minOS = conf.getInt("split.outstanding", 2); final int minOS = conf.getInt("split.outstanding", 2);
HTable table = new HTable(conf, tableName); HTable table = new HTable(conf, tableName);
@ -753,20 +788,25 @@ public class RegionSplitter {
} }
/** /**
* HexStringSplit is one possible {@link SplitAlgorithm} for choosing region * HexStringSplit is a well-known {@link SplitAlgorithm} for choosing region
* boundaries. The format of a HexStringSplit region boundary is the * boundaries. The format of a HexStringSplit region boundary is the ASCII
* ASCII representation of an MD5 checksum, or any other uniformly distributed * representation of an MD5 checksum, or any other uniformly distributed
* bytes. Row are hex-encoded long values in the range <b>"00000000" => * hexadecimal value. Row are hex-encoded long values in the range
* "FFFFFFFF"</b> and are left-padded with zeros to keep the same order * <b>"00000000" => "FFFFFFFF"</b> and are left-padded with zeros to keep the
* lexicographically as if they were binary. * same order lexicographically as if they were binary.
* *
* This split algorithm is only appropriate if you will use hex strings as * Since this split algorithm uses hex strings as keys, it is easy to read &
* keys. * write in the shell but takes up more space and may be non-intuitive.
*/ */
public static class HexStringSplit implements SplitAlgorithm { public static class HexStringSplit implements SplitAlgorithm {
final static String MAXHEX = "FFFFFFFF"; final static String DEFAULT_MIN_HEX = "00000000";
final static BigInteger MAXHEX_INT = new BigInteger(MAXHEX, 16); final static String DEFAULT_MAX_HEX = "FFFFFFFF";
final static int rowComparisonLength = MAXHEX.length();
String firstRow = DEFAULT_MIN_HEX;
BigInteger firstRowInt = BigInteger.ZERO;
String lastRow = DEFAULT_MAX_HEX;
BigInteger lastRowInt = new BigInteger(lastRow, 16);
int rowComparisonLength = lastRow.length();
public byte[] split(byte[] start, byte[] end) { public byte[] split(byte[] start, byte[] end) {
BigInteger s = convertToBigInteger(start); BigInteger s = convertToBigInteger(start);
@ -776,22 +816,43 @@ public class RegionSplitter {
} }
public byte[][] split(int n) { public byte[][] split(int n) {
Preconditions.checkArgument(lastRowInt.compareTo(firstRowInt) > 0,
"last row (%s) is configured less than first row (%s)", lastRow,
firstRow);
// +1 to range because the last row is inclusive
BigInteger range = lastRowInt.subtract(firstRowInt).add(BigInteger.ONE);
Preconditions.checkState(range.compareTo(BigInteger.valueOf(n)) >= 0,
"split granularity (%s) is greater than the range (%s)", n, range);
BigInteger[] splits = new BigInteger[n - 1]; BigInteger[] splits = new BigInteger[n - 1];
BigInteger sizeOfEachSplit = MAXHEX_INT.divide(BigInteger.valueOf(n)); BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(n));
for (int i = 1; i < n; i++) { for (int i = 1; i < n; i++) {
// NOTE: this means the last region gets all the slop. // NOTE: this means the last region gets all the slop.
// This is not a big deal if we're assuming n << MAXHEX // This is not a big deal if we're assuming n << MAXHEX
splits[i - 1] = sizeOfEachSplit.multiply(BigInteger.valueOf(i)); splits[i - 1] = firstRowInt.add(sizeOfEachSplit.multiply(BigInteger
.valueOf(i)));
} }
return convertToBytes(splits); return convertToBytes(splits);
} }
public byte[] firstRow() { public byte[] firstRow() {
return convertToByte(BigInteger.ZERO); return convertToByte(firstRowInt);
} }
public byte[] lastRow() { public byte[] lastRow() {
return convertToByte(MAXHEX_INT); return convertToByte(lastRowInt);
}
public void setFirstRow(String userInput) {
firstRow = userInput;
firstRowInt = new BigInteger(firstRow, 16);
}
public void setLastRow(String userInput) {
lastRow = userInput;
lastRowInt = new BigInteger(lastRow, 16);
// Precondition: lastRow > firstRow, so last's length is the greater
rowComparisonLength = lastRow.length();
} }
public byte[] strToRow(String in) { public byte[] strToRow(String in) {
@ -806,17 +867,24 @@ public class RegionSplitter {
return " "; return " ";
} }
static BigInteger split2(BigInteger minValue, BigInteger maxValue) { /**
return maxValue.add(minValue).divide(BigInteger.valueOf(2)); * Divide 2 numbers in half (for split algorithm)
*
* @param a number #1
* @param b number #2
* @return the midpoint of the 2 numbers
*/
public BigInteger split2(BigInteger a, BigInteger b) {
return a.add(b).divide(BigInteger.valueOf(2)).abs();
} }
/** /**
* Returns an array of bytes corresponding to an array of BigIntegers * Returns an array of bytes corresponding to an array of BigIntegers
* *
* @param bigIntegers * @param bigIntegers numbers to convert
* @return bytes corresponding to the bigIntegers * @return bytes corresponding to the bigIntegers
*/ */
static byte[][] convertToBytes(BigInteger[] bigIntegers) { public byte[][] convertToBytes(BigInteger[] bigIntegers) {
byte[][] returnBytes = new byte[bigIntegers.length][]; byte[][] returnBytes = new byte[bigIntegers.length][];
for (int i = 0; i < bigIntegers.length; i++) { for (int i = 0; i < bigIntegers.length; i++) {
returnBytes[i] = convertToByte(bigIntegers[i]); returnBytes[i] = convertToByte(bigIntegers[i]);
@ -827,38 +895,56 @@ public class RegionSplitter {
/** /**
* Returns the bytes corresponding to the BigInteger * Returns the bytes corresponding to the BigInteger
* *
* @param bigInteger * @param bigInteger number to convert
* @param pad padding length
* @return byte corresponding to input BigInteger * @return byte corresponding to input BigInteger
*/ */
static byte[] convertToByte(BigInteger bigInteger) { public static byte[] convertToByte(BigInteger bigInteger, int pad) {
String bigIntegerString = bigInteger.toString(16); String bigIntegerString = bigInteger.toString(16);
bigIntegerString = StringUtils.leftPad(bigIntegerString, bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');
rowComparisonLength, '0');
return Bytes.toBytes(bigIntegerString); return Bytes.toBytes(bigIntegerString);
} }
/** /**
* Returns the BigInteger represented by thebyte array * Returns the bytes corresponding to the BigInteger
* *
* @param row * @param bigInteger number to convert
* @return corresponding bytes
*/
public byte[] convertToByte(BigInteger bigInteger) {
return convertToByte(bigInteger, rowComparisonLength);
}
/**
* Returns the BigInteger represented by the byte array
*
* @param row byte array representing row
* @return the corresponding BigInteger * @return the corresponding BigInteger
*/ */
static BigInteger convertToBigInteger(byte[] row) { public BigInteger convertToBigInteger(byte[] row) {
return (row.length > 0) ? new BigInteger(Bytes.toString(row), 16) return (row.length > 0) ? new BigInteger(Bytes.toString(row), 16)
: BigInteger.ZERO; : BigInteger.ZERO;
} }
@Override
public String toString() {
return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
+ "," + rowToStr(lastRow()) + "]";
}
} }
/** /**
* A SplitAlgorithm that divides the space of possible keys evenly. Useful * A SplitAlgorithm that divides the space of possible keys evenly. Useful
* when the keys are approximately uniform random bytes (e.g. hashes). * when the keys are approximately uniform random bytes (e.g. hashes). Rows
* You probably shouldn't use this if your keys are ASCII, or if your keys * are raw byte values in the range <b>00 => FF</b> and are right-padded with
* tend to have similar prefixes. * zeros to keep the same memcmp() order. This is the natural algorithm to use
* for a byte[] environment and saves space, but is not necessarily the
* easiest for readability.
*/ */
public static class UniformSplit implements SplitAlgorithm { public static class UniformSplit implements SplitAlgorithm {
static final byte xFF = (byte)0xFF; static final byte xFF = (byte) 0xFF;
static final byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY; byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
static final byte[] lastRowBytes = byte[] lastRowBytes =
new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF}; new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
public byte[] split(byte[] start, byte[] end) { public byte[] split(byte[] start, byte[] end) {
return Bytes.split(start, end, 1)[1]; return Bytes.split(start, end, 1)[1];
@ -866,12 +952,19 @@ public class RegionSplitter {
@Override @Override
public byte[][] split(int numRegions) { public byte[][] split(int numRegions) {
byte[][] splitKeysPlusEndpoints = Bytes.split(firstRowBytes, lastRowBytes, Preconditions.checkArgument(
numRegions-1); Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
byte[][] splitAtKeys = new byte[splitKeysPlusEndpoints.length-2][]; "last row (%s) is configured less than first row (%s)",
System.arraycopy(splitKeysPlusEndpoints, 1, splitAtKeys, 0, Bytes.toStringBinary(lastRowBytes),
splitKeysPlusEndpoints.length-2); Bytes.toStringBinary(firstRowBytes));
return splitAtKeys;
byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
numRegions - 1);
Preconditions.checkState(splits != null,
"Could not split region with given user input: " + this);
// remove endpoints, which are included in the splits list
return Arrays.copyOfRange(splits, 1, splits.length - 1);
} }
@Override @Override
@ -884,6 +977,16 @@ public class RegionSplitter {
return lastRowBytes; return lastRowBytes;
} }
@Override
public void setFirstRow(String userInput) {
firstRowBytes = Bytes.toBytesBinary(userInput);
}
@Override
public void setLastRow(String userInput) {
lastRowBytes = Bytes.toBytesBinary(userInput);
}
@Override @Override
public byte[] strToRow(String input) { public byte[] strToRow(String input) {
return Bytes.toBytesBinary(input); return Bytes.toBytesBinary(input);
@ -898,5 +1001,11 @@ public class RegionSplitter {
public String separator() { public String separator() {
return ","; return ",";
} }
@Override
public String toString() {
return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
+ "," + rowToStr(lastRow()) + "]";
}
} }
} }

View File

@ -19,17 +19,18 @@
*/ */
package org.apache.hadoop.hbase.util; package org.apache.hadoop.hbase.util;
import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotSame;
import java.io.IOException; import java.io.IOException;
import java.math.BigInteger;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
@ -37,6 +38,7 @@ import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit; import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit;
import org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm;
import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit; import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -47,13 +49,10 @@ import org.junit.Test;
* rolling split of an existing table. * rolling split of an existing table.
*/ */
public class TestRegionSplitter { public class TestRegionSplitter {
private final static Log LOG = LogFactory.getLog(TestRegionSplitter.class);
private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); private final static HBaseTestingUtility UTIL = new HBaseTestingUtility();
private final static String HEX_SPLIT_CLASS_NAME =
"org.apache.hadoop.hbase.util.RegionSplitter$HexStringSplit";
private final static String UNIFORM_SPLIT_CLASS_NAME =
"org.apache.hadoop.hbase.util.RegionSplitter$UniformSplit";
private final static String CF_NAME = "SPLIT_TEST_CF"; private final static String CF_NAME = "SPLIT_TEST_CF";
private final static byte xFF = (byte)0xff; private final static byte xFF = (byte) 0xff;
@BeforeClass @BeforeClass
public static void setup() throws Exception { public static void setup() throws Exception {
@ -70,28 +69,28 @@ public class TestRegionSplitter {
*/ */
@Test @Test
public void testCreatePresplitTableHex() throws Exception { public void testCreatePresplitTableHex() throws Exception {
final List<byte[]> expectedBounds = new ArrayList<byte[]>(); final List<byte[]> expectedBounds = new ArrayList<byte[]>();
expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
expectedBounds.add("0fffffff".getBytes()); expectedBounds.add("10000000".getBytes());
expectedBounds.add("1ffffffe".getBytes()); expectedBounds.add("20000000".getBytes());
expectedBounds.add("2ffffffd".getBytes()); expectedBounds.add("30000000".getBytes());
expectedBounds.add("3ffffffc".getBytes()); expectedBounds.add("40000000".getBytes());
expectedBounds.add("4ffffffb".getBytes()); expectedBounds.add("50000000".getBytes());
expectedBounds.add("5ffffffa".getBytes()); expectedBounds.add("60000000".getBytes());
expectedBounds.add("6ffffff9".getBytes()); expectedBounds.add("70000000".getBytes());
expectedBounds.add("7ffffff8".getBytes()); expectedBounds.add("80000000".getBytes());
expectedBounds.add("8ffffff7".getBytes()); expectedBounds.add("90000000".getBytes());
expectedBounds.add("9ffffff6".getBytes()); expectedBounds.add("a0000000".getBytes());
expectedBounds.add("affffff5".getBytes()); expectedBounds.add("b0000000".getBytes());
expectedBounds.add("bffffff4".getBytes()); expectedBounds.add("c0000000".getBytes());
expectedBounds.add("cffffff3".getBytes()); expectedBounds.add("d0000000".getBytes());
expectedBounds.add("dffffff2".getBytes()); expectedBounds.add("e0000000".getBytes());
expectedBounds.add("effffff1".getBytes()); expectedBounds.add("f0000000".getBytes());
expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
// Do table creation/pre-splitting and verification of region boundaries // Do table creation/pre-splitting and verification of region boundaries
preSplitTableAndVerify(expectedBounds, HEX_SPLIT_CLASS_NAME, preSplitTableAndVerify(expectedBounds,
"NewHexPresplitTable"); HexStringSplit.class.getSimpleName(), "NewHexPresplitTable");
} }
/** /**
@ -99,29 +98,28 @@ public class TestRegionSplitter {
*/ */
@Test @Test
public void testCreatePresplitTableUniform() throws Exception { public void testCreatePresplitTableUniform() throws Exception {
List<byte[]> expectedBounds = new ArrayList<byte[]>(); List<byte[]> expectedBounds = new ArrayList<byte[]>();
expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
expectedBounds.add(new byte[] { 0x0f, xFF, xFF, xFF, xFF, xFF, xFF, xFF}); expectedBounds.add(new byte[] { 0x10, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] { 0x1f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfe}); expectedBounds.add(new byte[] { 0x20, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] { 0x2f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfd}); expectedBounds.add(new byte[] { 0x30, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] { 0x3f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfc}); expectedBounds.add(new byte[] { 0x40, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] { 0x4f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfb}); expectedBounds.add(new byte[] { 0x50, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] { 0x5f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfa}); expectedBounds.add(new byte[] { 0x60, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] { 0x6f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf9}); expectedBounds.add(new byte[] { 0x70, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] { 0x7f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf8}); expectedBounds.add(new byte[] {(byte)0x80, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] {(byte)0x8f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf7}); expectedBounds.add(new byte[] {(byte)0x90, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] {(byte)0x9f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf6}); expectedBounds.add(new byte[] {(byte)0xa0, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] {(byte)0xaf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf5}); expectedBounds.add(new byte[] {(byte)0xb0, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] {(byte)0xbf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf4}); expectedBounds.add(new byte[] {(byte)0xc0, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] {(byte)0xcf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf3}); expectedBounds.add(new byte[] {(byte)0xd0, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] {(byte)0xdf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf2}); expectedBounds.add(new byte[] {(byte)0xe0, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf1}); expectedBounds.add(new byte[] {(byte)0xf0, 0, 0, 0, 0, 0, 0, 0});
expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY);
// Do table creation/pre-splitting and verification of region boundaries // Do table creation/pre-splitting and verification of region boundaries
preSplitTableAndVerify(expectedBounds, preSplitTableAndVerify(expectedBounds, UniformSplit.class.getSimpleName(),
"org.apache.hadoop.hbase.util.RegionSplitter$UniformSplit", "NewUniformPresplitTable");
"NewUniformPresplitTable");
} }
/** /**
@ -135,7 +133,7 @@ public class TestRegionSplitter {
byte[][] twoRegionsSplits = splitter.split(2); byte[][] twoRegionsSplits = splitter.split(2);
assertEquals(1, twoRegionsSplits.length); assertEquals(1, twoRegionsSplits.length);
assertArrayEquals(twoRegionsSplits[0], "7fffffff".getBytes()); assertArrayEquals(twoRegionsSplits[0], "80000000".getBytes());
byte[][] threeRegionsSplits = splitter.split(3); byte[][] threeRegionsSplits = splitter.split(3);
assertEquals(2, threeRegionsSplits.length); assertEquals(2, threeRegionsSplits.length);
@ -157,7 +155,7 @@ public class TestRegionSplitter {
splitPoint = splitter.split(firstRow, "20000000".getBytes()); splitPoint = splitter.split(firstRow, "20000000".getBytes());
assertArrayEquals(splitPoint, "10000000".getBytes()); assertArrayEquals(splitPoint, "10000000".getBytes());
// Halfway between 5f... and 7f... should be 6f.... // Halfway between df... and ff... should be ef....
splitPoint = splitter.split("dfffffff".getBytes(), lastRow); splitPoint = splitter.split("dfffffff".getBytes(), lastRow);
assertArrayEquals(splitPoint,"efffffff".getBytes()); assertArrayEquals(splitPoint,"efffffff".getBytes());
} }
@ -179,7 +177,7 @@ public class TestRegionSplitter {
byte[][] twoRegionsSplits = splitter.split(2); byte[][] twoRegionsSplits = splitter.split(2);
assertEquals(1, twoRegionsSplits.length); assertEquals(1, twoRegionsSplits.length);
assertArrayEquals(twoRegionsSplits[0], assertArrayEquals(twoRegionsSplits[0],
new byte[] {0x7f, xFF, xFF, xFF, xFF, xFF, xFF, xFF}); new byte[] { (byte) 0x80, 0, 0, 0, 0, 0, 0, 0 });
byte[][] threeRegionsSplits = splitter.split(3); byte[][] threeRegionsSplits = splitter.split(3);
assertEquals(2, threeRegionsSplits.length); assertEquals(2, threeRegionsSplits.length);
@ -207,6 +205,64 @@ public class TestRegionSplitter {
new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, xFF}); new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, xFF});
} }
@Test
public void testUserInput() {
SplitAlgorithm algo = new HexStringSplit();
assertFalse(splitFailsPrecondition(algo)); // default settings are fine
assertFalse(splitFailsPrecondition(algo, "00", "AA")); // custom is fine
assertTrue(splitFailsPrecondition(algo, "AA", "00")); // range error
assertTrue(splitFailsPrecondition(algo, "AA", "AA")); // range error
assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine
assertFalse(splitFailsPrecondition(algo, "0", "A", 11)); // should be fine
assertTrue(splitFailsPrecondition(algo, "0", "A", 12)); // too granular
algo = new UniformSplit();
assertFalse(splitFailsPrecondition(algo)); // default settings are fine
assertFalse(splitFailsPrecondition(algo, "\\x00", "\\xAA")); // custom is fine
assertTrue(splitFailsPrecondition(algo, "\\xAA", "\\x00")); // range error
assertTrue(splitFailsPrecondition(algo, "\\xAA", "\\xAA")); // range error
assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x02", 3)); // should be fine
assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x0A", 11)); // should be fine
assertTrue(splitFailsPrecondition(algo, "\\x00", "\\x0A", 12)); // too granular
}
private boolean splitFailsPrecondition(SplitAlgorithm algo) {
return splitFailsPrecondition(algo, 100);
}
private boolean splitFailsPrecondition(SplitAlgorithm algo, String firstRow,
String lastRow) {
return splitFailsPrecondition(algo, firstRow, lastRow, 100);
}
private boolean splitFailsPrecondition(SplitAlgorithm algo, String firstRow,
String lastRow, int numRegions) {
algo.setFirstRow(firstRow);
algo.setLastRow(lastRow);
return splitFailsPrecondition(algo, numRegions);
}
private boolean splitFailsPrecondition(SplitAlgorithm algo, int numRegions) {
try {
byte[][] s = algo.split(numRegions);
LOG.debug("split algo = " + algo);
if (s != null) {
StringBuilder sb = new StringBuilder();
for (byte[] b : s) {
sb.append(Bytes.toStringBinary(b) + " ");
}
LOG.debug(sb.toString());
}
return false;
} catch (IllegalArgumentException e) {
return true;
} catch (IllegalStateException e) {
return true;
} catch (IndexOutOfBoundsException e) {
return true;
}
}
/** /**
* Creates a pre-split table with expectedBounds.size()+1 regions, then * Creates a pre-split table with expectedBounds.size()+1 regions, then
* verifies that the region boundaries are the same as the expected * verifies that the region boundaries are the same as the expected
@ -214,21 +270,23 @@ public class TestRegionSplitter {
* @throws Various junit assertions * @throws Various junit assertions
*/ */
private void preSplitTableAndVerify(List<byte[]> expectedBounds, private void preSplitTableAndVerify(List<byte[]> expectedBounds,
String splitAlgo, String tableName) throws Exception { String splitClass, String tableName) throws Exception {
final int numRegions = expectedBounds.size()-1; final int numRegions = expectedBounds.size()-1;
final Configuration conf = UTIL.getConfiguration(); final Configuration conf = UTIL.getConfiguration();
conf.setInt("split.count", numRegions); conf.setInt("split.count", numRegions);
SplitAlgorithm splitAlgo = RegionSplitter.newSplitAlgoInstance(conf, splitClass);
RegionSplitter.createPresplitTable(tableName, splitAlgo, RegionSplitter.createPresplitTable(tableName, splitAlgo,
new String[] {CF_NAME}, conf); new String[] {CF_NAME}, conf);
verifyBounds(expectedBounds, tableName); verifyBounds(expectedBounds, tableName);
} }
private void rollingSplitAndVerify(String tableName, String splitAlgo, private void rollingSplitAndVerify(String tableName, String splitClass,
List<byte[]> expectedBounds) throws Exception { List<byte[]> expectedBounds) throws Exception {
final Configuration conf = UTIL.getConfiguration(); final Configuration conf = UTIL.getConfiguration();
// Set this larger than the number of splits so RegionSplitter won't block // Set this larger than the number of splits so RegionSplitter won't block
conf.setInt("split.outstanding", 5); conf.setInt("split.outstanding", 5);
SplitAlgorithm splitAlgo = RegionSplitter.newSplitAlgoInstance(conf, splitClass);
RegionSplitter.rollingSplit(tableName, splitAlgo, conf); RegionSplitter.rollingSplit(tableName, splitAlgo, conf);
verifyBounds(expectedBounds, tableName); verifyBounds(expectedBounds, tableName);
} }