diff --git a/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 403f0e6c0c0..0e79609f83c 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -1337,21 +1337,48 @@ public class Bytes { * @return Array of dividing values */ public static byte [][] split(final byte [] a, final byte [] b, final int num) { - byte[][] ret = new byte[num+2][]; + return split(a, b, false, num); + } + + /** + * Split passed range. Expensive operation relatively. Uses BigInteger math. + * Useful splitting ranges for MapReduce jobs. + * @param a Beginning of range + * @param b End of range + * @param inclusive Whether the end of range is prefix-inclusive or is + * considered an exclusive boundary. Automatic splits are generally exclusive + * and manual splits with an explicit range utilize an inclusive end of range. + * @param num Number of times to split range. Pass 1 if you want to split + * the range in two; i.e. one split. + * @return Array of dividing values + */ + public static byte[][] split(final byte[] a, final byte[] b, + boolean inclusive, final int num) { + byte[][] ret = new byte[num + 2][]; int i = 0; - Iterable iter = iterateOnSplits(a, b, num); - if (iter == null) return null; + Iterable iter = iterateOnSplits(a, b, inclusive, num); + if (iter == null) + return null; for (byte[] elem : iter) { ret[i++] = elem; } return ret; } - + /** - * Iterate over keys within the passed inclusive range. + * Iterate over keys within the passed range, splitting at an [a,b) boundary. + */ + public static Iterable iterateOnSplits(final byte[] a, + final byte[] b, final int num) + { + return iterateOnSplits(a, b, false, num); + } + + /** + * Iterate over keys within the passed range. */ public static Iterable iterateOnSplits( - final byte[] a, final byte[]b, final int num) + final byte[] a, final byte[]b, boolean inclusive, final int num) { byte [] aPadded; byte [] bPadded; @@ -1374,7 +1401,10 @@ public class Bytes { byte [] prependHeader = {1, 0}; final BigInteger startBI = new BigInteger(add(prependHeader, aPadded)); final BigInteger stopBI = new BigInteger(add(prependHeader, bPadded)); - final BigInteger diffBI = stopBI.subtract(startBI); + BigInteger diffBI = stopBI.subtract(startBI); + if (inclusive) { + diffBI = diffBI.add(BigInteger.ONE); + } final BigInteger splitsBI = BigInteger.valueOf(num + 1); if(diffBI.compareTo(splitsBI) < 0) { return null; diff --git a/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java index ce166415071..d1fa294cdd0 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java +++ b/src/main/java/org/apache/hadoop/hbase/util/RegionSplitter.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hbase.util; import java.io.IOException; import java.math.BigInteger; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.LinkedList; @@ -150,9 +151,9 @@ public class RegionSplitter { * Split a pre-existing region into 2 regions. * * @param start - * row + * first row (inclusive) * @param end - * row + * last row (exclusive) * @return the split row to use */ byte[] split(byte[] start, byte[] end); @@ -163,8 +164,11 @@ public class RegionSplitter { * @param numRegions * number of regions to split the table into * - * @return array of split keys for the initial regions of the table. The length of the - * returned array should be numRegions-1. + * @throws RuntimeException + * user input is validated at this time. may throw a runtime + * exception in response to a parse failure + * @return array of split keys for the initial regions of the table. The + * length of the returned array should be numRegions-1. */ byte[][] split(int numRegions); @@ -186,6 +190,27 @@ public class RegionSplitter { */ byte[] lastRow(); + /** + * In HBase, the last row is represented by an empty byte array. Set this + * value to help the split code understand how to evenly divide the first + * region. + * + * @param userInput + * raw user input (may throw RuntimeException on parse failure) + */ + void setFirstRow(String userInput); + + /** + * In HBase, the last row is represented by an empty byte array. Set this + * value to help the split code understand how to evenly divide the last + * region. Note that this last row is inclusive for all rows sharing the + * same prefix. + * + * @param userInput + * raw user input (may throw RuntimeException on parse failure) + */ + void setLastRow(String userInput); + /** * @param input * user or file input for row @@ -263,6 +288,10 @@ public class RegionSplitter { opt.addOption(OptionBuilder.withArgName("count").hasArg().withDescription( "Max outstanding splits that have unfinished major compactions") .create("o")); + opt.addOption(null, "firstrow", true, + "First Row in Table for Split Algorithm"); + opt.addOption(null, "lastrow", true, + "Last Row in Table for Split Algorithm"); opt.addOption(null, "risky", false, "Skip verification steps to complete quickly." + "STRONGLY DISCOURAGED for production systems. "); @@ -299,24 +328,31 @@ public class RegionSplitter { } String tableName = cmd.getArgs()[0]; String splitClass = cmd.getArgs()[1]; + SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClass); + + if (cmd.hasOption("firstrow")) { + splitAlgo.setFirstRow(cmd.getOptionValue("firstrow")); + } + if (cmd.hasOption("lastrow")) { + splitAlgo.setLastRow(cmd.getOptionValue("lastrow")); + } if (createTable) { conf.set("split.count", cmd.getOptionValue("c")); - createPresplitTable(tableName, splitClass, cmd.getOptionValue("f").split(":"), conf); + createPresplitTable(tableName, splitAlgo, cmd.getOptionValue("f").split(":"), conf); } if (rollingSplit) { if (cmd.hasOption("o")) { conf.set("split.outstanding", cmd.getOptionValue("o")); } - rollingSplit(tableName, splitClass, conf); + rollingSplit(tableName, splitAlgo, conf); } } - static void createPresplitTable(String tableName, String splitClassName, + static void createPresplitTable(String tableName, SplitAlgorithm splitAlgo, String[] columnFamilies, Configuration conf) throws IOException, InterruptedException { - SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClassName); final int splitCount = conf.getInt("split.count", 0); Preconditions.checkArgument(splitCount > 1, "Split count must be > 1"); @@ -351,9 +387,8 @@ public class RegionSplitter { LOG.debug("Finished creating table with " + splitCount + " regions"); } - static void rollingSplit(String tableName, String splitClassName, + static void rollingSplit(String tableName, SplitAlgorithm splitAlgo, Configuration conf) throws IOException, InterruptedException { - SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClassName); final int minOS = conf.getInt("split.outstanding", 2); HTable table = new HTable(conf, tableName); @@ -753,20 +788,25 @@ public class RegionSplitter { } /** - * HexStringSplit is one possible {@link SplitAlgorithm} for choosing region - * boundaries. The format of a HexStringSplit region boundary is the - * ASCII representation of an MD5 checksum, or any other uniformly distributed - * bytes. Row are hex-encoded long values in the range "00000000" => - * "FFFFFFFF" and are left-padded with zeros to keep the same order - * lexicographically as if they were binary. + * HexStringSplit is a well-known {@link SplitAlgorithm} for choosing region + * boundaries. The format of a HexStringSplit region boundary is the ASCII + * representation of an MD5 checksum, or any other uniformly distributed + * hexadecimal value. Row are hex-encoded long values in the range + * "00000000" => "FFFFFFFF" and are left-padded with zeros to keep the + * same order lexicographically as if they were binary. * - * This split algorithm is only appropriate if you will use hex strings as - * keys. + * Since this split algorithm uses hex strings as keys, it is easy to read & + * write in the shell but takes up more space and may be non-intuitive. */ public static class HexStringSplit implements SplitAlgorithm { - final static String MAXHEX = "FFFFFFFF"; - final static BigInteger MAXHEX_INT = new BigInteger(MAXHEX, 16); - final static int rowComparisonLength = MAXHEX.length(); + final static String DEFAULT_MIN_HEX = "00000000"; + final static String DEFAULT_MAX_HEX = "FFFFFFFF"; + + String firstRow = DEFAULT_MIN_HEX; + BigInteger firstRowInt = BigInteger.ZERO; + String lastRow = DEFAULT_MAX_HEX; + BigInteger lastRowInt = new BigInteger(lastRow, 16); + int rowComparisonLength = lastRow.length(); public byte[] split(byte[] start, byte[] end) { BigInteger s = convertToBigInteger(start); @@ -776,22 +816,43 @@ public class RegionSplitter { } public byte[][] split(int n) { + Preconditions.checkArgument(lastRowInt.compareTo(firstRowInt) > 0, + "last row (%s) is configured less than first row (%s)", lastRow, + firstRow); + // +1 to range because the last row is inclusive + BigInteger range = lastRowInt.subtract(firstRowInt).add(BigInteger.ONE); + Preconditions.checkState(range.compareTo(BigInteger.valueOf(n)) >= 0, + "split granularity (%s) is greater than the range (%s)", n, range); + BigInteger[] splits = new BigInteger[n - 1]; - BigInteger sizeOfEachSplit = MAXHEX_INT.divide(BigInteger.valueOf(n)); + BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(n)); for (int i = 1; i < n; i++) { // NOTE: this means the last region gets all the slop. // This is not a big deal if we're assuming n << MAXHEX - splits[i - 1] = sizeOfEachSplit.multiply(BigInteger.valueOf(i)); + splits[i - 1] = firstRowInt.add(sizeOfEachSplit.multiply(BigInteger + .valueOf(i))); } return convertToBytes(splits); } public byte[] firstRow() { - return convertToByte(BigInteger.ZERO); + return convertToByte(firstRowInt); } public byte[] lastRow() { - return convertToByte(MAXHEX_INT); + return convertToByte(lastRowInt); + } + + public void setFirstRow(String userInput) { + firstRow = userInput; + firstRowInt = new BigInteger(firstRow, 16); + } + + public void setLastRow(String userInput) { + lastRow = userInput; + lastRowInt = new BigInteger(lastRow, 16); + // Precondition: lastRow > firstRow, so last's length is the greater + rowComparisonLength = lastRow.length(); } public byte[] strToRow(String in) { @@ -806,17 +867,24 @@ public class RegionSplitter { return " "; } - static BigInteger split2(BigInteger minValue, BigInteger maxValue) { - return maxValue.add(minValue).divide(BigInteger.valueOf(2)); + /** + * Divide 2 numbers in half (for split algorithm) + * + * @param a number #1 + * @param b number #2 + * @return the midpoint of the 2 numbers + */ + public BigInteger split2(BigInteger a, BigInteger b) { + return a.add(b).divide(BigInteger.valueOf(2)).abs(); } /** * Returns an array of bytes corresponding to an array of BigIntegers * - * @param bigIntegers + * @param bigIntegers numbers to convert * @return bytes corresponding to the bigIntegers */ - static byte[][] convertToBytes(BigInteger[] bigIntegers) { + public byte[][] convertToBytes(BigInteger[] bigIntegers) { byte[][] returnBytes = new byte[bigIntegers.length][]; for (int i = 0; i < bigIntegers.length; i++) { returnBytes[i] = convertToByte(bigIntegers[i]); @@ -827,38 +895,56 @@ public class RegionSplitter { /** * Returns the bytes corresponding to the BigInteger * - * @param bigInteger + * @param bigInteger number to convert + * @param pad padding length * @return byte corresponding to input BigInteger */ - static byte[] convertToByte(BigInteger bigInteger) { + public static byte[] convertToByte(BigInteger bigInteger, int pad) { String bigIntegerString = bigInteger.toString(16); - bigIntegerString = StringUtils.leftPad(bigIntegerString, - rowComparisonLength, '0'); + bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0'); return Bytes.toBytes(bigIntegerString); } /** - * Returns the BigInteger represented by thebyte array + * Returns the bytes corresponding to the BigInteger * - * @param row + * @param bigInteger number to convert + * @return corresponding bytes + */ + public byte[] convertToByte(BigInteger bigInteger) { + return convertToByte(bigInteger, rowComparisonLength); + } + + /** + * Returns the BigInteger represented by the byte array + * + * @param row byte array representing row * @return the corresponding BigInteger */ - static BigInteger convertToBigInteger(byte[] row) { + public BigInteger convertToBigInteger(byte[] row) { return (row.length > 0) ? new BigInteger(Bytes.toString(row), 16) : BigInteger.ZERO; } + + @Override + public String toString() { + return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) + + "," + rowToStr(lastRow()) + "]"; + } } /** * A SplitAlgorithm that divides the space of possible keys evenly. Useful - * when the keys are approximately uniform random bytes (e.g. hashes). - * You probably shouldn't use this if your keys are ASCII, or if your keys - * tend to have similar prefixes. + * when the keys are approximately uniform random bytes (e.g. hashes). Rows + * are raw byte values in the range 00 => FF and are right-padded with + * zeros to keep the same memcmp() order. This is the natural algorithm to use + * for a byte[] environment and saves space, but is not necessarily the + * easiest for readability. */ public static class UniformSplit implements SplitAlgorithm { - static final byte xFF = (byte)0xFF; - static final byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY; - static final byte[] lastRowBytes = + static final byte xFF = (byte) 0xFF; + byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY; + byte[] lastRowBytes = new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF}; public byte[] split(byte[] start, byte[] end) { return Bytes.split(start, end, 1)[1]; @@ -866,12 +952,19 @@ public class RegionSplitter { @Override public byte[][] split(int numRegions) { - byte[][] splitKeysPlusEndpoints = Bytes.split(firstRowBytes, lastRowBytes, - numRegions-1); - byte[][] splitAtKeys = new byte[splitKeysPlusEndpoints.length-2][]; - System.arraycopy(splitKeysPlusEndpoints, 1, splitAtKeys, 0, - splitKeysPlusEndpoints.length-2); - return splitAtKeys; + Preconditions.checkArgument( + Bytes.compareTo(lastRowBytes, firstRowBytes) > 0, + "last row (%s) is configured less than first row (%s)", + Bytes.toStringBinary(lastRowBytes), + Bytes.toStringBinary(firstRowBytes)); + + byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true, + numRegions - 1); + Preconditions.checkState(splits != null, + "Could not split region with given user input: " + this); + + // remove endpoints, which are included in the splits list + return Arrays.copyOfRange(splits, 1, splits.length - 1); } @Override @@ -884,6 +977,16 @@ public class RegionSplitter { return lastRowBytes; } + @Override + public void setFirstRow(String userInput) { + firstRowBytes = Bytes.toBytesBinary(userInput); + } + + @Override + public void setLastRow(String userInput) { + lastRowBytes = Bytes.toBytesBinary(userInput); + } + @Override public byte[] strToRow(String input) { return Bytes.toBytesBinary(input); @@ -898,5 +1001,11 @@ public class RegionSplitter { public String separator() { return ","; } + + @Override + public String toString() { + return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) + + "," + rowToStr(lastRow()) + "]"; + } } } diff --git a/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java b/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java index 8479d9b6c30..34f82acc98d 100644 --- a/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java +++ b/src/test/java/org/apache/hadoop/hbase/util/TestRegionSplitter.java @@ -19,17 +19,18 @@ */ package org.apache.hadoop.hbase.util; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.*; import java.io.IOException; +import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HRegionInfo; @@ -37,6 +38,7 @@ import org.apache.hadoop.hbase.HServerAddress; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.RegionSplitter.HexStringSplit; +import org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm; import org.apache.hadoop.hbase.util.RegionSplitter.UniformSplit; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -47,13 +49,10 @@ import org.junit.Test; * rolling split of an existing table. */ public class TestRegionSplitter { + private final static Log LOG = LogFactory.getLog(TestRegionSplitter.class); private final static HBaseTestingUtility UTIL = new HBaseTestingUtility(); - private final static String HEX_SPLIT_CLASS_NAME = - "org.apache.hadoop.hbase.util.RegionSplitter$HexStringSplit"; - private final static String UNIFORM_SPLIT_CLASS_NAME = - "org.apache.hadoop.hbase.util.RegionSplitter$UniformSplit"; private final static String CF_NAME = "SPLIT_TEST_CF"; - private final static byte xFF = (byte)0xff; + private final static byte xFF = (byte) 0xff; @BeforeClass public static void setup() throws Exception { @@ -70,28 +69,28 @@ public class TestRegionSplitter { */ @Test public void testCreatePresplitTableHex() throws Exception { - final List expectedBounds = new ArrayList(); - expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); - expectedBounds.add("0fffffff".getBytes()); - expectedBounds.add("1ffffffe".getBytes()); - expectedBounds.add("2ffffffd".getBytes()); - expectedBounds.add("3ffffffc".getBytes()); - expectedBounds.add("4ffffffb".getBytes()); - expectedBounds.add("5ffffffa".getBytes()); - expectedBounds.add("6ffffff9".getBytes()); - expectedBounds.add("7ffffff8".getBytes()); - expectedBounds.add("8ffffff7".getBytes()); - expectedBounds.add("9ffffff6".getBytes()); - expectedBounds.add("affffff5".getBytes()); - expectedBounds.add("bffffff4".getBytes()); - expectedBounds.add("cffffff3".getBytes()); - expectedBounds.add("dffffff2".getBytes()); - expectedBounds.add("effffff1".getBytes()); - expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); + final List expectedBounds = new ArrayList(); + expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); + expectedBounds.add("10000000".getBytes()); + expectedBounds.add("20000000".getBytes()); + expectedBounds.add("30000000".getBytes()); + expectedBounds.add("40000000".getBytes()); + expectedBounds.add("50000000".getBytes()); + expectedBounds.add("60000000".getBytes()); + expectedBounds.add("70000000".getBytes()); + expectedBounds.add("80000000".getBytes()); + expectedBounds.add("90000000".getBytes()); + expectedBounds.add("a0000000".getBytes()); + expectedBounds.add("b0000000".getBytes()); + expectedBounds.add("c0000000".getBytes()); + expectedBounds.add("d0000000".getBytes()); + expectedBounds.add("e0000000".getBytes()); + expectedBounds.add("f0000000".getBytes()); + expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); - // Do table creation/pre-splitting and verification of region boundaries - preSplitTableAndVerify(expectedBounds, HEX_SPLIT_CLASS_NAME, - "NewHexPresplitTable"); + // Do table creation/pre-splitting and verification of region boundaries + preSplitTableAndVerify(expectedBounds, + HexStringSplit.class.getSimpleName(), "NewHexPresplitTable"); } /** @@ -99,29 +98,28 @@ public class TestRegionSplitter { */ @Test public void testCreatePresplitTableUniform() throws Exception { - List expectedBounds = new ArrayList(); - expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); - expectedBounds.add(new byte[] { 0x0f, xFF, xFF, xFF, xFF, xFF, xFF, xFF}); - expectedBounds.add(new byte[] { 0x1f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfe}); - expectedBounds.add(new byte[] { 0x2f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfd}); - expectedBounds.add(new byte[] { 0x3f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfc}); - expectedBounds.add(new byte[] { 0x4f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfb}); - expectedBounds.add(new byte[] { 0x5f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xfa}); - expectedBounds.add(new byte[] { 0x6f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf9}); - expectedBounds.add(new byte[] { 0x7f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf8}); - expectedBounds.add(new byte[] {(byte)0x8f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf7}); - expectedBounds.add(new byte[] {(byte)0x9f, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf6}); - expectedBounds.add(new byte[] {(byte)0xaf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf5}); - expectedBounds.add(new byte[] {(byte)0xbf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf4}); - expectedBounds.add(new byte[] {(byte)0xcf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf3}); - expectedBounds.add(new byte[] {(byte)0xdf, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf2}); - expectedBounds.add(new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, (byte)0xf1}); - expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); + List expectedBounds = new ArrayList(); + expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); + expectedBounds.add(new byte[] { 0x10, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] { 0x20, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] { 0x30, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] { 0x40, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] { 0x50, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] { 0x60, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] { 0x70, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] {(byte)0x80, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] {(byte)0x90, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] {(byte)0xa0, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] {(byte)0xb0, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] {(byte)0xc0, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] {(byte)0xd0, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] {(byte)0xe0, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(new byte[] {(byte)0xf0, 0, 0, 0, 0, 0, 0, 0}); + expectedBounds.add(ArrayUtils.EMPTY_BYTE_ARRAY); - // Do table creation/pre-splitting and verification of region boundaries - preSplitTableAndVerify(expectedBounds, - "org.apache.hadoop.hbase.util.RegionSplitter$UniformSplit", - "NewUniformPresplitTable"); + // Do table creation/pre-splitting and verification of region boundaries + preSplitTableAndVerify(expectedBounds, UniformSplit.class.getSimpleName(), + "NewUniformPresplitTable"); } /** @@ -135,7 +133,7 @@ public class TestRegionSplitter { byte[][] twoRegionsSplits = splitter.split(2); assertEquals(1, twoRegionsSplits.length); - assertArrayEquals(twoRegionsSplits[0], "7fffffff".getBytes()); + assertArrayEquals(twoRegionsSplits[0], "80000000".getBytes()); byte[][] threeRegionsSplits = splitter.split(3); assertEquals(2, threeRegionsSplits.length); @@ -157,7 +155,7 @@ public class TestRegionSplitter { splitPoint = splitter.split(firstRow, "20000000".getBytes()); assertArrayEquals(splitPoint, "10000000".getBytes()); - // Halfway between 5f... and 7f... should be 6f.... + // Halfway between df... and ff... should be ef.... splitPoint = splitter.split("dfffffff".getBytes(), lastRow); assertArrayEquals(splitPoint,"efffffff".getBytes()); } @@ -179,7 +177,7 @@ public class TestRegionSplitter { byte[][] twoRegionsSplits = splitter.split(2); assertEquals(1, twoRegionsSplits.length); assertArrayEquals(twoRegionsSplits[0], - new byte[] {0x7f, xFF, xFF, xFF, xFF, xFF, xFF, xFF}); + new byte[] { (byte) 0x80, 0, 0, 0, 0, 0, 0, 0 }); byte[][] threeRegionsSplits = splitter.split(3); assertEquals(2, threeRegionsSplits.length); @@ -207,6 +205,64 @@ public class TestRegionSplitter { new byte[] {(byte)0xef, xFF, xFF, xFF, xFF, xFF, xFF, xFF}); } + @Test + public void testUserInput() { + SplitAlgorithm algo = new HexStringSplit(); + assertFalse(splitFailsPrecondition(algo)); // default settings are fine + assertFalse(splitFailsPrecondition(algo, "00", "AA")); // custom is fine + assertTrue(splitFailsPrecondition(algo, "AA", "00")); // range error + assertTrue(splitFailsPrecondition(algo, "AA", "AA")); // range error + assertFalse(splitFailsPrecondition(algo, "0", "2", 3)); // should be fine + assertFalse(splitFailsPrecondition(algo, "0", "A", 11)); // should be fine + assertTrue(splitFailsPrecondition(algo, "0", "A", 12)); // too granular + + algo = new UniformSplit(); + assertFalse(splitFailsPrecondition(algo)); // default settings are fine + assertFalse(splitFailsPrecondition(algo, "\\x00", "\\xAA")); // custom is fine + assertTrue(splitFailsPrecondition(algo, "\\xAA", "\\x00")); // range error + assertTrue(splitFailsPrecondition(algo, "\\xAA", "\\xAA")); // range error + assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x02", 3)); // should be fine + assertFalse(splitFailsPrecondition(algo, "\\x00", "\\x0A", 11)); // should be fine + assertTrue(splitFailsPrecondition(algo, "\\x00", "\\x0A", 12)); // too granular + } + + private boolean splitFailsPrecondition(SplitAlgorithm algo) { + return splitFailsPrecondition(algo, 100); + } + + private boolean splitFailsPrecondition(SplitAlgorithm algo, String firstRow, + String lastRow) { + return splitFailsPrecondition(algo, firstRow, lastRow, 100); + } + + private boolean splitFailsPrecondition(SplitAlgorithm algo, String firstRow, + String lastRow, int numRegions) { + algo.setFirstRow(firstRow); + algo.setLastRow(lastRow); + return splitFailsPrecondition(algo, numRegions); + } + + private boolean splitFailsPrecondition(SplitAlgorithm algo, int numRegions) { + try { + byte[][] s = algo.split(numRegions); + LOG.debug("split algo = " + algo); + if (s != null) { + StringBuilder sb = new StringBuilder(); + for (byte[] b : s) { + sb.append(Bytes.toStringBinary(b) + " "); + } + LOG.debug(sb.toString()); + } + return false; + } catch (IllegalArgumentException e) { + return true; + } catch (IllegalStateException e) { + return true; + } catch (IndexOutOfBoundsException e) { + return true; + } + } + /** * Creates a pre-split table with expectedBounds.size()+1 regions, then * verifies that the region boundaries are the same as the expected @@ -214,21 +270,23 @@ public class TestRegionSplitter { * @throws Various junit assertions */ private void preSplitTableAndVerify(List expectedBounds, - String splitAlgo, String tableName) throws Exception { + String splitClass, String tableName) throws Exception { final int numRegions = expectedBounds.size()-1; final Configuration conf = UTIL.getConfiguration(); conf.setInt("split.count", numRegions); + SplitAlgorithm splitAlgo = RegionSplitter.newSplitAlgoInstance(conf, splitClass); RegionSplitter.createPresplitTable(tableName, splitAlgo, new String[] {CF_NAME}, conf); verifyBounds(expectedBounds, tableName); } - private void rollingSplitAndVerify(String tableName, String splitAlgo, + private void rollingSplitAndVerify(String tableName, String splitClass, List expectedBounds) throws Exception { final Configuration conf = UTIL.getConfiguration(); // Set this larger than the number of splits so RegionSplitter won't block conf.setInt("split.outstanding", 5); + SplitAlgorithm splitAlgo = RegionSplitter.newSplitAlgoInstance(conf, splitClass); RegionSplitter.rollingSplit(tableName, splitAlgo, conf); verifyBounds(expectedBounds, tableName); }