HBASE-1183 New MR splitting algorithm and other new features need a way to split a key range in N chunks
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@769076 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b5da244412
commit
4a05b0693a
|
@ -167,6 +167,8 @@ Release 0.20.0 - Unreleased
|
|||
(Evgeny Ryabitskiy via Stack)
|
||||
HBASE-1260 Bytes utility class changes: remove usage of ByteBuffer and
|
||||
provide additional ByteBuffer primitives (Jon Gray via Stack)
|
||||
HBASE-1183 New MR splitting algorithm and other new features need a way to
|
||||
split a key range in N chunks (Jon Gray via Stack)
|
||||
|
||||
Release 0.19.0 - 01/21/2009
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.io.IOException;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Comparator;
|
||||
import java.math.BigInteger;
|
||||
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
|
||||
|
@ -749,7 +750,107 @@ public class Bytes {
|
|||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param a
|
||||
* @param length
|
||||
* @return First <code>length</code> bytes from <code>a</code>
|
||||
*/
|
||||
public static byte [] head(final byte [] a, final int length) {
|
||||
if(a.length < length) return null;
|
||||
byte [] result = new byte[length];
|
||||
System.arraycopy(a, 0, result, 0, length);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param a
|
||||
* @param length
|
||||
* @return Last <code>length</code> bytes from <code>a</code>
|
||||
*/
|
||||
public static byte [] tail(final byte [] a, final int length) {
|
||||
if(a.length < length) return null;
|
||||
byte [] result = new byte[length];
|
||||
System.arraycopy(a, a.length - length, result, 0, length);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param a
|
||||
* @param length
|
||||
* @return Value in <code>a</code> plus <code>length</code> prepended 0 bytes
|
||||
*/
|
||||
public static byte [] padHead(final byte [] a, final int length) {
|
||||
byte [] padding = new byte[length];
|
||||
for(int i=0;i<length;i++) padding[i] = 0;
|
||||
return add(padding,a);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param a
|
||||
* @param length
|
||||
* @return Value in <code>a</code> plus <code>length</code> appended 0 bytes
|
||||
*/
|
||||
public static byte [] padTail(final byte [] a, final int length) {
|
||||
byte [] padding = new byte[length];
|
||||
for(int i=0;i<length;i++) padding[i] = 0;
|
||||
return add(a,padding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Split passed range. Expensive operation relatively. Uses BigInteger math.
|
||||
* Useful splitting ranges for MapReduce jobs.
|
||||
* @param a Beginning of range
|
||||
* @param b End of range
|
||||
* @param num Number of times to split range. Pass 1 if you want to split
|
||||
* the range in two; i.e. one split.
|
||||
* @return Array of dividing values
|
||||
*/
|
||||
public static byte [][] split(final byte [] a, final byte [] b, final int num) {
|
||||
byte [] aPadded = null;
|
||||
byte [] bPadded = null;
|
||||
if (a.length < b.length) {
|
||||
aPadded = padTail(a,b.length-a.length);
|
||||
bPadded = b;
|
||||
} else if (b.length < a.length) {
|
||||
aPadded = a;
|
||||
bPadded = padTail(b,a.length-b.length);
|
||||
} else {
|
||||
aPadded = a;
|
||||
bPadded = b;
|
||||
}
|
||||
if (compareTo(aPadded,bPadded) > 1) {
|
||||
throw new IllegalArgumentException("b > a");
|
||||
}
|
||||
if (num <= 0) throw new IllegalArgumentException("num cannot be < 0");
|
||||
byte [] prependHeader = {1, 0};
|
||||
BigInteger startBI = new BigInteger(add(prependHeader, aPadded));
|
||||
BigInteger stopBI = new BigInteger(add(prependHeader, bPadded));
|
||||
BigInteger diffBI = stopBI.subtract(startBI);
|
||||
BigInteger splitsBI = BigInteger.valueOf(num + 1);
|
||||
if(diffBI.compareTo(splitsBI) <= 0) return null;
|
||||
BigInteger intervalBI = null;
|
||||
try {
|
||||
intervalBI = diffBI.divide(splitsBI);
|
||||
} catch(Exception e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
byte [][] result = new byte[num+2][];
|
||||
result[0] = a;
|
||||
|
||||
for (int i = 1; i <= num; i++) {
|
||||
BigInteger curBI = startBI.add(intervalBI.multiply(BigInteger.valueOf(i)));
|
||||
byte [] padded = curBI.toByteArray();
|
||||
if (padded[1] == 0)
|
||||
padded = tail(padded,padded.length-2);
|
||||
else
|
||||
padded = tail(padded,padded.length-1);
|
||||
result[i] = padded;
|
||||
}
|
||||
result[num+1] = b;
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param t
|
||||
* @return Array of byte arrays made from passed array of Text
|
||||
|
|
|
@ -24,6 +24,40 @@ import java.util.Arrays;
|
|||
import junit.framework.TestCase;
|
||||
|
||||
public class TestBytes extends TestCase {
|
||||
public void testSplit() throws Exception {
|
||||
byte [] lowest = Bytes.toBytes("AAA");
|
||||
byte [] middle = Bytes.toBytes("CCC");
|
||||
byte [] highest = Bytes.toBytes("EEE");
|
||||
byte [][] parts = Bytes.split(lowest, highest, 1);
|
||||
for (int i = 0; i < parts.length; i++) {
|
||||
System.out.println(Bytes.toString(parts[i]));
|
||||
}
|
||||
assertEquals(3, parts.length);
|
||||
assertTrue(Bytes.equals(parts[1], middle));
|
||||
// Now divide into three parts. Change highest so split is even.
|
||||
highest = Bytes.toBytes("DDD");
|
||||
parts = Bytes.split(lowest, highest, 2);
|
||||
for (int i = 0; i < parts.length; i++) {
|
||||
System.out.println(Bytes.toString(parts[i]));
|
||||
}
|
||||
assertEquals(4, parts.length);
|
||||
// Assert that 3rd part is 'CCC'.
|
||||
assertTrue(Bytes.equals(parts[2], middle));
|
||||
}
|
||||
|
||||
public void testSplit2() throws Exception {
|
||||
// More split tests.
|
||||
byte [] lowest = Bytes.toBytes("http://A");
|
||||
byte [] highest = Bytes.toBytes("http://z");
|
||||
byte [] middle = Bytes.toBytes("http://[");
|
||||
byte [][] parts = Bytes.split(lowest, highest, 1);
|
||||
for (int i = 0; i < parts.length; i++) {
|
||||
System.out.println(Bytes.toString(parts[i]));
|
||||
}
|
||||
assertEquals(2, parts.length);
|
||||
assertTrue(Bytes.equals(parts[1], middle));
|
||||
}
|
||||
|
||||
public void testToLong() throws Exception {
|
||||
long [] longs = {-1l, 123l, 122232323232l};
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
|
|
Loading…
Reference in New Issue