HBASE-9369 Add support for 1- and 2-byte integers in OrderedBytes and provide types (He Liangliang)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1524297 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
ndimiduk 2013-09-18 06:07:30 +00:00
parent d36860258e
commit 886a96aed8
6 changed files with 550 additions and 15 deletions

View File

@ -0,0 +1,73 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.types;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Order;
import org.apache.hadoop.hbase.util.OrderedBytes;
import org.apache.hadoop.hbase.util.PositionedByteRange;
/**
* A {@code short} of 16-bits using a fixed-length encoding. Built on
* {@link OrderedBytes#encodeInt16(PositionedByteRange, short, Order)}.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class OrderedInt16 extends OrderedBytesBase<Short> {
public static final OrderedInt16 ASCENDING = new OrderedInt16(Order.ASCENDING);
public static final OrderedInt16 DESCENDING = new OrderedInt16(Order.DESCENDING);
protected OrderedInt16(Order order) { super(order); }
@Override
public boolean isNullable() { return false; }
@Override
public int encodedLength(Short val) { return 3; }
@Override
public Class<Short> encodedClass() { return Short.class; }
@Override
public Short decode(PositionedByteRange src) {
return OrderedBytes.decodeInt16(src);
}
@Override
public int encode(PositionedByteRange dst, Short val) {
if (null == val) throw new IllegalArgumentException("Null values not supported.");
return OrderedBytes.encodeInt16(dst, val, order);
}
/**
* Read a {@code short} value from the buffer {@code src}.
*/
public short decodeShort(PositionedByteRange src) {
return OrderedBytes.decodeInt16(src);
}
/**
* Write instance {@code val} into buffer {@code dst}.
*/
public int encodeShort(PositionedByteRange dst, short val) {
return OrderedBytes.encodeInt16(dst, val, order);
}
}

View File

@ -0,0 +1,73 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.types;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Order;
import org.apache.hadoop.hbase.util.OrderedBytes;
import org.apache.hadoop.hbase.util.PositionedByteRange;
/**
* A {@code byte} of 8-bits using a fixed-length encoding. Built on
* {@link OrderedBytes#encodeInt8(PositionedByteRange, byte, Order)}.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class OrderedInt8 extends OrderedBytesBase<Byte> {
public static final OrderedInt8 ASCENDING = new OrderedInt8(Order.ASCENDING);
public static final OrderedInt8 DESCENDING = new OrderedInt8(Order.DESCENDING);
protected OrderedInt8(Order order) { super(order); }
@Override
public boolean isNullable() { return false; }
@Override
public int encodedLength(Byte val) { return 2; }
@Override
public Class<Byte> encodedClass() { return Byte.class; }
@Override
public Byte decode(PositionedByteRange src) {
return OrderedBytes.decodeInt8(src);
}
@Override
public int encode(PositionedByteRange dst, Byte val) {
if (null == val) throw new IllegalArgumentException("Null values not supported.");
return OrderedBytes.encodeInt8(dst, val, order);
}
/**
* Read a {@code byte} value from the buffer {@code src}.
*/
public byte decodeByte(PositionedByteRange src) {
return OrderedBytes.decodeInt8(src);
}
/**
* Write instance {@code val} into buffer {@code dst}.
*/
public int encodeByte(PositionedByteRange dst, byte val) {
return OrderedBytes.encodeInt8(dst, val, order);
}
}

View File

@ -0,0 +1,86 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.types;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Order;
import org.apache.hadoop.hbase.util.PositionedByteRange;
/**
* An {@code DataType} for interacting with values encoded using
* {@link Bytes#putByte(byte[], int, byte)}. Intended to make it easier to
* transition away from direct use of {@link Bytes}.
* @see Bytes#putByte(byte[], int, byte)
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class RawByte implements DataType<Byte> {
@Override
public boolean isOrderPreserving() { return false; }
@Override
public Order getOrder() { return null; }
@Override
public boolean isNullable() { return false; }
@Override
public boolean isSkippable() { return true; }
@Override
public int encodedLength(Byte val) { return Bytes.SIZEOF_BYTE; }
@Override
public Class<Byte> encodedClass() { return Byte.class; }
@Override
public int skip(PositionedByteRange src) {
src.setPosition(src.getPosition() + Bytes.SIZEOF_BYTE);
return Bytes.SIZEOF_BYTE;
}
@Override
public Byte decode(PositionedByteRange src) {
byte val = src.getBytes()[src.getOffset() + src.getPosition()];
skip(src);
return val;
}
@Override
public int encode(PositionedByteRange dst, Byte val) {
Bytes.putByte(dst.getBytes(), dst.getOffset() + dst.getPosition(), val);
return skip(dst);
}
/**
* Read a {@code byte} value from the buffer {@code buff}.
*/
public byte decodeByte(byte[] buff, int offset) {
return buff[offset];
}
/**
* Write instance {@code val} into buffer {@code buff}.
*/
public int encodeByte(byte[] buff, int offset, byte val) {
return Bytes.putByte(buff, offset, val);
}
}

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.types;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Order;
import org.apache.hadoop.hbase.util.PositionedByteRange;
/**
* An {@code DataType} for interacting with values encoded using
* {@link Bytes#putShort(byte[], int, short)}. Intended to make it easier to
* transition away from direct use of {@link Bytes}.
* @see Bytes#putShort(byte[], int, short)
* @see Bytes#toShort(byte[])
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class RawShort implements DataType<Short> {
@Override
public boolean isOrderPreserving() { return false; }
@Override
public Order getOrder() { return null; }
@Override
public boolean isNullable() { return false; }
@Override
public boolean isSkippable() { return true; }
@Override
public int encodedLength(Short val) { return Bytes.SIZEOF_SHORT; }
@Override
public Class<Short> encodedClass() { return Short.class; }
@Override
public int skip(PositionedByteRange src) {
src.setPosition(src.getPosition() + Bytes.SIZEOF_SHORT);
return Bytes.SIZEOF_SHORT;
}
@Override
public Short decode(PositionedByteRange src) {
short val = Bytes.toShort(src.getBytes(), src.getOffset() + src.getPosition());
skip(src);
return val;
}
@Override
public int encode(PositionedByteRange dst, Short val) {
Bytes.putShort(dst.getBytes(), dst.getOffset() + dst.getPosition(), val);
return skip(dst);
}
/**
* Read a {@code short} value from the buffer {@code buff}.
*/
public short decodeShort(byte[] buff, int offset) {
return Bytes.toShort(buff, offset);
}
/**
* Write instance {@code val} into buffer {@code buff}.
*/
public int encodeShort(byte[] buff, int offset, short val) {
return Bytes.putShort(buff, offset, val);
}
}

View File

@ -278,6 +278,7 @@ public class OrderedBytes {
* implementations can be inserted into the total ordering enforced here. * implementations can be inserted into the total ordering enforced here.
*/ */
private static final byte NULL = 0x05; private static final byte NULL = 0x05;
// room for 1 expansion type
private static final byte NEG_INF = 0x07; private static final byte NEG_INF = 0x07;
private static final byte NEG_LARGE = 0x08; private static final byte NEG_LARGE = 0x08;
private static final byte NEG_MED_MIN = 0x09; private static final byte NEG_MED_MIN = 0x09;
@ -289,14 +290,21 @@ public class OrderedBytes {
private static final byte POS_MED_MAX = 0x21; private static final byte POS_MED_MAX = 0x21;
private static final byte POS_LARGE = 0x22; private static final byte POS_LARGE = 0x22;
private static final byte POS_INF = 0x23; private static final byte POS_INF = 0x23;
private static final byte NAN = 0x25; // room for 2 expansion type
private static final byte FIXED_INT32 = 0x27; private static final byte NAN = 0x26;
private static final byte FIXED_INT64 = 0x28; // room for 2 expansion types
private static final byte FIXED_INT8 = 0x29;
private static final byte FIXED_INT16 = 0x2a;
private static final byte FIXED_INT32 = 0x2b;
private static final byte FIXED_INT64 = 0x2c;
// room for 3 expansion types
private static final byte FIXED_FLOAT32 = 0x30; private static final byte FIXED_FLOAT32 = 0x30;
private static final byte FIXED_FLOAT64 = 0x31; private static final byte FIXED_FLOAT64 = 0x31;
private static final byte TEXT = 0x33; // room for 2 expansion type
private static final byte BLOB_VAR = 0x35; private static final byte TEXT = 0x34;
private static final byte BLOB_COPY = 0x36; // room for 2 expansion type
private static final byte BLOB_VAR = 0x37;
private static final byte BLOB_COPY = 0x38;
/* /*
* The following constant values are used by encoding implementations * The following constant values are used by encoding implementations
@ -1198,6 +1206,59 @@ public class OrderedBytes {
return 1; return 1;
} }
/**
* Encode an {@code int8} value using the fixed-length encoding.
* @return the number of bytes written.
* @see #encodeInt64(PositionedByteRange, long, Order)
* @see #decodeInt8(PositionedByteRange)
*/
public static int encodeInt8(PositionedByteRange dst, byte val, Order ord) {
final int offset = dst.getOffset(), start = dst.getPosition();
dst.put(FIXED_INT8)
.put((byte) (val ^ 0x80));
ord.apply(dst.getBytes(), offset + start, 2);
return 2;
}
/**
* Decode an {@code int8} value.
* @see #encodeInt8(PositionedByteRange, byte, Order)
*/
public static byte decodeInt8(PositionedByteRange src) {
final byte header = src.get();
assert header == FIXED_INT8 || header == DESCENDING.apply(FIXED_INT8);
Order ord = header == FIXED_INT8 ? ASCENDING : DESCENDING;
return (byte)((ord.apply(src.get()) ^ 0x80) & 0xff);
}
/**
* Encode an {@code int16} value using the fixed-length encoding.
* @return the number of bytes written.
* @see #encodeInt64(PositionedByteRange, long, Order)
* @see #decodeInt16(PositionedByteRange)
*/
public static int encodeInt16(PositionedByteRange dst, short val, Order ord) {
final int offset = dst.getOffset(), start = dst.getPosition();
dst.put(FIXED_INT16)
.put((byte) ((val >> 8) ^ 0x80))
.put((byte) val);
ord.apply(dst.getBytes(), offset + start, 3);
return 3;
}
/**
* Decode an {@code int16} value.
* @see #encodeInt16(PositionedByteRange, short, Order)
*/
public static short decodeInt16(PositionedByteRange src) {
final byte header = src.get();
assert header == FIXED_INT16 || header == DESCENDING.apply(FIXED_INT16);
Order ord = header == FIXED_INT16 ? ASCENDING : DESCENDING;
short val = (short) ((ord.apply(src.get()) ^ 0x80) & 0xff);
val = (short) ((val << 8) + (ord.apply(src.get()) & 0xff));
return val;
}
/** /**
* Encode an {@code int32} value using the fixed-length encoding. * Encode an {@code int32} value using the fixed-length encoding.
* @return the number of bytes written. * @return the number of bytes written.
@ -1270,14 +1331,14 @@ public class OrderedBytes {
public static int encodeInt64(PositionedByteRange dst, long val, Order ord) { public static int encodeInt64(PositionedByteRange dst, long val, Order ord) {
final int offset = dst.getOffset(), start = dst.getPosition(); final int offset = dst.getOffset(), start = dst.getPosition();
dst.put(FIXED_INT64) dst.put(FIXED_INT64)
.put((byte) ((val >> 56) ^ 0x80)) .put((byte) ((val >> 56) ^ 0x80))
.put((byte) (val >> 48)) .put((byte) (val >> 48))
.put((byte) (val >> 40)) .put((byte) (val >> 40))
.put((byte) (val >> 32)) .put((byte) (val >> 32))
.put((byte) (val >> 24)) .put((byte) (val >> 24))
.put((byte) (val >> 16)) .put((byte) (val >> 16))
.put((byte) (val >> 8)) .put((byte) (val >> 8))
.put((byte) val); .put((byte) val);
ord.apply(dst.getBytes(), offset + start, 9); ord.apply(dst.getBytes(), offset + start, 9);
return 9; return 9;
} }
@ -1611,6 +1672,12 @@ public class OrderedBytes {
return 1; return 1;
case NAN: case NAN:
return 1; return 1;
case FIXED_INT8:
src.setPosition(src.getPosition() + 1);
return src.getPosition() - start;
case FIXED_INT16:
src.setPosition(src.getPosition() + 2);
return src.getPosition() - start;
case FIXED_INT32: case FIXED_INT32:
src.setPosition(src.getPosition() + 4); src.setPosition(src.getPosition() + 4);
return src.getPosition() - start; return src.getPosition() - start;

View File

@ -358,6 +358,142 @@ public class TestOrderedBytes {
} }
} }
/**
* Test int8 encoding.
*/
@Test
public void testInt8() {
Byte[] vals =
{ Byte.MIN_VALUE, Byte.MIN_VALUE / 2, 0, Byte.MAX_VALUE / 2, Byte.MAX_VALUE };
/*
* assert encoded values match decoded values. encode into target buffer
* starting at an offset to detect over/underflow conditions.
*/
for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) {
for (int i = 0; i < vals.length; i++) {
// allocate a buffer 3-bytes larger than necessary to detect over/underflow
byte[] a = new byte[2 + 3];
PositionedByteRange buf1 = new SimplePositionedByteRange(a, 1, 2 + 1);
buf1.setPosition(1);
// verify encode
assertEquals("Surprising return value.",
2, OrderedBytes.encodeInt8(buf1, vals[i], ord));
assertEquals("Broken test: serialization did not consume entire buffer.",
buf1.getLength(), buf1.getPosition());
assertEquals("Surprising serialized length.", 2, buf1.getPosition() - 1);
assertEquals("Buffer underflow.", 0, a[0]);
assertEquals("Buffer underflow.", 0, a[1]);
assertEquals("Buffer overflow.", 0, a[a.length - 1]);
// verify skip
buf1.setPosition(1);
assertEquals("Surprising return value.", 2, OrderedBytes.skip(buf1));
assertEquals("Did not skip enough bytes.", 2, buf1.getPosition() - 1);
// verify decode
buf1.setPosition(1);
assertEquals("Deserialization failed.",
vals[i].byteValue(), OrderedBytes.decodeInt8(buf1));
assertEquals("Did not consume enough bytes.", 2, buf1.getPosition() - 1);
}
}
/*
* assert natural sort order is preserved by the codec.
*/
for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) {
byte[][] encoded = new byte[vals.length][2];
PositionedByteRange pbr = new SimplePositionedByteRange();
for (int i = 0; i < vals.length; i++) {
OrderedBytes.encodeInt8(pbr.set(encoded[i]), vals[i], ord);
}
Arrays.sort(encoded, Bytes.BYTES_COMPARATOR);
Byte[] sortedVals = Arrays.copyOf(vals, vals.length);
if (ord == Order.ASCENDING) Arrays.sort(sortedVals);
else Arrays.sort(sortedVals, Collections.reverseOrder());
for (int i = 0; i < sortedVals.length; i++) {
int decoded = OrderedBytes.decodeInt8(pbr.set(encoded[i]));
assertEquals(
String.format(
"Encoded representations do not preserve natural order: <%s>, <%s>, %s",
sortedVals[i], decoded, ord),
sortedVals[i].byteValue(), decoded);
}
}
}
/**
* Test int16 encoding.
*/
@Test
public void testInt16() {
Short[] vals =
{ Short.MIN_VALUE, Short.MIN_VALUE / 2, 0, Short.MAX_VALUE / 2, Short.MAX_VALUE };
/*
* assert encoded values match decoded values. encode into target buffer
* starting at an offset to detect over/underflow conditions.
*/
for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) {
for (int i = 0; i < vals.length; i++) {
// allocate a buffer 3-bytes larger than necessary to detect over/underflow
byte[] a = new byte[3 + 3];
PositionedByteRange buf1 = new SimplePositionedByteRange(a, 1, 3 + 1);
buf1.setPosition(1);
// verify encode
assertEquals("Surprising return value.",
3, OrderedBytes.encodeInt16(buf1, vals[i], ord));
assertEquals("Broken test: serialization did not consume entire buffer.",
buf1.getLength(), buf1.getPosition());
assertEquals("Surprising serialized length.", 3, buf1.getPosition() - 1);
assertEquals("Buffer underflow.", 0, a[0]);
assertEquals("Buffer underflow.", 0, a[1]);
assertEquals("Buffer overflow.", 0, a[a.length - 1]);
// verify skip
buf1.setPosition(1);
assertEquals("Surprising return value.", 3, OrderedBytes.skip(buf1));
assertEquals("Did not skip enough bytes.", 3, buf1.getPosition() - 1);
// verify decode
buf1.setPosition(1);
assertEquals("Deserialization failed.",
vals[i].shortValue(), OrderedBytes.decodeInt16(buf1));
assertEquals("Did not consume enough bytes.", 3, buf1.getPosition() - 1);
}
}
/*
* assert natural sort order is preserved by the codec.
*/
for (Order ord : new Order[] { Order.ASCENDING, Order.DESCENDING }) {
byte[][] encoded = new byte[vals.length][3];
PositionedByteRange pbr = new SimplePositionedByteRange();
for (int i = 0; i < vals.length; i++) {
OrderedBytes.encodeInt16(pbr.set(encoded[i]), vals[i], ord);
}
Arrays.sort(encoded, Bytes.BYTES_COMPARATOR);
Short[] sortedVals = Arrays.copyOf(vals, vals.length);
if (ord == Order.ASCENDING) Arrays.sort(sortedVals);
else Arrays.sort(sortedVals, Collections.reverseOrder());
for (int i = 0; i < sortedVals.length; i++) {
int decoded = OrderedBytes.decodeInt16(pbr.set(encoded[i]));
assertEquals(
String.format(
"Encoded representations do not preserve natural order: <%s>, <%s>, %s",
sortedVals[i], decoded, ord),
sortedVals[i].shortValue(), decoded);
}
}
}
/** /**
* Test int32 encoding. * Test int32 encoding.
*/ */
@ -898,7 +1034,8 @@ public class TestOrderedBytes {
@Test(expected = IllegalArgumentException.class) @Test(expected = IllegalArgumentException.class)
public void testBlobCopyNoZeroBytes() { public void testBlobCopyNoZeroBytes() {
byte[] val = { 0x01, 0x02, 0x00, 0x03 }; byte[] val = { 0x01, 0x02, 0x00, 0x03 };
byte[] ascExpected = { 0x36, 0x01, 0x02, 0x00, 0x03 }; // TODO: implementation detail leaked here.
byte[] ascExpected = { 0x38, 0x01, 0x02, 0x00, 0x03 };
PositionedByteRange buf = new SimplePositionedByteRange(val.length + 1); PositionedByteRange buf = new SimplePositionedByteRange(val.length + 1);
OrderedBytes.encodeBlobCopy(buf, val, Order.ASCENDING); OrderedBytes.encodeBlobCopy(buf, val, Order.ASCENDING);
assertArrayEquals(ascExpected, buf.getBytes()); assertArrayEquals(ascExpected, buf.getBytes());
@ -923,6 +1060,8 @@ public class TestOrderedBytes {
BigDecimal posLarge = negLarge.negate(); BigDecimal posLarge = negLarge.negate();
double posInf = Double.POSITIVE_INFINITY; double posInf = Double.POSITIVE_INFINITY;
double nan = Double.NaN; double nan = Double.NaN;
byte int8 = 100;
short int16 = 100;
int int32 = 100; int int32 = 100;
long int64 = 100l; long int64 = 100l;
float float32 = 100.0f; float float32 = 100.0f;
@ -988,6 +1127,16 @@ public class TestOrderedBytes {
buff.setPosition(0); buff.setPosition(0);
assertEquals(o, OrderedBytes.skip(buff)); assertEquals(o, OrderedBytes.skip(buff));
buff.setPosition(0);
o = OrderedBytes.encodeInt8(buff, int8, ord);
buff.setPosition(0);
assertEquals(o, OrderedBytes.skip(buff));
buff.setPosition(0);
o = OrderedBytes.encodeInt16(buff, int16, ord);
buff.setPosition(0);
assertEquals(o, OrderedBytes.skip(buff));
buff.setPosition(0); buff.setPosition(0);
o = OrderedBytes.encodeInt32(buff, int32, ord); o = OrderedBytes.encodeInt32(buff, int32, ord);
buff.setPosition(0); buff.setPosition(0);