HBASE-26566 Optimize encodeNumeric in OrderedBytes (#3940)
Signed-off-by: Reid Chan <reidchan@apache.org>
This commit is contained in:
parent
92ff404059
commit
543582e7eb
|
@ -309,10 +309,6 @@ public class OrderedBytes {
|
||||||
|
|
||||||
public static final Charset UTF8 = Charset.forName("UTF-8");
|
public static final Charset UTF8 = Charset.forName("UTF-8");
|
||||||
private static final byte TERM = 0x00;
|
private static final byte TERM = 0x00;
|
||||||
private static final BigDecimal E8 = BigDecimal.valueOf(1e8);
|
|
||||||
private static final BigDecimal E32 = BigDecimal.valueOf(1e32);
|
|
||||||
private static final BigDecimal EN2 = BigDecimal.valueOf(1e-2);
|
|
||||||
private static final BigDecimal EN10 = BigDecimal.valueOf(1e-10);
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Max precision guaranteed to fit into a {@code long}.
|
* Max precision guaranteed to fit into a {@code long}.
|
||||||
|
@ -632,7 +628,7 @@ public class OrderedBytes {
|
||||||
byte[] a = dst.getBytes();
|
byte[] a = dst.getBytes();
|
||||||
boolean isNeg = val.signum() == -1;
|
boolean isNeg = val.signum() == -1;
|
||||||
final int offset = dst.getOffset(), start = dst.getPosition();
|
final int offset = dst.getOffset(), start = dst.getPosition();
|
||||||
int e = 0, d, startM;
|
int e = 0, startM;
|
||||||
|
|
||||||
if (isNeg) { /* Small negative number: 0x14, -E, ~M */
|
if (isNeg) { /* Small negative number: 0x14, -E, ~M */
|
||||||
dst.put(NEG_SMALL);
|
dst.put(NEG_SMALL);
|
||||||
|
@ -641,21 +637,17 @@ public class OrderedBytes {
|
||||||
}
|
}
|
||||||
|
|
||||||
// normalize abs(val) to determine E
|
// normalize abs(val) to determine E
|
||||||
while (abs.compareTo(EN10) < 0) { abs = abs.movePointRight(8); e += 4; }
|
int zerosBeforeFirstNonZero = abs.scale() - abs.precision();
|
||||||
while (abs.compareTo(EN2) < 0) { abs = abs.movePointRight(2); e++; }
|
int lengthToMoveRight = zerosBeforeFirstNonZero % 2 ==
|
||||||
|
0 ? zerosBeforeFirstNonZero : zerosBeforeFirstNonZero - 1;
|
||||||
|
e = lengthToMoveRight / 2;
|
||||||
|
abs = abs.movePointRight(lengthToMoveRight);
|
||||||
|
|
||||||
putVaruint64(dst, e, !isNeg); // encode appropriate E value.
|
putVaruint64(dst, e, !isNeg); // encode appropriate E value.
|
||||||
|
|
||||||
// encode M by peeling off centimal digits, encoding x as 2x+1
|
// encode M by peeling off centimal digits, encoding x as 2x+1
|
||||||
startM = dst.getPosition();
|
startM = dst.getPosition();
|
||||||
// TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
|
encodeToCentimal(dst, abs);
|
||||||
// numeric scale.
|
|
||||||
for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
|
|
||||||
abs = abs.movePointRight(2);
|
|
||||||
d = abs.intValue();
|
|
||||||
dst.put((byte) ((2 * d + 1) & 0xff));
|
|
||||||
abs = abs.subtract(BigDecimal.valueOf(d));
|
|
||||||
}
|
|
||||||
// terminal digit should be 2x
|
// terminal digit should be 2x
|
||||||
a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
|
a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
|
||||||
if (isNeg) {
|
if (isNeg) {
|
||||||
|
@ -707,7 +699,7 @@ public class OrderedBytes {
|
||||||
byte[] a = dst.getBytes();
|
byte[] a = dst.getBytes();
|
||||||
boolean isNeg = val.signum() == -1;
|
boolean isNeg = val.signum() == -1;
|
||||||
final int start = dst.getPosition(), offset = dst.getOffset();
|
final int start = dst.getPosition(), offset = dst.getOffset();
|
||||||
int e = 0, d, startM;
|
int e = 0, startM;
|
||||||
|
|
||||||
if (isNeg) { /* Large negative number: 0x08, ~E, ~M */
|
if (isNeg) { /* Large negative number: 0x08, ~E, ~M */
|
||||||
dst.put(NEG_LARGE);
|
dst.put(NEG_LARGE);
|
||||||
|
@ -716,9 +708,10 @@ public class OrderedBytes {
|
||||||
}
|
}
|
||||||
|
|
||||||
// normalize abs(val) to determine E
|
// normalize abs(val) to determine E
|
||||||
while (abs.compareTo(E32) >= 0 && e <= 350) { abs = abs.movePointLeft(32); e +=16; }
|
int integerDigits = abs.precision() - abs.scale();
|
||||||
while (abs.compareTo(E8) >= 0 && e <= 350) { abs = abs.movePointLeft(8); e+= 4; }
|
int lengthToMoveLeft = integerDigits % 2 == 0 ? integerDigits : integerDigits + 1;
|
||||||
while (abs.compareTo(BigDecimal.ONE) >= 0 && e <= 350) { abs = abs.movePointLeft(2); e++; }
|
e = lengthToMoveLeft / 2;
|
||||||
|
abs = abs.movePointLeft(lengthToMoveLeft);
|
||||||
|
|
||||||
// encode appropriate header byte and/or E value.
|
// encode appropriate header byte and/or E value.
|
||||||
if (e > 10) { /* large number, write out {~,}E */
|
if (e > 10) { /* large number, write out {~,}E */
|
||||||
|
@ -733,14 +726,7 @@ public class OrderedBytes {
|
||||||
|
|
||||||
// encode M by peeling off centimal digits, encoding x as 2x+1
|
// encode M by peeling off centimal digits, encoding x as 2x+1
|
||||||
startM = dst.getPosition();
|
startM = dst.getPosition();
|
||||||
// TODO: 18 is an arbitrary encoding limit. Reevaluate once we have a better handling of
|
encodeToCentimal(dst, abs);
|
||||||
// numeric scale.
|
|
||||||
for (int i = 0; i < 18 && abs.compareTo(BigDecimal.ZERO) != 0; i++) {
|
|
||||||
abs = abs.movePointRight(2);
|
|
||||||
d = abs.intValue();
|
|
||||||
dst.put((byte) (2 * d + 1));
|
|
||||||
abs = abs.subtract(BigDecimal.valueOf(d));
|
|
||||||
}
|
|
||||||
// terminal digit should be 2x
|
// terminal digit should be 2x
|
||||||
a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
|
a[offset + dst.getPosition() - 1] = (byte) (a[offset + dst.getPosition() - 1] & 0xfe);
|
||||||
if (isNeg) {
|
if (isNeg) {
|
||||||
|
@ -750,6 +736,32 @@ public class OrderedBytes {
|
||||||
return dst.getPosition() - start;
|
return dst.getPosition() - start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode a value val in [0.01, 1.0) into Centimals.
|
||||||
|
* Util function for {@link this.encodeNumericLarge()} and {@link this.encodeNumericSmall()}
|
||||||
|
* @param dst The destination to which encoded digits are written.
|
||||||
|
* @param val A BigDecimal after the normalization. The value must be in [0.01, 1.0).
|
||||||
|
*/
|
||||||
|
private static void encodeToCentimal(PositionedByteRange dst, BigDecimal val) {
|
||||||
|
// The input value val must be in [0.01, 1.0)
|
||||||
|
String stringOfAbs = val.stripTrailingZeros().toPlainString();
|
||||||
|
String value = stringOfAbs.substring(stringOfAbs.indexOf('.') + 1);
|
||||||
|
int d;
|
||||||
|
|
||||||
|
// If the first float digit is 0, we will encode one digit more than MAX_PRECISION
|
||||||
|
// We encode at most MAX_PRECISION significant digits into centimals,
|
||||||
|
// because the input value, has been already normalized.
|
||||||
|
int maxPrecision = value.charAt(0) == '0' ? MAX_PRECISION + 1 : MAX_PRECISION;
|
||||||
|
maxPrecision = Math.min(maxPrecision, value.length());
|
||||||
|
for (int i = 0; i < maxPrecision; i += 2) {
|
||||||
|
d = (value.charAt(i) - '0') * 10;
|
||||||
|
if (i + 1 < maxPrecision) {
|
||||||
|
d += (value.charAt(i + 1) - '0');
|
||||||
|
}
|
||||||
|
dst.put((byte) (2 * d + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encode a numerical value using the variable-length encoding.
|
* Encode a numerical value using the variable-length encoding.
|
||||||
* @param dst The destination to which encoded digits are written.
|
* @param dst The destination to which encoded digits are written.
|
||||||
|
@ -790,6 +802,8 @@ public class OrderedBytes {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encode a numerical value using the variable-length encoding.
|
* Encode a numerical value using the variable-length encoding.
|
||||||
|
* If the number of significant digits of the value exceeds the
|
||||||
|
* {@link OrderedBytes#MAX_PRECISION}, the exceeding part will be lost.
|
||||||
* @param dst The destination to which encoded digits are written.
|
* @param dst The destination to which encoded digits are written.
|
||||||
* @param val The value to encode.
|
* @param val The value to encode.
|
||||||
* @param ord The {@link Order} to respect while encoding {@code val}.
|
* @param ord The {@link Order} to respect while encoding {@code val}.
|
||||||
|
|
|
@ -70,9 +70,14 @@ public class TestOrderedBytes {
|
||||||
static final BigDecimal[] BD_VALS =
|
static final BigDecimal[] BD_VALS =
|
||||||
{ null, BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal.valueOf(Long.MIN_VALUE),
|
{ null, BigDecimal.valueOf(Long.MAX_VALUE), BigDecimal.valueOf(Long.MIN_VALUE),
|
||||||
BigDecimal.valueOf(Double.MAX_VALUE), BigDecimal.valueOf(Double.MIN_VALUE),
|
BigDecimal.valueOf(Double.MAX_VALUE), BigDecimal.valueOf(Double.MIN_VALUE),
|
||||||
BigDecimal.valueOf(Long.MAX_VALUE).multiply(BigDecimal.valueOf(100)) };
|
BigDecimal.valueOf(Long.MAX_VALUE).multiply(BigDecimal.valueOf(100)),
|
||||||
|
BigDecimal.valueOf(Long.MAX_VALUE).pow(64),
|
||||||
|
BigDecimal.valueOf(Long.MAX_VALUE).pow(64).negate(),
|
||||||
|
new BigDecimal("0." + String.join("", Collections.nCopies(500, "123"))),
|
||||||
|
new BigDecimal("-0." + String.join("", Collections.nCopies(500, "123")))
|
||||||
|
};
|
||||||
static final int[] BD_LENGTHS =
|
static final int[] BD_LENGTHS =
|
||||||
{ 1, 11, 11, 11, 4, 12 };
|
{ 1, 11, 11, 11, 4, 12, 19, 19, 18, 18 };
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is the smallest difference between two doubles in D_VALS
|
* This is the smallest difference between two doubles in D_VALS
|
||||||
|
@ -335,7 +340,11 @@ public class TestOrderedBytes {
|
||||||
if (null == BD_VALS[i]) {
|
if (null == BD_VALS[i]) {
|
||||||
assertEquals(BD_VALS[i], decoded);
|
assertEquals(BD_VALS[i], decoded);
|
||||||
} else {
|
} else {
|
||||||
assertEquals("Deserialization failed.", 0, BD_VALS[i].compareTo(decoded));
|
// The num will be rounded to a specific precision in the encoding phase.
|
||||||
|
// So that big value will lose precision here. Need to add a normalization here to
|
||||||
|
// make the test pass.
|
||||||
|
assertEquals("Deserialization failed.", 0,
|
||||||
|
OrderedBytes.normalize(BD_VALS[i]).compareTo(decoded));
|
||||||
}
|
}
|
||||||
assertEquals("Did not consume enough bytes.", BD_LENGTHS[i], buf1.getPosition() - 1);
|
assertEquals("Did not consume enough bytes.", BD_LENGTHS[i], buf1.getPosition() - 1);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue