HBASE-26635 Optimize decodeNumeric in OrderedBytes (#3986)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
Yutong Xiao 2022-01-01 20:42:57 +08:00 committed by GitHub
parent 9e469e4a2a
commit 96be585324
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 37 additions and 10 deletions

View File

@ -21,7 +21,6 @@ import static org.apache.hadoop.hbase.util.Order.ASCENDING;
import static org.apache.hadoop.hbase.util.Order.DESCENDING;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.MathContext;
import java.math.RoundingMode;
import java.nio.charset.Charset;
@ -556,8 +555,8 @@ public class OrderedBytes {
byte[] a = src.getBytes();
final int start = src.getPosition(), offset = src.getOffset(), remaining = src.getRemaining();
Order ord = comp ? DESCENDING : ASCENDING;
BigDecimal m = BigDecimal.ZERO;
e--;
BigDecimal m;
StringBuilder sb = new StringBuilder();
for (int i = 0;; i++) {
if (i > remaining) {
// we've exceeded this range's window
@ -566,18 +565,23 @@ public class OrderedBytes {
"Read exceeds range before termination byte found. offset: " + offset + " position: "
+ (start + i));
}
// one byte -> 2 digits
// base-100 digits are encoded as val * 2 + 1 except for the termination digit.
m = m.add( // m +=
new BigDecimal(BigInteger.ONE, e * -2).multiply( // 100 ^ p * [decoded digit]
BigDecimal.valueOf((ord.apply(a[offset + start + i]) & 0xff) / 2)));
e--;
int twoDigits = (ord.apply(a[offset + start + i]) & 0xff) / 2;
sb.append(String.format("%02d", twoDigits));
// detect termination digit
if ((ord.apply(a[offset + start + i]) & 1) == 0) {
// Besides, as we will normalise the return value at last,
// we only need to decode at most MAX_PRECISION + 2 digits here.
if ((ord.apply(a[offset + start + i]) & 1) == 0 || sb.length() > MAX_PRECISION + 1) {
src.setPosition(start + i + 1);
break;
}
}
return normalize(m);
m = new BigDecimal(sb.toString());
int stepsMoveLeft = sb.charAt(0) != '0' ? m.precision() : m.precision() + 1;
stepsMoveLeft -= e * 2;
return normalize(m.movePointLeft(stepsMoveLeft));
}
/**
@ -738,7 +742,8 @@ public class OrderedBytes {
/**
* Encode a value val in [0.01, 1.0) into Centimals.
* Util function for {@link this.encodeNumericLarge()} and {@link this.encodeNumericSmall()}
* Util function for {@link OrderedBytes#encodeNumericLarge(PositionedByteRange, BigDecimal)
* and {@link OrderedBytes#encodeNumericSmall(PositionedByteRange, BigDecimal)}
* @param dst The destination to which encoded digits are written.
* @param val A BigDecimal after the normalization. The value must be in [0.01, 1.0).
*/

View File

@ -25,6 +25,7 @@ import static org.junit.Assert.fail;
import java.math.BigDecimal;
import java.util.Arrays;
import java.util.Collections;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
@ -1319,4 +1320,25 @@ public class TestOrderedBytes {
OrderedBytes.skip(buff);
}
}
/**
* Test if the data encoded by our encoding function can be decoded correctly.
*/
@Test
public void testEncodeDecodeMatch() {
int samplesQuantity = 200;
for (int i = 0; i < samplesQuantity; i++) {
BigDecimal randomData = BigDecimal.valueOf(ThreadLocalRandom.current().nextDouble() +
ThreadLocalRandom.current().nextLong());
PositionedByteRange tmp = new SimplePositionedMutableByteRange(100);
Order ord = ThreadLocalRandom.current().nextBoolean() ? Order.DESCENDING : Order.ASCENDING;
OrderedBytes.encodeNumeric(tmp, randomData, ord);
tmp.setPosition(0);
BigDecimal left = OrderedBytes.normalize(randomData);
BigDecimal right = OrderedBytes.decodeNumericAsBigDecimal(tmp);
assertEquals(0, left.compareTo(right));
}
}
}