HBASE-16658 Optimize UTF8 string/byte conversions (binlijin)

This commit is contained in:
tedyu 2016-09-20 13:08:04 -07:00
parent 66821206b8
commit 6624c676fe
1 changed files with 29 additions and 9 deletions

View File

@ -24,10 +24,12 @@ import static com.google.common.base.Preconditions.checkPositionIndex;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.security.SecureRandom;
import java.util.Arrays;
import java.util.Collection;
@ -35,15 +37,13 @@ import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import com.google.protobuf.ByteString;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.io.WritableUtils;
@ -52,6 +52,7 @@ import sun.misc.Unsafe;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;
/**
* Utility class that handles byte arrays, conversions to/from other types,
@ -73,6 +74,10 @@ public class Bytes implements Comparable<Bytes> {
/** When we encode strings, we always specify UTF8 encoding */
private static final Charset UTF8_CHARSET = Charset.forName(UTF8_ENCODING);
// Using the charset canonical name for String/byte[] conversions is much
// more efficient due to use of cached encoders/decoders.
private static final String UTF8_CSN = StandardCharsets.UTF_8.name();
//HConstants.EMPTY_BYTE_ARRAY should be updated if this changed
private static final byte [] EMPTY_BYTE_ARRAY = new byte [0];
@ -563,7 +568,7 @@ public class Bytes implements Comparable<Bytes> {
* @param off offset into array
* @return String made from <code>b</code> or null
*/
public static String toString(final byte [] b, int off) {
public static String toString(final byte[] b, int off) {
if (b == null) {
return null;
}
@ -571,7 +576,12 @@ public class Bytes implements Comparable<Bytes> {
if (len <= 0) {
return "";
}
return new String(b, off, len, UTF8_CHARSET);
try {
return new String(b, off, len, UTF8_CSN);
} catch (UnsupportedEncodingException e) {
// should never happen!
throw new IllegalArgumentException("UTF8 encoding is not supported", e);
}
}
/**
@ -583,14 +593,19 @@ public class Bytes implements Comparable<Bytes> {
* @param len length of utf-8 sequence
* @return String made from <code>b</code> or null
*/
public static String toString(final byte [] b, int off, int len) {
public static String toString(final byte[] b, int off, int len) {
if (b == null) {
return null;
}
if (len == 0) {
return "";
}
return new String(b, off, len, UTF8_CHARSET);
try {
return new String(b, off, len, UTF8_CSN);
} catch (UnsupportedEncodingException e) {
// should never happen!
throw new IllegalArgumentException("UTF8 encoding is not supported", e);
}
}
/**
@ -715,7 +730,12 @@ public class Bytes implements Comparable<Bytes> {
* @return the byte array
*/
public static byte[] toBytes(String s) {
return s.getBytes(UTF8_CHARSET);
try {
return s.getBytes(UTF8_CSN);
} catch (UnsupportedEncodingException e) {
// should never happen!
throw new IllegalArgumentException("UTF8 decoding is not supported", e);
}
}
/**