HADOOP-16951: Tidy Up Text and ByteWritables Classes.

1. Remove superfluous code
2. Remove superfluous comments
3. Checkstyle fixes
4. Remove methods that simply call super.method()
5. Use Java 8 facilities to streamline code where applicable
6. Simplify and unify some of the constructs between the two classes
7. Expanding of the arrays be 1.5x instead of 2x per expansion.
This commit is contained in:
belugabehr 2020-04-17 11:16:12 -04:00 committed by GitHub
parent 2fe122e322
commit eca05917d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 147 additions and 143 deletions

View File

@ -19,6 +19,9 @@
package org.apache.hadoop.io;
import java.io.IOException;
import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.io.DataInput;
import java.io.DataOutput;
@ -36,7 +39,8 @@ import org.apache.hadoop.classification.InterfaceStability;
public class BytesWritable extends BinaryComparable
implements WritableComparable<BinaryComparable> {
private static final int LENGTH_BYTES = 4;
private static final byte[] EMPTY_BYTES = {};
private static final byte[] EMPTY_BYTES = new byte[0];
private int size;
private byte[] bytes;
@ -44,7 +48,10 @@ public class BytesWritable extends BinaryComparable
/**
* Create a zero-size sequence.
*/
public BytesWritable() {this(EMPTY_BYTES);}
public BytesWritable() {
this.bytes = EMPTY_BYTES;
this.size = 0;
}
/**
* Create a BytesWritable using the byte array as the initial value.
@ -71,9 +78,7 @@ public class BytesWritable extends BinaryComparable
* See {@link #getBytes()} for faster access to the underlying array.
*/
public byte[] copyBytes() {
byte[] result = new byte[size];
System.arraycopy(bytes, 0, result, 0, size);
return result;
return Arrays.copyOf(bytes, size);
}
/**
@ -130,6 +135,7 @@ public class BytesWritable extends BinaryComparable
/**
* Get the capacity, which is the maximum size that could handled without
* resizing the backing storage.
*
* @return The number of bytes
*/
public int getCapacity() {
@ -137,25 +143,20 @@ public class BytesWritable extends BinaryComparable
}
/**
* Change the capacity of the backing storage.
* The data is preserved.
* @param new_cap The new capacity in bytes.
* Change the capacity of the backing storage. The data is preserved.
*
* @param capacity The new capacity in bytes.
*/
public void setCapacity(int new_cap) {
if (new_cap != getCapacity()) {
byte[] new_data = new byte[new_cap];
if (new_cap < size) {
size = new_cap;
}
if (size != 0) {
System.arraycopy(bytes, 0, new_data, 0, size);
}
bytes = new_data;
public void setCapacity(final int capacity) {
if (capacity != getCapacity()) {
this.size = Math.min(size, capacity);
this.bytes = Arrays.copyOf(this.bytes, capacity);
}
}
/**
* Set the BytesWritable to the contents of the given newData.
*
* @param newData the value to set this BytesWritable to.
*/
public void set(BytesWritable newData) {
@ -163,7 +164,8 @@ public class BytesWritable extends BinaryComparable
}
/**
* Set the value to a copy of the given byte range
* Set the value to a copy of the given byte range.
*
* @param newData the new values to copy in
* @param offset the offset in newData to start at
* @param length the number of bytes to copy
@ -174,7 +176,6 @@ public class BytesWritable extends BinaryComparable
System.arraycopy(newData, offset, bytes, 0, size);
}
// inherit javadoc
@Override
public void readFields(DataInput in) throws IOException {
setSize(0); // clear the old data
@ -182,18 +183,12 @@ public class BytesWritable extends BinaryComparable
in.readFully(bytes, 0, size);
}
// inherit javadoc
@Override
public void write(DataOutput out) throws IOException {
out.writeInt(size);
out.write(bytes, 0, size);
}
@Override
public int hashCode() {
return super.hashCode();
}
/**
* Are the two byte sequences equal?
*/
@ -204,25 +199,19 @@ public class BytesWritable extends BinaryComparable
return false;
}
@Override
public int hashCode() {
return super.hashCode();
}
/**
* Generate the stream of bytes as hex pairs separated by ' '.
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder(3*size);
for (int idx = 0; idx < size; idx++) {
// if not the first, put a blank separator in
if (idx != 0) {
sb.append(' ');
}
String num = Integer.toHexString(0xff & bytes[idx]);
// if it is only one digit, add a leading 0.
if (num.length() < 2) {
sb.append('0');
}
sb.append(num);
}
return sb.toString();
return IntStream.range(0, size)
.mapToObj(idx -> String.format("%02x", bytes[idx]))
.collect(Collectors.joining(" "));
}
/** A Comparator optimized for BytesWritable. */

View File

@ -24,11 +24,11 @@ import java.io.DataOutput;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.MalformedInputException;
import java.nio.charset.StandardCharsets;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Arrays;
@ -57,7 +57,7 @@ public class Text extends BinaryComparable
new ThreadLocal<CharsetEncoder>() {
@Override
protected CharsetEncoder initialValue() {
return Charset.forName("UTF-8").newEncoder().
return StandardCharsets.UTF_8.newEncoder().
onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT);
}
@ -67,7 +67,7 @@ public class Text extends BinaryComparable
new ThreadLocal<CharsetDecoder>() {
@Override
protected CharsetDecoder initialValue() {
return Charset.forName("UTF-8").newDecoder().
return StandardCharsets.UTF_8.newDecoder().
onMalformedInput(CodingErrorAction.REPORT).
onUnmappableCharacter(CodingErrorAction.REPORT);
}
@ -75,25 +75,31 @@ public class Text extends BinaryComparable
private static final byte[] EMPTY_BYTES = new byte[0];
private byte[] bytes;
private int length;
private byte[] bytes = EMPTY_BYTES;
private int length = 0;
/**
* Construct an empty text string.
*/
public Text() {
bytes = EMPTY_BYTES;
}
/** Construct from a string.
/**
* Construct from a string.
*/
public Text(String string) {
set(string);
}
/** Construct from another text. */
/**
* Construct from another text.
*/
public Text(Text utf8) {
set(utf8);
}
/** Construct from a byte array.
/**
* Construct from a byte array.
*/
public Text(byte[] utf8) {
set(utf8);
@ -104,9 +110,7 @@ public class Text extends BinaryComparable
* See {@link #getBytes()} for faster access to the underlying array.
*/
public byte[] copyBytes() {
byte[] result = new byte[length];
System.arraycopy(bytes, 0, result, 0, length);
return result;
return Arrays.copyOf(bytes, length);
}
/**
@ -119,7 +123,9 @@ public class Text extends BinaryComparable
return bytes;
}
/** Returns the number of bytes in the byte array */
/**
* Returns the number of bytes in the byte array.
*/
@Override
public int getLength() {
return length;
@ -186,12 +192,12 @@ public class Text extends BinaryComparable
}
return -1; // not found
} catch (CharacterCodingException e) {
// can't get here
e.printStackTrace();
return -1;
throw new RuntimeException("Should not have happened", e);
}
}
/** Set to contain the contents of a string.
/**
* Set to contain the contents of a string.
*/
public void set(String string) {
try {
@ -203,37 +209,46 @@ public class Text extends BinaryComparable
}
}
/** Set to a utf8 byte array
/**
* Set to a utf8 byte array.
*/
public void set(byte[] utf8) {
set(utf8, 0, utf8.length);
}
/** copy a text. */
/**
* Copy a text.
*/
public void set(Text other) {
set(other.getBytes(), 0, other.getLength());
}
/**
* Set the Text to range of bytes
* Set the Text to range of bytes.
*
* @param utf8 the data to copy from
* @param start the first position of the new string
* @param len the number of bytes of the new string
*/
public void set(byte[] utf8, int start, int len) {
setCapacity(len, false);
ensureCapacity(len);
System.arraycopy(utf8, start, bytes, 0, len);
this.length = len;
}
/**
* Append a range of bytes to the end of the given text
* Append a range of bytes to the end of the given text.
*
* @param utf8 the data to copy from
* @param start the first position to append from utf8
* @param len the number of bytes to append
*/
public void append(byte[] utf8, int start, int len) {
setCapacity(length + len, true);
byte[] original = bytes;
int capacity = Math.max(length + len, length + (length >> 1));
if (ensureCapacity(capacity)) {
System.arraycopy(original, 0, bytes, 0, length);
}
System.arraycopy(utf8, start, bytes, length, len);
length += len;
}
@ -250,30 +265,24 @@ public class Text extends BinaryComparable
length = 0;
}
/*
/**
* Sets the capacity of this Text object to <em>at least</em>
* <code>len</code> bytes. If the current buffer is longer,
* then the capacity and existing content of the buffer are
* unchanged. If <code>len</code> is larger
* than the current capacity, the Text object's capacity is
* increased to match.
* @param len the number of bytes we need
* @param keepData should the old data be kept
* <code>capacity</code> bytes. If the current buffer is longer, then the
* capacity and existing content of the buffer are unchanged. If
* <code>capacity</code> is larger than the current capacity, the Text
* object's capacity is increased to match and any existing data is lost.
*
* @param capacity the number of bytes we need
* @return true if the internal array was resized or false otherwise
*/
private void setCapacity(int len, boolean keepData) {
if (bytes == null || bytes.length < len) {
if (bytes != null && keepData) {
bytes = Arrays.copyOf(bytes, Math.max(len,length << 1));
} else {
bytes = new byte[len];
}
private boolean ensureCapacity(final int capacity) {
if (bytes.length < capacity) {
bytes = new byte[capacity];
return true;
}
return false;
}
/**
* Convert text back to string
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
try {
@ -283,8 +292,6 @@ public class Text extends BinaryComparable
}
}
/** deserialize
*/
@Override
public void readFields(DataInput in) throws IOException {
int newLength = WritableUtils.readVInt(in);
@ -303,7 +310,9 @@ public class Text extends BinaryComparable
readWithKnownLength(in, newLength);
}
/** Skips over one Text in the input. */
/**
* Skips over one Text in the input.
*/
public static void skip(DataInput in) throws IOException {
int length = WritableUtils.readVInt(in);
WritableUtils.skipFully(in, length);
@ -315,14 +324,14 @@ public class Text extends BinaryComparable
* format.
*/
public void readWithKnownLength(DataInput in, int len) throws IOException {
setCapacity(len, false);
ensureCapacity(len);
in.readFully(bytes, 0, len);
length = len;
}
/** serialize
* write this object to out
* length uses zero-compressed encoding
/**
* Serialize. Write this object to out length uses zero-compressed encoding.
*
* @see Writable#write(DataOutput)
*/
@Override
@ -341,7 +350,10 @@ public class Text extends BinaryComparable
out.write(bytes, 0, length);
}
/** Returns true iff <code>o</code> is a Text with the same contents. */
/**
* Returns true iff <code>o</code> is a Text with the same length and same
* contents.
*/
@Override
public boolean equals(Object o) {
if (o instanceof Text)
@ -474,7 +486,8 @@ public class Text extends BinaryComparable
return decode(bytes);
}
/** Write a UTF8 encoded string to out
/**
* Write a UTF8 encoded string to out.
*/
public static int writeString(DataOutput out, String s) throws IOException {
ByteBuffer bytes = encode(s);
@ -484,7 +497,8 @@ public class Text extends BinaryComparable
return length;
}
/** Write a UTF8 encoded string with a maximum size to out
/**
* Write a UTF8 encoded string with a maximum size to out.
*/
public static int writeString(DataOutput out, String s, int maxLength)
throws IOException {
@ -509,16 +523,18 @@ public class Text extends BinaryComparable
private static final int TRAIL_BYTE = 2;
/**
* Check if a byte array contains valid utf-8
* Check if a byte array contains valid UTF-8.
*
* @param utf8 byte array
* @throws MalformedInputException if the byte array contains invalid utf-8
* @throws MalformedInputException if the byte array contains invalid UTF-8
*/
public static void validateUTF8(byte[] utf8) throws MalformedInputException {
validateUTF8(utf8, 0, utf8.length);
}
/**
* Check to see if a byte array is valid utf-8
* Check to see if a byte array is valid UTF-8.
*
* @param utf8 the array of bytes
* @param start the offset of the first byte in the array
* @param len the length of the byte sequence
@ -641,7 +657,6 @@ public class Text extends BinaryComparable
return ch;
}
static final int offsetsFromUTF8[] =
{ 0x00000000, 0x00003080,
0x000E2080, 0x03C82080, 0xFA082080, 0x82082080 };

View File

@ -291,9 +291,9 @@ public class TestText {
a.append("xdefgxxx".getBytes(), 1, 4);
assertEquals("modified aliased string", "abc", b.toString());
assertEquals("appended string incorrectly", "abcdefg", a.toString());
// add an extra byte so that capacity = 14 and length = 8
// add an extra byte so that capacity = 10 and length = 8
a.append(new byte[]{'d'}, 0, 1);
assertEquals(14, a.getBytes().length);
assertEquals(10, a.getBytes().length);
assertEquals(8, a.copyBytes().length);
}