HADOOP-10855. Allow Text to be read with a known Length. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1612732 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2014-07-23 00:29:00 +00:00
parent a24001475c
commit 04aad666d9
3 changed files with 37 additions and 6 deletions

View File

@ -42,6 +42,8 @@ Release 2.6.0 - UNRELEASED
HADOOP-10755. Support negative caching of user-group mapping. HADOOP-10755. Support negative caching of user-group mapping.
(Lei Xu via wang) (Lei Xu via wang)
HADOOP-10855. Allow Text to be read with a known Length. (todd)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -288,9 +288,7 @@ public class Text extends BinaryComparable
@Override @Override
public void readFields(DataInput in) throws IOException { public void readFields(DataInput in) throws IOException {
int newLength = WritableUtils.readVInt(in); int newLength = WritableUtils.readVInt(in);
setCapacity(newLength, false); readWithKnownLength(in, newLength);
in.readFully(bytes, 0, newLength);
length = newLength;
} }
public void readFields(DataInput in, int maxLength) throws IOException { public void readFields(DataInput in, int maxLength) throws IOException {
@ -302,9 +300,7 @@ public class Text extends BinaryComparable
throw new IOException("tried to deserialize " + newLength + throw new IOException("tried to deserialize " + newLength +
" bytes of data, but maxLength = " + maxLength); " bytes of data, but maxLength = " + maxLength);
} }
setCapacity(newLength, false); readWithKnownLength(in, newLength);
in.readFully(bytes, 0, newLength);
length = newLength;
} }
/** Skips over one Text in the input. */ /** Skips over one Text in the input. */
@ -313,6 +309,17 @@ public class Text extends BinaryComparable
WritableUtils.skipFully(in, length); WritableUtils.skipFully(in, length);
} }
/**
* Read a Text object whose length is already known.
* This allows creating Text from a stream which uses a different serialization
* format.
*/
public void readWithKnownLength(DataInput in, int len) throws IOException {
setCapacity(len, false);
in.readFully(bytes, 0, len);
length = len;
}
/** serialize /** serialize
* write this object to out * write this object to out
* length uses zero-compressed encoding * length uses zero-compressed encoding

View File

@ -24,6 +24,7 @@ import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException; import java.nio.charset.CharacterCodingException;
import java.util.Random; import java.util.Random;
import com.google.common.base.Charsets;
import com.google.common.primitives.Bytes; import com.google.common.primitives.Bytes;
/** Unit tests for LargeUTF8. */ /** Unit tests for LargeUTF8. */
@ -364,6 +365,27 @@ public class TestText extends TestCase {
} }
} }
public void testReadWithKnownLength() throws IOException {
String line = "hello world";
byte[] inputBytes = line.getBytes(Charsets.UTF_8);
DataInputBuffer in = new DataInputBuffer();
Text text = new Text();
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 5);
assertEquals("hello", text.toString());
// Read longer length, make sure it lengthens
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 7);
assertEquals("hello w", text.toString());
// Read shorter length, make sure it shortens
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 2);
assertEquals("he", text.toString());
}
/** /**
* test {@code Text.bytesToCodePoint(bytes) } * test {@code Text.bytesToCodePoint(bytes) }
* with {@code BufferUnderflowException} * with {@code BufferUnderflowException}