From 04aad666d94ab7f07ec338d3ef66c2877b1a18d2 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Wed, 23 Jul 2014 00:29:00 +0000 Subject: [PATCH] HADOOP-10855. Allow Text to be read with a known Length. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1612732 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 2 ++ .../main/java/org/apache/hadoop/io/Text.java | 19 +++++++++++----- .../java/org/apache/hadoop/io/TestText.java | 22 +++++++++++++++++++ 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 3a590169e90..d311aa42ebc 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -42,6 +42,8 @@ Release 2.6.0 - UNRELEASED HADOOP-10755. Support negative caching of user-group mapping. (Lei Xu via wang) + HADOOP-10855. Allow Text to be read with a known Length. (todd) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java index e4490f1e34e..3dc507687f5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java @@ -288,9 +288,7 @@ public class Text extends BinaryComparable @Override public void readFields(DataInput in) throws IOException { int newLength = WritableUtils.readVInt(in); - setCapacity(newLength, false); - in.readFully(bytes, 0, newLength); - length = newLength; + readWithKnownLength(in, newLength); } public void readFields(DataInput in, int maxLength) throws IOException { @@ -302,9 +300,7 @@ public class Text extends BinaryComparable throw new IOException("tried to deserialize " + newLength + " bytes of data, but maxLength = " + maxLength); } - setCapacity(newLength, false); - in.readFully(bytes, 0, newLength); - length = newLength; + readWithKnownLength(in, newLength); } /** Skips over one Text in the input. */ @@ -313,6 +309,17 @@ public class Text extends BinaryComparable WritableUtils.skipFully(in, length); } + /** + * Read a Text object whose length is already known. + * This allows creating Text from a stream which uses a different serialization + * format. + */ + public void readWithKnownLength(DataInput in, int len) throws IOException { + setCapacity(len, false); + in.readFully(bytes, 0, len); + length = len; + } + /** serialize * write this object to out * length uses zero-compressed encoding diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java index 4b04931f70a..56b199a4223 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java @@ -24,6 +24,7 @@ import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.util.Random; +import com.google.common.base.Charsets; import com.google.common.primitives.Bytes; /** Unit tests for LargeUTF8. */ @@ -363,6 +364,27 @@ public class TestText extends TestCase { fail("testReadWriteOperations error !!!"); } } + + public void testReadWithKnownLength() throws IOException { + String line = "hello world"; + byte[] inputBytes = line.getBytes(Charsets.UTF_8); + DataInputBuffer in = new DataInputBuffer(); + Text text = new Text(); + + in.reset(inputBytes, inputBytes.length); + text.readWithKnownLength(in, 5); + assertEquals("hello", text.toString()); + + // Read longer length, make sure it lengthens + in.reset(inputBytes, inputBytes.length); + text.readWithKnownLength(in, 7); + assertEquals("hello w", text.toString()); + + // Read shorter length, make sure it shortens + in.reset(inputBytes, inputBytes.length); + text.readWithKnownLength(in, 2); + assertEquals("he", text.toString()); + } /** * test {@code Text.bytesToCodePoint(bytes) }