HADOOP-10855. Allow Text to be read with a known Length. Contributed by Todd Lipcon.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1612732 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2014-07-23 00:29:00 +00:00
parent a24001475c
commit 04aad666d9
3 changed files with 37 additions and 6 deletions

View File

@ -42,6 +42,8 @@ Release 2.6.0 - UNRELEASED
HADOOP-10755. Support negative caching of user-group mapping.
(Lei Xu via wang)
HADOOP-10855. Allow Text to be read with a known Length. (todd)
OPTIMIZATIONS
BUG FIXES

View File

@ -288,9 +288,7 @@ public class Text extends BinaryComparable
@Override
public void readFields(DataInput in) throws IOException {
int newLength = WritableUtils.readVInt(in);
setCapacity(newLength, false);
in.readFully(bytes, 0, newLength);
length = newLength;
readWithKnownLength(in, newLength);
}
public void readFields(DataInput in, int maxLength) throws IOException {
@ -302,9 +300,7 @@ public class Text extends BinaryComparable
throw new IOException("tried to deserialize " + newLength +
" bytes of data, but maxLength = " + maxLength);
}
setCapacity(newLength, false);
in.readFully(bytes, 0, newLength);
length = newLength;
readWithKnownLength(in, newLength);
}
/** Skips over one Text in the input. */
@ -313,6 +309,17 @@ public class Text extends BinaryComparable
WritableUtils.skipFully(in, length);
}
/**
* Read a Text object whose length is already known.
* This allows creating Text from a stream which uses a different serialization
* format.
*/
public void readWithKnownLength(DataInput in, int len) throws IOException {
setCapacity(len, false);
in.readFully(bytes, 0, len);
length = len;
}
/** serialize
* write this object to out
* length uses zero-compressed encoding

View File

@ -24,6 +24,7 @@ import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.Random;
import com.google.common.base.Charsets;
import com.google.common.primitives.Bytes;
/** Unit tests for LargeUTF8. */
@ -363,6 +364,27 @@ public class TestText extends TestCase {
fail("testReadWriteOperations error !!!");
}
}
public void testReadWithKnownLength() throws IOException {
String line = "hello world";
byte[] inputBytes = line.getBytes(Charsets.UTF_8);
DataInputBuffer in = new DataInputBuffer();
Text text = new Text();
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 5);
assertEquals("hello", text.toString());
// Read longer length, make sure it lengthens
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 7);
assertEquals("hello w", text.toString());
// Read shorter length, make sure it shortens
in.reset(inputBytes, inputBytes.length);
text.readWithKnownLength(in, 2);
assertEquals("he", text.toString());
}
/**
* test {@code Text.bytesToCodePoint(bytes) }