diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 323aff0bbf4..afcc219ce58 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -167,6 +167,9 @@ Release 2.0.0 - UNRELEASED HADOOP-8340. SNAPSHOT build versions should compare as less than their eventual final release. (todd) + HADOOP-8361. Avoid out-of-memory problems when deserializing strings. + (Colin Patrick McCabe via eli) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java index b37fd93add3..1946dc23d9a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java @@ -254,7 +254,7 @@ public void setSymlink(final Path p) { // Writable ////////////////////////////////////////////////// public void write(DataOutput out) throws IOException { - Text.writeString(out, getPath().toString()); + Text.writeString(out, getPath().toString(), Text.ONE_MEGABYTE); out.writeLong(getLen()); out.writeBoolean(isDirectory()); out.writeShort(getReplication()); @@ -262,16 +262,16 @@ public void write(DataOutput out) throws IOException { out.writeLong(getModificationTime()); out.writeLong(getAccessTime()); getPermission().write(out); - Text.writeString(out, getOwner()); - Text.writeString(out, getGroup()); + Text.writeString(out, getOwner(), Text.ONE_MEGABYTE); + Text.writeString(out, getGroup(), Text.ONE_MEGABYTE); out.writeBoolean(isSymlink()); if (isSymlink()) { - Text.writeString(out, getSymlink().toString()); + Text.writeString(out, getSymlink().toString(), Text.ONE_MEGABYTE); } } public void readFields(DataInput in) throws IOException { - String strPath = Text.readString(in); + String strPath = Text.readString(in, Text.ONE_MEGABYTE); this.path = new Path(strPath); this.length = in.readLong(); this.isdir = in.readBoolean(); @@ -280,10 +280,10 @@ public void readFields(DataInput in) throws IOException { modification_time = in.readLong(); access_time = in.readLong(); permission.readFields(in); - owner = Text.readString(in); - group = Text.readString(in); + owner = Text.readString(in, Text.ONE_MEGABYTE); + group = Text.readString(in, Text.ONE_MEGABYTE); if (in.readBoolean()) { - this.symlink = new Path(Text.readString(in)); + this.symlink = new Path(Text.readString(in, Text.ONE_MEGABYTE)); } else { this.symlink = null; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java index a26d2f422a9..5642d0f5b92 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/permission/PermissionStatus.java @@ -84,8 +84,8 @@ public PermissionStatus applyUMask(FsPermission umask) { /** {@inheritDoc} */ public void readFields(DataInput in) throws IOException { - username = Text.readString(in); - groupname = Text.readString(in); + username = Text.readString(in, Text.ONE_MEGABYTE); + groupname = Text.readString(in, Text.ONE_MEGABYTE); permission = FsPermission.read(in); } @@ -110,8 +110,8 @@ public static void write(DataOutput out, String username, String groupname, FsPermission permission) throws IOException { - Text.writeString(out, username); - Text.writeString(out, groupname); + Text.writeString(out, username, Text.ONE_MEGABYTE); + Text.writeString(out, groupname, Text.ONE_MEGABYTE); permission.write(out); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java index e38dd3c79a5..0bee33236d6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/Text.java @@ -412,6 +412,8 @@ public static ByteBuffer encode(String string, boolean replace) return bytes; } + static final public int ONE_MEGABYTE = 1024 * 1024; + /** Read a UTF8 encoded string from in */ public static String readString(DataInput in) throws IOException { @@ -420,7 +422,17 @@ public static String readString(DataInput in) throws IOException { in.readFully(bytes, 0, length); return decode(bytes); } - + + /** Read a UTF8 encoded string with a maximum size + */ + public static String readString(DataInput in, int maxLength) + throws IOException { + int length = WritableUtils.readVIntInRange(in, 0, maxLength - 1); + byte [] bytes = new byte[length]; + in.readFully(bytes, 0, length); + return decode(bytes); + } + /** Write a UTF8 encoded string to out */ public static int writeString(DataOutput out, String s) throws IOException { @@ -431,6 +443,22 @@ public static int writeString(DataOutput out, String s) throws IOException { return length; } + /** Write a UTF8 encoded string with a maximum size to out + */ + public static int writeString(DataOutput out, String s, int maxLength) + throws IOException { + ByteBuffer bytes = encode(s); + int length = bytes.limit(); + if (length >= maxLength) { + throw new IOException("string was too long to write! Expected " + + "less than " + maxLength + " bytes, but got " + + length + " bytes."); + } + WritableUtils.writeVInt(out, length); + out.write(bytes.array(), 0, length); + return length; + } + ////// states for validateUTF8 private static final int LEAD_BYTE = 0; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java index a7718bfba70..a86c532badc 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestText.java @@ -20,6 +20,7 @@ import junit.framework.TestCase; +import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.util.Random; @@ -107,7 +108,6 @@ public void testCoding() throws Exception { } } - public void testIO() throws Exception { DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); @@ -136,6 +136,40 @@ public void testIO() throws Exception { assertTrue(before.equals(after2)); } } + + public void doTestLimitedIO(String str, int strLen) throws IOException { + DataOutputBuffer out = new DataOutputBuffer(); + DataInputBuffer in = new DataInputBuffer(); + + out.reset(); + try { + Text.writeString(out, str, strLen); + fail("expected writeString to fail when told to write a string " + + "that was too long! The string was '" + str + "'"); + } catch (IOException e) { + } + Text.writeString(out, str, strLen + 1); + + // test that it reads correctly + in.reset(out.getData(), out.getLength()); + in.mark(strLen); + String after; + try { + after = Text.readString(in, strLen); + fail("expected readString to fail when told to read a string " + + "that was too long! The string was '" + str + "'"); + } catch (IOException e) { + } + in.reset(); + after = Text.readString(in, strLen + 1); + assertTrue(str.equals(after)); + } + + public void testLimitedIO() throws Exception { + doTestLimitedIO("abcd", 4); + doTestLimitedIO("", 0); + doTestLimitedIO("1", 1); + } public void testCompare() throws Exception { DataOutputBuffer out1 = new DataOutputBuffer();