From f4ed787669bc2d9337f503f0a5188167b490a961 Mon Sep 17 00:00:00 2001 From: cnauroth Date: Tue, 4 Nov 2014 10:27:41 -0800 Subject: [PATCH] HADOOP-11165. TestUTF8 fails when run against java 8. Contributed by Stephen Chu. (cherry picked from commit 85da71c2d3c565a8920e47fe3925e8e0bef353a5) --- .../hadoop-common/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/io/TestUTF8.java | 23 +++++++++++++------ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index bcafb120af8..9d73cf079da 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -35,6 +35,9 @@ Release 2.7.0 - UNRELEASED HADOOP-11186. documentation should talk about hadoop.htrace.spanreceiver.classes, not hadoop.trace.spanreceiver.classes (cmccabe) + HADOOP-11165. TestUTF8 fails when run against java 8. + (Stephen Chu via cnauroth) + Release 2.6.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java index b3872248327..ede59406768 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java @@ -19,8 +19,11 @@ package org.apache.hadoop.io; import junit.framework.TestCase; +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; import java.io.IOException; import java.io.UTFDataFormatException; +import java.nio.ByteBuffer; import java.util.Random; import org.apache.hadoop.test.GenericTestUtils; @@ -54,11 +57,22 @@ public void testGetBytes() throws Exception { // generate a random string String before = getTestString(); - // check its utf8 - assertEquals(before, new String(UTF8.getBytes(before), "UTF-8")); + // Check that the bytes are stored correctly in Modified-UTF8 format. + // Note that the DataInput and DataOutput interfaces convert between + // bytes and Strings using the Modified-UTF8 format. + assertEquals(before, readModifiedUTF(UTF8.getBytes(before))); } } + private String readModifiedUTF(byte[] bytes) throws IOException { + final short lengthBytes = (short)2; + ByteBuffer bb = ByteBuffer.allocate(bytes.length + lengthBytes); + bb.putShort((short)bytes.length).put(bytes); + ByteArrayInputStream bis = new ByteArrayInputStream(bb.array()); + DataInputStream dis = new DataInputStream(bis); + return dis.readUTF(); + } + public void testIO() throws Exception { DataOutputBuffer out = new DataOutputBuffer(); DataInputBuffer in = new DataInputBuffer(); @@ -80,11 +94,6 @@ public void testIO() throws Exception { in.reset(out.getData(), out.getLength()); String after2 = in.readUTF(); assertEquals(before, after2); - - // test that it is compatible with Java's other decoder - String after3 = new String(out.getData(), 2, out.getLength()-2, "UTF-8"); - assertEquals(before, after3); - } }