From dc119ef7da055d4f5d19367bba6454511129a89a Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Thu, 1 Aug 2013 20:03:31 +0000 Subject: [PATCH] HADOOP-9801. Configuration#writeXml uses platform defaulting encoding, which may mishandle multi-byte characters. Contributed by Chris Nauroth. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1509405 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 ++ .../org/apache/hadoop/conf/Configuration.java | 4 +- .../apache/hadoop/conf/TestConfiguration.java | 44 +++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 882ab5ea7bc..3c0737ca94b 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -333,6 +333,9 @@ Release 2.1.1-beta - UNRELEASED HADOOP-9768. chown and chgrp reject users and groups with spaces on platforms where spaces are otherwise acceptable. (cnauroth) + HADOOP-9801. Configuration#writeXml uses platform defaulting encoding, which + may mishandle multi-byte characters. (cnauroth) + Release 2.1.0-beta - 2013-08-06 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 24eaea4e196..9bc7472da8a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -2181,12 +2181,12 @@ private void loadProperty(Properties properties, String name, String attr, /** * Write out the non-default properties in this configuration to the given - * {@link OutputStream}. + * {@link OutputStream} using UTF-8 encoding. * * @param out the output stream to write to. */ public void writeXml(OutputStream out) throws IOException { - writeXml(new OutputStreamWriter(out)); + writeXml(new OutputStreamWriter(out, "UTF-8")); } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 91c81b75c74..3bb211c54eb 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -21,9 +21,11 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStreamWriter; import java.io.StringWriter; import java.net.InetAddress; import java.net.InetSocketAddress; @@ -44,6 +46,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration.IntegerRanges; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import static org.apache.hadoop.util.PlatformName.IBM_JAVA; import org.codehaus.jackson.map.ObjectMapper; @@ -53,6 +56,10 @@ public class TestConfiguration extends TestCase { private Configuration conf; final static String CONFIG = new File("./test-config-TestConfiguration.xml").getAbsolutePath(); final static String CONFIG2 = new File("./test-config2-TestConfiguration.xml").getAbsolutePath(); + private static final String CONFIG_MULTI_BYTE = new File( + "./test-config-multi-byte-TestConfiguration.xml").getAbsolutePath(); + private static final String CONFIG_MULTI_BYTE_SAVED = new File( + "./test-config-multi-byte-saved-TestConfiguration.xml").getAbsolutePath(); final static Random RAN = new Random(); final static String XMLHEADER = IBM_JAVA?"": @@ -69,6 +76,8 @@ protected void tearDown() throws Exception { super.tearDown(); new File(CONFIG).delete(); new File(CONFIG2).delete(); + new File(CONFIG_MULTI_BYTE).delete(); + new File(CONFIG_MULTI_BYTE_SAVED).delete(); } private void startConfig() throws IOException{ @@ -101,6 +110,41 @@ public void testInputStreamResource() throws Exception { assertEquals("A", conf.get("prop")); } + /** + * Tests use of multi-byte characters in property names and values. This test + * round-trips multi-byte string literals through saving and loading of config + * and asserts that the same values were read. + */ + public void testMultiByteCharacters() throws IOException { + String priorDefaultEncoding = System.getProperty("file.encoding"); + try { + System.setProperty("file.encoding", "US-ASCII"); + String name = "multi_byte_\u611b_name"; + String value = "multi_byte_\u0641_value"; + out = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(CONFIG_MULTI_BYTE), "UTF-8")); + startConfig(); + declareProperty(name, value, value); + endConfig(); + + Configuration conf = new Configuration(false); + conf.addResource(new Path(CONFIG_MULTI_BYTE)); + assertEquals(value, conf.get(name)); + FileOutputStream fos = new FileOutputStream(CONFIG_MULTI_BYTE_SAVED); + try { + conf.writeXml(fos); + } finally { + IOUtils.closeStream(fos); + } + + conf = new Configuration(false); + conf.addResource(new Path(CONFIG_MULTI_BYTE_SAVED)); + assertEquals(value, conf.get(name)); + } finally { + System.setProperty("file.encoding", priorDefaultEncoding); + } + } + public void testVariableSubstitution() throws IOException { out=new BufferedWriter(new FileWriter(CONFIG)); startConfig();