HADOOP-9801. Configuration#writeXml uses platform defaulting encoding, which may mishandle multi-byte characters. Contributed by Chris Nauroth.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1509405 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris Nauroth 2013-08-01 20:03:31 +00:00
parent bfe5a528d8
commit dc119ef7da
3 changed files with 49 additions and 2 deletions

View File

@ -333,6 +333,9 @@ Release 2.1.1-beta - UNRELEASED
HADOOP-9768. chown and chgrp reject users and groups with spaces on platforms HADOOP-9768. chown and chgrp reject users and groups with spaces on platforms
where spaces are otherwise acceptable. (cnauroth) where spaces are otherwise acceptable. (cnauroth)
HADOOP-9801. Configuration#writeXml uses platform defaulting encoding, which
may mishandle multi-byte characters. (cnauroth)
Release 2.1.0-beta - 2013-08-06 Release 2.1.0-beta - 2013-08-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -2181,12 +2181,12 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
/** /**
* Write out the non-default properties in this configuration to the given * Write out the non-default properties in this configuration to the given
* {@link OutputStream}. * {@link OutputStream} using UTF-8 encoding.
* *
* @param out the output stream to write to. * @param out the output stream to write to.
*/ */
public void writeXml(OutputStream out) throws IOException { public void writeXml(OutputStream out) throws IOException {
writeXml(new OutputStreamWriter(out)); writeXml(new OutputStreamWriter(out, "UTF-8"));
} }
/** /**

View File

@ -21,9 +21,11 @@ import java.io.BufferedWriter;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter; import java.io.StringWriter;
import java.net.InetAddress; import java.net.InetAddress;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
@ -44,6 +46,7 @@ import static org.junit.Assert.assertArrayEquals;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration.IntegerRanges; import org.apache.hadoop.conf.Configuration.IntegerRanges;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import static org.apache.hadoop.util.PlatformName.IBM_JAVA; import static org.apache.hadoop.util.PlatformName.IBM_JAVA;
import org.codehaus.jackson.map.ObjectMapper; import org.codehaus.jackson.map.ObjectMapper;
@ -53,6 +56,10 @@ public class TestConfiguration extends TestCase {
private Configuration conf; private Configuration conf;
final static String CONFIG = new File("./test-config-TestConfiguration.xml").getAbsolutePath(); final static String CONFIG = new File("./test-config-TestConfiguration.xml").getAbsolutePath();
final static String CONFIG2 = new File("./test-config2-TestConfiguration.xml").getAbsolutePath(); final static String CONFIG2 = new File("./test-config2-TestConfiguration.xml").getAbsolutePath();
private static final String CONFIG_MULTI_BYTE = new File(
"./test-config-multi-byte-TestConfiguration.xml").getAbsolutePath();
private static final String CONFIG_MULTI_BYTE_SAVED = new File(
"./test-config-multi-byte-saved-TestConfiguration.xml").getAbsolutePath();
final static Random RAN = new Random(); final static Random RAN = new Random();
final static String XMLHEADER = final static String XMLHEADER =
IBM_JAVA?"<?xml version=\"1.0\" encoding=\"UTF-8\"?><configuration>": IBM_JAVA?"<?xml version=\"1.0\" encoding=\"UTF-8\"?><configuration>":
@ -69,6 +76,8 @@ public class TestConfiguration extends TestCase {
super.tearDown(); super.tearDown();
new File(CONFIG).delete(); new File(CONFIG).delete();
new File(CONFIG2).delete(); new File(CONFIG2).delete();
new File(CONFIG_MULTI_BYTE).delete();
new File(CONFIG_MULTI_BYTE_SAVED).delete();
} }
private void startConfig() throws IOException{ private void startConfig() throws IOException{
@ -101,6 +110,41 @@ public class TestConfiguration extends TestCase {
assertEquals("A", conf.get("prop")); assertEquals("A", conf.get("prop"));
} }
/**
* Tests use of multi-byte characters in property names and values. This test
* round-trips multi-byte string literals through saving and loading of config
* and asserts that the same values were read.
*/
public void testMultiByteCharacters() throws IOException {
String priorDefaultEncoding = System.getProperty("file.encoding");
try {
System.setProperty("file.encoding", "US-ASCII");
String name = "multi_byte_\u611b_name";
String value = "multi_byte_\u0641_value";
out = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(CONFIG_MULTI_BYTE), "UTF-8"));
startConfig();
declareProperty(name, value, value);
endConfig();
Configuration conf = new Configuration(false);
conf.addResource(new Path(CONFIG_MULTI_BYTE));
assertEquals(value, conf.get(name));
FileOutputStream fos = new FileOutputStream(CONFIG_MULTI_BYTE_SAVED);
try {
conf.writeXml(fos);
} finally {
IOUtils.closeStream(fos);
}
conf = new Configuration(false);
conf.addResource(new Path(CONFIG_MULTI_BYTE_SAVED));
assertEquals(value, conf.get(name));
} finally {
System.setProperty("file.encoding", priorDefaultEncoding);
}
}
public void testVariableSubstitution() throws IOException { public void testVariableSubstitution() throws IOException {
out=new BufferedWriter(new FileWriter(CONFIG)); out=new BufferedWriter(new FileWriter(CONFIG));
startConfig(); startConfig();