From a81916ea89d59c1642b3462e3d7c6c1acb1e7109 Mon Sep 17 00:00:00 2001 From: Jonathan Eagles Date: Mon, 12 Jun 2017 17:07:53 -0500 Subject: [PATCH] HADOOP-14501. Switch from aalto-xml to woodstox to handle odd XML features (jeagles) --- hadoop-common-project/hadoop-common/pom.xml | 4 +- .../org/apache/hadoop/conf/Configuration.java | 6 +- .../apache/hadoop/conf/TestConfiguration.java | 82 +++++++++++++++++++ hadoop-project/pom.xml | 6 +- 4 files changed, 90 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 7ef04623386..87b4dc91ac0 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -314,8 +314,8 @@ compile - com.fasterxml - aalto-xml + com.fasterxml.woodstox + woodstox-core compile diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 1a6679b1855..d3dd822d05a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -18,7 +18,7 @@ package org.apache.hadoop.conf; -import com.fasterxml.aalto.stax.InputFactoryImpl; +import com.ctc.wstx.stax.WstxInputFactory; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.google.common.annotations.VisibleForTesting; @@ -284,7 +284,7 @@ public class Configuration implements Iterable>, * Specify exact input factory to avoid time finding correct one. * Factory is reusable across un-synchronized threads once initialized */ - private static final XMLInputFactory2 factory = new InputFactoryImpl(); + private static final XMLInputFactory2 XML_INPUT_FACTORY = new WstxInputFactory(); /** * Class to keep the information about the keys which replace the deprecated @@ -2646,7 +2646,7 @@ public class Configuration implements Iterable>, if (is == null) { return null; } - return factory.createXMLStreamReader(systemId, is); + return XML_INPUT_FACTORY.createXMLStreamReader(systemId, is); } private void loadResources(Properties properties, diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index 0c664705675..5ced541af3b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -30,6 +30,7 @@ import java.io.StringWriter; import java.net.InetAddress; import java.net.InetSocketAddress; import java.net.URI; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -99,6 +100,18 @@ public class TestConfiguration extends TestCase { out.write("\n"); } + private void writeHeader() throws IOException{ + out.write("\n"); + } + + private void writeHeader(String encoding) throws IOException{ + out.write("\n"); + } + + private void writeConfiguration() throws IOException{ + out.write("\n"); + } + private void endConfig() throws IOException{ out.write("\n"); out.close(); @@ -120,6 +133,18 @@ public class TestConfiguration extends TestCase { out.write("\n "); } + private void declareEntity(String root, String entity, String value) + throws IOException { + out.write("\n]>"); + } + + private void declareSystemEntity(String root, String entity, String value) + throws IOException { + out.write("\n]>"); + } + public void testInputStreamResource() throws Exception { StringWriter writer = new StringWriter(); out = new BufferedWriter(writer); @@ -550,6 +575,63 @@ public class TestConfiguration extends TestCase { tearDown(); } + public void testCharsetInDocumentEncoding() throws Exception { + tearDown(); + out=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(CONFIG), + StandardCharsets.ISO_8859_1)); + writeHeader(StandardCharsets.ISO_8859_1.displayName()); + writeConfiguration(); + appendProperty("a", "b"); + appendProperty("c", "Müller"); + endConfig(); + + // verify that the includes file contains all properties + Path fileResource = new Path(CONFIG); + conf.addResource(fileResource); + assertEquals(conf.get("a"), "b"); + assertEquals(conf.get("c"), "Müller"); + tearDown(); + } + + public void testEntityReference() throws Exception { + tearDown(); + out=new BufferedWriter(new FileWriter(CONFIG)); + writeHeader(); + declareEntity("configuration", "d", "d"); + writeConfiguration(); + appendProperty("a", "b"); + appendProperty("c", "&d;"); + endConfig(); + + // verify that the includes file contains all properties + Path fileResource = new Path(CONFIG); + conf.addResource(fileResource); + assertEquals(conf.get("a"), "b"); + assertEquals(conf.get("c"), "d"); + tearDown(); + } + + public void testSystemEntityReference() throws Exception { + tearDown(); + out=new BufferedWriter(new FileWriter(CONFIG2)); + out.write("d"); + out.close(); + out=new BufferedWriter(new FileWriter(CONFIG)); + writeHeader(); + declareSystemEntity("configuration", "d", CONFIG2); + writeConfiguration(); + appendProperty("a", "b"); + appendProperty("c", "&d;"); + endConfig(); + + // verify that the includes file contains all properties + Path fileResource = new Path(CONFIG); + conf.addResource(fileResource); + assertEquals(conf.get("a"), "b"); + assertEquals(conf.get("c"), "d"); + tearDown(); + } + public void testIncludesWithFallback() throws Exception { tearDown(); out=new BufferedWriter(new FileWriter(CONFIG2)); diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 306dce535d0..afd2006968c 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -881,9 +881,9 @@ 3.1.4 - com.fasterxml - aalto-xml - 1.0.0 + com.fasterxml.woodstox + woodstox-core + 5.0.3 org.codehaus.jackson