From fe8cf32493c66e88b7cc26adb002950531b9a870 Mon Sep 17 00:00:00 2001 From: Ravi Prakash Date: Fri, 31 Oct 2014 11:22:25 -0700 Subject: [PATCH] HDFS-7309. XMLUtils.mangleXmlString doesn't seem to handle less than sign. (Colin Patrick McCabe via raviprak) (cherry picked from commit c7f81dad30c391822eed7273278cf5885fa59264) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../OfflineEditsXmlLoader.java | 4 +- .../offlineImageViewer/PBImageXmlWriter.java | 3 +- .../offlineImageViewer/XmlImageVisitor.java | 3 +- .../org/apache/hadoop/hdfs/util/XMLUtils.java | 79 ++++++++++++++++--- .../apache/hadoop/hdfs/util/TestXMLUtils.java | 31 ++++++-- 6 files changed, 102 insertions(+), 21 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index f32ebf91211..b1d5ec4a550 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -446,6 +446,9 @@ Release 2.6.0 - UNRELEASED BUG FIXES + HDFS-7309. XMLUtils.mangleXmlString doesn't seem to handle less than sign + (Colin Patrick McCabe via raviprak) + HDFS-6823. dfs.web.authentication.kerberos.principal shows up in logs for insecure HDFS (Allen Wittenauer via raviprak) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java index cf761ccedd4..1882e58b3b3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java @@ -177,7 +177,7 @@ class OfflineEditsXmlLoader @Override public void endElement (String uri, String name, String qName) { - String str = XMLUtils.unmangleXmlString(cbuf.toString()).trim(); + String str = XMLUtils.unmangleXmlString(cbuf.toString(), false).trim(); cbuf = new StringBuffer(); switch (state) { case EXPECT_EDITS_TAG: @@ -260,4 +260,4 @@ class OfflineEditsXmlLoader public void characters (char ch[], int start, int length) { cbuf.append(ch, start, length); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java index df00499626b..fa8c59d2dc6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageXmlWriter.java @@ -411,7 +411,8 @@ public final class PBImageXmlWriter { } private PBImageXmlWriter o(final String e, final Object v) { - out.print("<" + e + ">" + XMLUtils.mangleXmlString(v.toString()) + ""); + out.print("<" + e + ">" + + XMLUtils.mangleXmlString(v.toString(), true) + ""); return this; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java index 2719109e9db..44593a3e7a5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/XmlImageVisitor.java @@ -84,6 +84,7 @@ public class XmlImageVisitor extends TextWriterImageVisitor { } private void writeTag(String tag, String value) throws IOException { - write("<" + tag + ">" + XMLUtils.mangleXmlString(value) + "\n"); + write("<" + tag + ">" + + XMLUtils.mangleXmlString(value, true) + "\n"); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java index a0324083451..f23b02185a2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/util/XMLUtils.java @@ -94,6 +94,23 @@ public class XMLUtils { return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp); } + private static String codePointToEntityRef(int cp) { + switch (cp) { + case '&': + return "&"; + case '\"': + return """; + case '\'': + return "'"; + case '<': + return "<"; + case '>': + return ">"; + default: + return null; + } + } + /** * Mangle a string so that it can be represented in an XML document. * @@ -117,7 +134,7 @@ public class XMLUtils { * * @return The mangled string. */ - public static String mangleXmlString(String str) { + public static String mangleXmlString(String str, boolean createEntityRefs) { final StringBuilder bld = new StringBuilder(); final int length = str.length(); for (int offset = 0; offset < length; ) { @@ -126,8 +143,16 @@ public class XMLUtils { if (codePointMustBeMangled(cp)) { bld.append(mangleCodePoint(cp)); } else { - for (int i = 0; i < len; i++) { - bld.append(str.charAt(offset + i)); + String entityRef = null; + if (createEntityRefs) { + entityRef = codePointToEntityRef(cp); + } + if (entityRef != null) { + bld.append(entityRef); + } else { + for (int i = 0; i < len; i++) { + bld.append(str.charAt(offset + i)); + } } } offset += len; @@ -137,22 +162,42 @@ public class XMLUtils { /** * Demangle a string from an XML document. - * See {@link #mangleXmlString(String)} for a description of the mangling - * format. + * See {@link #mangleXmlString(String, boolean)} for a description of the + * mangling format. * * @param str The string to be demangled. * * @return The unmangled string * @throws UnmanglingError if the input is malformed. */ - public static String unmangleXmlString(String str) + public static String unmangleXmlString(String str, boolean decodeEntityRefs) throws UnmanglingError { int slashPosition = -1; String escapedCp = ""; StringBuilder bld = new StringBuilder(); + StringBuilder entityRef = null; for (int i = 0; i < str.length(); i++) { char ch = str.charAt(i); - if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) { + if (entityRef != null) { + entityRef.append(ch); + if (ch == ';') { + String e = entityRef.toString(); + if (e.equals(""")) { + bld.append("\""); + } else if (e.equals("'")) { + bld.append("\'"); + } else if (e.equals("&")) { + bld.append("&"); + } else if (e.equals("<")) { + bld.append("<"); + } else if (e.equals(">")) { + bld.append(">"); + } else { + throw new UnmanglingError("Unknown entity ref " + e); + } + entityRef = null; + } + } else if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) { escapedCp += ch; ++slashPosition; } else if (slashPosition == NUM_SLASH_POSITIONS) { @@ -170,10 +215,22 @@ public class XMLUtils { } else if (ch == '\\') { slashPosition = 0; } else { - bld.append(ch); + boolean startingEntityRef = false; + if (decodeEntityRefs) { + startingEntityRef = (ch == '&'); + } + if (startingEntityRef) { + entityRef = new StringBuilder(); + entityRef.append("&"); + } else { + bld.append(ch); + } } } - if (slashPosition != -1) { + if (entityRef != null) { + throw new UnmanglingError("unterminated entity ref starting with " + + entityRef.toString()); + } else if (slashPosition != -1) { throw new UnmanglingError("unterminated code point escape: string " + "broke off in the middle"); } @@ -185,12 +242,12 @@ public class XMLUtils { * * @param contentHandler the SAX content handler * @param tag the element tag to use - * @param value the string to put inside the tag + * @param val the string to put inside the tag */ public static void addSaxString(ContentHandler contentHandler, String tag, String val) throws SAXException { contentHandler.startElement("", "", tag, new AttributesImpl()); - char c[] = mangleXmlString(val).toCharArray(); + char c[] = mangleXmlString(val, false).toCharArray(); contentHandler.characters(c, 0, c.length); contentHandler.endElement("", "", tag); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java index f3ab56c578c..16df254b0e5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestXMLUtils.java @@ -22,11 +22,21 @@ import org.junit.Assert; import org.junit.Test; public class TestXMLUtils { + private static void testRoundTripImpl(String str, String expectedMangled, + boolean encodeEntityRefs) { + String mangled = XMLUtils.mangleXmlString(str, encodeEntityRefs); + Assert.assertEquals(expectedMangled, mangled); + String unmangled = XMLUtils.unmangleXmlString(mangled, encodeEntityRefs); + Assert.assertEquals(str, unmangled); + } + private static void testRoundTrip(String str, String expectedMangled) { - String mangled = XMLUtils.mangleXmlString(str); - Assert.assertEquals(mangled, expectedMangled); - String unmangled = XMLUtils.unmangleXmlString(mangled); - Assert.assertEquals(unmangled, str); + testRoundTripImpl(str, expectedMangled, false); + } + + private static void testRoundTripWithEntityRefs(String str, + String expectedMangled) { + testRoundTripImpl(str, expectedMangled, true); } @Test @@ -54,16 +64,25 @@ public class TestXMLUtils { @Test public void testInvalidSequence() throws Exception { try { - XMLUtils.unmangleXmlString("\\000g;foo"); + XMLUtils.unmangleXmlString("\\000g;foo", false); Assert.fail("expected an unmangling error"); } catch (UnmanglingError e) { // pass through } try { - XMLUtils.unmangleXmlString("\\0"); + XMLUtils.unmangleXmlString("\\0", false); Assert.fail("expected an unmangling error"); } catch (UnmanglingError e) { // pass through } } + + @Test + public void testAddEntityRefs() throws Exception { + testRoundTripWithEntityRefs("The Itchy & Scratchy Show", + "The Itchy & Scratchy Show"); + testRoundTripWithEntityRefs("\"He said '1 < 2, but 2 > 1'\"", + ""He said '1 < 2, but 2 > 1'""); + testRoundTripWithEntityRefs("\u0001 < \u0002", "\\0001; < \\0002;"); + } }