HDFS-7309. XMLUtils.mangleXmlString doesn't seem to handle less than sign. (Colin Patrick McCabe via raviprak)

This commit is contained in:
Ravi Prakash 2014-10-31 11:22:25 -07:00
parent b6c1188b85
commit c7f81dad30
6 changed files with 102 additions and 21 deletions

View File

@ -694,6 +694,9 @@ Release 2.6.0 - UNRELEASED
BUG FIXES BUG FIXES
HDFS-7309. XMLUtils.mangleXmlString doesn't seem to handle less than sign
(Colin Patrick McCabe via raviprak)
HDFS-6823. dfs.web.authentication.kerberos.principal shows up in logs for HDFS-6823. dfs.web.authentication.kerberos.principal shows up in logs for
insecure HDFS (Allen Wittenauer via raviprak) insecure HDFS (Allen Wittenauer via raviprak)

View File

@ -177,7 +177,7 @@ class OfflineEditsXmlLoader
@Override @Override
public void endElement (String uri, String name, String qName) { public void endElement (String uri, String name, String qName) {
String str = XMLUtils.unmangleXmlString(cbuf.toString()).trim(); String str = XMLUtils.unmangleXmlString(cbuf.toString(), false).trim();
cbuf = new StringBuffer(); cbuf = new StringBuffer();
switch (state) { switch (state) {
case EXPECT_EDITS_TAG: case EXPECT_EDITS_TAG:
@ -260,4 +260,4 @@ class OfflineEditsXmlLoader
public void characters (char ch[], int start, int length) { public void characters (char ch[], int start, int length) {
cbuf.append(ch, start, length); cbuf.append(ch, start, length);
} }
} }

View File

@ -411,7 +411,8 @@ public final class PBImageXmlWriter {
} }
private PBImageXmlWriter o(final String e, final Object v) { private PBImageXmlWriter o(final String e, final Object v) {
out.print("<" + e + ">" + XMLUtils.mangleXmlString(v.toString()) + "</" + e + ">"); out.print("<" + e + ">" +
XMLUtils.mangleXmlString(v.toString(), true) + "</" + e + ">");
return this; return this;
} }
} }

View File

@ -84,6 +84,7 @@ public class XmlImageVisitor extends TextWriterImageVisitor {
} }
private void writeTag(String tag, String value) throws IOException { private void writeTag(String tag, String value) throws IOException {
write("<" + tag + ">" + XMLUtils.mangleXmlString(value) + "</" + tag + ">\n"); write("<" + tag + ">" +
XMLUtils.mangleXmlString(value, true) + "</" + tag + ">\n");
} }
} }

View File

@ -94,6 +94,23 @@ public class XMLUtils {
return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp); return String.format("\\%0" + NUM_SLASH_POSITIONS + "x;", cp);
} }
private static String codePointToEntityRef(int cp) {
switch (cp) {
case '&':
return "&amp;";
case '\"':
return "&quot;";
case '\'':
return "&apos;";
case '<':
return "&lt;";
case '>':
return "&gt;";
default:
return null;
}
}
/** /**
* Mangle a string so that it can be represented in an XML document. * Mangle a string so that it can be represented in an XML document.
* *
@ -117,7 +134,7 @@ public class XMLUtils {
* *
* @return The mangled string. * @return The mangled string.
*/ */
public static String mangleXmlString(String str) { public static String mangleXmlString(String str, boolean createEntityRefs) {
final StringBuilder bld = new StringBuilder(); final StringBuilder bld = new StringBuilder();
final int length = str.length(); final int length = str.length();
for (int offset = 0; offset < length; ) { for (int offset = 0; offset < length; ) {
@ -126,8 +143,16 @@ public class XMLUtils {
if (codePointMustBeMangled(cp)) { if (codePointMustBeMangled(cp)) {
bld.append(mangleCodePoint(cp)); bld.append(mangleCodePoint(cp));
} else { } else {
for (int i = 0; i < len; i++) { String entityRef = null;
bld.append(str.charAt(offset + i)); if (createEntityRefs) {
entityRef = codePointToEntityRef(cp);
}
if (entityRef != null) {
bld.append(entityRef);
} else {
for (int i = 0; i < len; i++) {
bld.append(str.charAt(offset + i));
}
} }
} }
offset += len; offset += len;
@ -137,22 +162,42 @@ public class XMLUtils {
/** /**
* Demangle a string from an XML document. * Demangle a string from an XML document.
* See {@link #mangleXmlString(String)} for a description of the mangling * See {@link #mangleXmlString(String, boolean)} for a description of the
* format. * mangling format.
* *
* @param str The string to be demangled. * @param str The string to be demangled.
* *
* @return The unmangled string * @return The unmangled string
* @throws UnmanglingError if the input is malformed. * @throws UnmanglingError if the input is malformed.
*/ */
public static String unmangleXmlString(String str) public static String unmangleXmlString(String str, boolean decodeEntityRefs)
throws UnmanglingError { throws UnmanglingError {
int slashPosition = -1; int slashPosition = -1;
String escapedCp = ""; String escapedCp = "";
StringBuilder bld = new StringBuilder(); StringBuilder bld = new StringBuilder();
StringBuilder entityRef = null;
for (int i = 0; i < str.length(); i++) { for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i); char ch = str.charAt(i);
if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) { if (entityRef != null) {
entityRef.append(ch);
if (ch == ';') {
String e = entityRef.toString();
if (e.equals("&quot;")) {
bld.append("\"");
} else if (e.equals("&apos;")) {
bld.append("\'");
} else if (e.equals("&amp;")) {
bld.append("&");
} else if (e.equals("&lt;")) {
bld.append("<");
} else if (e.equals("&gt;")) {
bld.append(">");
} else {
throw new UnmanglingError("Unknown entity ref " + e);
}
entityRef = null;
}
} else if ((slashPosition >= 0) && (slashPosition < NUM_SLASH_POSITIONS)) {
escapedCp += ch; escapedCp += ch;
++slashPosition; ++slashPosition;
} else if (slashPosition == NUM_SLASH_POSITIONS) { } else if (slashPosition == NUM_SLASH_POSITIONS) {
@ -170,10 +215,22 @@ public class XMLUtils {
} else if (ch == '\\') { } else if (ch == '\\') {
slashPosition = 0; slashPosition = 0;
} else { } else {
bld.append(ch); boolean startingEntityRef = false;
if (decodeEntityRefs) {
startingEntityRef = (ch == '&');
}
if (startingEntityRef) {
entityRef = new StringBuilder();
entityRef.append("&");
} else {
bld.append(ch);
}
} }
} }
if (slashPosition != -1) { if (entityRef != null) {
throw new UnmanglingError("unterminated entity ref starting with " +
entityRef.toString());
} else if (slashPosition != -1) {
throw new UnmanglingError("unterminated code point escape: string " + throw new UnmanglingError("unterminated code point escape: string " +
"broke off in the middle"); "broke off in the middle");
} }
@ -185,12 +242,12 @@ public class XMLUtils {
* *
* @param contentHandler the SAX content handler * @param contentHandler the SAX content handler
* @param tag the element tag to use * @param tag the element tag to use
* @param value the string to put inside the tag * @param val the string to put inside the tag
*/ */
public static void addSaxString(ContentHandler contentHandler, public static void addSaxString(ContentHandler contentHandler,
String tag, String val) throws SAXException { String tag, String val) throws SAXException {
contentHandler.startElement("", "", tag, new AttributesImpl()); contentHandler.startElement("", "", tag, new AttributesImpl());
char c[] = mangleXmlString(val).toCharArray(); char c[] = mangleXmlString(val, false).toCharArray();
contentHandler.characters(c, 0, c.length); contentHandler.characters(c, 0, c.length);
contentHandler.endElement("", "", tag); contentHandler.endElement("", "", tag);
} }

View File

@ -22,11 +22,21 @@ import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
public class TestXMLUtils { public class TestXMLUtils {
private static void testRoundTripImpl(String str, String expectedMangled,
boolean encodeEntityRefs) {
String mangled = XMLUtils.mangleXmlString(str, encodeEntityRefs);
Assert.assertEquals(expectedMangled, mangled);
String unmangled = XMLUtils.unmangleXmlString(mangled, encodeEntityRefs);
Assert.assertEquals(str, unmangled);
}
private static void testRoundTrip(String str, String expectedMangled) { private static void testRoundTrip(String str, String expectedMangled) {
String mangled = XMLUtils.mangleXmlString(str); testRoundTripImpl(str, expectedMangled, false);
Assert.assertEquals(mangled, expectedMangled); }
String unmangled = XMLUtils.unmangleXmlString(mangled);
Assert.assertEquals(unmangled, str); private static void testRoundTripWithEntityRefs(String str,
String expectedMangled) {
testRoundTripImpl(str, expectedMangled, true);
} }
@Test @Test
@ -54,16 +64,25 @@ public class TestXMLUtils {
@Test @Test
public void testInvalidSequence() throws Exception { public void testInvalidSequence() throws Exception {
try { try {
XMLUtils.unmangleXmlString("\\000g;foo"); XMLUtils.unmangleXmlString("\\000g;foo", false);
Assert.fail("expected an unmangling error"); Assert.fail("expected an unmangling error");
} catch (UnmanglingError e) { } catch (UnmanglingError e) {
// pass through // pass through
} }
try { try {
XMLUtils.unmangleXmlString("\\0"); XMLUtils.unmangleXmlString("\\0", false);
Assert.fail("expected an unmangling error"); Assert.fail("expected an unmangling error");
} catch (UnmanglingError e) { } catch (UnmanglingError e) {
// pass through // pass through
} }
} }
@Test
public void testAddEntityRefs() throws Exception {
testRoundTripWithEntityRefs("The Itchy & Scratchy Show",
"The Itchy &amp; Scratchy Show");
testRoundTripWithEntityRefs("\"He said '1 < 2, but 2 > 1'\"",
"&quot;He said &apos;1 &lt; 2, but 2 &gt; 1&apos;&quot;");
testRoundTripWithEntityRefs("\u0001 < \u0002", "\\0001; &lt; \\0002;");
}
} }