HADOOP-18575: followup: try to avoid repeatedly hitting exceptions when transformer factories do not support attributes (#5253)

Part of HADOOP-18469 and the hardening of XML/XSL parsers.
Followup to the main HADOOP-18575 patch, to improve performance when
working with xml/xsl engines which don't support the relevant attributes.

Include this change when backporting.

Contributed by PJ Fanning.
This commit is contained in:
PJ Fanning 2023-01-16 14:15:37 +01:00 committed by Steve Loughran
parent c5cf845d78
commit ada06aa22e
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
2 changed files with 45 additions and 17 deletions

View File

@ -34,6 +34,7 @@ import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import java.io.*; import java.io.*;
import java.util.concurrent.atomic.AtomicBoolean;
/** /**
* General xml utilities. * General xml utilities.
@ -59,6 +60,11 @@ public class XMLUtils {
public static final String VALIDATION = public static final String VALIDATION =
"http://xml.org/sax/features/validation"; "http://xml.org/sax/features/validation";
private static final AtomicBoolean CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_DTD =
new AtomicBoolean(true);
private static final AtomicBoolean CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_STYLESHEET =
new AtomicBoolean(true);
/** /**
* Transform input xml given a stylesheet. * Transform input xml given a stylesheet.
* *
@ -143,8 +149,7 @@ public class XMLUtils {
throws TransformerConfigurationException { throws TransformerConfigurationException {
TransformerFactory trfactory = TransformerFactory.newInstance(); TransformerFactory trfactory = TransformerFactory.newInstance();
trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
bestEffortSetAttribute(trfactory, XMLConstants.ACCESS_EXTERNAL_DTD, ""); setOptionalSecureTransformerAttributes(trfactory);
bestEffortSetAttribute(trfactory, XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
return trfactory; return trfactory;
} }
@ -161,29 +166,45 @@ public class XMLUtils {
throws TransformerConfigurationException { throws TransformerConfigurationException {
SAXTransformerFactory trfactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance(); SAXTransformerFactory trfactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
bestEffortSetAttribute(trfactory, XMLConstants.ACCESS_EXTERNAL_DTD, ""); setOptionalSecureTransformerAttributes(trfactory);
bestEffortSetAttribute(trfactory, XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
return trfactory; return trfactory;
} }
/**
* These attributes are recommended for maximum security but some JAXP transformers do
* not support them. If at any stage, we fail to set these attributes, then we won't try again
* for subsequent transformers.
*
* @param transformerFactory to update
*/
private static void setOptionalSecureTransformerAttributes(
TransformerFactory transformerFactory) {
bestEffortSetAttribute(transformerFactory, CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_DTD,
XMLConstants.ACCESS_EXTERNAL_DTD, "");
bestEffortSetAttribute(transformerFactory, CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_STYLESHEET,
XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
}
/** /**
* Set an attribute value on a {@link TransformerFactory}. If the TransformerFactory * Set an attribute value on a {@link TransformerFactory}. If the TransformerFactory
* does not support the attribute, the method just returns <code>false</code> and * does not support the attribute, the method just returns <code>false</code> and
* logs the issue at debug level. * logs the issue at debug level.
* *
* @param transformerFactory to update * @param transformerFactory to update
* @param flag that indicates whether to do the update and the flag can be set to
* <code>false</code> if an update fails
* @param name of the attribute to set * @param name of the attribute to set
* @param value to set on the attribute * @param value to set on the attribute
* @return whether the attribute was successfully set
*/ */
static boolean bestEffortSetAttribute(TransformerFactory transformerFactory, static void bestEffortSetAttribute(TransformerFactory transformerFactory, AtomicBoolean flag,
String name, Object value) { String name, Object value) {
if (flag.get()) {
try { try {
transformerFactory.setAttribute(name, value); transformerFactory.setAttribute(name, value);
return true;
} catch (Throwable t) { } catch (Throwable t) {
flag.set(false);
LOG.debug("Issue setting TransformerFactory attribute {}: {}", name, t.toString()); LOG.debug("Issue setting TransformerFactory attribute {}: {}", name, t.toString());
} }
return false; }
} }
} }

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.util;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringReader; import java.io.StringReader;
import java.io.StringWriter; import java.io.StringWriter;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.xml.XMLConstants; import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParser;
@ -134,10 +135,16 @@ public class TestXMLUtils extends AbstractHadoopTestBase {
@Test @Test
public void testBestEffortSetAttribute() throws Exception { public void testBestEffortSetAttribute() throws Exception {
TransformerFactory factory = TransformerFactory.newInstance(); TransformerFactory factory = TransformerFactory.newInstance();
Assert.assertFalse("unexpected attribute results in return of false", AtomicBoolean flag1 = new AtomicBoolean(true);
XMLUtils.bestEffortSetAttribute(factory, "unsupportedAttribute false", "abc")); XMLUtils.bestEffortSetAttribute(factory, flag1, "unsupportedAttribute false", "abc");
Assert.assertTrue("expected attribute results in return of false", Assert.assertFalse("unexpected attribute results in return of false?", flag1.get());
XMLUtils.bestEffortSetAttribute(factory, XMLConstants.ACCESS_EXTERNAL_DTD, "")); AtomicBoolean flag2 = new AtomicBoolean(true);
XMLUtils.bestEffortSetAttribute(factory, flag2, XMLConstants.ACCESS_EXTERNAL_DTD, "");
Assert.assertTrue("expected attribute results in return of true?", flag2.get());
AtomicBoolean flag3 = new AtomicBoolean(false);
XMLUtils.bestEffortSetAttribute(factory, flag3, XMLConstants.ACCESS_EXTERNAL_DTD, "");
Assert.assertFalse("expected attribute results in return of false if input flag is false?",
flag3.get());
} }
private static InputStream getResourceStream(final String filename) { private static InputStream getResourceStream(final String filename) {