HADOOP-18575: followup: try to avoid repeatedly hitting exceptions when transformer factories do not support attributes (#5253)

Part of HADOOP-18469 and the hardening of XML/XSL parsers.
Followup to the main HADOOP-18575 patch, to improve performance when
working with xml/xsl engines which don't support the relevant attributes.

Include this change when backporting.

Contributed by PJ Fanning.
This commit is contained in:
PJ Fanning 2023-01-16 14:15:37 +01:00 committed by GitHub
parent 38453f8589
commit d81d98388c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 45 additions and 17 deletions

View File

@ -34,6 +34,7 @@ import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import java.io.*;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* General xml utilities.
@ -59,6 +60,11 @@ public class XMLUtils {
public static final String VALIDATION =
"http://xml.org/sax/features/validation";
private static final AtomicBoolean CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_DTD =
new AtomicBoolean(true);
private static final AtomicBoolean CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_STYLESHEET =
new AtomicBoolean(true);
/**
* Transform input xml given a stylesheet.
*
@ -143,8 +149,7 @@ public class XMLUtils {
throws TransformerConfigurationException {
TransformerFactory trfactory = TransformerFactory.newInstance();
trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
bestEffortSetAttribute(trfactory, XMLConstants.ACCESS_EXTERNAL_DTD, "");
bestEffortSetAttribute(trfactory, XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
setOptionalSecureTransformerAttributes(trfactory);
return trfactory;
}
@ -161,29 +166,45 @@ public class XMLUtils {
throws TransformerConfigurationException {
SAXTransformerFactory trfactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
trfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
bestEffortSetAttribute(trfactory, XMLConstants.ACCESS_EXTERNAL_DTD, "");
bestEffortSetAttribute(trfactory, XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
setOptionalSecureTransformerAttributes(trfactory);
return trfactory;
}
/**
* These attributes are recommended for maximum security but some JAXP transformers do
* not support them. If at any stage, we fail to set these attributes, then we won't try again
* for subsequent transformers.
*
* @param transformerFactory to update
*/
private static void setOptionalSecureTransformerAttributes(
TransformerFactory transformerFactory) {
bestEffortSetAttribute(transformerFactory, CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_DTD,
XMLConstants.ACCESS_EXTERNAL_DTD, "");
bestEffortSetAttribute(transformerFactory, CAN_SET_TRANSFORMER_ACCESS_EXTERNAL_STYLESHEET,
XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
}
/**
* Set an attribute value on a {@link TransformerFactory}. If the TransformerFactory
* does not support the attribute, the method just returns <code>false</code> and
* logs the issue at debug level.
*
* @param transformerFactory to update
* @param flag that indicates whether to do the update and the flag can be set to
* <code>false</code> if an update fails
* @param name of the attribute to set
* @param value to set on the attribute
* @return whether the attribute was successfully set
*/
static boolean bestEffortSetAttribute(TransformerFactory transformerFactory,
String name, Object value) {
try {
transformerFactory.setAttribute(name, value);
return true;
} catch (Throwable t) {
LOG.debug("Issue setting TransformerFactory attribute {}: {}", name, t.toString());
static void bestEffortSetAttribute(TransformerFactory transformerFactory, AtomicBoolean flag,
String name, Object value) {
if (flag.get()) {
try {
transformerFactory.setAttribute(name, value);
} catch (Throwable t) {
flag.set(false);
LOG.debug("Issue setting TransformerFactory attribute {}: {}", name, t.toString());
}
}
return false;
}
}

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.util;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.SAXParser;
@ -134,10 +135,16 @@ public class TestXMLUtils extends AbstractHadoopTestBase {
@Test
public void testBestEffortSetAttribute() throws Exception {
TransformerFactory factory = TransformerFactory.newInstance();
Assert.assertFalse("unexpected attribute results in return of false",
XMLUtils.bestEffortSetAttribute(factory, "unsupportedAttribute false", "abc"));
Assert.assertTrue("expected attribute results in return of false",
XMLUtils.bestEffortSetAttribute(factory, XMLConstants.ACCESS_EXTERNAL_DTD, ""));
AtomicBoolean flag1 = new AtomicBoolean(true);
XMLUtils.bestEffortSetAttribute(factory, flag1, "unsupportedAttribute false", "abc");
Assert.assertFalse("unexpected attribute results in return of false?", flag1.get());
AtomicBoolean flag2 = new AtomicBoolean(true);
XMLUtils.bestEffortSetAttribute(factory, flag2, XMLConstants.ACCESS_EXTERNAL_DTD, "");
Assert.assertTrue("expected attribute results in return of true?", flag2.get());
AtomicBoolean flag3 = new AtomicBoolean(false);
XMLUtils.bestEffortSetAttribute(factory, flag3, XMLConstants.ACCESS_EXTERNAL_DTD, "");
Assert.assertFalse("expected attribute results in return of false if input flag is false?",
flag3.get());
}
private static InputStream getResourceStream(final String filename) {