MAPREDUCE-7411: use secure XML parsers in mapreduce modules (#4980)

Lockdown of parsers in hadoop-mapreduce.

Follow-on to HADOOP-18469. Add secure XML parser factories to XMLUtils

Contributed by P J Fanning
This commit is contained in:
PJ Fanning 2022-10-21 14:02:11 +01:00 committed by Steve Loughran
parent 36a0e818ec
commit bd276092b0
13 changed files with 50 additions and 34 deletions

View File

@ -34,6 +34,7 @@
import org.apache.hadoop.http.JettyUtils;
import org.apache.hadoop.mapreduce.v2.app.AppContext;
import org.apache.hadoop.mapreduce.v2.app.MockAppContext;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
import org.apache.hadoop.yarn.webapp.JerseyTestBase;
@ -290,7 +291,7 @@ public void verifyAMInfo(JSONObject info, AppContext ctx)
public void verifyAMInfoXML(String xml, AppContext ctx)
throws JSONException, Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -335,7 +336,7 @@ public void verifyBlacklistedNodesInfo(JSONObject blacklist, AppContext ctx)
public void verifyBlacklistedNodesInfoXML(String xml, AppContext ctx)
throws JSONException, Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -44,6 +44,7 @@
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.security.authentication.server.AuthenticationFilter;
import org.apache.hadoop.security.authentication.server.PseudoAuthenticationHandler;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
import org.apache.hadoop.yarn.webapp.JerseyTestBase;
@ -185,7 +186,7 @@ public void testGetTaskAttemptIdXMLState() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -259,7 +260,7 @@ public void testPutTaskAttemptIdXMLState() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -44,6 +44,7 @@
import org.apache.hadoop.mapreduce.v2.app.job.Task;
import org.apache.hadoop.mapreduce.v2.app.job.TaskAttempt;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
import org.apache.hadoop.yarn.webapp.JerseyTestBase;
@ -192,7 +193,7 @@ public void testTaskAttemptsXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -316,7 +317,7 @@ public void testTaskAttemptIdXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -600,7 +601,7 @@ public void testTaskAttemptIdXMLCounters() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -44,6 +44,7 @@
import org.apache.hadoop.mapreduce.v2.app.MockAppContext;
import org.apache.hadoop.mapreduce.v2.app.job.Job;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
import org.apache.hadoop.yarn.webapp.JerseyTestBase;
@ -216,7 +217,7 @@ public void testJobConfXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -44,6 +44,7 @@
import org.apache.hadoop.mapreduce.v2.app.job.Job;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.security.authorize.AccessControlList;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
@ -180,7 +181,7 @@ public void testJobsXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -342,7 +343,7 @@ public void testJobIdInvalidXML() throws JSONException, Exception {
response.getType().toString());
String msg = response.getEntity(String.class);
System.out.println(msg);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(msg));
@ -411,7 +412,7 @@ public void testJobIdXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -702,7 +703,7 @@ public void testJobCountersXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -859,7 +860,7 @@ public void testJobAttemptsXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -42,6 +42,7 @@
import org.apache.hadoop.mapreduce.v2.app.job.Job;
import org.apache.hadoop.mapreduce.v2.app.job.Task;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
import org.apache.hadoop.yarn.webapp.JerseyTestBase;
@ -189,7 +190,7 @@ public void testTasksXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -533,7 +534,7 @@ public void testTaskIdXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -713,7 +714,7 @@ public void testJobTaskCountersXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -21,7 +21,10 @@
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.QueueState;
import org.apache.hadoop.security.authorize.AccessControlList;
import org.apache.hadoop.util.XMLUtils;
import static org.apache.hadoop.mapred.QueueManager.toFullPropertyName;
import org.xml.sax.SAXException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
@ -88,7 +91,7 @@ class QueueConfigurationParser {
static final String VALUE_TAG = "value";
/**
* Default constructor for DeperacatedQueueConfigurationParser
* Default constructor for QueueConfigurationParser.
*/
QueueConfigurationParser() {
@ -158,8 +161,9 @@ void setRoot(Queue root) {
*/
protected Queue loadResource(InputStream resourceInput)
throws ParserConfigurationException, SAXException, IOException {
DocumentBuilderFactory docBuilderFactory
= DocumentBuilderFactory.newInstance();
DocumentBuilderFactory docBuilderFactory =
XMLUtils.newSecureDocumentBuilderFactory();
//ignore all comments inside the xml file
docBuilderFactory.setIgnoringComments(true);

View File

@ -37,6 +37,7 @@
import org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer;
import org.apache.hadoop.mapreduce.v2.hs.MockHistoryContext;
import org.apache.hadoop.util.VersionInfo;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
@ -284,7 +285,7 @@ public void verifyHSInfo(JSONObject info, AppContext ctx)
public void verifyHSInfoXML(String xml, AppContext ctx)
throws JSONException, Exception {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -47,6 +47,7 @@
import org.apache.hadoop.mapreduce.v2.hs.HistoryContext;
import org.apache.hadoop.mapreduce.v2.hs.MockHistoryContext;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
@ -207,7 +208,7 @@ public void testTaskAttemptsXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -331,7 +332,7 @@ public void testTaskAttemptIdXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -618,7 +619,7 @@ public void testTaskAttemptIdXMLCounters() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -48,6 +48,7 @@
import org.apache.hadoop.mapreduce.v2.hs.HistoryContext;
import org.apache.hadoop.mapreduce.v2.hs.MockHistoryContext;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
@ -230,7 +231,7 @@ public void testJobConfXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -45,6 +45,7 @@
import org.apache.hadoop.mapreduce.v2.hs.HistoryContext;
import org.apache.hadoop.mapreduce.v2.hs.MockHistoryContext;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
@ -190,7 +191,7 @@ public void testJobsXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -422,7 +423,7 @@ public void testJobIdInvalidXML() throws JSONException, Exception {
response.getType().toString());
String msg = response.getEntity(String.class);
System.out.println(msg);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(msg));
@ -489,7 +490,7 @@ public void testJobIdXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -612,7 +613,7 @@ public void testJobCountersXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -769,7 +770,7 @@ public void testJobAttemptsXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -46,6 +46,7 @@
import org.apache.hadoop.mapreduce.v2.hs.HistoryContext;
import org.apache.hadoop.mapreduce.v2.hs.MockHistoryContext;
import org.apache.hadoop.mapreduce.v2.util.MRApps;
import org.apache.hadoop.util.XMLUtils;
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
import org.apache.hadoop.yarn.webapp.GenericExceptionHandler;
import org.apache.hadoop.yarn.webapp.GuiceServletConfig;
@ -202,7 +203,7 @@ public void testTasksXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -549,7 +550,7 @@ public void testTaskIdXML() throws JSONException, Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
@ -729,7 +730,7 @@ public void testJobTaskCountersXML() throws Exception {
assertEquals(MediaType.APPLICATION_XML_TYPE + "; " + JettyUtils.UTF_8,
response.getType().toString());
String xml = response.getEntity(String.class);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilderFactory dbf = XMLUtils.newSecureDocumentBuilderFactory();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));

View File

@ -28,13 +28,14 @@
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.hadoop.util.XMLUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import static org.junit.Assert.*;
import org.junit.Test;
public class TestQueueConfigurationParser {
/**
* test xml generation
@ -64,7 +65,7 @@ public void testQueueConfigurationParser()
DOMSource domSource = new DOMSource(e);
StringWriter writer = new StringWriter();
StreamResult result = new StreamResult(writer);
TransformerFactory tf = TransformerFactory.newInstance();
TransformerFactory tf = XMLUtils.newSecureTransformerFactory();
Transformer transformer = tf.newTransformer();
transformer.transform(domSource, result);
String str= writer.toString();