NIFI-4062 Provide an option to disable DTD validation for EvaluateXPath and EvaluateXQuery

This closes #2093.

Signed-off-by: Koji Kawamura <ijokarumawak@apache.org>
This commit is contained in:
Arun Manivannan 2017-08-12 13:41:18 +08:00 committed by Koji Kawamura
parent 02c05bc203
commit 6df112e4b1
6 changed files with 274 additions and 9 deletions

View File

@ -23,6 +23,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Collection;
@ -43,6 +44,7 @@ import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
@ -75,10 +77,14 @@ import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.stream.io.BufferedInputStream;
import org.apache.nifi.stream.io.BufferedOutputStream;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import net.sf.saxon.lib.NamespaceConstant;
import net.sf.saxon.xpath.XPathEvaluator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
@EventDriven
@SideEffectFree
@ -125,6 +131,14 @@ public class EvaluateXPath extends AbstractProcessor {
.defaultValue(RETURN_TYPE_AUTO)
.build();
public static final PropertyDescriptor VALIDATE_DTD = new PropertyDescriptor.Builder()
.name("Validate DTD")
.description("Specifies whether or not the XML content should be validated against the DTD.")
.required(true)
.allowableValues("true", "false")
.defaultValue("true")
.build();
public static final Relationship REL_MATCH = new Relationship.Builder()
.name("matched")
.description("FlowFiles are routed to this relationship "
@ -162,6 +176,7 @@ public class EvaluateXPath extends AbstractProcessor {
final List<PropertyDescriptor> properties = new ArrayList<>();
properties.add(DESTINATION);
properties.add(RETURN_TYPE);
properties.add(VALIDATE_DTD);
this.properties = Collections.unmodifiableList(properties);
}
@ -219,6 +234,24 @@ public class EvaluateXPath extends AbstractProcessor {
}
final ComponentLog logger = getLogger();
final XMLReader xmlReader;
try {
xmlReader = XMLReaderFactory.createXMLReader();
} catch (SAXException e) {
logger.error("Error while constructing XMLReader {}", new Object[]{e});
throw new ProcessException(e.getMessage());
}
if (!context.getProperty(VALIDATE_DTD).asBoolean()) {
xmlReader.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
return new InputSource(new StringReader(""));
}
});
}
final XPathFactory factory = factoryRef.get();
final XPathEvaluator xpathEvaluator = (XPathEvaluator) factory.newXPath();
final Map<String, XPathExpression> attributeToXPathMap = new HashMap<>();
@ -277,7 +310,8 @@ public class EvaluateXPath extends AbstractProcessor {
@Override
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
final List<Source> rootList = (List<Source>) slashExpression.evaluate(new InputSource(in), NODESET);
final List<Source> rootList = (List<Source>) slashExpression.evaluate(new SAXSource(xmlReader,
new InputSource(in)), NODESET);
sourceRef.set(rootList.get(0));
} catch (final Exception e) {
error.set(e);

View File

@ -20,6 +20,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@ -68,6 +69,7 @@ import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.stream.io.BufferedInputStream;
import org.apache.nifi.stream.io.BufferedOutputStream;
import org.w3c.dom.Document;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import net.sf.saxon.s9api.DOMDestination;
@ -79,6 +81,9 @@ import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmValue;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
@EventDriven
@SideEffectFree
@ -148,6 +153,14 @@ public class EvaluateXQuery extends AbstractProcessor {
.defaultValue("false")
.build();
public static final PropertyDescriptor VALIDATE_DTD = new PropertyDescriptor.Builder()
.name("Validate DTD")
.description("Specifies whether or not the XML content should be validated against the DTD.")
.required(true)
.allowableValues("true", "false")
.defaultValue("true")
.build();
public static final Relationship REL_MATCH = new Relationship.Builder()
.name("matched")
.description("FlowFiles are routed to this relationship when the XQuery is successfully evaluated and the FlowFile "
@ -182,6 +195,7 @@ public class EvaluateXQuery extends AbstractProcessor {
properties.add(XML_OUTPUT_METHOD);
properties.add(XML_OUTPUT_OMIT_XML_DECLARATION);
properties.add(XML_OUTPUT_INDENT);
properties.add(VALIDATE_DTD);
this.properties = Collections.unmodifiableList(properties);
}
@ -231,6 +245,24 @@ public class EvaluateXQuery extends AbstractProcessor {
final Map<String, XQueryExecutable> attributeToXQueryMap = new HashMap<>();
final Processor proc = new Processor(false);
final XMLReader xmlReader;
try {
xmlReader = XMLReaderFactory.createXMLReader();
} catch (SAXException e) {
logger.error("Error while constructing XMLReader {}", new Object[]{e});
throw new ProcessException(e.getMessage());
}
if (!context.getProperty(VALIDATE_DTD).asBoolean()) {
xmlReader.setEntityResolver(new EntityResolver() {
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
return new InputSource(new StringReader(""));
}
});
}
final XQueryCompiler comp = proc.newXQueryCompiler();
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
@ -272,7 +304,7 @@ public class EvaluateXQuery extends AbstractProcessor {
public void process(final InputStream rawIn) throws IOException {
try (final InputStream in = new BufferedInputStream(rawIn)) {
XQueryEvaluator qe = slashExpression.load();
qe.setSource(new SAXSource(new InputSource(in)));
qe.setSource(new SAXSource(xmlReader, new InputSource(in)));
DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
dfactory.setNamespaceAware(true);
Document dom = dfactory.newDocumentBuilder().newDocument();

View File

@ -33,6 +33,8 @@ import org.junit.Test;
public class TestEvaluateXPath {
private static final Path XML_SNIPPET = Paths.get("src/test/resources/TestXml/xml-snippet.xml");
private static final Path XML_SNIPPET_EMBEDDED_DOCTYPE = Paths.get("src/test/resources/TestXml/xml-snippet-embedded-doctype.xml");
private static final Path XML_SNIPPET_NONEXISTENT_DOCTYPE = Paths.get("src/test/resources/TestXml/xml-snippet-external-doctype.xml");
@Test
public void testAsAttribute() throws XPathFactoryConfigurationException, IOException {
@ -155,4 +157,72 @@ public class TestEvaluateXPath {
assertTrue(outXml.contains("subNode"));
assertTrue(outXml.contains("Hello"));
}
@Test
public void testSuccessForEmbeddedDocTypeValidation() throws XPathFactoryConfigurationException, IOException {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXPath());
testRunner.setProperty(EvaluateXPath.DESTINATION, EvaluateXPath.DESTINATION_CONTENT);
testRunner.setProperty(EvaluateXPath.RETURN_TYPE, EvaluateXPath.RETURN_TYPE_STRING);
testRunner.setProperty(EvaluateXPath.VALIDATE_DTD, "true");
testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()");
testRunner.enqueue(XML_SNIPPET_EMBEDDED_DOCTYPE);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(EvaluateXPath.REL_MATCH, 1);
final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXPath.REL_MATCH).get(0);
final byte[] outData = testRunner.getContentAsByteArray(out);
final String outXml = new String(outData, "UTF-8");
assertTrue(outXml.trim().equals("Hello"));
}
@Test
public void testSuccessForEmbeddedDocTypeValidationDisabled() throws XPathFactoryConfigurationException, IOException {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXPath());
testRunner.setProperty(EvaluateXPath.DESTINATION, EvaluateXPath.DESTINATION_CONTENT);
testRunner.setProperty(EvaluateXPath.RETURN_TYPE, EvaluateXPath.RETURN_TYPE_STRING);
testRunner.setProperty(EvaluateXPath.VALIDATE_DTD, "false");
testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()");
testRunner.enqueue(XML_SNIPPET_EMBEDDED_DOCTYPE);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(EvaluateXPath.REL_MATCH, 1);
final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXPath.REL_MATCH).get(0);
final byte[] outData = testRunner.getContentAsByteArray(out);
final String outXml = new String(outData, "UTF-8");
assertTrue(outXml.trim().equals("Hello"));
}
@Test
public void testFailureForExternalDocTypeWithDocTypeValidationEnabled() throws XPathFactoryConfigurationException, IOException {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXPath());
testRunner.setProperty(EvaluateXPath.DESTINATION, EvaluateXPath.DESTINATION_CONTENT);
testRunner.setProperty(EvaluateXPath.RETURN_TYPE, EvaluateXPath.RETURN_TYPE_STRING);
testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()");
testRunner.enqueue(XML_SNIPPET_NONEXISTENT_DOCTYPE);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(EvaluateXPath.REL_FAILURE, 1);
}
@Test
public void testSuccessForExternalDocTypeWithDocTypeValidationDisabled() throws XPathFactoryConfigurationException, IOException {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXPath());
testRunner.setProperty(EvaluateXPath.DESTINATION, EvaluateXPath.DESTINATION_CONTENT);
testRunner.setProperty(EvaluateXPath.RETURN_TYPE, EvaluateXPath.RETURN_TYPE_STRING);
testRunner.setProperty(EvaluateXPath.VALIDATE_DTD, "false");
testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()");
testRunner.enqueue(XML_SNIPPET_NONEXISTENT_DOCTYPE);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(EvaluateXPath.REL_MATCH, 1);
final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXPath.REL_MATCH).get(0);
final byte[] outData = testRunner.getContentAsByteArray(out);
final String outXml = new String(outData, "UTF-8");
assertTrue(outXml.trim().equals("Hello"));
}
}

View File

@ -37,13 +37,15 @@ import javax.xml.xpath.XPathFactoryConfigurationException;
import org.apache.nifi.util.MockFlowFile;
import org.apache.nifi.util.TestRunner;
import org.apache.nifi.util.TestRunners;
import org.junit.Ignore;
import org.junit.Test;
public class TestEvaluateXQuery {
private static final Path XML_SNIPPET = Paths.get("src/test/resources/TestXml/fruit.xml");
private static final Path XML_SNIPPET_EMBEDDED_DOCTYPE = Paths.get("src/test/resources/TestXml/xml-snippet-embedded-doctype.xml");
private static final Path XML_SNIPPET_NONEXISTENT_DOCTYPE = Paths.get("src/test/resources/TestXml/xml-snippet-external-doctype.xml");
private static final String[] fruitNames = {"apple", "apple", "banana", "orange", "blueberry", "raspberry", "none"};
private static final String[] methods = {EvaluateXQuery.OUTPUT_METHOD_XML, EvaluateXQuery.OUTPUT_METHOD_HTML, EvaluateXQuery.OUTPUT_METHOD_TEXT};
@ -65,7 +67,6 @@ public class TestEvaluateXQuery {
}
}
@Ignore("this test is failing")
@Test
public void testFormatting() throws Exception {
@ -102,7 +103,7 @@ public class TestEvaluateXQuery {
+ " <name>apple</name>\n"
+ " <color>red</color>\n"
+ " </fruit>";
assertEquals(expectedXml, formattedResults.get(0));
assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0)));
}
{
formattedResults = getFormattedResult(XML_SNIPPET, singleElementNodeQuery, "html", false, false);
@ -113,7 +114,7 @@ public class TestEvaluateXQuery {
+ " <name>apple</name>\n"
+ " <color>red</color>\n"
+ " </fruit>";
assertEquals(expectedXml, formattedResults.get(0));
assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0)));
}
{
formattedResults = getFormattedResult(XML_SNIPPET, singleElementNodeQuery, "text", false, false);
@ -123,7 +124,7 @@ public class TestEvaluateXQuery {
+ " apple\n"
+ " red\n"
+ " ";
assertEquals(expectedXml, formattedResults.get(0));
assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0)));
}
{
formattedResults = getFormattedResult(XML_SNIPPET, singleElementNodeQuery, "xml", true, false);
@ -135,7 +136,7 @@ public class TestEvaluateXQuery {
+ " <name>apple</name>\n"
+ " <color>red</color>\n"
+ " </fruit>\n";
assertEquals(expectedXml, formattedResults.get(0));
assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0)));
}
{
formattedResults = getFormattedResult(XML_SNIPPET, singleElementNodeQuery, "xml", true, true);
@ -146,10 +147,14 @@ public class TestEvaluateXQuery {
+ " <name>apple</name>\n"
+ " <color>red</color>\n"
+ " </fruit>\n";
assertEquals(expectedXml, formattedResults.get(0));
assertEquals(spaceTrimmed(expectedXml), spaceTrimmed(formattedResults.get(0)));
}
}
private String spaceTrimmed(String str) {
return Arrays.stream(str.split("\n")).map(String :: trim).reduce("", String :: concat);
}
private List<String> getFormattedResult(Path xml, final String xQuery, final String method, final boolean indent, final boolean omitDeclaration) throws Exception {
Map<String, String> runnerProps = new HashMap<>();
@ -648,4 +653,69 @@ public class TestEvaluateXQuery {
}
testRunner.getFlowFilesForRelationship(EvaluateXQuery.REL_MATCH).get(0).assertContentEquals(XML_SNIPPET);
}
@Test
public void testSuccessForEmbeddedDocTypeValidation() throws XPathFactoryConfigurationException, IOException {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXQuery());
testRunner.setProperty(EvaluateXQuery.DESTINATION, EvaluateXQuery.DESTINATION_CONTENT);
testRunner.setProperty(EvaluateXQuery.VALIDATE_DTD, "true");
testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()");
testRunner.enqueue(XML_SNIPPET_EMBEDDED_DOCTYPE);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(EvaluateXQuery.REL_MATCH, 1);
final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXQuery.REL_MATCH).get(0);
final byte[] outData = testRunner.getContentAsByteArray(out);
final String outXml = new String(outData, "UTF-8");
assertTrue(outXml.trim().equals("Hello"));
}
@Test
public void testSuccessForEmbeddedDocTypeValidationDisabled() throws XPathFactoryConfigurationException, IOException {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXQuery());
testRunner.setProperty(EvaluateXQuery.DESTINATION, EvaluateXQuery.DESTINATION_CONTENT);
testRunner.setProperty(EvaluateXQuery.VALIDATE_DTD, "false");
testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()");
testRunner.enqueue(XML_SNIPPET_EMBEDDED_DOCTYPE);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(EvaluateXQuery.REL_MATCH, 1);
final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXQuery.REL_MATCH).get(0);
final byte[] outData = testRunner.getContentAsByteArray(out);
final String outXml = new String(outData, "UTF-8");
assertTrue(outXml.trim().equals("Hello"));
}
@Test
public void testFailureForExternalDocTypeWithDocTypeValidationEnabled() throws XPathFactoryConfigurationException, IOException {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXQuery());
testRunner.setProperty(EvaluateXQuery.DESTINATION, EvaluateXQuery.DESTINATION_CONTENT);
testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()");
testRunner.enqueue(XML_SNIPPET_NONEXISTENT_DOCTYPE);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(EvaluateXQuery.REL_FAILURE, 1);
}
@Test
public void testSuccessForExternalDocTypeWithDocTypeValidationDisabled() throws XPathFactoryConfigurationException, IOException {
final TestRunner testRunner = TestRunners.newTestRunner(new EvaluateXQuery());
testRunner.setProperty(EvaluateXQuery.DESTINATION, EvaluateXQuery.DESTINATION_CONTENT);
testRunner.setProperty(EvaluateXQuery.VALIDATE_DTD, "false");
testRunner.setProperty("some.property", "/*:bundle/node/subNode[1]/value/text()");
testRunner.enqueue(XML_SNIPPET_NONEXISTENT_DOCTYPE);
testRunner.run();
testRunner.assertAllFlowFilesTransferred(EvaluateXQuery.REL_MATCH, 1);
final MockFlowFile out = testRunner.getFlowFilesForRelationship(EvaluateXQuery.REL_MATCH).get(0);
final byte[] outData = testRunner.getContentAsByteArray(out);
final String outXml = new String(outData, "UTF-8");
assertTrue(outXml.trim().equals("Hello"));
}
}

View File

@ -0,0 +1,33 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE bundle
[
<!ELEMENT bundle (node)>
<!ELEMENT node (subNode*)>
<!ELEMENT subNode (value)>
<!ELEMENT value (#PCDATA)>
]>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<bundle>
<node>
<subNode>
<value>Hello</value>
</subNode>
<subNode>
<value>World!</value>
</subNode>
</node>
</bundle>

View File

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE bundle SYSTEM "non-existent-doctype.dtd">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<bundle>
<node>
<subNode>
<value>Hello</value>
</subNode>
<subNode>
<value>World!</value>
</subNode>
</node>
</bundle>