NIFI-4869 Added SAX utility method for SplitXML processor.

Added unit tests.
Added test resources to RAT exclude list.
This closes #2466
This commit is contained in:
Andy LoPresto 2018-02-12 21:10:16 -08:00 committed by Matt Gilman
parent a3edd34db0
commit 28067a29fd
No known key found for this signature in database
GPG Key ID: DF61EC19432AEE37
6 changed files with 354 additions and 16 deletions

View File

@ -16,11 +16,17 @@
*/ */
package org.apache.nifi.security.xml; package org.apache.nifi.security.xml;
import java.io.InputStream;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader; import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.stream.StreamSource; import javax.xml.transform.stream.StreamSource;
import java.io.InputStream; import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
public class XmlUtils { public class XmlUtils {
@ -41,4 +47,23 @@ public class XmlUtils {
xif.setProperty(XMLInputFactory.SUPPORT_DTD, false); xif.setProperty(XMLInputFactory.SUPPORT_DTD, false);
return xif.createXMLStreamReader(source); return xif.createXMLStreamReader(source);
} }
public static XMLReader createSafeSaxReader(SAXParserFactory saxParserFactory, ContentHandler contentHandler) throws SAXException, ParserConfigurationException {
if (saxParserFactory == null) {
throw new IllegalArgumentException("The provided SAX parser factory cannot be null");
}
if (contentHandler == null) {
throw new IllegalArgumentException("The provided SAX content handler cannot be null");
}
saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
saxParserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
SAXParser saxParser = saxParserFactory.newSAXParser();
XMLReader xmlReader = saxParser.getXMLReader();
xmlReader.setContentHandler(contentHandler);
return xmlReader;
}
} }

View File

@ -515,6 +515,8 @@
<!-- This file is copied from https://github.com/jeremyh/jBCrypt <!-- This file is copied from https://github.com/jeremyh/jBCrypt
because the binary is compiled for Java 8 and we must support Java 7 --> because the binary is compiled for Java 8 and we must support Java 7 -->
<exclude>src/main/java/org/apache/nifi/security/util/crypto/bcrypt/BCrypt.java</exclude> <exclude>src/main/java/org/apache/nifi/security/util/crypto/bcrypt/BCrypt.java</exclude>
<exclude>src/test/resources/xxe_template.xml</exclude>
<exclude>src/test/resources/xxe_from_report.xml</exclude>
</excludes> </excludes>
</configuration> </configuration>
</plugin> </plugin>

View File

@ -16,6 +16,12 @@
*/ */
package org.apache.nifi.processors.standard; package org.apache.nifi.processors.standard;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttributesToOriginal;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
@ -28,11 +34,8 @@ import java.util.TreeMap;
import java.util.UUID; import java.util.UUID;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory; import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.lang3.StringEscapeUtils; import org.apache.commons.lang3.StringEscapeUtils;
import org.apache.nifi.annotation.behavior.EventDriven; import org.apache.nifi.annotation.behavior.EventDriven;
import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement;
@ -54,7 +57,7 @@ import org.apache.nifi.processor.ProcessorInitializationContext;
import org.apache.nifi.processor.Relationship; import org.apache.nifi.processor.Relationship;
import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.processor.util.StandardValidators;
import org.apache.nifi.processors.standard.util.XmlElementNotifier; import org.apache.nifi.processors.standard.util.XmlElementNotifier;
import org.apache.nifi.stream.io.BufferedInputStream; import org.apache.nifi.security.xml.XmlUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes; import org.xml.sax.Attributes;
@ -64,12 +67,6 @@ import org.xml.sax.Locator;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import org.xml.sax.XMLReader; import org.xml.sax.XMLReader;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttributesToOriginal;
@EventDriven @EventDriven
@SideEffectFree @SideEffectFree
@SupportsBatching @SupportsBatching
@ -175,12 +172,9 @@ public class SplitXml extends AbstractProcessor {
final AtomicBoolean failed = new AtomicBoolean(false); final AtomicBoolean failed = new AtomicBoolean(false);
session.read(original, rawIn -> { session.read(original, rawIn -> {
try (final InputStream in = new BufferedInputStream(rawIn)) { try (final InputStream in = new java.io.BufferedInputStream(rawIn)) {
SAXParser saxParser = null;
try { try {
saxParser = saxParserFactory.newSAXParser(); final XMLReader reader = XmlUtils.createSafeSaxReader(saxParserFactory, parser);
final XMLReader reader = saxParser.getXMLReader();
reader.setContentHandler(parser);
reader.parse(new InputSource(in)); reader.parse(new InputSource(in));
} catch (final ParserConfigurationException | SAXException e) { } catch (final ParserConfigurationException | SAXException e) {
logger.error("Unable to parse {} due to {}", new Object[]{original, e}); logger.error("Unable to parse {} due to {}", new Object[]{original, e});

View File

@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.processors.standard
import org.apache.nifi.util.TestRunner
import org.apache.nifi.util.TestRunners
import org.junit.After
import org.junit.Before
import org.junit.BeforeClass
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.runners.JUnit4
import org.slf4j.Logger
import org.slf4j.LoggerFactory
import java.nio.file.Paths
@RunWith(JUnit4.class)
class SplitXmlTest extends GroovyTestCase {
private static final Logger logger = LoggerFactory.getLogger(SplitXmlTest.class)
@BeforeClass
static void setUpOnce() throws Exception {
logger.metaClass.methodMissing = { String name, args ->
logger.info("[${name?.toUpperCase()}] ${(args as List).join(" ")}")
}
}
@Before
void setUp() throws Exception {
}
@After
void tearDown() throws Exception {
}
@Test
void testShouldHandleXXEInTemplate() {
// Arrange
final String XXE_TEMPLATE_FILEPATH = "src/test/resources/xxe_template.xml"
final TestRunner runner = TestRunners.newTestRunner(new SplitXml())
runner.setProperty(SplitXml.SPLIT_DEPTH, "3")
runner.enqueue(Paths.get(XXE_TEMPLATE_FILEPATH))
// Act
runner.run()
logger.info("SplitXML processor ran")
// Assert
runner.assertAllFlowFilesTransferred(SplitXml.REL_FAILURE)
}
@Test
void testShouldHandleRemoteCallXXE() {
// Arrange
final String XXE_TEMPLATE_FILEPATH = "src/test/resources/xxe_from_report.xml"
final TestRunner runner = TestRunners.newTestRunner(new SplitXml())
runner.setProperty(SplitXml.SPLIT_DEPTH, "3")
runner.enqueue(Paths.get(XXE_TEMPLATE_FILEPATH))
// Act
runner.run()
logger.info("SplitXML processor ran")
// Assert
runner.assertAllFlowFilesTransferred(SplitXml.REL_FAILURE)
}
}

View File

@ -0,0 +1,2 @@
<!DOCTYPE ANY SYSTEM "http://some.external.nifi.apache.org:8888/xxe">
<a>1</b>

View File

@ -0,0 +1,230 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><!DOCTYPE netspi [<!ENTITY xxe SYSTEM "file:///etc/passwd" >]>
<template>
<name>&xxe;</name>
<description>A simple template which generates flowfiles and logs them. </description>
<groupId>3a204982-015e-1000-eaa2-19d352ec8394</groupId>
<snippet>
<connections>
<id>0fbe8be5-306c-3b6c-0000-000000000000</id>
<parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
<destination>
<groupId>21ae0bd6-5db6-3a47-0000-000000000000</groupId>
<id>fd90023d-a235-30f6-0000-000000000000</id>
<type>PROCESSOR</type>
</destination>
<flowFileExpiration>0 sec</flowFileExpiration>
<labelIndex>1</labelIndex>
<name></name>
<selectedRelationships>success</selectedRelationships>
<source>
<groupId>21ae0bd6-5db6-3a47-0000-000000000000</groupId>
<id>ff49910d-06bb-37ee-0000-000000000000</id>
<type>PROCESSOR</type>
</source>
<zIndex>0</zIndex>
</connections>
<processors>
<id>fd90023d-a235-30f6-0000-000000000000</id>
<parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
<position>
<x>0.0</x>
<y>318.3128613789876</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>Log Level</key>
<value>
<name>Log Level</name>
</value>
</entry>
<entry>
<key>Log Payload</key>
<value>
<name>Log Payload</name>
</value>
</entry>
<entry>
<key>Attributes to Log</key>
<value>
<name>Attributes to Log</name>
</value>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>
<name>attributes-to-log-regex</name>
</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
<value>
<name>Attributes to Ignore</name>
</value>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
<value>
<name>attributes-to-ignore-regex</name>
</value>
</entry>
<entry>
<key>Log prefix</key>
<value>
<name>Log prefix</name>
</value>
</entry>
<entry>
<key>character-set</key>
<value>
<name>character-set</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>Log Level</key>
<value>info</value>
</entry>
<entry>
<key>Log Payload</key>
<value>true</value>
</entry>
<entry>
<key>Attributes to Log</key>
</entry>
<entry>
<key>attributes-to-log-regex</key>
<value>.*</value>
</entry>
<entry>
<key>Attributes to Ignore</key>
</entry>
<entry>
<key>attributes-to-ignore-regex</key>
</entry>
<entry>
<key>Log prefix</key>
</entry>
<entry>
<key>character-set</key>
<value>UTF-8</value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>0 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>LogAttribute</name>
<relationships>
<autoTerminate>true</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.LogAttribute</type>
</processors>
<processors>
<id>ff49910d-06bb-37ee-0000-000000000000</id>
<parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
<position>
<x>1.1368683772161603E-13</x>
<y>0.0</y>
</position>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.4.0-SNAPSHOT</version>
</bundle>
<config>
<bulletinLevel>WARN</bulletinLevel>
<comments></comments>
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
<descriptors>
<entry>
<key>File Size</key>
<value>
<name>File Size</name>
</value>
</entry>
<entry>
<key>Batch Size</key>
<value>
<name>Batch Size</name>
</value>
</entry>
<entry>
<key>Data Format</key>
<value>
<name>Data Format</name>
</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>
<name>Unique FlowFiles</name>
</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>
<name>generate-ff-custom-text</name>
</value>
</entry>
</descriptors>
<executionNode>ALL</executionNode>
<lossTolerant>false</lossTolerant>
<penaltyDuration>30 sec</penaltyDuration>
<properties>
<entry>
<key>File Size</key>
<value>0B</value>
</entry>
<entry>
<key>Batch Size</key>
<value>1</value>
</entry>
<entry>
<key>Data Format</key>
<value>Text</value>
</entry>
<entry>
<key>Unique FlowFiles</key>
<value>false</value>
</entry>
<entry>
<key>generate-ff-custom-text</key>
<value>This is a plaintext message. </value>
</entry>
</properties>
<runDurationMillis>0</runDurationMillis>
<schedulingPeriod>1 sec</schedulingPeriod>
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
<yieldDuration>1 sec</yieldDuration>
</config>
<name>GenerateFlowFile</name>
<relationships>
<autoTerminate>false</autoTerminate>
<name>success</name>
</relationships>
<state>STOPPED</state>
<style></style>
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
</processors>
</snippet>
<timestamp>09/05/2017 14:51:01 PDT</timestamp>
</template>