mirror of https://github.com/apache/nifi.git
NIFI-4869 Added SAX utility method for SplitXML processor.
Added unit tests. Added test resources to RAT exclude list. This closes #2466
This commit is contained in:
parent
a3edd34db0
commit
28067a29fd
|
@ -16,11 +16,17 @@
|
|||
*/
|
||||
package org.apache.nifi.security.xml;
|
||||
|
||||
import java.io.InputStream;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParser;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import javax.xml.transform.stream.StreamSource;
|
||||
import java.io.InputStream;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
|
||||
public class XmlUtils {
|
||||
|
||||
|
@ -41,4 +47,23 @@ public class XmlUtils {
|
|||
xif.setProperty(XMLInputFactory.SUPPORT_DTD, false);
|
||||
return xif.createXMLStreamReader(source);
|
||||
}
|
||||
|
||||
public static XMLReader createSafeSaxReader(SAXParserFactory saxParserFactory, ContentHandler contentHandler) throws SAXException, ParserConfigurationException {
|
||||
if (saxParserFactory == null) {
|
||||
throw new IllegalArgumentException("The provided SAX parser factory cannot be null");
|
||||
}
|
||||
|
||||
if (contentHandler == null) {
|
||||
throw new IllegalArgumentException("The provided SAX content handler cannot be null");
|
||||
}
|
||||
|
||||
saxParserFactory.setFeature("http://xml.org/sax/features/external-general-entities", false);
|
||||
saxParserFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
|
||||
|
||||
SAXParser saxParser = saxParserFactory.newSAXParser();
|
||||
XMLReader xmlReader = saxParser.getXMLReader();
|
||||
xmlReader.setContentHandler(contentHandler);
|
||||
|
||||
return xmlReader;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -515,6 +515,8 @@
|
|||
<!-- This file is copied from https://github.com/jeremyh/jBCrypt
|
||||
because the binary is compiled for Java 8 and we must support Java 7 -->
|
||||
<exclude>src/main/java/org/apache/nifi/security/util/crypto/bcrypt/BCrypt.java</exclude>
|
||||
<exclude>src/test/resources/xxe_template.xml</exclude>
|
||||
<exclude>src/test/resources/xxe_from_report.xml</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
|
|
@ -16,6 +16,12 @@
|
|||
*/
|
||||
package org.apache.nifi.processors.standard;
|
||||
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX;
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttributesToOriginal;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
@ -28,11 +34,8 @@ import java.util.TreeMap;
|
|||
import java.util.UUID;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.parsers.SAXParser;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
|
||||
import org.apache.commons.lang3.StringEscapeUtils;
|
||||
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
|
@ -54,7 +57,7 @@ import org.apache.nifi.processor.ProcessorInitializationContext;
|
|||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.apache.nifi.processors.standard.util.XmlElementNotifier;
|
||||
import org.apache.nifi.stream.io.BufferedInputStream;
|
||||
import org.apache.nifi.security.xml.XmlUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.xml.sax.Attributes;
|
||||
|
@ -64,12 +67,6 @@ import org.xml.sax.Locator;
|
|||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT;
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID;
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX;
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.SEGMENT_ORIGINAL_FILENAME;
|
||||
import static org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttributesToOriginal;
|
||||
|
||||
@EventDriven
|
||||
@SideEffectFree
|
||||
@SupportsBatching
|
||||
|
@ -175,12 +172,9 @@ public class SplitXml extends AbstractProcessor {
|
|||
|
||||
final AtomicBoolean failed = new AtomicBoolean(false);
|
||||
session.read(original, rawIn -> {
|
||||
try (final InputStream in = new BufferedInputStream(rawIn)) {
|
||||
SAXParser saxParser = null;
|
||||
try (final InputStream in = new java.io.BufferedInputStream(rawIn)) {
|
||||
try {
|
||||
saxParser = saxParserFactory.newSAXParser();
|
||||
final XMLReader reader = saxParser.getXMLReader();
|
||||
reader.setContentHandler(parser);
|
||||
final XMLReader reader = XmlUtils.createSafeSaxReader(saxParserFactory, parser);
|
||||
reader.parse(new InputSource(in));
|
||||
} catch (final ParserConfigurationException | SAXException e) {
|
||||
logger.error("Unable to parse {} due to {}", new Object[]{original, e});
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.standard
|
||||
|
||||
import org.apache.nifi.util.TestRunner
|
||||
import org.apache.nifi.util.TestRunners
|
||||
import org.junit.After
|
||||
import org.junit.Before
|
||||
import org.junit.BeforeClass
|
||||
import org.junit.Test
|
||||
import org.junit.runner.RunWith
|
||||
import org.junit.runners.JUnit4
|
||||
import org.slf4j.Logger
|
||||
import org.slf4j.LoggerFactory
|
||||
|
||||
import java.nio.file.Paths
|
||||
|
||||
|
||||
@RunWith(JUnit4.class)
|
||||
class SplitXmlTest extends GroovyTestCase {
|
||||
private static final Logger logger = LoggerFactory.getLogger(SplitXmlTest.class)
|
||||
|
||||
@BeforeClass
|
||||
static void setUpOnce() throws Exception {
|
||||
logger.metaClass.methodMissing = { String name, args ->
|
||||
logger.info("[${name?.toUpperCase()}] ${(args as List).join(" ")}")
|
||||
}
|
||||
}
|
||||
|
||||
@Before
|
||||
void setUp() throws Exception {
|
||||
|
||||
}
|
||||
|
||||
@After
|
||||
void tearDown() throws Exception {
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
void testShouldHandleXXEInTemplate() {
|
||||
// Arrange
|
||||
final String XXE_TEMPLATE_FILEPATH = "src/test/resources/xxe_template.xml"
|
||||
final TestRunner runner = TestRunners.newTestRunner(new SplitXml())
|
||||
runner.setProperty(SplitXml.SPLIT_DEPTH, "3")
|
||||
runner.enqueue(Paths.get(XXE_TEMPLATE_FILEPATH))
|
||||
|
||||
// Act
|
||||
runner.run()
|
||||
logger.info("SplitXML processor ran")
|
||||
|
||||
// Assert
|
||||
runner.assertAllFlowFilesTransferred(SplitXml.REL_FAILURE)
|
||||
}
|
||||
|
||||
@Test
|
||||
void testShouldHandleRemoteCallXXE() {
|
||||
// Arrange
|
||||
final String XXE_TEMPLATE_FILEPATH = "src/test/resources/xxe_from_report.xml"
|
||||
final TestRunner runner = TestRunners.newTestRunner(new SplitXml())
|
||||
runner.setProperty(SplitXml.SPLIT_DEPTH, "3")
|
||||
runner.enqueue(Paths.get(XXE_TEMPLATE_FILEPATH))
|
||||
|
||||
// Act
|
||||
runner.run()
|
||||
logger.info("SplitXML processor ran")
|
||||
|
||||
// Assert
|
||||
runner.assertAllFlowFilesTransferred(SplitXml.REL_FAILURE)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,2 @@
|
|||
<!DOCTYPE ANY SYSTEM "http://some.external.nifi.apache.org:8888/xxe">
|
||||
<a>1</b>
|
|
@ -0,0 +1,230 @@
|
|||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><!DOCTYPE netspi [<!ENTITY xxe SYSTEM "file:///etc/passwd" >]>
|
||||
<template>
|
||||
<name>&xxe;</name>
|
||||
<description>A simple template which generates flowfiles and logs them. </description>
|
||||
<groupId>3a204982-015e-1000-eaa2-19d352ec8394</groupId>
|
||||
<snippet>
|
||||
<connections>
|
||||
<id>0fbe8be5-306c-3b6c-0000-000000000000</id>
|
||||
<parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
|
||||
<backPressureDataSizeThreshold>1 GB</backPressureDataSizeThreshold>
|
||||
<backPressureObjectThreshold>10000</backPressureObjectThreshold>
|
||||
<destination>
|
||||
<groupId>21ae0bd6-5db6-3a47-0000-000000000000</groupId>
|
||||
<id>fd90023d-a235-30f6-0000-000000000000</id>
|
||||
<type>PROCESSOR</type>
|
||||
</destination>
|
||||
<flowFileExpiration>0 sec</flowFileExpiration>
|
||||
<labelIndex>1</labelIndex>
|
||||
<name></name>
|
||||
<selectedRelationships>success</selectedRelationships>
|
||||
<source>
|
||||
<groupId>21ae0bd6-5db6-3a47-0000-000000000000</groupId>
|
||||
<id>ff49910d-06bb-37ee-0000-000000000000</id>
|
||||
<type>PROCESSOR</type>
|
||||
</source>
|
||||
<zIndex>0</zIndex>
|
||||
</connections>
|
||||
<processors>
|
||||
<id>fd90023d-a235-30f6-0000-000000000000</id>
|
||||
<parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
|
||||
<position>
|
||||
<x>0.0</x>
|
||||
<y>318.3128613789876</y>
|
||||
</position>
|
||||
<bundle>
|
||||
<artifact>nifi-standard-nar</artifact>
|
||||
<group>org.apache.nifi</group>
|
||||
<version>1.4.0-SNAPSHOT</version>
|
||||
</bundle>
|
||||
<config>
|
||||
<bulletinLevel>WARN</bulletinLevel>
|
||||
<comments></comments>
|
||||
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
|
||||
<descriptors>
|
||||
<entry>
|
||||
<key>Log Level</key>
|
||||
<value>
|
||||
<name>Log Level</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Log Payload</key>
|
||||
<value>
|
||||
<name>Log Payload</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Attributes to Log</key>
|
||||
<value>
|
||||
<name>Attributes to Log</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>attributes-to-log-regex</key>
|
||||
<value>
|
||||
<name>attributes-to-log-regex</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Attributes to Ignore</key>
|
||||
<value>
|
||||
<name>Attributes to Ignore</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>attributes-to-ignore-regex</key>
|
||||
<value>
|
||||
<name>attributes-to-ignore-regex</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Log prefix</key>
|
||||
<value>
|
||||
<name>Log prefix</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>character-set</key>
|
||||
<value>
|
||||
<name>character-set</name>
|
||||
</value>
|
||||
</entry>
|
||||
</descriptors>
|
||||
<executionNode>ALL</executionNode>
|
||||
<lossTolerant>false</lossTolerant>
|
||||
<penaltyDuration>30 sec</penaltyDuration>
|
||||
<properties>
|
||||
<entry>
|
||||
<key>Log Level</key>
|
||||
<value>info</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Log Payload</key>
|
||||
<value>true</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Attributes to Log</key>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>attributes-to-log-regex</key>
|
||||
<value>.*</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Attributes to Ignore</key>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>attributes-to-ignore-regex</key>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Log prefix</key>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>character-set</key>
|
||||
<value>UTF-8</value>
|
||||
</entry>
|
||||
</properties>
|
||||
<runDurationMillis>0</runDurationMillis>
|
||||
<schedulingPeriod>0 sec</schedulingPeriod>
|
||||
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
|
||||
<yieldDuration>1 sec</yieldDuration>
|
||||
</config>
|
||||
<name>LogAttribute</name>
|
||||
<relationships>
|
||||
<autoTerminate>true</autoTerminate>
|
||||
<name>success</name>
|
||||
</relationships>
|
||||
<state>STOPPED</state>
|
||||
<style></style>
|
||||
<type>org.apache.nifi.processors.standard.LogAttribute</type>
|
||||
</processors>
|
||||
<processors>
|
||||
<id>ff49910d-06bb-37ee-0000-000000000000</id>
|
||||
<parentGroupId>21ae0bd6-5db6-3a47-0000-000000000000</parentGroupId>
|
||||
<position>
|
||||
<x>1.1368683772161603E-13</x>
|
||||
<y>0.0</y>
|
||||
</position>
|
||||
<bundle>
|
||||
<artifact>nifi-standard-nar</artifact>
|
||||
<group>org.apache.nifi</group>
|
||||
<version>1.4.0-SNAPSHOT</version>
|
||||
</bundle>
|
||||
<config>
|
||||
<bulletinLevel>WARN</bulletinLevel>
|
||||
<comments></comments>
|
||||
<concurrentlySchedulableTaskCount>1</concurrentlySchedulableTaskCount>
|
||||
<descriptors>
|
||||
<entry>
|
||||
<key>File Size</key>
|
||||
<value>
|
||||
<name>File Size</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Batch Size</key>
|
||||
<value>
|
||||
<name>Batch Size</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Data Format</key>
|
||||
<value>
|
||||
<name>Data Format</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Unique FlowFiles</key>
|
||||
<value>
|
||||
<name>Unique FlowFiles</name>
|
||||
</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>generate-ff-custom-text</key>
|
||||
<value>
|
||||
<name>generate-ff-custom-text</name>
|
||||
</value>
|
||||
</entry>
|
||||
</descriptors>
|
||||
<executionNode>ALL</executionNode>
|
||||
<lossTolerant>false</lossTolerant>
|
||||
<penaltyDuration>30 sec</penaltyDuration>
|
||||
<properties>
|
||||
<entry>
|
||||
<key>File Size</key>
|
||||
<value>0B</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Batch Size</key>
|
||||
<value>1</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Data Format</key>
|
||||
<value>Text</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>Unique FlowFiles</key>
|
||||
<value>false</value>
|
||||
</entry>
|
||||
<entry>
|
||||
<key>generate-ff-custom-text</key>
|
||||
<value>This is a plaintext message. </value>
|
||||
</entry>
|
||||
</properties>
|
||||
<runDurationMillis>0</runDurationMillis>
|
||||
<schedulingPeriod>1 sec</schedulingPeriod>
|
||||
<schedulingStrategy>TIMER_DRIVEN</schedulingStrategy>
|
||||
<yieldDuration>1 sec</yieldDuration>
|
||||
</config>
|
||||
<name>GenerateFlowFile</name>
|
||||
<relationships>
|
||||
<autoTerminate>false</autoTerminate>
|
||||
<name>success</name>
|
||||
</relationships>
|
||||
<state>STOPPED</state>
|
||||
<style></style>
|
||||
<type>org.apache.nifi.processors.standard.GenerateFlowFile</type>
|
||||
</processors>
|
||||
</snippet>
|
||||
<timestamp>09/05/2017 14:51:01 PDT</timestamp>
|
||||
</template>
|
Loading…
Reference in New Issue