mirror of https://github.com/apache/nifi.git
HTML Parsing Processors Bundle
NIFI-1156 HTML Parsing Processors Bundle
This commit is contained in:
parent
56ad22aea6
commit
c82fc18f8e
|
@ -162,6 +162,11 @@ language governing permissions and limitations under the License. -->
|
|||
<artifactId>nifi-http-context-map-nar</artifactId>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-html-nar</artifactId>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-kite-nar</artifactId>
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-html-bundle</artifactId>
|
||||
<version>0.4.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nifi-html-nar</artifactId>
|
||||
<packaging>nar</packaging>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-standard-services-api-nar</artifactId>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-html-processors</artifactId>
|
||||
<version>0.4.0-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
|
@ -0,0 +1,59 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-html-bundle</artifactId>
|
||||
<version>0.4.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nifi-html-processors</artifactId>
|
||||
<description>Support for parsing HTML documents</description>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.8.3</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-api</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-processor-utils</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-mock</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-simple</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.11</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.io.InputStreamCallback;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
public abstract class AbstractHTMLProcessor extends AbstractProcessor {
|
||||
|
||||
protected static final String ELEMENT_HTML = "HTML";
|
||||
protected static final String ELEMENT_TEXT = "Text";
|
||||
protected static final String ELEMENT_DATA = "Data";
|
||||
protected static final String ELEMENT_ATTRIBUTE = "Attribute";
|
||||
|
||||
public static final PropertyDescriptor URL = new PropertyDescriptor
|
||||
.Builder().name("URL")
|
||||
.description("Base URL for the HTML page being parsed.")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor CSS_SELECTOR = new PropertyDescriptor
|
||||
.Builder().name("CSS Selector")
|
||||
.description("CSS selector syntax string used to extract the desired HTML element(s).")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HTML_CHARSET = new PropertyDescriptor
|
||||
.Builder().name("HTML character encoding")
|
||||
.description("Character encoding of the input HTML")
|
||||
.defaultValue("UTF-8")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_ORIGINAL = new Relationship.Builder()
|
||||
.name("original")
|
||||
.description("The original HTML input")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("Successfully parsed HTML element")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("Failed to parse HTML content")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_INVALID_HTML = new Relationship.Builder()
|
||||
.name("invalid html")
|
||||
.description("The input HTML syntax is invalid")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_NOT_FOUND = new Relationship.Builder()
|
||||
.name("element not found")
|
||||
.description("Element could not be found in the HTML document. The original HTML input will remain " +
|
||||
"in the flowfile content unchanged. Relationship '" + REL_ORIGINAL + "' will not be invoked " +
|
||||
"in this scenario.")
|
||||
.build();
|
||||
|
||||
/**
|
||||
* Parses the Jsoup HTML document from the FlowFile input content.
|
||||
*
|
||||
* @param inputFlowFile
|
||||
* Input FlowFile containing the HTML
|
||||
*
|
||||
* @param context
|
||||
* ProcessContext
|
||||
*
|
||||
* @param session
|
||||
* ProcessSession
|
||||
*
|
||||
* @return
|
||||
* Jsoup Document
|
||||
*/
|
||||
protected Document parseHTMLDocumentFromFlowfile(FlowFile inputFlowFile,
|
||||
final ProcessContext context,
|
||||
final ProcessSession session) {
|
||||
final AtomicReference<Document> doc = new AtomicReference<>();
|
||||
session.read(inputFlowFile, new InputStreamCallback() {
|
||||
@Override
|
||||
public void process(InputStream inputStream) throws IOException {
|
||||
doc.set(Jsoup.parse(inputStream,
|
||||
context.getProperty(HTML_CHARSET).getValue(),
|
||||
context.getProperty(URL).getValue()));
|
||||
}
|
||||
});
|
||||
return doc.get();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,243 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.io.StreamCallback;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.Collections;
|
||||
|
||||
@Tags({"get", "html", "dom", "css", "element"})
|
||||
@CapabilityDescription("Parses HTML input using CSS selector syntax and creates a new flowfile containing the extracted" +
|
||||
" element content for each matching CSS selector.")
|
||||
@SeeAlso({ModifyHTMLElement.class, PutHTMLElement.class})
|
||||
@WritesAttributes({@WritesAttribute(attribute="HTMLElement", description="Flowfile attribute where the element result" +
|
||||
" parsed from the HTML using the CSS selector syntax are placed if the destination is a flowfile attribute.")})
|
||||
public class GetHTMLElement
|
||||
extends AbstractHTMLProcessor {
|
||||
|
||||
public static final String HTML_ELEMENT_ATTRIBUTE_NAME = "HTMLElement";
|
||||
public static final String DESTINATION_ATTRIBUTE = "flowfile-attribute";
|
||||
public static final String DESTINATION_CONTENT = "flowfile-content";
|
||||
|
||||
public static final PropertyDescriptor PREPEND_ELEMENT_VALUE = new PropertyDescriptor
|
||||
.Builder().name("Prepend Element value")
|
||||
.description("Prepends the specified value to the resulting Element")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor APPEND_ELEMENT_VALUE = new PropertyDescriptor
|
||||
.Builder().name("Append Element value")
|
||||
.description("Appends the specified value to the resulting Element")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor ATTRIBUTE_KEY = new PropertyDescriptor
|
||||
.Builder().name("Attribute Name")
|
||||
.description(("When getting the value of an element attribute this value is used as the key to determine" +
|
||||
" which attribute on the selected element should be retrieved."))
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
|
||||
public static final PropertyDescriptor OUTPUT_TYPE = new PropertyDescriptor.Builder()
|
||||
.name("Output Type")
|
||||
.description("Controls the type of value that is retrieved from the element. " +
|
||||
ELEMENT_HTML + "," + ELEMENT_TEXT + ", " + ELEMENT_ATTRIBUTE + " or " + ELEMENT_DATA)
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.allowableValues(ELEMENT_HTML, ELEMENT_TEXT, ELEMENT_ATTRIBUTE, ELEMENT_DATA)
|
||||
.defaultValue(ELEMENT_HTML)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor DESTINATION = new PropertyDescriptor.Builder()
|
||||
.name("Destination")
|
||||
.description("Control if element extracted is written as a flowfile attribute or " +
|
||||
"as flowfile content.")
|
||||
.required(true)
|
||||
.allowableValues(DESTINATION_ATTRIBUTE, DESTINATION_CONTENT)
|
||||
.defaultValue(DESTINATION_ATTRIBUTE)
|
||||
.build();
|
||||
|
||||
private List<PropertyDescriptor> descriptors;
|
||||
|
||||
private Set<Relationship> relationships;
|
||||
|
||||
@Override
|
||||
protected void init(final ProcessorInitializationContext context) {
|
||||
final List<PropertyDescriptor> descriptors = new ArrayList<>();
|
||||
descriptors.add(URL);
|
||||
descriptors.add(CSS_SELECTOR);
|
||||
descriptors.add(HTML_CHARSET);
|
||||
descriptors.add(OUTPUT_TYPE);
|
||||
descriptors.add(DESTINATION);
|
||||
descriptors.add(PREPEND_ELEMENT_VALUE);
|
||||
descriptors.add(APPEND_ELEMENT_VALUE);
|
||||
descriptors.add(ATTRIBUTE_KEY);
|
||||
this.descriptors = Collections.unmodifiableList(descriptors);
|
||||
|
||||
final Set<Relationship> relationships = new HashSet<>();
|
||||
relationships.add(REL_ORIGINAL);
|
||||
relationships.add(REL_SUCCESS);
|
||||
relationships.add(REL_FAILURE);
|
||||
relationships.add(REL_NOT_FOUND);
|
||||
this.relationships = Collections.unmodifiableSet(relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return this.relationships;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||
final FlowFile flowFile = session.get();
|
||||
if ( flowFile == null ) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session);
|
||||
final Elements eles = doc.select(context.getProperty(CSS_SELECTOR)
|
||||
.evaluateAttributeExpressions().getValue());
|
||||
final String prependValue = context.getProperty(PREPEND_ELEMENT_VALUE)
|
||||
.evaluateAttributeExpressions(flowFile).getValue();
|
||||
final String appendValue = context.getProperty(APPEND_ELEMENT_VALUE)
|
||||
.evaluateAttributeExpressions(flowFile).getValue();
|
||||
|
||||
if (eles == null || eles.size() == 0) {
|
||||
//No element found
|
||||
session.transfer(flowFile, REL_NOT_FOUND);
|
||||
} else {
|
||||
for (final Element ele : eles) {
|
||||
final FlowFile ff = session.create();
|
||||
|
||||
switch (context.getProperty(DESTINATION).getValue()) {
|
||||
case DESTINATION_ATTRIBUTE:
|
||||
final FlowFile atFlowfile = session.putAttribute(ff, HTML_ELEMENT_ATTRIBUTE_NAME,
|
||||
extractElementValue(
|
||||
prependValue,
|
||||
context.getProperty(OUTPUT_TYPE).getValue(),
|
||||
appendValue,
|
||||
ele,
|
||||
context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions()
|
||||
.getValue()));
|
||||
session.getProvenanceReporter().create(atFlowfile);
|
||||
session.transfer(atFlowfile, REL_SUCCESS);
|
||||
break;
|
||||
case DESTINATION_CONTENT:
|
||||
final FlowFile conFlowfile = session.write(ff, new StreamCallback() {
|
||||
@Override
|
||||
public void process(InputStream inputStream, OutputStream outputStream) throws IOException {
|
||||
try {
|
||||
outputStream.write(extractElementValue(
|
||||
prependValue,
|
||||
context.getProperty(OUTPUT_TYPE).getValue(),
|
||||
appendValue,
|
||||
ele,
|
||||
context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions()
|
||||
.getValue()).getBytes());
|
||||
} catch (Exception ex) {
|
||||
session.transfer(ff, REL_FAILURE);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
session.getProvenanceReporter().create(conFlowfile);
|
||||
session.transfer(conFlowfile, REL_SUCCESS);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//Transfer the original HTML
|
||||
session.transfer(flowFile, REL_ORIGINAL);
|
||||
}
|
||||
|
||||
} catch (Exception ex) {
|
||||
getLogger().error(ex.getMessage());
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Extracts the HTML value based on the configuration values.
|
||||
*
|
||||
* @return
|
||||
* value from the parsed HTML element
|
||||
*/
|
||||
private String extractElementValue(String prependValue, String outputType, String appendValue, Element ele,
|
||||
String attrKey) {
|
||||
if (StringUtils.isEmpty(prependValue)) {
|
||||
prependValue = "";
|
||||
}
|
||||
if (StringUtils.isEmpty(appendValue)) {
|
||||
appendValue = "";
|
||||
}
|
||||
|
||||
switch (outputType) {
|
||||
case ELEMENT_HTML:
|
||||
return prependValue + ele.html() + appendValue;
|
||||
case ELEMENT_TEXT:
|
||||
return prependValue + ele.text() + appendValue;
|
||||
case ELEMENT_DATA:
|
||||
return prependValue + ele.data() + appendValue;
|
||||
case ELEMENT_ATTRIBUTE:
|
||||
return prependValue + ele.attr(attrKey) + appendValue;
|
||||
default:
|
||||
return prependValue + ele.html() + appendValue;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,164 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.io.StreamCallback;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.Collections;
|
||||
|
||||
@Tags({"modify", "html", "dom", "css", "element"})
|
||||
@CapabilityDescription("Modifies the value of an existing HTML element in the original input HTML")
|
||||
@SeeAlso({GetHTMLElement.class, PutHTMLElement.class})
|
||||
@WritesAttributes({@WritesAttribute(attribute="NumElementsModified", description="Total number of HTML " +
|
||||
"element modifications made")})
|
||||
public class ModifyHTMLElement extends AbstractHTMLProcessor {
|
||||
|
||||
public static final String NUM_ELEMENTS_MODIFIED_ATTR = "NumElementsModified";
|
||||
|
||||
public static final PropertyDescriptor OUTPUT_TYPE = new PropertyDescriptor.Builder()
|
||||
.name("Output Type")
|
||||
.description("Controls whether the HTML element is output as " +
|
||||
ELEMENT_HTML + "," + ELEMENT_TEXT + " or " + ELEMENT_DATA)
|
||||
.required(true)
|
||||
.allowableValues(ELEMENT_HTML, ELEMENT_TEXT, ELEMENT_ATTRIBUTE)
|
||||
.defaultValue(ELEMENT_HTML)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor MODIFIED_VALUE = new PropertyDescriptor
|
||||
.Builder().name("Modified Value")
|
||||
.description("Value to update the found HTML elements with")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor ATTRIBUTE_KEY = new PropertyDescriptor
|
||||
.Builder().name("Attribute Name")
|
||||
.description(("When modifying the value of an element attribute this value is used as the key to determine" +
|
||||
" which attribute on the selected element will be modified with the new value."))
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
private List<PropertyDescriptor> descriptors;
|
||||
|
||||
private Set<Relationship> relationships;
|
||||
|
||||
@Override
|
||||
protected void init(final ProcessorInitializationContext context) {
|
||||
final List<PropertyDescriptor> descriptors = new ArrayList<>();
|
||||
descriptors.add(URL);
|
||||
descriptors.add(CSS_SELECTOR);
|
||||
descriptors.add(HTML_CHARSET);
|
||||
descriptors.add(OUTPUT_TYPE);
|
||||
descriptors.add(MODIFIED_VALUE);
|
||||
descriptors.add(ATTRIBUTE_KEY);
|
||||
this.descriptors = Collections.unmodifiableList(descriptors);
|
||||
|
||||
final Set<Relationship> relationships = new HashSet<Relationship>();
|
||||
relationships.add(REL_ORIGINAL);
|
||||
relationships.add(REL_SUCCESS);
|
||||
relationships.add(REL_FAILURE);
|
||||
relationships.add(REL_INVALID_HTML);
|
||||
relationships.add(REL_NOT_FOUND);
|
||||
this.relationships = Collections.unmodifiableSet(relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return this.relationships;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||
final FlowFile flowFile = session.get();
|
||||
if (flowFile == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session);
|
||||
final Elements eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions().getValue());
|
||||
|
||||
if (eles == null || eles.size() == 0) {
|
||||
//No element found
|
||||
session.transfer(flowFile, REL_NOT_FOUND);
|
||||
} else {
|
||||
for (Element ele : eles) {
|
||||
switch (context.getProperty(OUTPUT_TYPE).getValue()) {
|
||||
case ELEMENT_HTML:
|
||||
ele.html(context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue());
|
||||
break;
|
||||
case ELEMENT_ATTRIBUTE:
|
||||
ele.attr(context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions().getValue(),
|
||||
context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue());
|
||||
break;
|
||||
case ELEMENT_TEXT:
|
||||
ele.text(context.getProperty(MODIFIED_VALUE).evaluateAttributeExpressions().getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
FlowFile ff = session.write(session.create(flowFile), new StreamCallback() {
|
||||
@Override
|
||||
public void process(InputStream in, OutputStream out) throws IOException {
|
||||
out.write(doc.html().getBytes());
|
||||
}
|
||||
});
|
||||
ff = session.putAttribute(ff, NUM_ELEMENTS_MODIFIED_ATTR, new Integer(eles.size()).toString());
|
||||
session.transfer(ff, REL_SUCCESS);
|
||||
|
||||
//Transfer the original HTML
|
||||
session.transfer(flowFile, REL_ORIGINAL);
|
||||
}
|
||||
|
||||
} catch (Exception ex) {
|
||||
getLogger().error(ex.getMessage());
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.ProcessorInitializationContext;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.io.StreamCallback;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
import java.util.Collections;
|
||||
|
||||
@Tags({"put", "html", "dom", "css", "element"})
|
||||
@CapabilityDescription("Creates a new HTML element in the input HTML")
|
||||
@SeeAlso({GetHTMLElement.class, ModifyHTMLElement.class})
|
||||
public class PutHTMLElement extends AbstractHTMLProcessor {
|
||||
|
||||
public static final String APPEND_ELEMENT = "append-html";
|
||||
public static final String PREPEND_ELEMENT = "prepend-html";
|
||||
|
||||
public static final PropertyDescriptor PUT_LOCATION_TYPE = new PropertyDescriptor.Builder()
|
||||
.name("Element Insert Location Type")
|
||||
.description("Controls whether the new element is prepended or appended to the children of the " +
|
||||
"Element located by the CSS selector. EX: prepended value '<b>Hi</b>' inside of " +
|
||||
"Element (using CSS Selector 'p') '<p>There</p>' would result in " +
|
||||
"'<p><b>Hi</b>There</p>'. Appending the value would result in '<p>There<b>Hi</b></p>'")
|
||||
.required(true)
|
||||
.allowableValues(APPEND_ELEMENT, PREPEND_ELEMENT)
|
||||
.defaultValue(APPEND_ELEMENT)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor PUT_VALUE = new PropertyDescriptor.Builder()
|
||||
.name("Put Value")
|
||||
.description("Value used when creating the new Element. Value should be a valid HTML element. " +
|
||||
"The text should be supplied unencoded: characters like '<', '>', etc will be properly HTML " +
|
||||
"encoded in the output.")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
private List<PropertyDescriptor> descriptors;
|
||||
|
||||
private Set<Relationship> relationships;
|
||||
|
||||
@Override
|
||||
protected void init(final ProcessorInitializationContext context) {
|
||||
final List<PropertyDescriptor> descriptors = new ArrayList<PropertyDescriptor>();
|
||||
descriptors.add(URL);
|
||||
descriptors.add(CSS_SELECTOR);
|
||||
descriptors.add(HTML_CHARSET);
|
||||
descriptors.add(PUT_LOCATION_TYPE);
|
||||
descriptors.add(PUT_VALUE);
|
||||
this.descriptors = Collections.unmodifiableList(descriptors);
|
||||
|
||||
final Set<Relationship> relationships = new HashSet<Relationship>();
|
||||
relationships.add(REL_ORIGINAL);
|
||||
relationships.add(REL_SUCCESS);
|
||||
relationships.add(REL_FAILURE);
|
||||
relationships.add(REL_INVALID_HTML);
|
||||
this.relationships = Collections.unmodifiableSet(relationships);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return this.relationships;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
return descriptors;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
|
||||
final FlowFile flowFile = session.get();
|
||||
if (flowFile == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session);
|
||||
final Elements eles = doc.select(context.getProperty(CSS_SELECTOR).evaluateAttributeExpressions().getValue());
|
||||
|
||||
if (eles == null || eles.size() == 0) {
|
||||
//No element found
|
||||
session.transfer(flowFile, REL_NOT_FOUND);
|
||||
} else {
|
||||
for (Element ele : eles) {
|
||||
switch (context.getProperty(PUT_LOCATION_TYPE).getValue()) {
|
||||
case APPEND_ELEMENT:
|
||||
ele.append(context.getProperty(PUT_VALUE).evaluateAttributeExpressions().getValue());
|
||||
break;
|
||||
case PREPEND_ELEMENT:
|
||||
ele.prepend(context.getProperty(PUT_VALUE).evaluateAttributeExpressions().getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
FlowFile ff = session.write(session.create(flowFile), new StreamCallback() {
|
||||
@Override
|
||||
public void process(InputStream in, OutputStream out) throws IOException {
|
||||
out.write(doc.html().getBytes());
|
||||
}
|
||||
});
|
||||
session.transfer(ff, REL_SUCCESS);
|
||||
|
||||
//Transfer the original HTML
|
||||
session.transfer(flowFile, REL_ORIGINAL);
|
||||
}
|
||||
|
||||
} catch (Exception ex) {
|
||||
getLogger().error(ex.getMessage());
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
org.apache.nifi.GetHTMLElement
|
||||
org.apache.nifi.ModifyHTMLElement
|
||||
org.apache.nifi.PutHTMLElement
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.io.StreamCallback;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
public class AbstractHTMLTest {
|
||||
|
||||
protected final String ATL_WEATHER_TEXT = "Atlanta Weather";
|
||||
protected final String GDR_WEATHER_TEXT = "<i>Grand Rapids Weather</i>";
|
||||
protected final String ATL_WEATHER_LINK = "http://w1.weather.gov/obhistory/KPDK.html";
|
||||
protected final String GR_WEATHER_LINK = "http://w1.weather.gov/obhistory/KGRR.html";
|
||||
protected final String AUTHOR_NAME = "Jeremy Dyer";
|
||||
protected final String ATL_ID = "ATL";
|
||||
protected final String GDR_ID = "GDR";
|
||||
|
||||
protected final String HTML = "<!doctype html>\n" +
|
||||
"\n" +
|
||||
"<html lang=\"en\">\n" +
|
||||
"<head>\n" +
|
||||
" <meta charset=\"utf-8\">\n" +
|
||||
"\n" +
|
||||
" <title>NiFi HTML Parsing Demo</title>\n" +
|
||||
" <meta name=\"description\" content=\"NiFi HTML Parsing Demo\">\n" +
|
||||
" <meta name=\"author\" content=\"" + AUTHOR_NAME + "\">\n" +
|
||||
"\n" +
|
||||
" <link rel=\"stylesheet\" href=\"css/styles.css?v=1.0\">\n" +
|
||||
"\n" +
|
||||
" <!--[if lt IE 9]>\n" +
|
||||
" <script src=\"http://html5shiv.googlecode.com/svn/trunk/html5.js\"></script>\n" +
|
||||
" <![endif]-->\n" +
|
||||
"</head>\n" +
|
||||
"\n" +
|
||||
"<body>\n" +
|
||||
" <script src=\"js/scripts.js\"></script>\n" +
|
||||
" <p>Check out this weather! <a id=\"" + ATL_ID + "\" href=\"" +
|
||||
ATL_WEATHER_LINK + "\">" + ATL_WEATHER_TEXT + "</a></p>\n" +
|
||||
" <p>I guess it could be colder ... <a id=\"" + GDR_ID + "\" href=\"" +
|
||||
GR_WEATHER_LINK + "\">" + GDR_WEATHER_TEXT + "</a></p>\n" +
|
||||
" <div id=\"put\"><a href=\"httpd://localhost\" /></div>\n" +
|
||||
"</body>\n" +
|
||||
"</html>";
|
||||
|
||||
|
||||
protected FlowFile writeContentToNewFlowFile(final byte[] content, ProcessSession session) {
|
||||
FlowFile ff = session.write(session.create(), new StreamCallback() {
|
||||
@Override
|
||||
public void process(InputStream in, OutputStream out) throws IOException {
|
||||
out.write(content);
|
||||
}
|
||||
});
|
||||
return ff;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,319 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.lang.Exception;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TestGetHTMLElement extends AbstractHTMLTest {
|
||||
|
||||
private TestRunner testRunner;
|
||||
|
||||
@Before
|
||||
public void init() {
|
||||
testRunner = TestRunners.newTestRunner(GetHTMLElement.class);
|
||||
testRunner.setProperty(GetHTMLElement.URL, "http://localhost");
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML);
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.HTML_CHARSET, "UTF-8");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoElementFound() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b"); //Bold element is not present in sample HTML
|
||||
// testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, "");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInvalidSelector() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "InvalidCSSSelectorSyntax");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSingleElementFound() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "head");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleElementFound() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "a");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 3);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testElementFoundWriteToAttribute() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID);
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
MockFlowFile fff = ffs.get(0);
|
||||
String atValue = fff.getAttribute(GetHTMLElement.HTML_ELEMENT_ATTRIBUTE_NAME);
|
||||
assertTrue(StringUtils.equals(ATL_WEATHER_LINK, atValue));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testElementFoundWriteToContent() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID);
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(ATL_WEATHER_LINK, data));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidPrependValueToFoundElement() throws Exception {
|
||||
final String PREPEND_VALUE = "TestPrepend";
|
||||
testRunner.setProperty(GetHTMLElement.PREPEND_ELEMENT_VALUE, PREPEND_VALUE);
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID);
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(PREPEND_VALUE + ATL_WEATHER_LINK, data));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidPrependValueToNotFoundElement() throws Exception {
|
||||
final String PREPEND_VALUE = "TestPrepend";
|
||||
testRunner.setProperty(GetHTMLElement.PREPEND_ELEMENT_VALUE, PREPEND_VALUE);
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b");
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidAppendValueToFoundElement() throws Exception {
|
||||
final String APPEND_VALUE = "TestAppend";
|
||||
testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, APPEND_VALUE);
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID);
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(ATL_WEATHER_LINK + APPEND_VALUE, data));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testValidAppendValueToNotFoundElement() throws Exception {
|
||||
final String APPEND_VALUE = "TestAppend";
|
||||
testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, APPEND_VALUE);
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b");
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractAttributeFromElement() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "meta[name=author]");
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "Content");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(AUTHOR_NAME, data));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractTextFromElement() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + ATL_ID);
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(ATL_WEATHER_TEXT, data));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractHTMLFromElement() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "#" + GDR_ID);
|
||||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(GDR_WEATHER_TEXT, data));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,223 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TestModifyHTMLElement extends AbstractHTMLTest {
|
||||
|
||||
private TestRunner testRunner;
|
||||
|
||||
@Before
|
||||
public void init() {
|
||||
testRunner = TestRunners.newTestRunner(ModifyHTMLElement.class);
|
||||
testRunner = TestRunners.newTestRunner(ModifyHTMLElement.class);
|
||||
testRunner.setProperty(ModifyHTMLElement.URL, "http://localhost");
|
||||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML);
|
||||
testRunner.setProperty(ModifyHTMLElement.HTML_CHARSET, "UTF-8");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModifyText() throws Exception {
|
||||
final String MOD_VALUE = "Newly modified value to replace " + ATL_WEATHER_TEXT;
|
||||
testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + ATL_ID);
|
||||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_TEXT);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
|
||||
//Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
|
||||
Document doc = Jsoup.parse(data);
|
||||
Elements eles = doc.select("#" + ATL_ID);
|
||||
Element ele = eles.get(0);
|
||||
|
||||
assertTrue(StringUtils.equals(MOD_VALUE, ele.text()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModifyHTMLWithExpressionLanguage() throws Exception {
|
||||
|
||||
final String MOD_VALUE = "Newly modified value to replace " + ATL_WEATHER_TEXT;
|
||||
|
||||
testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + ATL_ID);
|
||||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_TEXT);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, "${\" " + MOD_VALUE + " \":trim()}");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
|
||||
//Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
|
||||
Document doc = Jsoup.parse(data);
|
||||
Elements eles = doc.select("#" + ATL_ID);
|
||||
Element ele = eles.get(0);
|
||||
|
||||
assertNotNull(ele.text());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModifyHTML() throws Exception {
|
||||
final String MOD_VALUE = "Newly modified HTML to replace " + GDR_WEATHER_TEXT;
|
||||
testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + GDR_ID);
|
||||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
|
||||
//Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
|
||||
Document doc = Jsoup.parse(data);
|
||||
Elements eles = doc.select("#" + GDR_ID);
|
||||
Element ele = eles.get(0);
|
||||
|
||||
assertTrue(StringUtils.equals(MOD_VALUE, ele.html()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModifyAttribute() throws Exception {
|
||||
final String MOD_VALUE = "http://localhost/newlink";
|
||||
testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + GDR_ID);
|
||||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(ModifyHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
|
||||
//Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
|
||||
Document doc = Jsoup.parse(data);
|
||||
Elements eles = doc.select("#" + GDR_ID);
|
||||
Element ele = eles.get(0);
|
||||
|
||||
assertTrue(StringUtils.equals(MOD_VALUE, ele.attr("href")));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModifyElementNotFound() throws Exception {
|
||||
final String MOD_VALUE = "http://localhost/newlink";
|
||||
testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "b");
|
||||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModifyValueContainsHTMLCharacters() throws Exception {
|
||||
final String MOD_VALUE = "Text that contains > and < characters";
|
||||
testRunner.setProperty(ModifyHTMLElement.CSS_SELECTOR, "#" + GDR_ID);
|
||||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(ModifyHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
|
||||
//Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
|
||||
Document doc = Jsoup.parse(data);
|
||||
Elements eles = doc.select("#" + GDR_ID);
|
||||
Element ele = eles.get(0);
|
||||
|
||||
assertTrue(StringUtils.equals(MOD_VALUE, ele.text()));
|
||||
assertTrue(StringUtils.equals(MOD_VALUE.replace(">", ">").replace("<", "<"), ele.html()));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.List;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
|
||||
public class TestPutHTMLElement extends AbstractHTMLTest {
|
||||
|
||||
private TestRunner testRunner;
|
||||
|
||||
@Before
|
||||
public void init() {
|
||||
testRunner = TestRunners.newTestRunner(PutHTMLElement.class);
|
||||
testRunner.setProperty(PutHTMLElement.URL, "http://localhost");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddNewElementToRoot() throws Exception {
|
||||
final String MOD_VALUE = "<p>modified value</p>";
|
||||
testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "body");
|
||||
testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT);
|
||||
testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
|
||||
//Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
|
||||
Document doc = Jsoup.parse(data);
|
||||
Elements eles = doc.select("body > p");
|
||||
Element ele = eles.get(0);
|
||||
|
||||
assertTrue(StringUtils.equals(MOD_VALUE.replace("<p>", "").replace("</p>", ""), ele.html()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPrependPElementToDiv() throws Exception {
|
||||
final String MOD_VALUE = "<p>modified value</p>";
|
||||
testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "#put");
|
||||
testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT);
|
||||
testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
|
||||
//Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
|
||||
Document doc = Jsoup.parse(data);
|
||||
Elements eles = doc.select("#put");
|
||||
Element ele = eles.get(0);
|
||||
|
||||
assertTrue(StringUtils.equals("<p>modified value</p> \n<a href=\"httpd://localhost\"></a>", ele.html()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppendPElementToDiv() throws Exception {
|
||||
final String MOD_VALUE = "<p>modified value</p>";
|
||||
testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "#put");
|
||||
testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.APPEND_ELEMENT);
|
||||
testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
|
||||
//Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
|
||||
Document doc = Jsoup.parse(data);
|
||||
Elements eles = doc.select("#put");
|
||||
Element ele = eles.get(0);
|
||||
|
||||
assertTrue(StringUtils.equals("<a href=\"httpd://localhost\"></a> \n" +
|
||||
"<p>modified value</p>", ele.html()));
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-nar-bundles</artifactId>
|
||||
<version>0.4.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<artifactId>nifi-html-bundle</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<modules>
|
||||
<module>nifi-html-processors</module>
|
||||
<module>nifi-html-nar</module>
|
||||
</modules>
|
||||
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-html-processors</artifactId>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</dependencyManagement>
|
||||
|
||||
</project>
|
|
@ -42,12 +42,13 @@
|
|||
<module>nifi-language-translation-bundle</module>
|
||||
<module>nifi-mongodb-bundle</module>
|
||||
<module>nifi-flume-bundle</module>
|
||||
<module>nifi-hbase-bundle</module>
|
||||
<module>nifi-hbase-bundle</module>
|
||||
<module>nifi-ambari-bundle</module>
|
||||
<module>nifi-image-bundle</module>
|
||||
<module>nifi-avro-bundle</module>
|
||||
<module>nifi-couchbase-bundle</module>
|
||||
<module>nifi-azure-bundle</module>
|
||||
<module>nifi-html-bundle</module>
|
||||
</modules>
|
||||
<dependencyManagement>
|
||||
<dependencies>
|
||||
|
|
8
pom.xml
8
pom.xml
|
@ -823,6 +823,12 @@
|
|||
<version>0.4.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-html-nar</artifactId>
|
||||
<version>0.4.0-SNAPSHOT</version>
|
||||
<type>nar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.nifi</groupId>
|
||||
<artifactId>nifi-kite-nar</artifactId>
|
||||
|
@ -1375,4 +1381,4 @@
|
|||
</build>
|
||||
</profile>
|
||||
</profiles>
|
||||
</project>
|
||||
</project>
|
Loading…
Reference in New Issue