mirror of https://github.com/apache/nifi.git
NIFI-1156
This commit is contained in:
parent
c9d59fa819
commit
ee7400ef53
|
@ -1030,4 +1030,30 @@ information can be found here: http://www.adobe.com/devnet/xmp/library/eula-xmp-
|
|||
OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
This product bundles 'Jsoup' which is available under "The MIT license". More
|
||||
information can be found here: http://jsoup.org/license
|
||||
|
||||
The MIT License
|
||||
|
||||
Copyright (c) 2009-2015, Jonathan Hedley <jonathan@hedley.net>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -56,4 +56,18 @@
|
|||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.rat</groupId>
|
||||
<artifactId>apache-rat-plugin</artifactId>
|
||||
<configuration>
|
||||
<excludes combine.children="append">
|
||||
<exclude>src/test/resources/Weather.html</exclude>
|
||||
</excludes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
package org.apache.nifi;
|
||||
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.components.ValidationContext;
|
||||
import org.apache.nifi.components.ValidationResult;
|
||||
import org.apache.nifi.components.Validator;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.AbstractProcessor;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
|
@ -26,6 +29,7 @@ import org.apache.nifi.processor.io.InputStreamCallback;
|
|||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Selector;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
@ -38,6 +42,25 @@ public abstract class AbstractHTMLProcessor extends AbstractProcessor {
|
|||
protected static final String ELEMENT_DATA = "Data";
|
||||
protected static final String ELEMENT_ATTRIBUTE = "Attribute";
|
||||
|
||||
protected static final Validator CSS_SELECTOR_VALIDATOR = new Validator() {
|
||||
@Override
|
||||
public ValidationResult validate(final String subject, final String value, final ValidationContext context) {
|
||||
if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(value)) {
|
||||
return new ValidationResult.Builder().subject(subject).input(value).explanation("Expression Language Present").valid(true).build();
|
||||
}
|
||||
|
||||
String reason = null;
|
||||
try {
|
||||
Document doc = Jsoup.parse("<html></html>");
|
||||
doc.select(value);
|
||||
} catch (final Selector.SelectorParseException e) {
|
||||
reason = "\"" + value + "\" is an invalid CSS selector";
|
||||
}
|
||||
|
||||
return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null).build();
|
||||
}
|
||||
};
|
||||
|
||||
public static final PropertyDescriptor URL = new PropertyDescriptor
|
||||
.Builder().name("URL")
|
||||
.description("Base URL for the HTML page being parsed.")
|
||||
|
@ -49,16 +72,16 @@ public abstract class AbstractHTMLProcessor extends AbstractProcessor {
|
|||
.Builder().name("CSS Selector")
|
||||
.description("CSS selector syntax string used to extract the desired HTML element(s).")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.addValidator(CSS_SELECTOR_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor HTML_CHARSET = new PropertyDescriptor
|
||||
.Builder().name("HTML character encoding")
|
||||
.Builder().name("HTML Character Encoding")
|
||||
.description("Character encoding of the input HTML")
|
||||
.defaultValue("UTF-8")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.addValidator(StandardValidators.CHARACTER_SET_VALIDATOR)
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_ORIGINAL = new Relationship.Builder()
|
||||
|
@ -71,11 +94,6 @@ public abstract class AbstractHTMLProcessor extends AbstractProcessor {
|
|||
.description("Successfully parsed HTML element")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("Failed to parse HTML content")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_INVALID_HTML = new Relationship.Builder()
|
||||
.name("invalid html")
|
||||
.description("The input HTML syntax is invalid")
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.nifi;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
|
@ -45,8 +46,19 @@ import java.util.HashSet;
|
|||
import java.util.Collections;
|
||||
|
||||
@Tags({"get", "html", "dom", "css", "element"})
|
||||
@CapabilityDescription("Parses HTML input using CSS selector syntax and creates a new flowfile containing the extracted" +
|
||||
" element content for each matching CSS selector.")
|
||||
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
|
||||
@CapabilityDescription("Extracts HTML element values from the incoming flowfile's content using a CSS selector." +
|
||||
" The incoming HTML is first converted into a HTML Document Object Model so that HTML elements may be selected" +
|
||||
" in the similar manner that CSS selectors are used to apply styles to HTML. The resulting HTML DOM is then \"queried\"" +
|
||||
" using the user defined CSS selector string. The result of \"querying\" the HTML DOM may produce 0-N results." +
|
||||
" If no results are found the flowfile will be transferred to the \"element not found\" relationship to indicate" +
|
||||
" so to the end user. If N results are found a new flowfile will be created and emitted for each result. The query result will" +
|
||||
" either be placed in the content of the new flowfile or as an attribute of the new flowfile. By default the result is written to an" +
|
||||
" attribute. This can be controlled by the \"Destination\" property. Resulting query values may also have data" +
|
||||
" prepended or appended to them by setting the value of property \"Prepend Element Value\" or \"Append Element Value\"." +
|
||||
" Prepended and appended values are treated as string values and concatenated to the result retrieved from the" +
|
||||
" HTML DOM query operation. A more thorough reference for the CSS selector syntax can be found at" +
|
||||
" \"http://jsoup.org/apidocs/org/jsoup/select/Selector.html\"")
|
||||
@SeeAlso({ModifyHTMLElement.class, PutHTMLElement.class})
|
||||
@WritesAttributes({@WritesAttribute(attribute="HTMLElement", description="Flowfile attribute where the element result" +
|
||||
" parsed from the HTML using the CSS selector syntax are placed if the destination is a flowfile attribute.")})
|
||||
|
@ -58,7 +70,7 @@ public class GetHTMLElement
|
|||
public static final String DESTINATION_CONTENT = "flowfile-content";
|
||||
|
||||
public static final PropertyDescriptor PREPEND_ELEMENT_VALUE = new PropertyDescriptor
|
||||
.Builder().name("Prepend Element value")
|
||||
.Builder().name("Prepend Element Value")
|
||||
.description("Prepends the specified value to the resulting Element")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
|
@ -66,7 +78,7 @@ public class GetHTMLElement
|
|||
.build();
|
||||
|
||||
public static final PropertyDescriptor APPEND_ELEMENT_VALUE = new PropertyDescriptor
|
||||
.Builder().name("Append Element value")
|
||||
.Builder().name("Append Element Value")
|
||||
.description("Appends the specified value to the resulting Element")
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
|
@ -75,8 +87,9 @@ public class GetHTMLElement
|
|||
|
||||
public static final PropertyDescriptor ATTRIBUTE_KEY = new PropertyDescriptor
|
||||
.Builder().name("Attribute Name")
|
||||
.description(("When getting the value of an element attribute this value is used as the key to determine" +
|
||||
" which attribute on the selected element should be retrieved."))
|
||||
.description(("When getting the value of a HTML element attribute this value is used as the key to determine" +
|
||||
" which attribute on the selected element should be retrieved. This value is used when the \"Output Type\"" +
|
||||
" is set to \"" + ELEMENT_ATTRIBUTE + "\""))
|
||||
.required(false)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
|
@ -85,8 +98,7 @@ public class GetHTMLElement
|
|||
|
||||
public static final PropertyDescriptor OUTPUT_TYPE = new PropertyDescriptor.Builder()
|
||||
.name("Output Type")
|
||||
.description("Controls the type of value that is retrieved from the element. " +
|
||||
ELEMENT_HTML + "," + ELEMENT_TEXT + ", " + ELEMENT_ATTRIBUTE + " or " + ELEMENT_DATA)
|
||||
.description("Controls the type of DOM value that is retrieved from the HTML element.")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.allowableValues(ELEMENT_HTML, ELEMENT_TEXT, ELEMENT_ATTRIBUTE, ELEMENT_DATA)
|
||||
|
@ -122,7 +134,7 @@ public class GetHTMLElement
|
|||
final Set<Relationship> relationships = new HashSet<>();
|
||||
relationships.add(REL_ORIGINAL);
|
||||
relationships.add(REL_SUCCESS);
|
||||
relationships.add(REL_FAILURE);
|
||||
relationships.add(REL_INVALID_HTML);
|
||||
relationships.add(REL_NOT_FOUND);
|
||||
this.relationships = Collections.unmodifiableSet(relationships);
|
||||
}
|
||||
|
@ -148,7 +160,7 @@ public class GetHTMLElement
|
|||
|
||||
final Document doc = parseHTMLDocumentFromFlowfile(flowFile, context, session);
|
||||
final Elements eles = doc.select(context.getProperty(CSS_SELECTOR)
|
||||
.evaluateAttributeExpressions().getValue());
|
||||
.evaluateAttributeExpressions(flowFile).getValue());
|
||||
final String prependValue = context.getProperty(PREPEND_ELEMENT_VALUE)
|
||||
.evaluateAttributeExpressions(flowFile).getValue();
|
||||
final String appendValue = context.getProperty(APPEND_ELEMENT_VALUE)
|
||||
|
@ -159,7 +171,7 @@ public class GetHTMLElement
|
|||
session.transfer(flowFile, REL_NOT_FOUND);
|
||||
} else {
|
||||
for (final Element ele : eles) {
|
||||
final FlowFile ff = session.create();
|
||||
final FlowFile ff = session.create(flowFile);
|
||||
|
||||
switch (context.getProperty(DESTINATION).getValue()) {
|
||||
case DESTINATION_ATTRIBUTE:
|
||||
|
@ -171,7 +183,6 @@ public class GetHTMLElement
|
|||
ele,
|
||||
context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions()
|
||||
.getValue()));
|
||||
session.getProvenanceReporter().create(atFlowfile);
|
||||
session.transfer(atFlowfile, REL_SUCCESS);
|
||||
break;
|
||||
case DESTINATION_CONTENT:
|
||||
|
@ -187,12 +198,12 @@ public class GetHTMLElement
|
|||
context.getProperty(ATTRIBUTE_KEY).evaluateAttributeExpressions()
|
||||
.getValue()).getBytes());
|
||||
} catch (Exception ex) {
|
||||
session.transfer(ff, REL_FAILURE);
|
||||
getLogger().error(ex.getMessage());
|
||||
session.transfer(ff, REL_INVALID_HTML);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
session.getProvenanceReporter().create(conFlowfile);
|
||||
session.transfer(conFlowfile, REL_SUCCESS);
|
||||
break;
|
||||
}
|
||||
|
@ -205,7 +216,7 @@ public class GetHTMLElement
|
|||
|
||||
} catch (Exception ex) {
|
||||
getLogger().error(ex.getMessage());
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
session.transfer(flowFile, REL_INVALID_HTML);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
|
@ -44,7 +46,17 @@ import java.util.HashSet;
|
|||
import java.util.Collections;
|
||||
|
||||
@Tags({"modify", "html", "dom", "css", "element"})
|
||||
@CapabilityDescription("Modifies the value of an existing HTML element in the original input HTML")
|
||||
@SupportsBatching
|
||||
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
|
||||
@CapabilityDescription("Modifies the value of an existing HTML element. The desired element to be modified is located by" +
|
||||
" using CSS selector syntax. The incoming HTML is first converted into a HTML Document Object Model so that HTML elements may be selected" +
|
||||
" in the similar manner that CSS selectors are used to apply styles to HTML. The resulting HTML DOM is then \"queried\"" +
|
||||
" using the user defined CSS selector string to find the element the user desires to modify. If the HTML element is found" +
|
||||
" the element's value is updated in the DOM using the value specified \"Modified Value\" property. All DOM elements" +
|
||||
" that match the CSS selector will be updated. Once all of the DOM elements have been updated the DOM is rendered" +
|
||||
" to HTML and the result replaces the flowfile content with the updated HTML. A more thorough reference for the" +
|
||||
" CSS selector syntax can be found at" +
|
||||
" \"http://jsoup.org/apidocs/org/jsoup/select/Selector.html\"")
|
||||
@SeeAlso({GetHTMLElement.class, PutHTMLElement.class})
|
||||
@WritesAttributes({@WritesAttribute(attribute="NumElementsModified", description="Total number of HTML " +
|
||||
"element modifications made")})
|
||||
|
@ -96,7 +108,6 @@ public class ModifyHTMLElement extends AbstractHTMLProcessor {
|
|||
final Set<Relationship> relationships = new HashSet<Relationship>();
|
||||
relationships.add(REL_ORIGINAL);
|
||||
relationships.add(REL_SUCCESS);
|
||||
relationships.add(REL_FAILURE);
|
||||
relationships.add(REL_INVALID_HTML);
|
||||
relationships.add(REL_NOT_FOUND);
|
||||
this.relationships = Collections.unmodifiableSet(relationships);
|
||||
|
@ -157,7 +168,7 @@ public class ModifyHTMLElement extends AbstractHTMLProcessor {
|
|||
|
||||
} catch (Exception ex) {
|
||||
getLogger().error(ex.getMessage());
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
session.transfer(flowFile, REL_INVALID_HTML);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.SupportsBatching;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.SeeAlso;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
|
@ -42,7 +44,15 @@ import java.util.HashSet;
|
|||
import java.util.Collections;
|
||||
|
||||
@Tags({"put", "html", "dom", "css", "element"})
|
||||
@CapabilityDescription("Creates a new HTML element in the input HTML")
|
||||
@SupportsBatching
|
||||
@InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED)
|
||||
@CapabilityDescription("Places a new HTML element in the existing HTML DOM. The desired position for the new HTML element is specified by" +
|
||||
" using CSS selector syntax. The incoming HTML is first converted into a HTML Document Object Model so that HTML DOM location may be located" +
|
||||
" in a similar manner that CSS selectors are used to apply styles to HTML. The resulting HTML DOM is then \"queried\"" +
|
||||
" using the user defined CSS selector string to find the position where the user desires to add the new HTML element." +
|
||||
" Once the new HTML element is added to the DOM it is rendered to HTML and the result replaces the flowfile" +
|
||||
" content with the updated HTML. A more thorough reference for the CSS selector syntax can be found at" +
|
||||
" \"http://jsoup.org/apidocs/org/jsoup/select/Selector.html\"")
|
||||
@SeeAlso({GetHTMLElement.class, ModifyHTMLElement.class})
|
||||
public class PutHTMLElement extends AbstractHTMLProcessor {
|
||||
|
||||
|
@ -64,7 +74,7 @@ public class PutHTMLElement extends AbstractHTMLProcessor {
|
|||
.name("Put Value")
|
||||
.description("Value used when creating the new Element. Value should be a valid HTML element. " +
|
||||
"The text should be supplied unencoded: characters like '<', '>', etc will be properly HTML " +
|
||||
"encoded in the output.")
|
||||
"encoded in the resulting output.")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
|
@ -87,8 +97,8 @@ public class PutHTMLElement extends AbstractHTMLProcessor {
|
|||
final Set<Relationship> relationships = new HashSet<Relationship>();
|
||||
relationships.add(REL_ORIGINAL);
|
||||
relationships.add(REL_SUCCESS);
|
||||
relationships.add(REL_FAILURE);
|
||||
relationships.add(REL_INVALID_HTML);
|
||||
relationships.add(REL_NOT_FOUND);
|
||||
this.relationships = Collections.unmodifiableSet(relationships);
|
||||
}
|
||||
|
||||
|
@ -120,10 +130,10 @@ public class PutHTMLElement extends AbstractHTMLProcessor {
|
|||
for (Element ele : eles) {
|
||||
switch (context.getProperty(PUT_LOCATION_TYPE).getValue()) {
|
||||
case APPEND_ELEMENT:
|
||||
ele.append(context.getProperty(PUT_VALUE).evaluateAttributeExpressions().getValue());
|
||||
ele.append(context.getProperty(PUT_VALUE).evaluateAttributeExpressions(flowFile).getValue());
|
||||
break;
|
||||
case PREPEND_ELEMENT:
|
||||
ele.prepend(context.getProperty(PUT_VALUE).evaluateAttributeExpressions().getValue());
|
||||
ele.prepend(context.getProperty(PUT_VALUE).evaluateAttributeExpressions(flowFile).getValue());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -142,7 +152,7 @@ public class PutHTMLElement extends AbstractHTMLProcessor {
|
|||
|
||||
} catch (Exception ex) {
|
||||
getLogger().error(ex.getMessage());
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
session.transfer(flowFile, REL_INVALID_HTML);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -16,15 +16,7 @@
|
|||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.io.StreamCallback;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
public class AbstractHTMLTest {
|
||||
public abstract class AbstractHTMLTest {
|
||||
|
||||
protected final String ATL_WEATHER_TEXT = "Atlanta Weather";
|
||||
protected final String GDR_WEATHER_TEXT = "<i>Grand Rapids Weather</i>";
|
||||
|
@ -33,42 +25,4 @@ public class AbstractHTMLTest {
|
|||
protected final String AUTHOR_NAME = "Jeremy Dyer";
|
||||
protected final String ATL_ID = "ATL";
|
||||
protected final String GDR_ID = "GDR";
|
||||
|
||||
protected final String HTML = "<!doctype html>\n" +
|
||||
"\n" +
|
||||
"<html lang=\"en\">\n" +
|
||||
"<head>\n" +
|
||||
" <meta charset=\"utf-8\">\n" +
|
||||
"\n" +
|
||||
" <title>NiFi HTML Parsing Demo</title>\n" +
|
||||
" <meta name=\"description\" content=\"NiFi HTML Parsing Demo\">\n" +
|
||||
" <meta name=\"author\" content=\"" + AUTHOR_NAME + "\">\n" +
|
||||
"\n" +
|
||||
" <link rel=\"stylesheet\" href=\"css/styles.css?v=1.0\">\n" +
|
||||
"\n" +
|
||||
" <!--[if lt IE 9]>\n" +
|
||||
" <script src=\"http://html5shiv.googlecode.com/svn/trunk/html5.js\"></script>\n" +
|
||||
" <![endif]-->\n" +
|
||||
"</head>\n" +
|
||||
"\n" +
|
||||
"<body>\n" +
|
||||
" <script src=\"js/scripts.js\"></script>\n" +
|
||||
" <p>Check out this weather! <a id=\"" + ATL_ID + "\" href=\"" +
|
||||
ATL_WEATHER_LINK + "\">" + ATL_WEATHER_TEXT + "</a></p>\n" +
|
||||
" <p>I guess it could be colder ... <a id=\"" + GDR_ID + "\" href=\"" +
|
||||
GR_WEATHER_LINK + "\">" + GDR_WEATHER_TEXT + "</a></p>\n" +
|
||||
" <div id=\"put\"><a href=\"httpd://localhost\" /></div>\n" +
|
||||
"</body>\n" +
|
||||
"</html>";
|
||||
|
||||
|
||||
protected FlowFile writeContentToNewFlowFile(final byte[] content, ProcessSession session) {
|
||||
FlowFile ff = session.write(session.create(), new StreamCallback() {
|
||||
@Override
|
||||
public void process(InputStream in, OutputStream out) throws IOException {
|
||||
out.write(content);
|
||||
}
|
||||
});
|
||||
return ff;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,20 +16,21 @@
|
|||
*/
|
||||
package org.apache.nifi;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.select.Selector;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.Exception;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class TestGetHTMLElement extends AbstractHTMLTest {
|
||||
|
||||
private TestRunner testRunner;
|
||||
|
@ -43,19 +44,26 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.HTML_CHARSET, "UTF-8");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCSSSelectorSyntaxValidator() throws IOException {
|
||||
Document doc = Jsoup.parse(new URL("http://www.google.com"), 5000);
|
||||
try {
|
||||
doc.select("---jeremy");
|
||||
} catch (Selector.SelectorParseException ex) {
|
||||
String mes = ex.getMessage();
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoElementFound() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "b"); //Bold element is not present in sample HTML
|
||||
// testRunner.setProperty(GetHTMLElement.APPEND_ELEMENT_VALUE, "");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
||||
|
@ -63,14 +71,11 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
public void testInvalidSelector() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "InvalidCSSSelectorSyntax");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
||||
|
@ -78,14 +83,11 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
public void testSingleElementFound() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "head");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
}
|
||||
|
@ -94,14 +96,11 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
public void testMultipleElementFound() throws Exception {
|
||||
testRunner.setProperty(GetHTMLElement.CSS_SELECTOR, "a");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 3);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
}
|
||||
|
@ -113,22 +112,16 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
MockFlowFile fff = ffs.get(0);
|
||||
String atValue = fff.getAttribute(GetHTMLElement.HTML_ELEMENT_ATTRIBUTE_NAME);
|
||||
assertTrue(StringUtils.equals(ATL_WEATHER_LINK, atValue));
|
||||
ffs.get(0).assertAttributeEquals(GetHTMLElement.HTML_ELEMENT_ATTRIBUTE_NAME, ATL_WEATHER_LINK);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -138,21 +131,16 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(ATL_WEATHER_LINK, data));
|
||||
ffs.get(0).assertContentEquals(ATL_WEATHER_LINK);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -164,21 +152,16 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(PREPEND_VALUE + ATL_WEATHER_LINK, data));
|
||||
ffs.get(0).assertContentEquals(PREPEND_VALUE + ATL_WEATHER_LINK);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -189,14 +172,11 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
@ -210,21 +190,16 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(ATL_WEATHER_LINK + APPEND_VALUE, data));
|
||||
ffs.get(0).assertContentEquals(ATL_WEATHER_LINK + APPEND_VALUE);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -235,14 +210,11 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
@ -254,21 +226,16 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_ATTRIBUTE);
|
||||
testRunner.setProperty(GetHTMLElement.ATTRIBUTE_KEY, "Content");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(AUTHOR_NAME, data));
|
||||
ffs.get(0).assertContentEquals(AUTHOR_NAME);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -277,21 +244,16 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_TEXT);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(ATL_WEATHER_TEXT, data));
|
||||
ffs.get(0).assertContentEquals(ATL_WEATHER_TEXT);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -300,20 +262,15 @@ public class TestGetHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(GetHTMLElement.DESTINATION, GetHTMLElement.DESTINATION_CONTENT);
|
||||
testRunner.setProperty(GetHTMLElement.OUTPUT_TYPE, GetHTMLElement.ELEMENT_HTML);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(GetHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(GetHTMLElement.REL_SUCCESS);
|
||||
assertTrue(ffs.size() == 1);
|
||||
String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));
|
||||
assertTrue(StringUtils.equals(GDR_WEATHER_TEXT, data));
|
||||
ffs.get(0).assertContentEquals(GDR_WEATHER_TEXT);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
package org.apache.nifi;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
|
@ -29,6 +27,7 @@ import org.jsoup.select.Elements;
|
|||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
@ -54,14 +53,11 @@ public class TestModifyHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_TEXT);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
|
@ -86,14 +82,11 @@ public class TestModifyHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_TEXT);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, "${\" " + MOD_VALUE + " \":trim()}");
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
|
@ -116,14 +109,11 @@ public class TestModifyHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
|
@ -147,14 +137,11 @@ public class TestModifyHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(ModifyHTMLElement.ATTRIBUTE_KEY, "href");
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
|
@ -177,14 +164,11 @@ public class TestModifyHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 1);
|
||||
}
|
||||
|
@ -196,14 +180,11 @@ public class TestModifyHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(ModifyHTMLElement.OUTPUT_TYPE, ModifyHTMLElement.ELEMENT_HTML);
|
||||
testRunner.setProperty(ModifyHTMLElement.MODIFIED_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(ModifyHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
package org.apache.nifi;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
|
@ -29,6 +27,7 @@ import org.jsoup.select.Elements;
|
|||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
|
@ -50,14 +49,11 @@ public class TestPutHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT);
|
||||
testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
|
@ -80,14 +76,11 @@ public class TestPutHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT);
|
||||
testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
|
@ -110,14 +103,11 @@ public class TestPutHTMLElement extends AbstractHTMLTest {
|
|||
testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.APPEND_ELEMENT);
|
||||
testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE);
|
||||
|
||||
ProcessSession session = testRunner.getProcessSessionFactory().createSession();
|
||||
FlowFile ff = writeContentToNewFlowFile(HTML.getBytes(), session);
|
||||
|
||||
testRunner.enqueue(ff);
|
||||
testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
|
||||
testRunner.run();
|
||||
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_FAILURE, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_INVALID_HTML, 0);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1);
|
||||
testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0);
|
||||
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>NiFi HTML Parsing Demo</title>
|
||||
<meta charset="utf-8">
|
||||
<meta name="description" content="NiFi HTML Parsing Demo">
|
||||
<meta name="author" content="Jeremy Dyer">
|
||||
<link rel="stylesheet" href="css/styles.css?v=1.0">
|
||||
|
||||
<!--[if lt IE 9]>
|
||||
<script src="http://html5shiv.googlecode.com/svn/trunk/html5.js"></script>
|
||||
<![endif]-->
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<script src="js/scripts.js"></script>
|
||||
<p>Check out this weather!
|
||||
<a id="ATL" href="http://w1.weather.gov/obhistory/KPDK.html">Atlanta Weather</a>
|
||||
</p>
|
||||
<p>I guess it could be colder ...
|
||||
<a id="GDR" href="http://w1.weather.gov/obhistory/KGRR.html"><i>Grand Rapids Weather</i></a>
|
||||
</p>
|
||||
<div id="put"><a href="httpd://localhost" /></div>
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue