Merge branch 'NIFI-USER-GUIDE' into develop
|
@ -41,6 +41,7 @@
|
||||||
<div class="header">Documents</div>
|
<div class="header">Documents</div>
|
||||||
<div class="component-links">
|
<div class="component-links">
|
||||||
<ul>
|
<ul>
|
||||||
|
<li class="component-item"><a class="component-link user-guide" href="user-guide/nifi-user-guide.html" target="component-usage">User Guide</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -256,6 +256,11 @@ $(document).ready(function () {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// listen for on the rest api and user guide
|
||||||
|
$('a.rest-api a.user-guide').on('click', function() {
|
||||||
|
selectComponent($(this).text());
|
||||||
|
});
|
||||||
|
|
||||||
// get the initial selection
|
// get the initial selection
|
||||||
var initialComponentLink = $('a.component-link:first');
|
var initialComponentLink = $('a.component-link:first');
|
||||||
var initialSelection = $('#initial-selection').text();
|
var initialSelection = $('#initial-selection').text();
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<parent>
|
||||||
|
<groupId>org.apache.nifi</groupId>
|
||||||
|
<artifactId>nifi-parent</artifactId>
|
||||||
|
<version>0.0.1-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<artifactId>nifi-docs</artifactId>
|
||||||
|
|
||||||
|
<name>nifi-docs</name>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.asciidoctor</groupId>
|
||||||
|
<artifactId>asciidoctor-maven-plugin</artifactId>
|
||||||
|
<version>1.5.0</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<id>output-html</id>
|
||||||
|
<phase>generate-resources</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>process-asciidoc</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<backend>html</backend>
|
||||||
|
<attributes>
|
||||||
|
<skipFrontMatter>true</skipFrontMatter>
|
||||||
|
</attributes>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
|
||||||
|
<!-- This plugin is used to insert the Apache License into the output HMTL because
|
||||||
|
AsciiDoc doesn't appear to provide a mechanism for doing this. -->
|
||||||
|
<plugin>
|
||||||
|
<groupId>com.google.code.maven-replacer-plugin</groupId>
|
||||||
|
<artifactId>replacer</artifactId>
|
||||||
|
<version>1.5.3</version>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>prepare-package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>replace</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<file>target/generated-docs/nifi-user-guide.html</file>
|
||||||
|
<regex>true</regex>
|
||||||
|
<regexFlags>
|
||||||
|
<regexFlag>DOTALL</regexFlag>
|
||||||
|
<regexFlag>MULTILINE</regexFlag>
|
||||||
|
</regexFlags>
|
||||||
|
<token>^(.*)$</token>
|
||||||
|
<value>
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
$1
|
||||||
|
</value>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
</dependencies>
|
||||||
|
</project>
|
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 31 KiB |
After Width: | Height: | Size: 1.9 KiB |
After Width: | Height: | Size: 6.3 KiB |
After Width: | Height: | Size: 8.8 KiB |
After Width: | Height: | Size: 15 KiB |
After Width: | Height: | Size: 9.2 KiB |
After Width: | Height: | Size: 14 KiB |
After Width: | Height: | Size: 17 KiB |
After Width: | Height: | Size: 1.4 KiB |
After Width: | Height: | Size: 1.5 KiB |
After Width: | Height: | Size: 670 B |
After Width: | Height: | Size: 764 B |
After Width: | Height: | Size: 493 B |
After Width: | Height: | Size: 667 B |
After Width: | Height: | Size: 1.2 KiB |
After Width: | Height: | Size: 550 B |
After Width: | Height: | Size: 1.8 KiB |
After Width: | Height: | Size: 838 B |
After Width: | Height: | Size: 137 B |
After Width: | Height: | Size: 1.6 KiB |
After Width: | Height: | Size: 1.4 KiB |
After Width: | Height: | Size: 1.4 KiB |
After Width: | Height: | Size: 674 B |
After Width: | Height: | Size: 538 B |
After Width: | Height: | Size: 133 B |
After Width: | Height: | Size: 402 B |
After Width: | Height: | Size: 970 B |
After Width: | Height: | Size: 1.3 KiB |
After Width: | Height: | Size: 1.2 KiB |
After Width: | Height: | Size: 6.4 KiB |
After Width: | Height: | Size: 3.1 KiB |
After Width: | Height: | Size: 8.0 KiB |
After Width: | Height: | Size: 53 KiB |
After Width: | Height: | Size: 84 KiB |
After Width: | Height: | Size: 71 KiB |
After Width: | Height: | Size: 36 KiB |
After Width: | Height: | Size: 9.7 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 21 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 12 KiB |
After Width: | Height: | Size: 16 KiB |
After Width: | Height: | Size: 56 KiB |
After Width: | Height: | Size: 7.9 KiB |
|
@ -0,0 +1,782 @@
|
||||||
|
//
|
||||||
|
// Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
// contributor license agreements. See the NOTICE file distributed with
|
||||||
|
// this work for additional information regarding copyright ownership.
|
||||||
|
// The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
// (the "License"); you may not use this file except in compliance with
|
||||||
|
// the License. You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
NiFi User Guide (Draft - Preview Version)
|
||||||
|
=========================================
|
||||||
|
Apache_NiFi_Team
|
||||||
|
:toc:
|
||||||
|
:icons:
|
||||||
|
|
||||||
|
|
||||||
|
[template="glossary", id="terminology"]
|
||||||
|
Terminology
|
||||||
|
-----------
|
||||||
|
*DataFlow Manager*: A DataFlow Manager (DFM) is a NiFi user who has permissions to add, remove, and modify components of a NiFi dataflow.
|
||||||
|
|
||||||
|
*FlowFile*: The FlowFile represents a single piece of data in NiFi. A FlowFile is made up of two components:
|
||||||
|
FlowFile Attributes and FlowFile Content.
|
||||||
|
Content is the data that is represented by the FlowFile. Attributes are key-value pairs that provide information or
|
||||||
|
context about the data.
|
||||||
|
All FlowFiles have the following Standard Attributes:
|
||||||
|
|
||||||
|
- *uuid*: A unique identifier for the FlowFile
|
||||||
|
- *filename*: A human-readable filename that may be used when storing the data to disk or in an external service
|
||||||
|
- *path*: A hierarchically structured value that can be used when storing data to disk or an external service so that the data is not stored in a single directory
|
||||||
|
|
||||||
|
*Processor*: The Processor is the NiFi component that is used to listen for incoming data; pull data from external sources;
|
||||||
|
publish data to external sources; and route, transform, or extract information from FlowFiles.
|
||||||
|
|
||||||
|
*Relationship*: Each Processor has zero or more Relationships defined for it. These Relationships are named to indicate the result of processing a FlowFile.
|
||||||
|
After a Processor has finished processing a FlowFile, it will route (or “transfer”) the FlowFile to one of the Relationships.
|
||||||
|
A DataFlow Manager is then able to connect each of these Relationships to other components in order to specify where the FlowFile should
|
||||||
|
go next under each potential processing result.
|
||||||
|
|
||||||
|
*Connection*: A DataFlow Manager creates an automated dataflow by dragging components from the Components part of the NiFi toolbar to the canvas
|
||||||
|
and then connecting the components together via Connections. Each connection consists of one or more Relationships.
|
||||||
|
For each Connection that is drawn, a DataFlow Manager can determine which Relationships should be used for the Connection.
|
||||||
|
This allows data to be routed in different ways based on its processing outcome. Each connection houses a FlowFile Queue.
|
||||||
|
When a FlowFile is transferred to a particular Relationship, it is added to the queue belonging to the associated Connection.
|
||||||
|
|
||||||
|
*Funnel*: A funnel is a NiFi component that is used to combine the data from several Connections into a single Connection.
|
||||||
|
|
||||||
|
*Process Group*: When a dataflow becomes complex, it often is beneficial to reason about the dataflow at a higher, more abstract level.
|
||||||
|
NiFi allows multiple components, such as Processors, to be grouped together into a Process Group.
|
||||||
|
The NiFi User Interface then makes it easy for a DataFlow Manager to connect together multiple Process Groups into a logical dataflow,
|
||||||
|
as well as allowing the DataFlow Manager to enter a Process Group in order to see and manipulate the components within the Process Group.
|
||||||
|
|
||||||
|
*Port*: Dataflows that are constructed using one or more Process Groups need a way to connect a Process Group to other dataflow components.
|
||||||
|
This is achieved by using Ports. A DataFlow Manager can add any number of Input Ports and Output Ports to a Process Group and name these ports appropriately.
|
||||||
|
|
||||||
|
*Remote Process Group*: Just as data is transferred into and out of a Process Group, it is sometimes necessary to transfer data from one instance of NiFi to another.
|
||||||
|
While NiFi provides many different mechanisms for transferring data from one system to another, Remote Process Groups are often the easiest way to accomplish
|
||||||
|
this if transferring data to another instance of NiFi.
|
||||||
|
|
||||||
|
*Bulletin*: The NiFi User Interface provides a significant amount of monitoring and feedback about the current status of the application.
|
||||||
|
In addition to rolling statistics and the current status that are provided for each component, components are able to report Bulletins.
|
||||||
|
Whenever a component reports a Bulletin, an icon is displayed on that component (or on the Status bar near the top of the page, for System-Level Bulletins).
|
||||||
|
Using the mouse to hover over that icon will provide a tool-tip that shows the time and severity (Debug, Info, Warning, Error) of the bulletin,
|
||||||
|
as well as the message of the Bulletin.
|
||||||
|
Bulletins from all components can also be viewed and filtered in the Bulletins Page, available in the Management Toolbar.
|
||||||
|
|
||||||
|
*Template*: Often times, a dataflow is comprised of many sub-flows that could be reused. NiFi allows DataFlow Managers to select a part of the dataflow
|
||||||
|
(or the entire dataflow) and create a Template. This Template is given a name can then be dragged onto the canvas just like the other components.
|
||||||
|
As a result, several components be combined together to make a larger building block from which to create a dataflow.
|
||||||
|
These templates can also be exported as XML and imported into another NiFi instance, allowing these building blocks to be shared.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
NiFi User Interface
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
The NiFi User Interface (UI) provides mechanisms for creating automated dataflows, as well as visualizing,
|
||||||
|
editing, monitoring, and administering those dataflows. The UI can be broken down into several different segments,
|
||||||
|
each responsible for different functionality of the application. We will begin by looking at screenshots of the
|
||||||
|
application and labeling the different segments of the UI. We will provide a brief explanation of the purpose of each segment.
|
||||||
|
Then, in the following sections of this document, we will discuss each of those segments in greater detail.
|
||||||
|
|
||||||
|
When the application is started, by default, the user is able to navigate to the User Interface by going to
|
||||||
|
`http://<hostname>:8080/nifi` in a web browser. There are no permissions configured, by default, so anyone is
|
||||||
|
able to view and modify the dataflow. For information on securing the system, see Systems Administrator guide.
|
||||||
|
|
||||||
|
When a DataFlow Manager navigates to the UI for the first time, a blank canvas is provided on which a dataflow can be built:
|
||||||
|
|
||||||
|
image::new-flow.png["Empty Flow"]
|
||||||
|
|
||||||
|
Along the top of the of the screen is a toolbar that contains several of these segments.
|
||||||
|
To the left is the Components Toolbar. This toolbar consists of the different components that can be dragged onto the canvas.
|
||||||
|
|
||||||
|
Next to the Components Toolbar is the Actions Toolbar. This toolbar consists of buttons to manipulate the existing
|
||||||
|
components on the graph. Following the Actions Toolbar is the Search Toolbar. This toolbar consists of a single
|
||||||
|
Search field that allows users to easily find components on the graph. Users are able to search by component name,
|
||||||
|
type, identifier, and configuration properties.
|
||||||
|
|
||||||
|
Finally, the Management Toolbar sits to the right-hand side of the screen. This toolbar consists of buttons that are
|
||||||
|
of use to DataFlow Managers to manage the flow as well as administrators who may use this section to manage user access
|
||||||
|
and configure system properties, such as how many system resources should be provided to the application.
|
||||||
|
|
||||||
|
image::nifi-toolbar-components.png["NiFi Components Toolbar"]
|
||||||
|
|
||||||
|
Next, we have segments that provide capabilities to easily navigate around the graph. On the left-hand side is a toolbar that
|
||||||
|
provides the ability to pan around the graph and zoom in and out. On the right-hand side is a “Birds-Eye View” of the dataflow.
|
||||||
|
This provides a high-level view of the dataflow and allows the user to quickly and easily pan across large portions of the dataflow.
|
||||||
|
Along the top of the screen is a trail of breadcrumbs. As users navigate into and out of Process Groups, the breadcrumbs show
|
||||||
|
the depth in the flow and each Process Group that was entered to reach this depth. Each of the Process Groups listed in the breadcrumbs
|
||||||
|
is a link that will take you back up to that level in the flow.
|
||||||
|
|
||||||
|
image::nifi-navigation.png["NiFi Navigation"]
|
||||||
|
|
||||||
|
[[status_bar]]
|
||||||
|
Below the breadcrumbs lives the Status bar. The Status bar provides information about how many Processors exist in the graph in
|
||||||
|
each state (Stopped, Running, Invalid, Disabled), how many Remote Process Groups exist on the graph in each state
|
||||||
|
(Transmitting, Not Transmitting), the number of threads that are currently active in the flow, the amount of data that currently
|
||||||
|
exists in the flow, and the timestamp at which all of this information was last refreshed. If there are any System-Level bulletins,
|
||||||
|
these are shown in the Status bar as well. Additionally, if the instance of NiFi is clustered, the Status bar shows many nodes
|
||||||
|
are in the cluster and how many are currently connected.
|
||||||
|
|
||||||
|
image::status-bar.png["NiFi Status Bar"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Building a DataFlow
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
A DataFlow Manager (DFM) is able to build an automated dataflow using the NiFi User Interface (UI). This is accomplished
|
||||||
|
by dragging components from the toolbar to the canvas, configuring the components to meet specific needs, and connecting
|
||||||
|
the components together.
|
||||||
|
|
||||||
|
|
||||||
|
=== Adding Components to the Canvas
|
||||||
|
|
||||||
|
In the User Interface section above, we outlined the different segments of the UI and pointed out a Components Toolbar.
|
||||||
|
Here, we will look at each of the Components in that toolbar:
|
||||||
|
|
||||||
|
image::components.png["Components"]
|
||||||
|
|
||||||
|
image:iconProcessor.png["Processor", width=32]
|
||||||
|
*Processor*: The Processor is the most commonly used component, as it is responsible for data ingress, egress, routing, and
|
||||||
|
manipulating. There are many different types of Processors. In fact, this is a very common Extension Point in NiFi,
|
||||||
|
meaning that many vendors may implement their own Processors to perform whatever functions are necessary for their use case.
|
||||||
|
When a Processor is dragged onto the graph, the user is presented with a dialog to choose which type of Processor to use:
|
||||||
|
|
||||||
|
image::add-processor.png["Add Processor Dialog"]
|
||||||
|
|
||||||
|
In the top-right corner, the user is able to filter the list based on the Processor Type or the Tags associated with a Processor.
|
||||||
|
Processor developers have the ability to add Tags to their Processors. These tags are used in this dialog for filtering and are
|
||||||
|
displayed on the left-hand side in a Tag Cloud. The more Processors that exist with a particular Tag, the larger the Tag appears
|
||||||
|
in the Tag Cloud. Clicking a Tag in the Cloud will filter the available Processors to only those that contain that Tag. If multiple
|
||||||
|
Tags are selected, only those Processors that contain all of those Tags are shown. For example, if we want to show only those
|
||||||
|
Processors that allow us to ingest data via HTTP, we can select both the `http` Tag and the `ingest` Tag:
|
||||||
|
|
||||||
|
image::add-processor-with-tag-cloud.png["Add Processor with Tag Cloud"]
|
||||||
|
|
||||||
|
Clicking the `Add` button or double-clicking on a Processor Type will add the selected Processor to the canvas at the
|
||||||
|
location that it was dropped.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
image:iconInputPort.png["Input Port", width=32]
|
||||||
|
*Input Port*: Input Ports provide a mechanism for transferring data into a Process Group. When an Input Port is dragged
|
||||||
|
onto the canvas, the DFM is prompted to name the Port. All Ports within a Process Group must have unique names.
|
||||||
|
|
||||||
|
All components exist only within a Process Group. When a user navigates to the NiFi page, the user is placed in the
|
||||||
|
Root Progress Group. If the Input Port is dragged onto the Root Progress Group, the Input Port provides a mechanism
|
||||||
|
to receive data from remote instances of NiFi. In this case, the Input Port can be configured to restrict access to
|
||||||
|
appropriate users.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
image:iconOutputPort.png["Output Port", width=32]
|
||||||
|
*Output Port*: Output Ports provide a mechanism for transferring data from a Process Group back to destination outside
|
||||||
|
of the Process Group. When an Output Port is dragged onto the canvas, the DFM is prompted to name the Port. All Ports
|
||||||
|
within a Process Group must have unique names.
|
||||||
|
|
||||||
|
If the Output Port is dragged onto the Root Process Group, the Output Port provides a mechanism for sending data to
|
||||||
|
remote instances of NiFi. In this case, the Port acts as a queue. As remote instances of NiFi pull data from the port,
|
||||||
|
that data is removed from the queues of the incoming Connections.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
image:iconProcessGroup.png["Process Group", width=32]
|
||||||
|
*Process Group*: Process Groups can be used logically group a set of components so that the dataflow is easier to understand
|
||||||
|
and maintain. When a Process Group is dragged onto the canvas, the DFM is prompted to name the Process Group. All Process
|
||||||
|
Groups within the same parent group must have unique names.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
image:iconRemoteProcessGroup.png["Remote Process Group", width=32]
|
||||||
|
*Remote Process Group*: Remote Process Groups appear and behave similar to Process Groups. However, the Remote Process Group (RPG)
|
||||||
|
references a remote instance of NiFi. When an RPG is dragged onto the canvas, rather than being prompted for a name, the DFM
|
||||||
|
is prompted for the URL of the remote NiFi instance. If the remote NiFi is a clustered instance, the URL that should be used
|
||||||
|
is the URL of the remote instance's NiFi Cluster Manager (NCM). When data is transferred to a clustered instance of NiFi
|
||||||
|
via an RPG, the RPG it will first connect to the remote instance's NCM to determine which nodes are in the cluster and
|
||||||
|
how busy each node is. This information is then used to load balance the data that is pushed to each node. The remote NCM is
|
||||||
|
then interrogated periodically to ensure that any nodes that are dropped from the cluster and no longer sent to, any new nodes
|
||||||
|
will be added to the list of nodes, and to recalculate the load balancing based on each node's load.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
image:iconFunnel.png["Funnel", width=32]
|
||||||
|
*Funnel*: Funnels are used to combine the data from many Connections into a single Connection. This has two advantages.
|
||||||
|
First, if many Connections are created with the same destination, the canvas can become cluttered if those Connections
|
||||||
|
have to span a large space. By funneling these Connections into a single Connection, that single Connection can then be
|
||||||
|
drawn to span that large space instead. Secondly, Connections can be configured with FlowFile Prioritizers. Data from
|
||||||
|
several Connections can be funneled into a single Connection, providing the ability to Prioritize all of the data on that
|
||||||
|
one Connection, rather than prioritizing the data on each Connection independently.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
image:iconTemplate.png["Template", width=32]
|
||||||
|
*Template*: Templates can be created by DataFlow Managers from sections of the flow, or they can be imported from other
|
||||||
|
dataflows. These Templates provide larger building blocks for creating a complex flow quickly. When the Template is
|
||||||
|
dragged onto the canvas, the DFM is provided a dialog to choose which Template to add to the canvas:
|
||||||
|
|
||||||
|
image::instantiate-template.png["Instantiate Template Dialog"]
|
||||||
|
|
||||||
|
Clicking the drop-down box shows all available Templates. Any Template that was created with a description will show an
|
||||||
|
icon indicating that there is more information. Hovering over the icon with the mouse will show this description:
|
||||||
|
|
||||||
|
image::instantiate-template-description.png["Instantiate Template Dialog"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
image:iconLabel.png["Label"]
|
||||||
|
*Label*: Labels are used to provide documentation to parts of a dataflow. When a Label is dropped onto the canvas,
|
||||||
|
it is created with a default size. The Label can then be resized by dragging the handle in the bottom-right corner.
|
||||||
|
The Label has no text when initially created. The text of the Label can be added by right-clicking on the Label and
|
||||||
|
choosing `Configure...`
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
=== Configuring a Processor
|
||||||
|
|
||||||
|
Once a Processor has been dragged onto the Canvas, it is ready to configure. This is done by right-clicking on the
|
||||||
|
Processor and clicking the `Configure...` option from the context menu. The configuration dialog is opened with four
|
||||||
|
different tabs, each of which is discussed below. Once you have finished configuring the Processor, you can apply
|
||||||
|
the changes by clicking the `Apply` button or cancel all changes by clicking the `Cancel` button.
|
||||||
|
|
||||||
|
Note that after a Processor has been started, the context menu shown for the Processor no longer has a `Configure...`
|
||||||
|
option but rather has a `View Configuration` option. Processor configuration cannot be changed while the Processor is
|
||||||
|
running. You must first stop the Processor and wait for all of its active tasks to complete before configuring
|
||||||
|
the Processor again.
|
||||||
|
|
||||||
|
|
||||||
|
==== Settings Tab
|
||||||
|
|
||||||
|
The first tab in the Processor Configuration dialog is the Settings tab:
|
||||||
|
|
||||||
|
image::settings-tab.png["Settings Tab"]
|
||||||
|
|
||||||
|
This tab contains several different configuration items. First, it allows the DFM to change the name of the Processor.
|
||||||
|
The name of a Processor by default is the same as the Processor type. Next to the Processor Name is a control for
|
||||||
|
determining whether or not the Processor is Enabled. When a Processor is added to the graph, it is enabled. If the
|
||||||
|
Processor is disabled, it cannot be started. This is used to indicate that even when a group of Processors are started,
|
||||||
|
such as when a DFM starts an entire Process Group, this Processor should be excluded.
|
||||||
|
|
||||||
|
Below the Name configuration, the Processor's unique identifier is displayed along with the Processor's type. These
|
||||||
|
values cannot be modified.
|
||||||
|
|
||||||
|
Next are two dialogues for configuring `Penalty duration' and `Yield duration'. During the normal course of processing a
|
||||||
|
piece of data (a FlowFile), an event may occur that indicates that the data cannot be processed at this time but the
|
||||||
|
data may be processable at a later time. When this occurs, the Processor may choose to Penalize the FlowFile. This will
|
||||||
|
prevent the FlowFile from being Processed for some period of time. For example, if the Processor is to push the data
|
||||||
|
to a remote service, but the remote service already has a file with the same name as the filename that the Processor
|
||||||
|
is specifying, the Processor may penalize the FlowFile. The `Penalty duration' allows the DFM to specify what
|
||||||
|
how long the FlowFile should be penalized. The default value is 30 seconds.
|
||||||
|
|
||||||
|
Similarly, the Processor may determine that some situation exists such that the Processor can no longer make any progress,
|
||||||
|
regardless of the data that it is processing. For example, if a Processor is to push data to a remote service and that
|
||||||
|
service is not responding, the Processor cannot make any progress. As a result, the Processor should `yield,' which will
|
||||||
|
prevent the Processor from being scheduled to run for some period of time. That period of time is specified by setting
|
||||||
|
the `Yield duration.' The default value is 1 second.
|
||||||
|
|
||||||
|
The last configurable option on the left-hand side of the Settings tab is the Bulletin level. Whenever the Processor writes
|
||||||
|
to its log, the Processor also will generate a Bulletin. This setting indicates the lowest level of Bulletin that should be
|
||||||
|
shown in the User Interface. By default, the Bulletin level is set to WARN.
|
||||||
|
|
||||||
|
The right-hand side of the dialogue provides an `Auto-terminate relationships' section. Each of the Relationships that is
|
||||||
|
defined by the Processor is listed here, along with its description. In order for a Processor to be considered valid and
|
||||||
|
able to run, each Relationship defined by the Processor must be either connected to a downstream component or auto-terminated.
|
||||||
|
If a Relationship is auto-terminated, any FlowFile that is routed to that Relationship will be removed from the flow and
|
||||||
|
its processing considered complete. Any Relationship that is already connected to a downstream component cannot be auto-terminated.
|
||||||
|
The Relationship must first be removed from any Connection that uses it. Additionally, for any Relationship that is selected to be
|
||||||
|
auto-terminated, the auto-termination status will be cleared if the Relationship is added to a Connection.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
==== Scheduling Tab
|
||||||
|
|
||||||
|
The second tab in the Processor Configuration dialog is the Scheduling Tab:
|
||||||
|
|
||||||
|
image::scheduling-tab.png["Scheduling Tab"]
|
||||||
|
|
||||||
|
The first configuration option is the Scheduling Strategy. There are three options for scheduling components:
|
||||||
|
|
||||||
|
- *Timer driven*: This is the default mode. The Processor will be scheduled to run on a regular interval. The interval
|
||||||
|
at which the Processor is run is defined by the `Run schedule' option (see below).
|
||||||
|
- *Event driven*: When this mode is selected, the Processor will be triggered to run by FlowFiles entering the Connections
|
||||||
|
that have this Processor as their destination. This mode is not supported by all Processors. When this mode is
|
||||||
|
selected, the `Run schedule' option is not configurable, as the Processor is not triggered to run periodically but
|
||||||
|
rather is triggered to run as the result of an event. Additionally, this is the only mode for which the `Concurrent tasks'
|
||||||
|
option can be set to 0. In this case, the number of threads is limited only by the size of the Event-Driven Thread Pool that
|
||||||
|
the administrator has configured.
|
||||||
|
- *CRON driven*: When using the CRON driven scheduling mode, the Processor is scheduled to run periodically, similarly to the
|
||||||
|
Timer driven scheduling mode. However, the CRON driven mode provides significantly more flexibility at the expensive of
|
||||||
|
increasing the complexity of the configuration. This value is made up of 6 fields, each separated by a space. These
|
||||||
|
fields represent the following fields:
|
||||||
|
+
|
||||||
|
. Seconds
|
||||||
|
. Minutes
|
||||||
|
. Hours
|
||||||
|
. Day of Month
|
||||||
|
. Month
|
||||||
|
. Day of Week
|
||||||
|
. Year
|
||||||
|
+
|
||||||
|
The value for each of these fields should be a number, range, or increment.
|
||||||
|
Range here refers to a syntax of <number>-<number>.
|
||||||
|
For example,the Seconds field could be set to 0-30, meaning that the Processor should only be scheduled if the time is 0 to 30 seconds
|
||||||
|
after the minute. Additionally, a value of `*` indicates that all values are valid for this field. Multiple values can also
|
||||||
|
be entered using a `,` as a separator: `0,5,10,15,30`.
|
||||||
|
An increment is written as <start value>/<increment>. For example, settings a value of `0/10` for the seconds fields means that valid
|
||||||
|
values are 0, 10, 20, 30, 40, and 50. However, if we change this to `5/10`, valid values become 5, 15, 25, 35, 45, and 55.
|
||||||
|
+
|
||||||
|
For the Month field, valid values are 1 (January) through 12 (December).
|
||||||
|
+
|
||||||
|
For the Day of Week field, valid values are 1 (Sunday) through 7 (Saturday). Additionally, a value of `L` may be appended to one of these
|
||||||
|
values to indicate the last occurrence of this day in the month. For example, `1L` can be used to indicate the last Monday of the month.
|
||||||
|
|
||||||
|
|
||||||
|
Next, the Scheduling Tab provides a configuration option named `Concurrent tasks.' This controls how many threads the Processor
|
||||||
|
will use. Said a different way, this controls how many FlowFiles should be processed by this Processor at the same time. Increasing
|
||||||
|
this value will typically allow the Processor to handle more data in the same amount of time. However, it does this by using system
|
||||||
|
resources that then are not usable by other Processors. This essentially provides a relative weighting of Processors -- it controls
|
||||||
|
how much of the system's resources should be allocated to this Processor instead of other Processors. This field is available for
|
||||||
|
most Processors. There are, however, some types of Processors that can only be scheduled with a single Concurrent task.
|
||||||
|
|
||||||
|
The ``Run schedule'' dictates how often this Processor should be scheduled to run. The valid values for this field depend on the selected
|
||||||
|
Scheduling Strategy (see above). If using the Event driven Scheduling Strategy, this field is not available. When using the Timer driven
|
||||||
|
Scheduling Strategy, this value is a time duration specified by a number followed by a time unit. For example, `1 second` or `5 mins`.
|
||||||
|
The default value of `0 sec` means that the Processor should run as often as possible as long as it has data to process. This is true
|
||||||
|
for any time duration of 0, regardless of the time unit (i.e., `0 sec`, `0 mins`, `0 days`). For an explanation of values that are
|
||||||
|
applicable for the CRON driven Scheduling Strategy, see the description of the CRON driven Scheduling Strategy itself.
|
||||||
|
|
||||||
|
The right-hand side of the tab contains a slider for choosing the `Run duration.' This controls how long the Processor should be scheduled
|
||||||
|
to run each time that it is triggered. On the left-hand side of the slider, it is marked `Lower latency' while the right-hand side
|
||||||
|
is marked `Higher throughput.' When a Processor finishes running, it must update the repository in order to transfer the FlowFiles to
|
||||||
|
the next Connection. Updating this repository is expensive, so the more work that can be done at once before updating the repository
|
||||||
|
the more work the Processor can handle (Higher throughput). However, this means that the next Processor cannot start processing
|
||||||
|
those FlowFiles until the previous Process updates this repository. As a result, the latency will be longer (the time required to process
|
||||||
|
the FlowFile from beginning to end will be longer). As a result, the slider provides a spectrum from which the DFM can choose to favor
|
||||||
|
Lower Latency or Higher Throughput.
|
||||||
|
|
||||||
|
|
||||||
|
==== Properties Tab
|
||||||
|
|
||||||
|
The Properties Tab provides a mechanism to configure Processor-specific behavior. There are no default properties. Each type of Processor
|
||||||
|
must define which Properties make sense for its use case. Below, we see the Properties Tab for a RouteOnAttribute Processor:
|
||||||
|
|
||||||
|
image::properties-tab.png["Properties Tab"]
|
||||||
|
|
||||||
|
This Processor, by default, has only a single property: `Routing Strategy.' The default value is `Route on Property name.' Next to
|
||||||
|
the name of this property is a small question-mark symbol (
|
||||||
|
image:iconInfo.png["Question Mark"]
|
||||||
|
). This help symbol is seen in other places throughout the application, as well, and indicates that more information is available.
|
||||||
|
Hovering over this symbol with the mouse will provide additional details about the property and the default value, as well as
|
||||||
|
historical values that have been set for the Property.
|
||||||
|
|
||||||
|
Clicking on the value for the property will allow a DFM to change the value. Depending on the values that are allowed for the property,
|
||||||
|
the user is either provided a drop-down from which to choose a value or is given a text area to type a value:
|
||||||
|
|
||||||
|
image::edit-property-dropdown.png["Edit Property with Dropdown"]
|
||||||
|
|
||||||
|
In the top-right corner of the tab is a button for adding a New Property. Clicking this button will provide the DFM with a dialog to
|
||||||
|
enter the name and value of a new property. Not all Processors allow User-Defined properties. In this case, the Processor would become
|
||||||
|
invalid when the properties are applied. RouteOnAttribute, for example, does allow User-Defined properties. In fact, this Processor
|
||||||
|
will not be valid until the user has added a property.
|
||||||
|
|
||||||
|
image:edit-property-textarea.png["Edit Property with Text Area"]
|
||||||
|
|
||||||
|
Not that after a User-Defined property has been added, an icon will appear on the right-hand side of that row (
|
||||||
|
image:iconDelete.png["Delete Icon"]
|
||||||
|
). Clicking this button will remove the User-Defined property from the Processor.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
==== Comments Tab
|
||||||
|
|
||||||
|
The last tab in the Processor configuration dialog is the Comments tab. This tab simply provides an area for users to provide
|
||||||
|
whatever comments are appropriate for this component:
|
||||||
|
|
||||||
|
image::comments-tab.png["Comments Tab"]
|
||||||
|
|
||||||
|
|
||||||
|
=== Additional Help
|
||||||
|
|
||||||
|
Each Processor has the ability to provide additional documentation about its usage. This documentation can be found by right-clicking
|
||||||
|
on the Processor and then selecting the `Usage' item from the context menu. Alternatively, clicking the `Help' link in the top-right
|
||||||
|
corner of the application will provide a Help page with all of the Processors that are available. Clicking on the Processor in the list
|
||||||
|
will then show its usage.
|
||||||
|
|
||||||
|
|
||||||
|
=== Connecting Components
|
||||||
|
|
||||||
|
After the appropriate Processors have been added to the graph and configured to meet your needs, they will have to be connected
|
||||||
|
to one another so that NiFi knows what to do with each FlowFile after it has been processed. This is accomplished by creating a
|
||||||
|
Connection between two components. When the mouse hovers over a component, a new Connection icon (
|
||||||
|
image:addConnect.png["Connection Bubble"]
|
||||||
|
) will appear in the middle of the component:
|
||||||
|
|
||||||
|
image:processor-connection-bubble.png["Processor with Connection Bubble"]
|
||||||
|
|
||||||
|
This Connection bubble can then be dragged from this component to another component, which will provide to the user a
|
||||||
|
`Create Connection' dialog. This dialog consists of two tabs: `Details' and `Settings'.
|
||||||
|
|
||||||
|
==== Details Tab
|
||||||
|
|
||||||
|
The Details Tab provides information about the source and destination components, including the component name, the
|
||||||
|
component type, and the Process Group in which the component lives:
|
||||||
|
|
||||||
|
image::create-connection.png["Create Connection"]
|
||||||
|
|
||||||
|
Additionally, this tab provides the ability to choose which Relationships should be included in this Connection. At least one
|
||||||
|
Relationship must be selected. If only one Relationship is available, it is automatically selected.
|
||||||
|
|
||||||
|
*Note*: If multiple Connections are added with the same Relationship, any FlowFile that is routed to that Relationship will
|
||||||
|
automatically be `cloned', and a copy will be sent to each of those Connections.
|
||||||
|
|
||||||
|
==== Settings
|
||||||
|
|
||||||
|
The Settings Tab provides the ability to configure the Connection's name, FlowFile expiration, back pressure thresholds, and
|
||||||
|
Prioritization:
|
||||||
|
|
||||||
|
image:connection-settings.png["Connection Settings"]
|
||||||
|
|
||||||
|
The Connection name is optional. If not specified, the name shown for the Connection will be names of the Relationships
|
||||||
|
that are active for the Connection.
|
||||||
|
|
||||||
|
File expiration is a concept by which data that cannot be processed in a timely fashion can be automatically destroyed.
|
||||||
|
This is useful, for example, when the volume of data is expected to exceed the volume that can be sent to a remote site.
|
||||||
|
In this case, the expiration can be used in conjunction with Prioritizers to ensure that the highest priority data is
|
||||||
|
processed first and then anything that cannot be processed within one hour, for example, can be dropped. The default
|
||||||
|
value of `0 sec` indicates that the data will never expire.
|
||||||
|
|
||||||
|
|
||||||
|
NiFi provides two different configuration elements for back pressure. These thresholds indicate how much data should be
|
||||||
|
allowed to exist in the queue before the component that is the source of the Connection is no longer scheduled to run.
|
||||||
|
This allows the system to avoid being overrun with data. The first option provided is the ``Back pressure object threshold.''
|
||||||
|
This is the number of FlowFiles that can be in the queue before back pressure is applied. The second configuration option
|
||||||
|
is the ``Back pressure data size threshold.''
|
||||||
|
This specifies the maximum amount of data that should be queued up before
|
||||||
|
applying back pressure. This value is configured by entering a number followed by a data size (`B` for bytes, `KB` for
|
||||||
|
kilobytes, `MB` for megabytes, `GB` for gigabytes, or `TB` for terabytes).
|
||||||
|
|
||||||
|
The right-hand side of the tab provides the ability to prioritize the data in queue so that higher priority data is
|
||||||
|
processed first. Prioritizers can be dragged from the top (`Available prioritizers') to the bottom (`Selected prioritizers').
|
||||||
|
Multiple prioritizers can be selected. The prioritizer that is at the top of the `Selected prioritizers' list is the highest
|
||||||
|
priority. If two FlowFiles have the same value according to this prioritizer, the second prioritizer will determine which
|
||||||
|
FlowFile to process first, and so on. If a prioritizer is no longer desired, it can then be dragged from the `Selected
|
||||||
|
prioritizers' list to the `Available prioritizers' list.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
=== Processor Validation
|
||||||
|
|
||||||
|
Before trying to start a Processor, it's important to make sure that the Processor's configuration is valid.
|
||||||
|
A status indicator is shown in the top-left of the Processor. If the Processor is invalid, the indicator
|
||||||
|
will show a yellow Warning indicator with an exclamation mark indicating that there is a problem:
|
||||||
|
|
||||||
|
image::invalid-processor.png["Invalid Processor"]
|
||||||
|
|
||||||
|
In this case, hovering over the indicator icon with the mouse will provide a tooltip showing all of the validation
|
||||||
|
failures for the Processor. Once all of the validation errors have been addressed, the status indicator will change
|
||||||
|
to a Stop icon, indicating that the Processor is valid and ready to be start but currently is not running:
|
||||||
|
|
||||||
|
image::valid-processor.png["Valid Processor"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
== Command and Control of DataFlow
|
||||||
|
|
||||||
|
When a component is added to the NiFi canvas, it is in the Stopped state. In order to cause the component to
|
||||||
|
be triggered, the component must be started. Once started, the component can be stopped at any time. From a
|
||||||
|
Stopped state, the component can then be configured, started, or disabled.
|
||||||
|
|
||||||
|
=== Starting a Component
|
||||||
|
|
||||||
|
In order to start a component, the following conditions must be met:
|
||||||
|
|
||||||
|
- The component's configuration must be valid.
|
||||||
|
|
||||||
|
- All defined Relationships for component must be connected to another component or auto-terminated.
|
||||||
|
|
||||||
|
- The component must be stopped.
|
||||||
|
|
||||||
|
- The component must be enabled.
|
||||||
|
|
||||||
|
- The component must have no active tasks. For more information about active tasks, see the ``Anatomy of ...''
|
||||||
|
sections under <<monitoring>> (<<processor_anatomy>>, <<process_group_anatomy>>, <<remote_group_anatomy>>).
|
||||||
|
|
||||||
|
Components can be started by selecting all of the components to start and then clicking the Start icon (
|
||||||
|
image:iconRun.png["Start"]
|
||||||
|
) in the
|
||||||
|
Actions Toolbar or by right-clicking a single component and choosing Start from the context menu.
|
||||||
|
|
||||||
|
If starting a Process Group, all components within that Process Group (including child Process Groups) will
|
||||||
|
be started, with the exception of those components that are invalid or disabled.
|
||||||
|
|
||||||
|
Once started, the status indicator of a Processor will change to a Play symbol (
|
||||||
|
image:iconRun.png["Run"]
|
||||||
|
).
|
||||||
|
|
||||||
|
|
||||||
|
=== Stopping a Component
|
||||||
|
|
||||||
|
A component can be stopped any time that it is running. A component is stopped by right-clicking on the component
|
||||||
|
and clicking Stop from the context menu, or by clicking the Stop icon (
|
||||||
|
image:iconStop.png["Stop"]
|
||||||
|
) in the Actions Toolbar.
|
||||||
|
|
||||||
|
If a Process Group is stopped, all of the components within the Process Group (including child Process Groups)
|
||||||
|
will be stopped.
|
||||||
|
|
||||||
|
Once stopped, the status indicator of a Processor will change to the Stop symbol (
|
||||||
|
image:iconStop.png["Stop"]
|
||||||
|
).
|
||||||
|
|
||||||
|
Stopping a component does not interrupt its currently running tasks. Rather, it stops scheduling new tasks to
|
||||||
|
be performed. The number of active tasks is shown in the top-right corner of the Processor (see <<processor_anatomy>>
|
||||||
|
for more information).
|
||||||
|
|
||||||
|
=== Enabling/Disabling a Component
|
||||||
|
|
||||||
|
When a component is enabled, it is able to be started. Components may be disabled when part of a
|
||||||
|
dataflow is still being assembled, for example, and as a result should not be started. Typically,
|
||||||
|
if a component is not intended to be run, the component is disabled, rather than being left in the
|
||||||
|
Stopped state. This helps to distinguish between components that are intentionally not running and
|
||||||
|
those components that may have been stopped temporarily (for instance, to change the component's
|
||||||
|
configuration) and inadvertently were never restarted.
|
||||||
|
|
||||||
|
When it is desirable to re-enable a component, it can be enabled by selecting the component and
|
||||||
|
clicking the Enable icon (
|
||||||
|
image:iconEnable.png["Enable"]
|
||||||
|
) in the Actions Toolbar. This is available only when the selected component or components are disabled.
|
||||||
|
Alternatively, a component can be enabled by checking the checkbox next to the ``Enabled'' option in
|
||||||
|
the Settings tab of the Processor configuration dialog or the configuration dialog for a Port.
|
||||||
|
|
||||||
|
Once enabled, the component's status indicator will change to either Invalid (
|
||||||
|
image:iconAlert.png["Invalid"]
|
||||||
|
) or Stopped (
|
||||||
|
image:iconStop.png["Stopped"]
|
||||||
|
), depending on whether or not the component is valid.
|
||||||
|
|
||||||
|
A component is then disabled by selecting the component and clicking the Disable icon (
|
||||||
|
image:iconDisable.png["Disable"]
|
||||||
|
) in the Actions Toolbar, or by clearing the checkbox next to the ``Enabled'' option in the Settings tab
|
||||||
|
of the Processor configuration dialog or the configuration dialog for a Port.
|
||||||
|
|
||||||
|
Only Ports and Processors can be enabled and disabled.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
=== Remote Process Group Transmission
|
||||||
|
|
||||||
|
Remote Process Groups provide a mechanism for sending data to or retrieving data from a remote instance
|
||||||
|
of NiFi. When a Remote Process Group (RPG) is added to the canvas, it is added with the Transmision Disabled,
|
||||||
|
as indicated by the icon (
|
||||||
|
image:iconTransmissionInactive.png["Transmission Disabled"]
|
||||||
|
) in the top-left corner. When Transmission is Disabled, it can be enabled by right-clicking on the
|
||||||
|
RPG and clicking the ``Enable Transmission'' menu item. This will cause all ports for which there is a Connection
|
||||||
|
to begin transmitting data. This will cause the status indicator to then change to the Transmission Enabled icon (
|
||||||
|
image:iconTransmissionActive.png["Transmission Enabled"]
|
||||||
|
).
|
||||||
|
|
||||||
|
If there are problems communicating with the Remote Process Group, a Warning indicator (
|
||||||
|
image:iconAlert.png["Warning"]
|
||||||
|
) may instead be present in the top-left corner. Hovering over this Warning indicator with the mouse will provide
|
||||||
|
more information about the problem.
|
||||||
|
|
||||||
|
|
||||||
|
==== Individual Port Transmission
|
||||||
|
|
||||||
|
There are times when the DFM may want to either enable or disable transmission for only a specific
|
||||||
|
Port within the Remote Process Group. This can be accomplished by right-clicking on the Remote Process Group
|
||||||
|
and choosing the ``Remote ports'' menu item. This provides a configuration dialog from which each Port can be
|
||||||
|
configured:
|
||||||
|
|
||||||
|
image::remote-group-ports-dialog.png["Remote Process Groups"]
|
||||||
|
|
||||||
|
The left-hand side lists all of the Input Ports that the remote instance of NiFi allows data to be sent to.
|
||||||
|
The right-hand side lists all of the Output Ports from which this instance is able to pull data.
|
||||||
|
If the remote instance is using secure communications (the URL of the NiFi instance begins with `https://`,
|
||||||
|
rather than `http://`), any Ports that the remote instance has not made available to this instance will not
|
||||||
|
be shown.
|
||||||
|
|
||||||
|
*Note*: If a Port that is expected to be shown is not shown in this dialog, ensure that the instance has proper
|
||||||
|
permissions and that the Remote Process Group's flow is current. This can be checked by closing the Port
|
||||||
|
Configuration Dialog and looking at the bottom-right corner of the Remote Process Group. The data at which
|
||||||
|
the flow was last refresh is shown. If the flow appears to be outdated, it can be updated by right-clicking
|
||||||
|
on the Remote Process Group and selecting ``Refresh flow.'' (See <<remote_group_anatomy>> for more information).
|
||||||
|
|
||||||
|
Each Port is shown with the Port name, followed by its description, currently configured number of Concurrent
|
||||||
|
tasks, and whether or not data sent to this port will be compressed. To the left of this information is a switch
|
||||||
|
to turn the Port on or off. Those Ports that have no Connections attached to them are grayed out:
|
||||||
|
|
||||||
|
image::remote-port-connection-status.png["Remote Port Statuses"]
|
||||||
|
|
||||||
|
The on/off switch provides a mechanism to enable and disable transmission for each Port in the Remote
|
||||||
|
Process Group independently. Those Ports that are connected but are not currently transmitting can be
|
||||||
|
configured by clicking the pencil icon (
|
||||||
|
image:iconEdit.png["Edit"]
|
||||||
|
) below the on/off switch. Clicking this icon will allow the DFM to change the number of Concurrent tasks and whether
|
||||||
|
or not compression should be used when transmitting data to or from this Port.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[[monitoring]]
|
||||||
|
== Monitoring of DataFlow
|
||||||
|
|
||||||
|
NiFi provides a great deal of information about the status of the DataFlow in order to monitor the
|
||||||
|
health and status. The Status bar provides information about the overall system health
|
||||||
|
(See <<status_bar>> above for more information). Processors, Process Groups, and Remote Process Groups
|
||||||
|
provide fine-grained details about their operations. Connections and Process Groups provide information
|
||||||
|
about the amount of data in their queues. The Summary Page provides information about all of the components
|
||||||
|
on the graph in a tabular format and also provides System Diagnostics information that includes disk usage,
|
||||||
|
CPU utilization, and Java Heap and Garbage Collection information. In a clustered environment, this
|
||||||
|
information is available per-node or as aggregates across the entire cluster. We will explore each of these
|
||||||
|
monitoring artifacts below.
|
||||||
|
|
||||||
|
|
||||||
|
[[processor_anatomy]]
|
||||||
|
=== Anatomy of a Processor
|
||||||
|
|
||||||
|
NiFi provides a significant amount of information about each Processor on the canvas. The following diagram
|
||||||
|
is the anatomy of a Processor:
|
||||||
|
|
||||||
|
image:processor-anatomy.png["Anatomy of a Processor"]
|
||||||
|
|
||||||
|
The image outlines the following elements:
|
||||||
|
|
||||||
|
- *Processor Type*: NiFi provides several different types of Processors in order to allow for a wide range
|
||||||
|
of tasks to be performed. Each type of Processor is designed to perform one specific task. The Processor
|
||||||
|
type (PutFile, in this example) describes the task that this Processor performs. In this case, the
|
||||||
|
Processor writes a FlowFile to disk - or ``Puts'' a FlowFile to a File.
|
||||||
|
|
||||||
|
- *Bulletin Indicator*: When a Processor logs that some event has occurred, it generates a Bulletin to notify
|
||||||
|
those who are monitoring NiFi via the User Interface. The DataFlow Manager is able to configure which
|
||||||
|
bulletins should be displayed in the User Interface by updating the ``Bulletin level'' field in the
|
||||||
|
``Settings'' tab of the Processor configuration dialog. The default value is `WARN`, which means that only
|
||||||
|
warnings and errors will be displayed in the UI. This icon is not present unless a Bulletin exists for this
|
||||||
|
Processor. When it is present, hovering over the icon with the mouse will provide a tooltip explaining the
|
||||||
|
message provided by the Processor as well as the Bulletin level. If the instance of NiFi is clustered,
|
||||||
|
it will also show the Node that emitted the Bulletin. Bulletins automatically expire after five minutes.
|
||||||
|
|
||||||
|
- *Status Indicator*: Shows the current Status of the Processor. The following indicators are possible:
|
||||||
|
** image:iconRun.png["Running"]
|
||||||
|
*Running*: The Processor is currently running.
|
||||||
|
** image:iconStop.png["Stopped"]
|
||||||
|
*Stopped*: The Processor is valid and enabled but is not running.
|
||||||
|
** image:iconAlert.png["Invalid"]
|
||||||
|
*Invalid*: The Processor is enabled but is not currently valid and cannot be started.
|
||||||
|
Hovering over this icon will provide a tooltip indicating why the Processor is not valid.
|
||||||
|
** image:iconDisable.png["Disabled"]
|
||||||
|
*Disabled*: The Processor is not running and cannot be started until it has been enabled.
|
||||||
|
This status does not indicate whether or not the Processor is valid.
|
||||||
|
|
||||||
|
- *Processor Name*: This is the user-defined name of the Processor. By default, the name of the Processor is
|
||||||
|
the same as the Processor Type. In the example, this value is "Copy to /review".
|
||||||
|
|
||||||
|
- *Active Tasks*: The number of tasks that this Processor is currently executing. This number is constrained
|
||||||
|
by the ``Concurrent tasks'' setting in the ``Scheduling'' tab of the Processor configuration dialog.
|
||||||
|
Here, we can see that the Processor is currently performing two tasks. If the NiFi instance is clustered,
|
||||||
|
this value represents the number of tasks that are currently executing across all nodes in the cluster.
|
||||||
|
|
||||||
|
- *5-Minute Statistics*: The Processor shows several different statistics in tabular form. Each of these
|
||||||
|
statistics represent the amount of work that has been performed in the past five minutes. If the NiFi
|
||||||
|
instance is clustered, these values indicate how much work has been done by all of the Nodes combined
|
||||||
|
in the past five minutes. These metrics are:
|
||||||
|
|
||||||
|
** *In*: The amount of data that the Processor has pulled from the queues of its incoming Connections.
|
||||||
|
This value is represented as <count> / <size> where <count> is the number of FlowFiles that have been
|
||||||
|
pulled from the queues and <size> is the total size of those FlowFiles' content. In this example,
|
||||||
|
the Processor has pulled 884 FlowFiles from the input queues, for a total of 8.85 megabytes (MB).
|
||||||
|
** *Read/Write*: The total size of the FlowFile content that the Processor has read from disk and written
|
||||||
|
to disk. This provides valuable information about the I/O performance that this Processor requires.
|
||||||
|
Some Processors may only read the data without writing anything while some will not read the data but
|
||||||
|
will only write data. Others will neither read nor write data, and some Processors will both read
|
||||||
|
and write data. In this example, we see that in the past five minutes, this Processor has read 4.7
|
||||||
|
MB of the FlowFile content and has written 4.7 MB as well. This is what we would expect,
|
||||||
|
since this Processor simply copies the contents of a FlowFile to disk. Note, however, that this is
|
||||||
|
not the same as the amount of data that it pulled from its input queues. This is because some of
|
||||||
|
the files that it pulled from the input queues already exists in the output directory, and the
|
||||||
|
Processor is configured to route FlowFiles to failure when this occurs. Therefore, for those files
|
||||||
|
which already existed in the output directory, no data was read nor written to disk.
|
||||||
|
** *Out*: The amount of data that the Processor has transferred to its outbound Connections. This does
|
||||||
|
not include FlowFiles that the Processor removes itself, or FlowFiles that are routed to connections
|
||||||
|
that are auto-terminated. Like the ``In'' metric above, this value is represented as <count> / <size>
|
||||||
|
where <count> is the number of FlowFiles that have been transferred to outbound Connections and <size>
|
||||||
|
is the total size of those FlowFiles' content. In this example, all of the Relationships are configured to be
|
||||||
|
auto-terminated, so no FlowFiles are reported as having been transferred Out.
|
||||||
|
** *Tasks/Time*: The number of times that this Processor has been triggered to run in the past 5 minutes, and
|
||||||
|
the amount of time taken to perform those tasks. The format of the time is <hour>:<minute>:<second>. Note
|
||||||
|
that the amount of time taken can exceed five minutes, because many tasks can be executed in parallel. For
|
||||||
|
instance, if the Processor is scheduled to run with 60 Concurrent tasks, and each of those tasks takes one
|
||||||
|
second to complete, it is possible that all 60 tasks will be completed in a single second. However, in this
|
||||||
|
case we will see the Time metric showing that it took 60 seconds, instead of 1 second. This time can be
|
||||||
|
thought of as ``System Time,'' or said another way, this value is 60 seconds because that's the amount of
|
||||||
|
time it would have taken to perform the action if only a single concurrent task were used.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
[[process_group_anatomy]]
|
||||||
|
=== Anatomy of a Process Group
|
||||||
|
|
||||||
|
|
||||||
|
[[remote_group_anatomy]]
|
||||||
|
=== Anatomy of a Remote Process Group
|
||||||
|
|
||||||
|
|
||||||
|
=== Summary Page
|
||||||
|
|
||||||
|
|
||||||
|
=== Historical Statics of a Component
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
== Templates
|
||||||
|
|
||||||
|
=== Creating a Template
|
||||||
|
|
||||||
|
=== Instantiating a Template
|
||||||
|
|
||||||
|
=== Managing Templates
|
||||||
|
==== Importing a Template
|
||||||
|
==== Exporting a Template
|
||||||
|
==== Removing a Template
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
== Data Provenance
|
||||||
|
|
||||||
|
=== Searching for Events
|
||||||
|
|
||||||
|
=== Details of an Event
|
||||||
|
|
||||||
|
=== Viewing FlowFile Content
|
||||||
|
|
||||||
|
=== Replaying a FlowFile
|
||||||
|
|
||||||
|
=== Viewing FlowFile Lineage
|
||||||
|
==== Find Parents
|
||||||
|
==== Expanding an Event
|
6
pom.xml
|
@ -12,8 +12,7 @@
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
See the License for the specific language governing permissions and
|
See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
-->
|
--><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<parent>
|
<parent>
|
||||||
<groupId>org.apache</groupId>
|
<groupId>org.apache</groupId>
|
||||||
|
@ -69,7 +68,8 @@
|
||||||
<module>nifi-mock</module>
|
<module>nifi-mock</module>
|
||||||
<module>nar-bundles</module>
|
<module>nar-bundles</module>
|
||||||
<module>assembly</module>
|
<module>assembly</module>
|
||||||
</modules>
|
<module>nifi-docs</module>
|
||||||
|
</modules>
|
||||||
<scm>
|
<scm>
|
||||||
<connection>scm:git:git://git.apache.org/incubator-nifi.git</connection>
|
<connection>scm:git:git://git.apache.org/incubator-nifi.git</connection>
|
||||||
<developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/incubator-nifi.git</developerConnection>
|
<developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/incubator-nifi.git</developerConnection>
|
||||||
|
|