Mark Payne 96ed405d70 NIFI-3356: Initial implementation of writeahead provenance repository
- The idea behind NIFI-3356 was to improve the efficiency and throughput of the Provenance Repository, as it is often the bottleneck. While testing the newly designed repository,
  a handful of other, fairly minor, changes were made to improve efficiency as well, as these came to light when testing the new repository:

- Use a BufferedOutputStream within StandardProcessSession (via a ClaimCache abstraction) in order to avoid continually writing to FileOutputStream when writing many small FlowFiles
- Updated threading model of MinimalLockingWriteAheadLog - now performs serialization outside of lock and writes to a 'synchronized' OutputStream
- Change minimum scheduling period for components from 30 microseconds to 1 nanosecond. ScheduledExecutor is very inconsistent with timing of task scheduling. With the bored.yield.duration
  now present, this value doesn't need to be set to 30 microseconds. This was originally done to avoid processors that had no work from dominating the CPU. However, now that we will yield
  when processors have no work, this results in slowing down processors that are able to perform work.
- Allow nifi.properties to specify multiple directories for FlowFile Repository
- If backpressure is engaged while running a batch of sessions, then stop batch processing earlier. This helps FlowFiles to move through the system much more smoothly instead of the
  herky-jerky queuing that we previously saw at very high rates of FlowFiles.
- Added NiFi PID to log message when starting nifi. This was simply an update to the log message that provides helpful information.

NIFI-3356: Fixed bug in ContentClaimWriteCache that resulted in data corruption and fixed bug in RepositoryConfiguration that threw exception if cache warm duration was set to empty string

NIFI-3356: Fixed NPE

NIFI-3356: Added debug-level performance monitoring

NIFI-3356: Updates to unit tests that failed after rebasing against master

NIFI-3356: Incorporated PR review feedback

NIFI-3356: Fixed bug where we would delete index directories that are still in use; also added additional debug logging and a simple util class that can be used to textualize provenance event files - useful in debugging

This closes #1493
2017-02-22 12:40:06 -05:00

210 lines
13 KiB
XML

<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.nifi</groupId>
<artifactId>nifi-framework</artifactId>
<version>1.2.0-SNAPSHOT</version>
</parent>
<artifactId>nifi-resources</artifactId>
<packaging>pom</packaging>
<description>holds common resources used to build installers</description>
<properties>
<!--Wrapper Properties -->
<nifi.jvm.heap.mb>512</nifi.jvm.heap.mb>
<nifi.jvm.permgen.mb>128</nifi.jvm.permgen.mb>
<nifi.run.as />
<!-- nifi.properties: core properties -->
<nifi.version>${project.version}</nifi.version>
<nifi.flowcontroller.autoResumeState>true</nifi.flowcontroller.autoResumeState>
<nifi.flowcontroller.graceful.shutdown.period>10 sec</nifi.flowcontroller.graceful.shutdown.period>
<nifi.flowservice.writedelay.interval>500 ms</nifi.flowservice.writedelay.interval>
<nifi.administrative.yield.duration>30 sec</nifi.administrative.yield.duration>
<nifi.bored.yield.duration>10 millis</nifi.bored.yield.duration>
<nifi.flow.configuration.file>./conf/flow.xml.gz</nifi.flow.configuration.file>
<nifi.flow.configuration.archive.enabled>true</nifi.flow.configuration.archive.enabled>
<nifi.flow.configuration.archive.dir>./conf/archive/</nifi.flow.configuration.archive.dir>
<nifi.flow.configuration.archive.max.time>30 days</nifi.flow.configuration.archive.max.time>
<nifi.flow.configuration.archive.max.storage>500 MB</nifi.flow.configuration.archive.max.storage>
<nifi.login.identity.provider.configuration.file>./conf/login-identity-providers.xml</nifi.login.identity.provider.configuration.file>
<nifi.authorizer.configuration.file>./conf/authorizers.xml</nifi.authorizer.configuration.file>
<nifi.templates.directory>./conf/templates</nifi.templates.directory>
<nifi.database.directory>./database_repository</nifi.database.directory>
<nifi.state.management.configuration.file>./conf/state-management.xml</nifi.state.management.configuration.file>
<nifi.state.management.embedded.zookeeper.start>false</nifi.state.management.embedded.zookeeper.start>
<nifi.state.management.embedded.zookeeper.properties>./conf/zookeeper.properties</nifi.state.management.embedded.zookeeper.properties>
<nifi.state.management.provider.local>local-provider</nifi.state.management.provider.local>
<nifi.state.management.provider.cluster>zk-provider</nifi.state.management.provider.cluster>
<nifi.flowfile.repository.implementation>org.apache.nifi.controller.repository.WriteAheadFlowFileRepository</nifi.flowfile.repository.implementation>
<nifi.flowfile.repository.directory>./flowfile_repository</nifi.flowfile.repository.directory>
<nifi.flowfile.repository.partitions>256</nifi.flowfile.repository.partitions>
<nifi.flowfile.repository.checkpoint.interval>2 mins</nifi.flowfile.repository.checkpoint.interval>
<nifi.flowfile.repository.always.sync>false</nifi.flowfile.repository.always.sync>
<nifi.swap.manager.implementation>org.apache.nifi.controller.FileSystemSwapManager</nifi.swap.manager.implementation>
<nifi.queue.swap.threshold>20000</nifi.queue.swap.threshold>
<nifi.swap.in.period>5 sec</nifi.swap.in.period>
<nifi.swap.in.threads>1</nifi.swap.in.threads>
<nifi.swap.out.period>5 sec</nifi.swap.out.period>
<nifi.swap.out.threads>4</nifi.swap.out.threads>
<nifi.content.repository.implementation>org.apache.nifi.controller.repository.FileSystemRepository</nifi.content.repository.implementation>
<nifi.content.claim.max.appendable.size>10 MB</nifi.content.claim.max.appendable.size>
<nifi.content.claim.max.flow.files>100</nifi.content.claim.max.flow.files>
<nifi.content.repository.directory.default>./content_repository</nifi.content.repository.directory.default>
<nifi.content.repository.archive.max.retention.period>12 hours</nifi.content.repository.archive.max.retention.period>
<nifi.content.repository.archive.max.usage.percentage>50%</nifi.content.repository.archive.max.usage.percentage>
<nifi.content.repository.archive.enabled>true</nifi.content.repository.archive.enabled>
<nifi.content.repository.always.sync>false</nifi.content.repository.always.sync>
<nifi.content.viewer.url>/nifi-content-viewer/</nifi.content.viewer.url>
<nifi.restore.directory />
<nifi.ui.banner.text />
<nifi.ui.autorefresh.interval>30 sec</nifi.ui.autorefresh.interval>
<nifi.nar.library.directory>./lib</nifi.nar.library.directory>
<nifi.nar.working.directory>./work/nar/</nifi.nar.working.directory>
<nifi.documentation.working.directory>./work/docs/components</nifi.documentation.working.directory>
<nifi.sensitive.props.key.protected />
<nifi.sensitive.props.algorithm>PBEWITHMD5AND256BITAES-CBC-OPENSSL</nifi.sensitive.props.algorithm>
<nifi.sensitive.props.provider>BC</nifi.sensitive.props.provider>
<nifi.sensitive.props.additional.keys />
<nifi.h2.url.append>;LOCK_TIMEOUT=25000;WRITE_DELAY=0;AUTO_SERVER=FALSE</nifi.h2.url.append>
<nifi.remote.input.socket.port>9990</nifi.remote.input.socket.port>
<!-- persistent provenance repository properties -->
<nifi.provenance.repository.implementation>org.apache.nifi.provenance.PersistentProvenanceRepository</nifi.provenance.repository.implementation>
<nifi.provenance.repository.directory.default>./provenance_repository</nifi.provenance.repository.directory.default>
<nifi.provenance.repository.max.storage.time>24 hours</nifi.provenance.repository.max.storage.time>
<nifi.provenance.repository.max.storage.size>1 GB</nifi.provenance.repository.max.storage.size>
<nifi.provenance.repository.rollover.time>30 secs</nifi.provenance.repository.rollover.time>
<nifi.provenance.repository.rollover.size>100 MB</nifi.provenance.repository.rollover.size>
<nifi.provenance.repository.query.threads>2</nifi.provenance.repository.query.threads>
<nifi.provenance.repository.index.threads>2</nifi.provenance.repository.index.threads>
<nifi.provenance.repository.compress.on.rollover>true</nifi.provenance.repository.compress.on.rollover>
<nifi.provenance.repository.indexed.fields>EventType, FlowFileUUID, Filename, ProcessorID, Relationship</nifi.provenance.repository.indexed.fields>
<nifi.provenance.repository.indexed.attributes />
<nifi.provenance.repository.index.shard.size>500 MB</nifi.provenance.repository.index.shard.size>
<nifi.provenance.repository.always.sync>false</nifi.provenance.repository.always.sync>
<nifi.provenance.repository.journal.count>16</nifi.provenance.repository.journal.count>
<nifi.provenance.repository.max.attribute.length>65536</nifi.provenance.repository.max.attribute.length>
<nifi.provenance.repository.concurrent.merge.threads>2</nifi.provenance.repository.concurrent.merge.threads>
<nifi.provenance.repository.warm.cache.frequency>1 hour</nifi.provenance.repository.warm.cache.frequency>
<!-- volatile provenance repository properties -->
<nifi.provenance.repository.buffer.size>100000</nifi.provenance.repository.buffer.size>
<!-- Component status repository properties -->
<nifi.components.status.repository.implementation>org.apache.nifi.controller.status.history.VolatileComponentStatusRepository</nifi.components.status.repository.implementation>
<nifi.components.status.repository.buffer.size>1440</nifi.components.status.repository.buffer.size>
<nifi.components.status.snapshot.frequency>1 min</nifi.components.status.snapshot.frequency>
<!-- nifi.properties: web properties -->
<nifi.web.war.directory>./lib</nifi.web.war.directory>
<nifi.web.http.host />
<nifi.web.http.port>8080</nifi.web.http.port>
<nifi.web.http.network.interface.default />
<nifi.web.https.host />
<nifi.web.https.port />
<nifi.web.https.network.interface.default />
<nifi.jetty.work.dir>./work/jetty</nifi.jetty.work.dir>
<nifi.web.jetty.threads>200</nifi.web.jetty.threads>
<!-- nifi.properties: security properties -->
<nifi.security.keystore />
<nifi.security.keystoreType />
<nifi.security.keystorePasswd />
<nifi.security.keyPasswd />
<nifi.security.truststore />
<nifi.security.truststoreType />
<nifi.security.truststorePasswd />
<nifi.security.needClientAuth />
<nifi.security.user.authorizer>file-provider</nifi.security.user.authorizer>
<nifi.security.user.login.identity.provider />
<nifi.security.x509.principal.extractor />
<nifi.security.ocsp.responder.url />
<nifi.security.ocsp.responder.certificate />
<!-- nifi.properties: cluster common properties (cluster manager and nodes must have same values) -->
<nifi.cluster.protocol.heartbeat.interval>5 sec</nifi.cluster.protocol.heartbeat.interval>
<nifi.cluster.protocol.is.secure>false</nifi.cluster.protocol.is.secure>
<!-- nifi.properties: cluster node properties (only configure for cluster nodes) -->
<nifi.cluster.is.node>false</nifi.cluster.is.node>
<nifi.cluster.node.address />
<nifi.cluster.node.protocol.port />
<nifi.cluster.node.protocol.threads>10</nifi.cluster.node.protocol.threads>
<nifi.cluster.node.event.history.size>25</nifi.cluster.node.event.history.size>
<nifi.cluster.node.connection.timeout>5 sec</nifi.cluster.node.connection.timeout>
<nifi.cluster.node.read.timeout>5 sec</nifi.cluster.node.read.timeout>
<nifi.cluster.firewall.file />
<nifi.cluster.flow.election.max.wait.time>5 mins</nifi.cluster.flow.election.max.wait.time>
<nifi.cluster.flow.election.max.candidates />
<nifi.cluster.request.replication.claim.timeout>15 secs</nifi.cluster.request.replication.claim.timeout>
<!-- nifi.properties: zookeeper properties -->
<nifi.zookeeper.connect.string />
<nifi.zookeeper.connect.timeout>3 secs</nifi.zookeeper.connect.timeout>
<nifi.zookeeper.session.timeout>3 secs</nifi.zookeeper.session.timeout>
<nifi.zookeeper.root.node>/nifi</nifi.zookeeper.root.node>
<!-- nifi.properties: kerberos properties -->
<nifi.kerberos.krb5.file> </nifi.kerberos.krb5.file>
<nifi.kerberos.service.principal />
<nifi.kerberos.service.keytab.location />
<nifi.kerberos.spnego.principal />
<nifi.kerberos.spnego.keytab.location />
<nifi.kerberos.spnego.authentication.expiration>12 hours</nifi.kerberos.spnego.authentication.expiration>
<!-- nifi.properties: build info -->
<nifi.build.tag>${project.scm.tag}</nifi.build.tag>
<nifi.build.timestamp>${maven.build.timestamp}</nifi.build.timestamp>
<!-- buildRevision and buildBranch provided by buildnumber-maven-plugin or build-info-read-properties profile -->
<nifi.build.branch>${buildBranch}</nifi.build.branch>
<nifi.build.revision>${buildRevision}</nifi.build.revision>
</properties>
<build>
<plugins>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<attach>true</attach>
</configuration>
<executions>
<execution>
<id>make shared resource</id>
<goals>
<goal>single</goal>
</goals>
<phase>package</phase>
<configuration>
<descriptors>
<descriptor>src/main/assembly/dependencies.xml</descriptor>
</descriptors>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>