From 48befe22f6af6791361ed7037b8bda372d0ee5c9 Mon Sep 17 00:00:00 2001 From: Peter Turcsanyi Date: Tue, 1 Jun 2021 14:31:51 +0200 Subject: [PATCH] NIFI-8645: Disable LongRunningTaskMonitor by default This closes #5111 Signed-off-by: David Handermann --- .../org/apache/nifi/util/NiFiProperties.java | 4 --- .../main/asciidoc/administration-guide.adoc | 16 +++++++++ .../nifi/controller/FlowController.java | 35 ++++++++++++------- .../src/main/resources/conf/nifi.properties | 4 +-- 4 files changed, 41 insertions(+), 18 deletions(-) diff --git a/nifi-commons/nifi-properties/src/main/java/org/apache/nifi/util/NiFiProperties.java b/nifi-commons/nifi-properties/src/main/java/org/apache/nifi/util/NiFiProperties.java index 8c8e2fbc29..329f916fd9 100644 --- a/nifi-commons/nifi-properties/src/main/java/org/apache/nifi/util/NiFiProperties.java +++ b/nifi-commons/nifi-properties/src/main/java/org/apache/nifi/util/NiFiProperties.java @@ -384,10 +384,6 @@ public class NiFiProperties extends ApplicationProperties { public static final String DEFAULT_ANALYTICS_CONNECTION_SCORE_NAME = "rSquared"; public static final double DEFAULT_ANALYTICS_CONNECTION_SCORE_THRESHOLD = .90; - // runtime monitoring defaults - public static final String DEFAULT_MONITOR_LONG_RUNNING_TASK_SCHEDULE = "1 min"; - public static final String DEFAULT_MONITOR_LONG_RUNNING_TASK_THRESHOLD = "5 mins"; - // Status repository defaults public static final int DEFAULT_COMPONENT_STATUS_REPOSITORY_PERSIST_NODE_DAYS = 14; public static final int DEFAULT_COMPONENT_STATUS_REPOSITORY_PERSIST_COMPONENT_DAYS = 3; diff --git a/nifi-docs/src/main/asciidoc/administration-guide.adoc b/nifi-docs/src/main/asciidoc/administration-guide.adoc index 3dffe70a50..d2c945adcc 100644 --- a/nifi-docs/src/main/asciidoc/administration-guide.adoc +++ b/nifi-docs/src/main/asciidoc/administration-guide.adoc @@ -3764,6 +3764,22 @@ These properties determine the behavior of the internal NiFi predictive analytic |==== +[[runtime_monitoring_properties]] +=== Runtime Monitoring Properties + +Long-Running Task Monitor periodically checks the NiFi processor executor threads and produces warning logs and bulletin messages for those that have been running for a longer period of time. +It can be used to detect possibly stuck / hanging processor tasks. +Please note the performance impact of the task monitor: it creates a thread dump for every run that may affect the normal flow execution. +The Long-Running Task Monitor can be disabled via defining no values for its properties, and it is disabled by default. +To enable it, both `nifi.monitor.long.running.task.schedule` and `nifi.monitor.long.running.task.threshold` properties need to be configured with valid time periods. + +|==== +|*Property*|*Description* +|`nifi.monitor.long.running.task.schedule`|The time period between successive executions of the Long-Running Task Monitor (e.g. `1 min`). +|`nifi.monitor.long.running.task.threshold`|The time period beyond which a task is considered long-running, i.e. stuck / hanging (e.g. `5 mins`). + +|==== + [[custom_properties]] === Custom Properties diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/FlowController.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/FlowController.java index 5154963ede..aea9604a34 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/FlowController.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/FlowController.java @@ -311,7 +311,7 @@ public class FlowController implements ReportingTaskProvider, Authorizable, Node private final StandardFlowManager flowManager; private final RepositoryContextFactory repositoryContextFactory; private final RingBufferGarbageCollectionLog gcLog; - private final FlowEngine longRunningTaskMonitorThreadPool; + private final Optional longRunningTaskMonitorThreadPool; /** * true if controller is configured to operate in a clustered environment @@ -781,7 +781,9 @@ public class FlowController implements ReportingTaskProvider, Authorizable, Node loadBalanceClientThreadPool = null; } - longRunningTaskMonitorThreadPool = new FlowEngine(1, "Long Running Task Monitor", true); + longRunningTaskMonitorThreadPool = isLongRunningTaskMonitorEnabled() + ? Optional.of(new FlowEngine(1, "Long Running Task Monitor", true)) + : Optional.empty(); } @Override @@ -1104,23 +1106,32 @@ public class FlowController implements ReportingTaskProvider, Authorizable, Node } private void scheduleLongRunningTaskMonitor() { - final long scheduleMillis = parseDurationPropertyToMillis(NiFiProperties.MONITOR_LONG_RUNNING_TASK_SCHEDULE, NiFiProperties.DEFAULT_MONITOR_LONG_RUNNING_TASK_SCHEDULE); - final long thresholdMillis = parseDurationPropertyToMillis(NiFiProperties.MONITOR_LONG_RUNNING_TASK_THRESHOLD, NiFiProperties.DEFAULT_MONITOR_LONG_RUNNING_TASK_THRESHOLD); + longRunningTaskMonitorThreadPool.ifPresent(flowEngine -> { + try { + final long scheduleMillis = parseDurationPropertyToMillis(NiFiProperties.MONITOR_LONG_RUNNING_TASK_SCHEDULE); + final long thresholdMillis = parseDurationPropertyToMillis(NiFiProperties.MONITOR_LONG_RUNNING_TASK_THRESHOLD); - LongRunningTaskMonitor longRunningTaskMonitor = new LongRunningTaskMonitor(getFlowManager(), createEventReporter(), thresholdMillis); - longRunningTaskMonitorThreadPool.scheduleWithFixedDelay(longRunningTaskMonitor, scheduleMillis, scheduleMillis, TimeUnit.MILLISECONDS); + LongRunningTaskMonitor longRunningTaskMonitor = new LongRunningTaskMonitor(getFlowManager(), createEventReporter(), thresholdMillis); + longRunningTaskMonitorThreadPool.get().scheduleWithFixedDelay(longRunningTaskMonitor, scheduleMillis, scheduleMillis, TimeUnit.MILLISECONDS); + } catch (Exception e) { + LOG.warn("Could not initialize LongRunningTaskMonitor.", e); + } + }); } - private long parseDurationPropertyToMillis(String propertyName, String defaultValue) { - long durationMillis; + private long parseDurationPropertyToMillis(String propertyName) { try { final String duration = nifiProperties.getProperty(propertyName); - durationMillis = (long) FormatUtils.getPreciseTimeDuration(duration, TimeUnit.MILLISECONDS); + return (long) FormatUtils.getPreciseTimeDuration(duration, TimeUnit.MILLISECONDS); } catch (final Exception e) { - LOG.warn("Could not retrieve value for {}. This property has been set to '{}'", propertyName, defaultValue); - durationMillis = (long) FormatUtils.getPreciseTimeDuration(defaultValue, TimeUnit.MILLISECONDS); + LOG.warn("Could not retrieve value for {}. Valid values e.g. 60 secs or 1 min.", propertyName); + throw e; } - return durationMillis; + } + + private boolean isLongRunningTaskMonitorEnabled() { + return StringUtils.isNotBlank(nifiProperties.getProperty(NiFiProperties.MONITOR_LONG_RUNNING_TASK_SCHEDULE)) + && StringUtils.isNotBlank(nifiProperties.getProperty(NiFiProperties.MONITOR_LONG_RUNNING_TASK_THRESHOLD)); } public boolean isStartAfterInitialization(final Connectable component) { diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-resources/src/main/resources/conf/nifi.properties b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-resources/src/main/resources/conf/nifi.properties index 12a31e5b7a..772fe1229e 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-resources/src/main/resources/conf/nifi.properties +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-resources/src/main/resources/conf/nifi.properties @@ -321,5 +321,5 @@ nifi.analytics.connection.model.score.name=${nifi.analytics.connection.model.sco nifi.analytics.connection.model.score.threshold=${nifi.analytics.connection.model.score.threshold} # runtime monitoring properties -nifi.monitor.long.running.task.schedule=${nifi.monitor.long.running.task.schedule} -nifi.monitor.long.running.task.threshold=${nifi.monitor.long.running.task.threshold} +nifi.monitor.long.running.task.schedule= +nifi.monitor.long.running.task.threshold=