NIFI-8645: Disable LongRunningTaskMonitor by default

This closes #5111

Signed-off-by: David Handermann <exceptionfactory@apache.org>
This commit is contained in:
Peter Turcsanyi 2021-06-01 14:31:51 +02:00 committed by exceptionfactory
parent ab851b9cde
commit 48befe22f6
No known key found for this signature in database
GPG Key ID: 29B6A52D2AAE8DBA
4 changed files with 41 additions and 18 deletions

View File

@ -384,10 +384,6 @@ public class NiFiProperties extends ApplicationProperties {
public static final String DEFAULT_ANALYTICS_CONNECTION_SCORE_NAME = "rSquared"; public static final String DEFAULT_ANALYTICS_CONNECTION_SCORE_NAME = "rSquared";
public static final double DEFAULT_ANALYTICS_CONNECTION_SCORE_THRESHOLD = .90; public static final double DEFAULT_ANALYTICS_CONNECTION_SCORE_THRESHOLD = .90;
// runtime monitoring defaults
public static final String DEFAULT_MONITOR_LONG_RUNNING_TASK_SCHEDULE = "1 min";
public static final String DEFAULT_MONITOR_LONG_RUNNING_TASK_THRESHOLD = "5 mins";
// Status repository defaults // Status repository defaults
public static final int DEFAULT_COMPONENT_STATUS_REPOSITORY_PERSIST_NODE_DAYS = 14; public static final int DEFAULT_COMPONENT_STATUS_REPOSITORY_PERSIST_NODE_DAYS = 14;
public static final int DEFAULT_COMPONENT_STATUS_REPOSITORY_PERSIST_COMPONENT_DAYS = 3; public static final int DEFAULT_COMPONENT_STATUS_REPOSITORY_PERSIST_COMPONENT_DAYS = 3;

View File

@ -3764,6 +3764,22 @@ These properties determine the behavior of the internal NiFi predictive analytic
|==== |====
[[runtime_monitoring_properties]]
=== Runtime Monitoring Properties
Long-Running Task Monitor periodically checks the NiFi processor executor threads and produces warning logs and bulletin messages for those that have been running for a longer period of time.
It can be used to detect possibly stuck / hanging processor tasks.
Please note the performance impact of the task monitor: it creates a thread dump for every run that may affect the normal flow execution.
The Long-Running Task Monitor can be disabled via defining no values for its properties, and it is disabled by default.
To enable it, both `nifi.monitor.long.running.task.schedule` and `nifi.monitor.long.running.task.threshold` properties need to be configured with valid time periods.
|====
|*Property*|*Description*
|`nifi.monitor.long.running.task.schedule`|The time period between successive executions of the Long-Running Task Monitor (e.g. `1 min`).
|`nifi.monitor.long.running.task.threshold`|The time period beyond which a task is considered long-running, i.e. stuck / hanging (e.g. `5 mins`).
|====
[[custom_properties]] [[custom_properties]]
=== Custom Properties === Custom Properties

View File

@ -311,7 +311,7 @@ public class FlowController implements ReportingTaskProvider, Authorizable, Node
private final StandardFlowManager flowManager; private final StandardFlowManager flowManager;
private final RepositoryContextFactory repositoryContextFactory; private final RepositoryContextFactory repositoryContextFactory;
private final RingBufferGarbageCollectionLog gcLog; private final RingBufferGarbageCollectionLog gcLog;
private final FlowEngine longRunningTaskMonitorThreadPool; private final Optional<FlowEngine> longRunningTaskMonitorThreadPool;
/** /**
* true if controller is configured to operate in a clustered environment * true if controller is configured to operate in a clustered environment
@ -781,7 +781,9 @@ public class FlowController implements ReportingTaskProvider, Authorizable, Node
loadBalanceClientThreadPool = null; loadBalanceClientThreadPool = null;
} }
longRunningTaskMonitorThreadPool = new FlowEngine(1, "Long Running Task Monitor", true); longRunningTaskMonitorThreadPool = isLongRunningTaskMonitorEnabled()
? Optional.of(new FlowEngine(1, "Long Running Task Monitor", true))
: Optional.empty();
} }
@Override @Override
@ -1104,23 +1106,32 @@ public class FlowController implements ReportingTaskProvider, Authorizable, Node
} }
private void scheduleLongRunningTaskMonitor() { private void scheduleLongRunningTaskMonitor() {
final long scheduleMillis = parseDurationPropertyToMillis(NiFiProperties.MONITOR_LONG_RUNNING_TASK_SCHEDULE, NiFiProperties.DEFAULT_MONITOR_LONG_RUNNING_TASK_SCHEDULE); longRunningTaskMonitorThreadPool.ifPresent(flowEngine -> {
final long thresholdMillis = parseDurationPropertyToMillis(NiFiProperties.MONITOR_LONG_RUNNING_TASK_THRESHOLD, NiFiProperties.DEFAULT_MONITOR_LONG_RUNNING_TASK_THRESHOLD); try {
final long scheduleMillis = parseDurationPropertyToMillis(NiFiProperties.MONITOR_LONG_RUNNING_TASK_SCHEDULE);
final long thresholdMillis = parseDurationPropertyToMillis(NiFiProperties.MONITOR_LONG_RUNNING_TASK_THRESHOLD);
LongRunningTaskMonitor longRunningTaskMonitor = new LongRunningTaskMonitor(getFlowManager(), createEventReporter(), thresholdMillis); LongRunningTaskMonitor longRunningTaskMonitor = new LongRunningTaskMonitor(getFlowManager(), createEventReporter(), thresholdMillis);
longRunningTaskMonitorThreadPool.scheduleWithFixedDelay(longRunningTaskMonitor, scheduleMillis, scheduleMillis, TimeUnit.MILLISECONDS); longRunningTaskMonitorThreadPool.get().scheduleWithFixedDelay(longRunningTaskMonitor, scheduleMillis, scheduleMillis, TimeUnit.MILLISECONDS);
} catch (Exception e) {
LOG.warn("Could not initialize LongRunningTaskMonitor.", e);
}
});
} }
private long parseDurationPropertyToMillis(String propertyName, String defaultValue) { private long parseDurationPropertyToMillis(String propertyName) {
long durationMillis;
try { try {
final String duration = nifiProperties.getProperty(propertyName); final String duration = nifiProperties.getProperty(propertyName);
durationMillis = (long) FormatUtils.getPreciseTimeDuration(duration, TimeUnit.MILLISECONDS); return (long) FormatUtils.getPreciseTimeDuration(duration, TimeUnit.MILLISECONDS);
} catch (final Exception e) { } catch (final Exception e) {
LOG.warn("Could not retrieve value for {}. This property has been set to '{}'", propertyName, defaultValue); LOG.warn("Could not retrieve value for {}. Valid values e.g. 60 secs or 1 min.", propertyName);
durationMillis = (long) FormatUtils.getPreciseTimeDuration(defaultValue, TimeUnit.MILLISECONDS); throw e;
} }
return durationMillis; }
private boolean isLongRunningTaskMonitorEnabled() {
return StringUtils.isNotBlank(nifiProperties.getProperty(NiFiProperties.MONITOR_LONG_RUNNING_TASK_SCHEDULE))
&& StringUtils.isNotBlank(nifiProperties.getProperty(NiFiProperties.MONITOR_LONG_RUNNING_TASK_THRESHOLD));
} }
public boolean isStartAfterInitialization(final Connectable component) { public boolean isStartAfterInitialization(final Connectable component) {

View File

@ -321,5 +321,5 @@ nifi.analytics.connection.model.score.name=${nifi.analytics.connection.model.sco
nifi.analytics.connection.model.score.threshold=${nifi.analytics.connection.model.score.threshold} nifi.analytics.connection.model.score.threshold=${nifi.analytics.connection.model.score.threshold}
# runtime monitoring properties # runtime monitoring properties
nifi.monitor.long.running.task.schedule=${nifi.monitor.long.running.task.schedule} nifi.monitor.long.running.task.schedule=
nifi.monitor.long.running.task.threshold=${nifi.monitor.long.running.task.threshold} nifi.monitor.long.running.task.threshold=