NIFI-10756 Generate error message when processor and/or controller

service is unable to transition to start and/or enabled state

Signed-off-by: Mike Moser <mosermw@apache.org>
This commit is contained in:
Nissim Shiman 2023-03-17 19:32:20 +00:00 committed by Mike Moser
parent b23baac00e
commit fd2138b8cf
2 changed files with 20 additions and 4 deletions

View File

@ -1594,7 +1594,7 @@ public class StandardProcessorNode extends ProcessorNode implements Connectable
} }
if (starting) { // will ensure that the Processor represented by this node can only be started once if (starting) { // will ensure that the Processor represented by this node can only be started once
initiateStart(taskScheduler, administrativeYieldMillis, timeoutMillis, processContextFactory, schedulingAgentCallback); initiateStart(taskScheduler, administrativeYieldMillis, timeoutMillis, new AtomicLong(0), processContextFactory, schedulingAgentCallback);
} else { } else {
final String procName = processorRef.get().getProcessor().toString(); final String procName = processorRef.get().getProcessor().toString();
procLog.warn("Cannot start {} because it is not currently stopped. Current state is {}", procName, currentState); procLog.warn("Cannot start {} because it is not currently stopped. Current state is {}", procName, currentState);
@ -1711,7 +1711,7 @@ public class StandardProcessorNode extends ProcessorNode implements Connectable
private void initiateStart(final ScheduledExecutorService taskScheduler, final long administrativeYieldMillis, final long timeoutMilis, private void initiateStart(final ScheduledExecutorService taskScheduler, final long administrativeYieldMillis, final long timeoutMilis,
final Supplier<ProcessContext> processContextFactory, final SchedulingAgentCallback schedulingAgentCallback) { AtomicLong startupAttemptCount, final Supplier<ProcessContext> processContextFactory, final SchedulingAgentCallback schedulingAgentCallback) {
final Processor processor = getProcessor(); final Processor processor = getProcessor();
final ComponentLog procLog = new SimpleProcessLogger(StandardProcessorNode.this.getIdentifier(), processor); final ComponentLog procLog = new SimpleProcessLogger(StandardProcessorNode.this.getIdentifier(), processor);
@ -1733,8 +1733,15 @@ public class StandardProcessorNode extends ProcessorNode implements Connectable
if (validationStatus != ValidationStatus.VALID) { if (validationStatus != ValidationStatus.VALID) {
LOG.debug("Cannot start {} because Processor is currently not valid; will try again after 5 seconds", StandardProcessorNode.this); LOG.debug("Cannot start {} because Processor is currently not valid; will try again after 5 seconds", StandardProcessorNode.this);
startupAttemptCount.incrementAndGet();
if (startupAttemptCount.get() == 240 || startupAttemptCount.get() % 7200 == 0) {
final ValidationState validationState = getValidationState();
procLog.error("Encountering difficulty starting. (Validation State is {}: {}). Will continue trying to start.",
validationState, validationState.getValidationErrors());
}
// re-initiate the entire process // re-initiate the entire process
final Runnable initiateStartTask = () -> initiateStart(taskScheduler, administrativeYieldMillis, timeoutMilis, processContextFactory, schedulingAgentCallback); final Runnable initiateStartTask = () -> initiateStart(taskScheduler, administrativeYieldMillis, timeoutMilis, startupAttemptCount, processContextFactory, schedulingAgentCallback);
taskScheduler.schedule(initiateStartTask, 500, TimeUnit.MILLISECONDS); taskScheduler.schedule(initiateStartTask, 500, TimeUnit.MILLISECONDS);
schedulingAgentCallback.onTaskComplete(); schedulingAgentCallback.onTaskComplete();
@ -1811,7 +1818,7 @@ public class StandardProcessorNode extends ProcessorNode implements Connectable
// make sure we only continue retry loop if STOP action wasn't initiated // make sure we only continue retry loop if STOP action wasn't initiated
if (scheduledState.get() != ScheduledState.STOPPING && scheduledState.get() != ScheduledState.RUN_ONCE) { if (scheduledState.get() != ScheduledState.STOPPING && scheduledState.get() != ScheduledState.RUN_ONCE) {
// re-initiate the entire process // re-initiate the entire process
final Runnable initiateStartTask = () -> initiateStart(taskScheduler, administrativeYieldMillis, timeoutMilis, processContextFactory, schedulingAgentCallback); final Runnable initiateStartTask = () -> initiateStart(taskScheduler, administrativeYieldMillis, timeoutMilis, startupAttemptCount, processContextFactory, schedulingAgentCallback);
taskScheduler.schedule(initiateStartTask, administrativeYieldMillis, TimeUnit.MILLISECONDS); taskScheduler.schedule(initiateStartTask, administrativeYieldMillis, TimeUnit.MILLISECONDS);
} else { } else {
completeStopAction(); completeStopAction();

View File

@ -83,6 +83,7 @@ import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReadWriteLock;
@ -574,6 +575,7 @@ public class StandardControllerServiceNode extends AbstractComponentNode impleme
final ControllerServiceProvider controllerServiceProvider = this.serviceProvider; final ControllerServiceProvider controllerServiceProvider = this.serviceProvider;
final StandardControllerServiceNode service = this; final StandardControllerServiceNode service = this;
AtomicLong enablingAttemptCount = new AtomicLong(0);
scheduler.execute(new Runnable() { scheduler.execute(new Runnable() {
@Override @Override
public void run() { public void run() {
@ -592,6 +594,13 @@ public class StandardControllerServiceNode extends AbstractComponentNode impleme
LOG.debug("Cannot enable {} because it is not currently valid. (Validation State is {}: {}). Will try again in 1 second", LOG.debug("Cannot enable {} because it is not currently valid. (Validation State is {}: {}). Will try again in 1 second",
StandardControllerServiceNode.this, validationState, validationState.getValidationErrors()); StandardControllerServiceNode.this, validationState, validationState.getValidationErrors());
enablingAttemptCount.incrementAndGet();
if (enablingAttemptCount.get() == 120 || enablingAttemptCount.get() % 3600 == 0) {
final ComponentLog componentLog = new SimpleProcessLogger(getIdentifier(), StandardControllerServiceNode.this);
componentLog.error("Encountering difficulty enabling. (Validation State is {}: {}). Will continue trying to enable.",
validationState, validationState.getValidationErrors());
}
try { try {
scheduler.schedule(this, 1, TimeUnit.SECONDS); scheduler.schedule(this, 1, TimeUnit.SECONDS);
} catch (RejectedExecutionException rejectedExecutionException) { } catch (RejectedExecutionException rejectedExecutionException) {