Watcher: Ensure TriggerEngine start replaces existing watches (#33157)

This commit ensures that when `TriggerService.start()` is called,
we ensure in the trigger engine implementations that current watches are
removed instead of adding to the existing ones in
`TickerScheduleTriggerEngine.start()`

Two additional minor fixes, where the result remains the same but less code gets executed.

1. If the node is not a data node, we forgot to set the status to
STARTING when watcher is being started. This should not be a big issue,
because a non-data node does not spent a lot of time loading as there
are no watches which need loading.
2. If a new cluster state came in during a reload, we had two checks in
place to abort loading the current one. The first one before we load all
the watches of the local node and the second before watcher is starting
with those new watches. Turned out that the first check was not
returning, which meant we always tried to load all the watches, and then
would fail on the second check. This has been fixed here.
This commit is contained in:
Alexander Reelsen 2018-08-30 10:53:01 +02:00 committed by GitHub
parent 5cf6e0d4bc
commit b6f762d131
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 51 additions and 6 deletions

View File

@ -112,6 +112,7 @@ public class WatcherLifeCycleService extends AbstractComponent implements Cluste
// if this is not a data node, we need to start it ourselves possibly
if (event.state().nodes().getLocalNode().isDataNode() == false &&
isWatcherStoppedManually == false && this.state.get() == WatcherState.STOPPED) {
this.state.set(WatcherState.STARTING);
watcherService.start(event.state(), () -> this.state.set(WatcherState.STARTED));
return;
}

View File

@ -183,9 +183,6 @@ public class WatcherService extends AbstractComponent {
// by checking the cluster state version before and after loading the watches we can potentially just exit without applying the
// changes
processedClusterStateVersion.set(state.getVersion());
triggerService.pauseExecution();
int cancelledTaskCount = executionService.clearExecutionsAndQueue();
logger.info("reloading watcher, reason [{}], cancelled [{}] queued tasks", reason, cancelledTaskCount);
executor.execute(wrapWatcherService(() -> reloadInner(state, reason, false),
e -> logger.error("error reloading watcher", e)));
@ -221,6 +218,7 @@ public class WatcherService extends AbstractComponent {
if (processedClusterStateVersion.get() != state.getVersion()) {
logger.debug("watch service has not been reloaded for state [{}], another reload for state [{}] in progress",
state.getVersion(), processedClusterStateVersion.get());
return false;
}
Collection<Watch> watches = loadWatches(state);
@ -231,7 +229,13 @@ public class WatcherService extends AbstractComponent {
// if we had another state coming in the meantime, we will not start the trigger engines with these watches, but wait
// until the others are loaded
// also this is the place where we pause the trigger service execution and clear the current execution service, so that we make sure
// that existing executions finish, but no new ones are executed
if (processedClusterStateVersion.get() == state.getVersion()) {
triggerService.pauseExecution();
int cancelledTaskCount = executionService.clearExecutionsAndQueue();
logger.info("reloading watcher, reason [{}], cancelled [{}] queued tasks", reason, cancelledTaskCount);
executionService.unPause();
triggerService.start(watches);
if (triggeredWatches.isEmpty() == false) {

View File

@ -56,7 +56,7 @@ public class TickerScheduleTriggerEngine extends ScheduleTriggerEngine {
schedules.put(job.id(), new ActiveSchedule(job.id(), trigger.getSchedule(), startTime));
}
}
this.schedules.putAll(schedules);
this.schedules = schedules;
}
@Override

View File

@ -35,7 +35,9 @@ import java.util.function.Consumer;
import static org.elasticsearch.xpack.watcher.trigger.schedule.Schedules.daily;
import static org.elasticsearch.xpack.watcher.trigger.schedule.Schedules.interval;
import static org.elasticsearch.xpack.watcher.trigger.schedule.Schedules.weekly;
import static org.hamcrest.Matchers.everyItem;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.startsWith;
import static org.joda.time.DateTimeZone.UTC;
import static org.mockito.Mockito.mock;
@ -50,8 +52,12 @@ public class TickerScheduleEngineTests extends ESTestCase {
}
private TriggerEngine createEngine() {
return new TickerScheduleTriggerEngine(Settings.EMPTY,
mock(ScheduleRegistry.class), clock);
Settings settings = Settings.EMPTY;
// having a low value here speeds up the tests tremendously, we still want to run with the defaults every now and then
if (usually()) {
settings = Settings.builder().put(TickerScheduleTriggerEngine.TICKER_INTERVAL_SETTING.getKey(), "10ms").build();
}
return new TickerScheduleTriggerEngine(settings, mock(ScheduleRegistry.class), clock);
}
private void advanceClockIfNeeded(DateTime newCurrentDateTime) {
@ -104,6 +110,40 @@ public class TickerScheduleEngineTests extends ESTestCase {
assertThat(bits.cardinality(), is(count));
}
public void testStartClearsExistingSchedules() throws Exception {
final CountDownLatch latch = new CountDownLatch(1);
List<String> firedWatchIds = new ArrayList<>();
engine.register(new Consumer<Iterable<TriggerEvent>>() {
@Override
public void accept(Iterable<TriggerEvent> events) {
for (TriggerEvent event : events) {
firedWatchIds.add(event.jobName());
}
latch.countDown();
}
});
int count = randomIntBetween(2, 5);
List<Watch> watches = new ArrayList<>();
for (int i = 0; i < count; i++) {
watches.add(createWatch(String.valueOf(i), interval("1s")));
}
engine.start(watches);
watches.clear();
for (int i = 0; i < count; i++) {
watches.add(createWatch("another_id" + i, interval("1s")));
}
engine.start(watches);
advanceClockIfNeeded(new DateTime(clock.millis(), UTC).plusMillis(1100));
if (!latch.await(3 * count, TimeUnit.SECONDS)) {
fail("waiting too long for all watches to be triggered");
}
assertThat(firedWatchIds, everyItem(startsWith("another_id")));
}
public void testAddHourly() throws Exception {
final String name = "job_name";
final CountDownLatch latch = new CountDownLatch(1);