From 9554b2fecb27b205d5e960e3fb12e0901b2badf9 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 15 Jan 2019 16:24:19 +0100 Subject: [PATCH] When removing an AutoFollower also mark it as removed. (#37402) Currently when there are no more auto follow patterns for a remote cluster then the AutoFollower instance for this remote cluster will be removed. If a new auto follow pattern for this remote cluster gets added quickly enough after the last delete then there may be two AutoFollower instance running for this remote cluster instead of one. Each AutoFollower instance stops automatically after it sees in the start() method that there are no more auto follow patterns for the remote cluster it is tracking. However when an auto follow pattern gets removed and then added back quickly enough then old AutoFollower may never detect that at some point there were no auto follow patterns for the remote cluster it is monitoring. The creation and removal of an AutoFollower instance happens independently in the `updateAutoFollowers()` as part of a cluster state update. By adding the `removed` field, an AutoFollower instance will not miss the fact there were no auto follow patterns at some point in time. The `updateAutoFollowers()` method now marks an AutoFollower instance as removed when it sees that there are no more patterns for a remote cluster. The updateAutoFollowers() method can then safely start a new AutoFollower instance. Relates to #36761 --- .../ccr/action/AutoFollowCoordinator.java | 33 +++++++++++++++++++ .../elasticsearch/xpack/ccr/AutoFollowIT.java | 7 +++- .../action/AutoFollowCoordinatorTests.java | 13 ++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinator.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinator.java index 55e24abc86c..e3b008efc56 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinator.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinator.java @@ -258,6 +258,7 @@ public class AutoFollowCoordinator implements ClusterStateListener { .anyMatch(pattern -> pattern.getRemoteCluster().equals(remoteCluster)); if (exist == false) { LOGGER.info("removing auto follower for remote cluster [{}]", remoteCluster); + autoFollower.removed = true; removedRemoteClusters.add(remoteCluster); } else if (autoFollower.remoteClusterConnectionMissing) { LOGGER.info("retrying auto follower [{}] after remote cluster connection was missing", remoteCluster); @@ -265,11 +266,25 @@ public class AutoFollowCoordinator implements ClusterStateListener { autoFollower.start(); } } + assert assertNoOtherActiveAutoFollower(newAutoFollowers); this.autoFollowers = autoFollowers .copyAndPutAll(newAutoFollowers) .copyAndRemoveAll(removedRemoteClusters); } + private boolean assertNoOtherActiveAutoFollower(Map newAutoFollowers) { + for (AutoFollower newAutoFollower : newAutoFollowers.values()) { + AutoFollower previousInstance = autoFollowers.get(newAutoFollower.remoteCluster); + assert previousInstance == null || previousInstance.removed; + } + return true; + } + + + Map getAutoFollowers() { + return autoFollowers; + } + @Override public void clusterChanged(ClusterChangedEvent event) { if (event.localNodeMaster()) { @@ -295,6 +310,7 @@ public class AutoFollowCoordinator implements ClusterStateListener { private volatile long lastAutoFollowTimeInMillis = -1; private volatile long metadataVersion = 0; private volatile boolean remoteClusterConnectionMissing = false; + volatile boolean removed = false; private volatile CountDown autoFollowPatternsCountDown; private volatile AtomicArray autoFollowResults; @@ -309,6 +325,17 @@ public class AutoFollowCoordinator implements ClusterStateListener { } void start() { + if (removed) { + // This check exists to avoid two AutoFollower instances a single remote cluster. + // (If an auto follow pattern is deleted and then added back quickly enough then + // the old AutoFollower instance still sees that there is an auto follow pattern + // for the remote cluster it is tracking and will continue to operate, while in + // the meantime in updateAutoFollowers() method another AutoFollower instance has been + // started for the same remote cluster.) + LOGGER.info("AutoFollower instance for cluster [{}] has been removed", remoteCluster); + return; + } + lastAutoFollowTimeInMillis = relativeTimeProvider.getAsLong(); final ClusterState clusterState = followerClusterStateSupplier.get(); final AutoFollowMetadata autoFollowMetadata = clusterState.metaData().custom(AutoFollowMetadata.TYPE); @@ -330,6 +357,12 @@ public class AutoFollowCoordinator implements ClusterStateListener { this.autoFollowResults = new AtomicArray<>(patterns.size()); getRemoteClusterState(remoteCluster, metadataVersion + 1, (remoteClusterStateResponse, remoteError) -> { + // Also check removed flag here, as it may take a while for this remote cluster state api call to return: + if (removed) { + LOGGER.info("AutoFollower instance for cluster [{}] has been removed", remoteCluster); + return; + } + if (remoteClusterStateResponse != null) { assert remoteError == null; if (remoteClusterStateResponse.isWaitForTimedOut()) { diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/AutoFollowIT.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/AutoFollowIT.java index 286e5badee1..70f62439236 100644 --- a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/AutoFollowIT.java +++ b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/AutoFollowIT.java @@ -157,15 +157,20 @@ public class AutoFollowIT extends CcrIntegTestCase { int expectedVal2 = numIndices; MetaData[] metaData = new MetaData[1]; + AutoFollowStats[] autoFollowStats = new AutoFollowStats[1]; try { assertBusy(() -> { metaData[0] = followerClient().admin().cluster().prepareState().get().getState().metaData(); + autoFollowStats[0] = getAutoFollowStats(); int count = (int) Arrays.stream(metaData[0].getConcreteAllIndices()).filter(s -> s.startsWith("copy-")).count(); assertThat(count, equalTo(expectedVal2)); + // Ensure that there are no auto follow errors: + // (added specifically to see that there are no leader indices auto followed multiple times) + assertThat(autoFollowStats[0].getRecentAutoFollowErrors().size(), equalTo(0)); }); } catch (AssertionError ae) { logger.warn("metadata={}", Strings.toString(metaData[0])); - logger.warn("auto follow stats={}", Strings.toString(getAutoFollowStats())); + logger.warn("auto follow stats={}", Strings.toString(autoFollowStats[0])); throw ae; } } diff --git a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinatorTests.java b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinatorTests.java index 1c6864088b5..2ac67a65fc1 100644 --- a/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinatorTests.java +++ b/x-pack/plugin/ccr/src/test/java/org/elasticsearch/xpack/ccr/action/AutoFollowCoordinatorTests.java @@ -620,6 +620,10 @@ public class AutoFollowCoordinatorTests extends ESTestCase { assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(2)); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote1"), notNullValue()); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue()); + // Get a reference to auto follower that will get removed, so that we can assert that it has been marked as removed, + // when pattern 1 and 3 are moved. (To avoid a edge case where multiple auto follow coordinators for the same remote cluster) + AutoFollowCoordinator.AutoFollower removedAutoFollower1 = autoFollowCoordinator.getAutoFollowers().get("remote1"); + assertThat(removedAutoFollower1.removed, is(false)); // Remove patterns 1 and 3: patterns.remove("pattern1"); patterns.remove("pattern3"); @@ -630,6 +634,7 @@ public class AutoFollowCoordinatorTests extends ESTestCase { autoFollowCoordinator.updateAutoFollowers(clusterState); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(1)); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue()); + assertThat(removedAutoFollower1.removed, is(true)); // Add pattern 4: patterns.put("pattern4", new AutoFollowPattern("remote1", Collections.singletonList("metrics-*"), null, null, null, null, null, null, null, null, null, null, null)); @@ -641,7 +646,13 @@ public class AutoFollowCoordinatorTests extends ESTestCase { assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(2)); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote1"), notNullValue()); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue()); + // Get references to auto followers that will get removed, so that we can assert that those have been marked as removed, + // when pattern 2 and 4 are moved. (To avoid a edge case where multiple auto follow coordinators for the same remote cluster) + removedAutoFollower1 = autoFollowCoordinator.getAutoFollowers().get("remote1"); + AutoFollower removedAutoFollower2 = autoFollowCoordinator.getAutoFollowers().get("remote2"); // Remove patterns 2 and 4: + assertThat(removedAutoFollower1.removed, is(false)); + assertThat(removedAutoFollower2.removed, is(false)); patterns.remove("pattern2"); patterns.remove("pattern4"); clusterState = ClusterState.builder(new ClusterName("remote")) @@ -650,6 +661,8 @@ public class AutoFollowCoordinatorTests extends ESTestCase { .build(); autoFollowCoordinator.updateAutoFollowers(clusterState); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(0)); + assertThat(removedAutoFollower1.removed, is(true)); + assertThat(removedAutoFollower2.removed, is(true)); } public void testUpdateAutoFollowersNoPatterns() {