When removing an AutoFollower also mark it as removed. (#37402)

Currently when there are no more auto follow patterns for a remote cluster then
the AutoFollower instance for this remote cluster will be removed. If
a new auto follow pattern for this remote cluster gets added quickly enough
after the last delete then there may be two AutoFollower instance running
for this remote cluster instead of one.

Each AutoFollower instance stops automatically after it sees in the
start() method that there are no more auto follow patterns for the
remote cluster it is tracking. However when an auto follow pattern
gets removed and then added back quickly enough then old AutoFollower
may never detect that at some point there were no auto follow patterns
for the remote cluster it is monitoring. The creation and removal of
an AutoFollower instance happens independently in the `updateAutoFollowers()`
as part of a cluster state update.

By adding the `removed` field, an AutoFollower instance will not miss the
fact there were no auto follow patterns at some point in time. The
`updateAutoFollowers()` method now marks an AutoFollower instance as
removed when it sees that there are no more patterns for a remote cluster.
The updateAutoFollowers() method can then safely start a new AutoFollower
instance.

Relates to #36761
This commit is contained in:
Martijn van Groningen 2019-01-15 16:24:19 +01:00 committed by GitHub
parent 3cc8f39532
commit 9554b2fecb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 52 additions and 1 deletions

View File

@ -258,6 +258,7 @@ public class AutoFollowCoordinator implements ClusterStateListener {
.anyMatch(pattern -> pattern.getRemoteCluster().equals(remoteCluster)); .anyMatch(pattern -> pattern.getRemoteCluster().equals(remoteCluster));
if (exist == false) { if (exist == false) {
LOGGER.info("removing auto follower for remote cluster [{}]", remoteCluster); LOGGER.info("removing auto follower for remote cluster [{}]", remoteCluster);
autoFollower.removed = true;
removedRemoteClusters.add(remoteCluster); removedRemoteClusters.add(remoteCluster);
} else if (autoFollower.remoteClusterConnectionMissing) { } else if (autoFollower.remoteClusterConnectionMissing) {
LOGGER.info("retrying auto follower [{}] after remote cluster connection was missing", remoteCluster); LOGGER.info("retrying auto follower [{}] after remote cluster connection was missing", remoteCluster);
@ -265,11 +266,25 @@ public class AutoFollowCoordinator implements ClusterStateListener {
autoFollower.start(); autoFollower.start();
} }
} }
assert assertNoOtherActiveAutoFollower(newAutoFollowers);
this.autoFollowers = autoFollowers this.autoFollowers = autoFollowers
.copyAndPutAll(newAutoFollowers) .copyAndPutAll(newAutoFollowers)
.copyAndRemoveAll(removedRemoteClusters); .copyAndRemoveAll(removedRemoteClusters);
} }
private boolean assertNoOtherActiveAutoFollower(Map<String, AutoFollower> newAutoFollowers) {
for (AutoFollower newAutoFollower : newAutoFollowers.values()) {
AutoFollower previousInstance = autoFollowers.get(newAutoFollower.remoteCluster);
assert previousInstance == null || previousInstance.removed;
}
return true;
}
Map<String, AutoFollower> getAutoFollowers() {
return autoFollowers;
}
@Override @Override
public void clusterChanged(ClusterChangedEvent event) { public void clusterChanged(ClusterChangedEvent event) {
if (event.localNodeMaster()) { if (event.localNodeMaster()) {
@ -295,6 +310,7 @@ public class AutoFollowCoordinator implements ClusterStateListener {
private volatile long lastAutoFollowTimeInMillis = -1; private volatile long lastAutoFollowTimeInMillis = -1;
private volatile long metadataVersion = 0; private volatile long metadataVersion = 0;
private volatile boolean remoteClusterConnectionMissing = false; private volatile boolean remoteClusterConnectionMissing = false;
volatile boolean removed = false;
private volatile CountDown autoFollowPatternsCountDown; private volatile CountDown autoFollowPatternsCountDown;
private volatile AtomicArray<AutoFollowResult> autoFollowResults; private volatile AtomicArray<AutoFollowResult> autoFollowResults;
@ -309,6 +325,17 @@ public class AutoFollowCoordinator implements ClusterStateListener {
} }
void start() { void start() {
if (removed) {
// This check exists to avoid two AutoFollower instances a single remote cluster.
// (If an auto follow pattern is deleted and then added back quickly enough then
// the old AutoFollower instance still sees that there is an auto follow pattern
// for the remote cluster it is tracking and will continue to operate, while in
// the meantime in updateAutoFollowers() method another AutoFollower instance has been
// started for the same remote cluster.)
LOGGER.info("AutoFollower instance for cluster [{}] has been removed", remoteCluster);
return;
}
lastAutoFollowTimeInMillis = relativeTimeProvider.getAsLong(); lastAutoFollowTimeInMillis = relativeTimeProvider.getAsLong();
final ClusterState clusterState = followerClusterStateSupplier.get(); final ClusterState clusterState = followerClusterStateSupplier.get();
final AutoFollowMetadata autoFollowMetadata = clusterState.metaData().custom(AutoFollowMetadata.TYPE); final AutoFollowMetadata autoFollowMetadata = clusterState.metaData().custom(AutoFollowMetadata.TYPE);
@ -330,6 +357,12 @@ public class AutoFollowCoordinator implements ClusterStateListener {
this.autoFollowResults = new AtomicArray<>(patterns.size()); this.autoFollowResults = new AtomicArray<>(patterns.size());
getRemoteClusterState(remoteCluster, metadataVersion + 1, (remoteClusterStateResponse, remoteError) -> { getRemoteClusterState(remoteCluster, metadataVersion + 1, (remoteClusterStateResponse, remoteError) -> {
// Also check removed flag here, as it may take a while for this remote cluster state api call to return:
if (removed) {
LOGGER.info("AutoFollower instance for cluster [{}] has been removed", remoteCluster);
return;
}
if (remoteClusterStateResponse != null) { if (remoteClusterStateResponse != null) {
assert remoteError == null; assert remoteError == null;
if (remoteClusterStateResponse.isWaitForTimedOut()) { if (remoteClusterStateResponse.isWaitForTimedOut()) {

View File

@ -157,15 +157,20 @@ public class AutoFollowIT extends CcrIntegTestCase {
int expectedVal2 = numIndices; int expectedVal2 = numIndices;
MetaData[] metaData = new MetaData[1]; MetaData[] metaData = new MetaData[1];
AutoFollowStats[] autoFollowStats = new AutoFollowStats[1];
try { try {
assertBusy(() -> { assertBusy(() -> {
metaData[0] = followerClient().admin().cluster().prepareState().get().getState().metaData(); metaData[0] = followerClient().admin().cluster().prepareState().get().getState().metaData();
autoFollowStats[0] = getAutoFollowStats();
int count = (int) Arrays.stream(metaData[0].getConcreteAllIndices()).filter(s -> s.startsWith("copy-")).count(); int count = (int) Arrays.stream(metaData[0].getConcreteAllIndices()).filter(s -> s.startsWith("copy-")).count();
assertThat(count, equalTo(expectedVal2)); assertThat(count, equalTo(expectedVal2));
// Ensure that there are no auto follow errors:
// (added specifically to see that there are no leader indices auto followed multiple times)
assertThat(autoFollowStats[0].getRecentAutoFollowErrors().size(), equalTo(0));
}); });
} catch (AssertionError ae) { } catch (AssertionError ae) {
logger.warn("metadata={}", Strings.toString(metaData[0])); logger.warn("metadata={}", Strings.toString(metaData[0]));
logger.warn("auto follow stats={}", Strings.toString(getAutoFollowStats())); logger.warn("auto follow stats={}", Strings.toString(autoFollowStats[0]));
throw ae; throw ae;
} }
} }

View File

@ -620,6 +620,10 @@ public class AutoFollowCoordinatorTests extends ESTestCase {
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(2)); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(2));
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote1"), notNullValue()); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote1"), notNullValue());
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue()); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue());
// Get a reference to auto follower that will get removed, so that we can assert that it has been marked as removed,
// when pattern 1 and 3 are moved. (To avoid a edge case where multiple auto follow coordinators for the same remote cluster)
AutoFollowCoordinator.AutoFollower removedAutoFollower1 = autoFollowCoordinator.getAutoFollowers().get("remote1");
assertThat(removedAutoFollower1.removed, is(false));
// Remove patterns 1 and 3: // Remove patterns 1 and 3:
patterns.remove("pattern1"); patterns.remove("pattern1");
patterns.remove("pattern3"); patterns.remove("pattern3");
@ -630,6 +634,7 @@ public class AutoFollowCoordinatorTests extends ESTestCase {
autoFollowCoordinator.updateAutoFollowers(clusterState); autoFollowCoordinator.updateAutoFollowers(clusterState);
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(1)); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(1));
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue()); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue());
assertThat(removedAutoFollower1.removed, is(true));
// Add pattern 4: // Add pattern 4:
patterns.put("pattern4", new AutoFollowPattern("remote1", Collections.singletonList("metrics-*"), null, null, null, patterns.put("pattern4", new AutoFollowPattern("remote1", Collections.singletonList("metrics-*"), null, null, null,
null, null, null, null, null, null, null, null)); null, null, null, null, null, null, null, null));
@ -641,7 +646,13 @@ public class AutoFollowCoordinatorTests extends ESTestCase {
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(2)); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(2));
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote1"), notNullValue()); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote1"), notNullValue());
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue()); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().get("remote2"), notNullValue());
// Get references to auto followers that will get removed, so that we can assert that those have been marked as removed,
// when pattern 2 and 4 are moved. (To avoid a edge case where multiple auto follow coordinators for the same remote cluster)
removedAutoFollower1 = autoFollowCoordinator.getAutoFollowers().get("remote1");
AutoFollower removedAutoFollower2 = autoFollowCoordinator.getAutoFollowers().get("remote2");
// Remove patterns 2 and 4: // Remove patterns 2 and 4:
assertThat(removedAutoFollower1.removed, is(false));
assertThat(removedAutoFollower2.removed, is(false));
patterns.remove("pattern2"); patterns.remove("pattern2");
patterns.remove("pattern4"); patterns.remove("pattern4");
clusterState = ClusterState.builder(new ClusterName("remote")) clusterState = ClusterState.builder(new ClusterName("remote"))
@ -650,6 +661,8 @@ public class AutoFollowCoordinatorTests extends ESTestCase {
.build(); .build();
autoFollowCoordinator.updateAutoFollowers(clusterState); autoFollowCoordinator.updateAutoFollowers(clusterState);
assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(0)); assertThat(autoFollowCoordinator.getStats().getAutoFollowedClusters().size(), equalTo(0));
assertThat(removedAutoFollower1.removed, is(true));
assertThat(removedAutoFollower2.removed, is(true));
} }
public void testUpdateAutoFollowersNoPatterns() { public void testUpdateAutoFollowersNoPatterns() {