Don't update nodes list when stepping down as master (#22049)
This commit simplifies the node update logic so that nodes are never removed from the cluster state when the cluster state is not published.
This commit is contained in:
parent
2592ff86ce
commit
a724f4eb61
|
@ -72,6 +72,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import static org.elasticsearch.common.unit.TimeValue.timeValueSeconds;
|
||||
|
@ -207,7 +208,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
|
|||
joinThreadControl.start();
|
||||
zenPing.start(this);
|
||||
this.nodeJoinController = new NodeJoinController(clusterService, allocationService, electMaster, discoverySettings, settings);
|
||||
this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, electMaster, this::rejoin, logger);
|
||||
this.nodeRemovalExecutor = new NodeRemovalClusterStateTaskExecutor(allocationService, electMaster, this::submitRejoin, logger);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -306,18 +307,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
|
|||
} catch (FailedToCommitClusterStateException t) {
|
||||
// cluster service logs a WARN message
|
||||
logger.debug("failed to publish cluster state version [{}] (not enough nodes acknowledged, min master nodes [{}])", clusterChangedEvent.state().version(), electMaster.minimumMasterNodes());
|
||||
clusterService.submitStateUpdateTask("zen-disco-failed-to-publish", new ClusterStateUpdateTask(Priority.IMMEDIATE) {
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) {
|
||||
return rejoin(currentState, "failed to publish to min_master_nodes");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(String source, Exception e) {
|
||||
logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
|
||||
}
|
||||
|
||||
});
|
||||
submitRejoin("zen-disco-failed-to-publish");
|
||||
throw t;
|
||||
}
|
||||
|
||||
|
@ -505,12 +495,27 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
|
|||
}
|
||||
}
|
||||
|
||||
private void submitRejoin(String source) {
|
||||
clusterService.submitStateUpdateTask(source, new ClusterStateUpdateTask(Priority.IMMEDIATE) {
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) {
|
||||
return rejoin(currentState, source);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onFailure(String source, Exception e) {
|
||||
logger.error((Supplier<?>) () -> new ParameterizedMessage("unexpected failure during [{}]", source), e);
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
|
||||
// visible for testing
|
||||
static class NodeRemovalClusterStateTaskExecutor implements ClusterStateTaskExecutor<NodeRemovalClusterStateTaskExecutor.Task>, ClusterStateTaskListener {
|
||||
|
||||
private final AllocationService allocationService;
|
||||
private final ElectMasterService electMasterService;
|
||||
private final BiFunction<ClusterState, String, ClusterState> rejoin;
|
||||
private final Consumer<String> rejoin;
|
||||
private final Logger logger;
|
||||
|
||||
static class Task {
|
||||
|
@ -540,7 +545,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
|
|||
NodeRemovalClusterStateTaskExecutor(
|
||||
final AllocationService allocationService,
|
||||
final ElectMasterService electMasterService,
|
||||
final BiFunction<ClusterState, String, ClusterState> rejoin,
|
||||
final Consumer<String> rejoin,
|
||||
final Logger logger) {
|
||||
this.allocationService = allocationService;
|
||||
this.electMasterService = electMasterService;
|
||||
|
@ -570,7 +575,8 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover
|
|||
|
||||
final BatchResult.Builder<Task> resultBuilder = BatchResult.<Task>builder().successes(tasks);
|
||||
if (!electMasterService.hasEnoughMasterNodes(remainingNodesClusterState.nodes())) {
|
||||
return resultBuilder.build(rejoin.apply(remainingNodesClusterState, "not enough master nodes"));
|
||||
rejoin.accept("not enough master nodes");
|
||||
return resultBuilder.build(currentState);
|
||||
} else {
|
||||
return resultBuilder.build(allocationService.deassociateDeadNodes(remainingNodesClusterState, true, describeTasks(tasks)));
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import java.util.List;
|
|||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
|
@ -77,17 +78,12 @@ public class NodeRemovalClusterStateTaskExecutorTests extends ESTestCase {
|
|||
|
||||
final AllocationService allocationService = mock(AllocationService.class);
|
||||
|
||||
final AtomicBoolean rejoined = new AtomicBoolean();
|
||||
final AtomicReference<ClusterState> rejoinedClusterState = new AtomicReference<>();
|
||||
final BiFunction<ClusterState, String, ClusterState> rejoin = (cs, r) -> {
|
||||
rejoined.set(true);
|
||||
rejoinedClusterState.set(ClusterState.builder(cs).build());
|
||||
return rejoinedClusterState.get();
|
||||
};
|
||||
final AtomicBoolean rejoinCalled = new AtomicBoolean();
|
||||
final Consumer<String> submitRejoin = source -> rejoinCalled.set(true);
|
||||
|
||||
final AtomicReference<ClusterState> remainingNodesClusterState = new AtomicReference<>();
|
||||
final ZenDiscovery.NodeRemovalClusterStateTaskExecutor executor =
|
||||
new ZenDiscovery.NodeRemovalClusterStateTaskExecutor(allocationService, electMasterService, rejoin, logger) {
|
||||
new ZenDiscovery.NodeRemovalClusterStateTaskExecutor(allocationService, electMasterService, submitRejoin, logger) {
|
||||
@Override
|
||||
ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) {
|
||||
remainingNodesClusterState.set(super.remainingNodesClusterState(currentState, remainingNodesBuilder));
|
||||
|
@ -117,11 +113,11 @@ public class NodeRemovalClusterStateTaskExecutorTests extends ESTestCase {
|
|||
|
||||
// ensure that we did not reroute
|
||||
verifyNoMoreInteractions(allocationService);
|
||||
assertTrue(rejoined.get());
|
||||
assertThat(result.resultingState, equalTo(rejoinedClusterState.get()));
|
||||
assertTrue(rejoinCalled.get());
|
||||
assertThat(result.resultingState, equalTo(clusterState));
|
||||
|
||||
for (final ZenDiscovery.NodeRemovalClusterStateTaskExecutor.Task task : tasks) {
|
||||
assertNull(result.resultingState.nodes().get(task.node().getId()));
|
||||
assertNotNull(result.resultingState.nodes().get(task.node().getId()));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -133,14 +129,11 @@ public class NodeRemovalClusterStateTaskExecutorTests extends ESTestCase {
|
|||
when(allocationService.deassociateDeadNodes(any(ClusterState.class), eq(true), any(String.class)))
|
||||
.thenAnswer(im -> im.getArguments()[0]);
|
||||
|
||||
final BiFunction<ClusterState, String, ClusterState> rejoin = (cs, r) -> {
|
||||
fail("rejoin should not be invoked");
|
||||
return cs;
|
||||
};
|
||||
final Consumer<String> submitRejoin = source -> fail("rejoin should not be invoked");
|
||||
|
||||
final AtomicReference<ClusterState> remainingNodesClusterState = new AtomicReference<>();
|
||||
final ZenDiscovery.NodeRemovalClusterStateTaskExecutor executor =
|
||||
new ZenDiscovery.NodeRemovalClusterStateTaskExecutor(allocationService, electMasterService, rejoin, logger) {
|
||||
new ZenDiscovery.NodeRemovalClusterStateTaskExecutor(allocationService, electMasterService, submitRejoin, logger) {
|
||||
@Override
|
||||
ClusterState remainingNodesClusterState(ClusterState currentState, DiscoveryNodes.Builder remainingNodesBuilder) {
|
||||
remainingNodesClusterState.set(super.remainingNodesClusterState(currentState, remainingNodesBuilder));
|
||||
|
|
Loading…
Reference in New Issue