Stop responding to ping requests before master abdication (#27329)

When the current master node is shutting down, it sends a leave request to the other nodes so that they can eagerly start a fresh master election. Unfortunately, it was still possible for the master node that was shutting down to respond to ping requests, possibly influencing the election decision as it still appeared as an active master in the ping responses. This commit ensures that UnicastZenPing does not respond to ping requests once it's been closed. ZenDiscovery.doStop() continues to ensure that the pinging component is first closed before it triggers a master election.

Closes #27328
This commit is contained in:
Yannick Welsch 2017-11-13 15:18:59 +01:00 committed by GitHub
parent 91a23de55e
commit c83f112b1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 15 additions and 1 deletions

View File

@ -575,7 +575,8 @@ public class UnicastZenPing extends AbstractComponent implements ZenPing {
@Override
public void handleException(TransportException exp) {
if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException) {
if (exp instanceof ConnectTransportException || exp.getCause() instanceof ConnectTransportException ||
exp.getCause() instanceof AlreadyClosedException) {
// ok, not connected...
logger.trace((Supplier<?>) () -> new ParameterizedMessage("failed to connect to {}", node), exp);
} else if (closed == false) {
@ -608,6 +609,9 @@ public class UnicastZenPing extends AbstractComponent implements ZenPing {
@Override
public void messageReceived(UnicastPingRequest request, TransportChannel channel) throws Exception {
if (closed) {
throw new AlreadyClosedException("node is shutting down");
}
if (request.pingResponse.clusterName().equals(clusterName)) {
channel.sendResponse(handlePingRequest(request));
} else {

View File

@ -258,6 +258,16 @@ public class UnicastZenPingTests extends ESTestCase {
assertPingCount(handleD, handleA, 0);
assertPingCount(handleD, handleB, 0);
assertPingCount(handleD, handleC, 3);
zenPingC.close();
handleD.counters.clear();
logger.info("ping from UZP_D after closing UZP_C");
pingResponses = zenPingD.pingAndWait().toList();
// check that node does not respond to pings anymore after the ping service has been closed
assertThat(pingResponses.size(), equalTo(0));
assertPingCount(handleD, handleA, 0);
assertPingCount(handleD, handleB, 0);
assertPingCount(handleD, handleC, 3);
}
public void testUnknownHostNotCached() throws ExecutionException, InterruptedException {