TransportMasterNodeOperationAction: retry operation if cluster state version changed while adding a ClusterStateListener

TransportMasterNodeOperationAction forwards incoming requests to the currently known master node. If that fails due to a connection error, a cluster state listener will be added in order to try again when a new master is elected. After the listener is in place, a check was made to see if the master has change *while* the listener was being added so that change will not be missed. The check was not enough as it may be that the same master was re-elected (for example, a network hick up) and thus test will fail even though the re-ellection event was missed. In these cases, the request would timeout unjustly. This commit changes this test to be  more strict and retry if the cluster state version changed during the addition of the listener.

Closes #5499
This commit is contained in:
Boaz Leskes 2014-03-23 22:45:09 +01:00
parent 034ce75250
commit c650ee47f3
1 changed files with 6 additions and 2 deletions

View File

@ -157,6 +157,8 @@ public abstract class TransportMasterNodeOperationAction<Request extends MasterN
if (retrying) { if (retrying) {
listener.onFailure(new MasterNotDiscoveredException()); listener.onFailure(new MasterNotDiscoveredException());
} else { } else {
logger.debug("no known master node, scheduling a retry");
clusterService.add(request.masterNodeTimeout(), new TimeoutClusterStateListener() { clusterService.add(request.masterNodeTimeout(), new TimeoutClusterStateListener() {
@Override @Override
public void postAdded() { public void postAdded() {
@ -212,12 +214,14 @@ public abstract class TransportMasterNodeOperationAction<Request extends MasterN
public void handleException(final TransportException exp) { public void handleException(final TransportException exp) {
if (exp.unwrapCause() instanceof ConnectTransportException) { if (exp.unwrapCause() instanceof ConnectTransportException) {
// we want to retry here a bit to see if a new master is elected // we want to retry here a bit to see if a new master is elected
logger.debug("connection exception while trying to forward request to master node [{}], scheduling a retry. Error: [{}]",
nodes.masterNode(), exp.getDetailedMessage());
clusterService.add(request.masterNodeTimeout(), new TimeoutClusterStateListener() { clusterService.add(request.masterNodeTimeout(), new TimeoutClusterStateListener() {
@Override @Override
public void postAdded() { public void postAdded() {
ClusterState clusterStateV2 = clusterService.state(); ClusterState clusterStateV2 = clusterService.state();
if (!clusterState.nodes().masterNodeId().equals(clusterStateV2.nodes().masterNodeId())) { if (clusterState.version() != clusterStateV2.version()) {
// master changes while adding the listener, try here // something changed while adding, try again
clusterService.remove(this); clusterService.remove(this);
innerExecute(request, listener, false); innerExecute(request, listener, false);
} }