Improve exception handling on TransportMasterNodeAction (#29314)
We have seen exceptions bubble up to the uncaught exception handler. Checking the blocks can lead for example to IndexNotFoundException when the indices are resolved. In order to make TransportMasterNodeAction more resilient against such expected exceptions, this code change wraps the execution of doStart() into a try catch and informs the listener in case of failures.
This commit is contained in:
parent
2dc546ccec
commit
d4538df893
|
@ -145,6 +145,7 @@ public abstract class TransportMasterNodeAction<Request extends MasterNodeReques
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void doStart(ClusterState clusterState) {
|
protected void doStart(ClusterState clusterState) {
|
||||||
|
try {
|
||||||
final Predicate<ClusterState> masterChangePredicate = MasterNodeChangePredicate.build(clusterState);
|
final Predicate<ClusterState> masterChangePredicate = MasterNodeChangePredicate.build(clusterState);
|
||||||
final DiscoveryNodes nodes = clusterState.nodes();
|
final DiscoveryNodes nodes = clusterState.nodes();
|
||||||
if (nodes.isLocalNodeElectedMaster() || localExecute(request)) {
|
if (nodes.isLocalNodeElectedMaster() || localExecute(request)) {
|
||||||
|
@ -156,8 +157,14 @@ public abstract class TransportMasterNodeAction<Request extends MasterNodeReques
|
||||||
} else {
|
} else {
|
||||||
logger.trace("can't execute due to a cluster block, retrying", blockException);
|
logger.trace("can't execute due to a cluster block, retrying", blockException);
|
||||||
retry(blockException, newState -> {
|
retry(blockException, newState -> {
|
||||||
|
try {
|
||||||
ClusterBlockException newException = checkBlock(request, newState);
|
ClusterBlockException newException = checkBlock(request, newState);
|
||||||
return (newException == null || !newException.retryable());
|
return (newException == null || !newException.retryable());
|
||||||
|
} catch (Exception e) {
|
||||||
|
// accept state as block will be rechecked by doStart() and listener.onFailure() then called
|
||||||
|
logger.trace("exception occurred during cluster block checking, accepting state", e);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -209,6 +216,9 @@ public abstract class TransportMasterNodeAction<Request extends MasterNodeReques
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
listener.onFailure(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void retry(final Throwable failure, final Predicate<ClusterState> statePredicate) {
|
private void retry(final Throwable failure, final Predicate<ClusterState> statePredicate) {
|
||||||
|
|
|
@ -242,6 +242,39 @@ public class TransportMasterNodeActionTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCheckBlockThrowsException() throws InterruptedException {
|
||||||
|
boolean throwExceptionOnRetry = randomBoolean();
|
||||||
|
Request request = new Request().masterNodeTimeout(TimeValue.timeValueSeconds(60));
|
||||||
|
PlainActionFuture<Response> listener = new PlainActionFuture<>();
|
||||||
|
|
||||||
|
ClusterBlock block = new ClusterBlock(1, "", true, true,
|
||||||
|
false, randomFrom(RestStatus.values()), ClusterBlockLevel.ALL);
|
||||||
|
ClusterState stateWithBlock = ClusterState.builder(ClusterStateCreationUtils.state(localNode, localNode, allNodes))
|
||||||
|
.blocks(ClusterBlocks.builder().addGlobalBlock(block)).build();
|
||||||
|
setState(clusterService, stateWithBlock);
|
||||||
|
|
||||||
|
new Action(Settings.EMPTY, "testAction", transportService, clusterService, threadPool) {
|
||||||
|
@Override
|
||||||
|
protected ClusterBlockException checkBlock(Request request, ClusterState state) {
|
||||||
|
Set<ClusterBlock> blocks = state.blocks().global();
|
||||||
|
if (throwExceptionOnRetry == false || blocks.isEmpty()) {
|
||||||
|
throw new RuntimeException("checkBlock has thrown exception");
|
||||||
|
}
|
||||||
|
return new ClusterBlockException(blocks);
|
||||||
|
|
||||||
|
}
|
||||||
|
}.execute(request, listener);
|
||||||
|
|
||||||
|
if (throwExceptionOnRetry == false) {
|
||||||
|
assertListenerThrows("checkBlock has thrown exception", listener, RuntimeException.class);
|
||||||
|
} else {
|
||||||
|
assertFalse(listener.isDone());
|
||||||
|
setState(clusterService, ClusterState.builder(ClusterStateCreationUtils.state(localNode, localNode, allNodes))
|
||||||
|
.blocks(ClusterBlocks.EMPTY_CLUSTER_BLOCK).build());
|
||||||
|
assertListenerThrows("checkBlock has thrown exception", listener, RuntimeException.class);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testForceLocalOperation() throws ExecutionException, InterruptedException {
|
public void testForceLocalOperation() throws ExecutionException, InterruptedException {
|
||||||
Request request = new Request();
|
Request request = new Request();
|
||||||
PlainActionFuture<Response> listener = new PlainActionFuture<>();
|
PlainActionFuture<Response> listener = new PlainActionFuture<>();
|
||||||
|
|
Loading…
Reference in New Issue