* It looks like we might be cancelling a previous publication instead of the one triggered by the given request with a very low likelihood. * Fixed by adding a wait for no in-progress publications * Also added debug logging that would've identified this problem * Closes #36813
This commit is contained in:
parent
c4f44024af
commit
a94d24ae5a
|
@ -1198,9 +1198,13 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
|||
*/
|
||||
boolean cancelCommittedPublication() {
|
||||
synchronized (mutex) {
|
||||
if (currentPublication.isPresent() && currentPublication.get().isCommitted()) {
|
||||
currentPublication.get().cancel("cancelCommittedPublication");
|
||||
return true;
|
||||
if (currentPublication.isPresent()) {
|
||||
final CoordinatorPublication publication = currentPublication.get();
|
||||
if (publication.isCommitted()) {
|
||||
publication.cancel("cancelCommittedPublication");
|
||||
logger.debug("Cancelled publication of [{}].", publication);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -141,6 +141,10 @@ public class RareClusterStateIT extends ESIntegTestCase {
|
|||
|
||||
private <Req extends ActionRequest, Res extends ActionResponse> ActionFuture<Res> executeAndCancelCommittedPublication(
|
||||
ActionRequestBuilder<Req, Res> req) throws Exception {
|
||||
// Wait for no publication in progress to not accidentally cancel a publication different from the one triggered by the given
|
||||
// request.
|
||||
assertBusy(
|
||||
() -> assertFalse(((Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class)).publicationInProgress()));
|
||||
ActionFuture<Res> future = req.execute();
|
||||
assertBusy(
|
||||
() -> assertTrue(((Coordinator)internalCluster().getCurrentMasterNodeInstance(Discovery.class)).cancelCommittedPublication()));
|
||||
|
|
Loading…
Reference in New Issue