From 9100a61bf61ba2b9b018e5ec8f7be178eff22243 Mon Sep 17 00:00:00 2001 From: Laksh Singla Date: Mon, 6 Feb 2023 19:13:39 +0530 Subject: [PATCH] Fix NPE in postCleanupStage if stage doesn't exist (#13742) With fault tolerance enabled in MSQ, not all the work orders might be populated if the worker is restarted. In case it gets the request for cleaning up the stage which is not present in the worker's map, it can throw an NPE. Added a check to ensure that the stage is present in the map before cleaning it up, or else logging it as a warning. --- .../main/java/org/apache/druid/msq/exec/WorkerImpl.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java index 3a8a0372348..01e832e109f 100644 --- a/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java +++ b/extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/WorkerImpl.java @@ -574,7 +574,12 @@ public class WorkerImpl implements Worker holder -> { cleanStageOutput(stageId, true); // Mark the stage as FINISHED - holder.getStageKernelMap().get(stageId).setStageFinished(); + WorkerStageKernel stageKernel = holder.getStageKernelMap().get(stageId); + if (stageKernel == null) { + log.warn("Stage id [%s] non existent. Unable to mark the stage kernel for it as FINISHED", stageId); + } else { + stageKernel.setStageFinished(); + } } ); }