From df01998213d31ba7231afd5d61395e82e2e4ac09 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Thu, 3 May 2018 09:59:01 -0700 Subject: [PATCH] SegmentLoadDropHandler: Fix deadlock when segments have errors loading on startup. (#5735) The "lock" object was used to synchronize start/stop as well as synchronize removals from segmentsToDelete (when a segment is done dropping). This could cause a deadlock if a segment-load throws an exception during loadLocalCache. loadLocalCache is run by start() while it holds the lock, but then it spawns loading threads, and those threads will try to acquire the "segmentsToDelete" lock if they want to drop a corrupt segments. I don't see any reason for these two locks to be the same lock, so I split them. --- .../coordination/SegmentLoadDropHandler.java | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java b/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java index d0985b7cfee..312ef20769a 100644 --- a/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java +++ b/server/src/main/java/io/druid/server/coordination/SegmentLoadDropHandler.java @@ -73,7 +73,11 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler { private static final EmittingLogger log = new EmittingLogger(SegmentLoadDropHandler.class); - private final Object lock = new Object(); + // Synchronizes removals from segmentsToDelete + private final Object segmentDeleteLock = new Object(); + + // Synchronizes start/stop of this object. + private final Object startStopLock = new Object(); private final ObjectMapper jsonMapper; private final SegmentLoaderConfig config; @@ -137,7 +141,7 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler @LifecycleStart public void start() throws IOException { - synchronized (lock) { + synchronized (startStopLock) { if (started) { return; } @@ -159,7 +163,7 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler @LifecycleStop public void stop() { - synchronized (lock) { + synchronized (startStopLock) { if (!started) { return; } @@ -296,7 +300,7 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler things slow. Given that in most cases segmentsToDelete.contains(segment) returns false, it will save a lot of cost of acquiring lock by doing the "contains" check outside the synchronized block. */ - synchronized (lock) { + synchronized (segmentDeleteLock) { segmentsToDelete.remove(segment); } } @@ -423,7 +427,7 @@ public class SegmentLoadDropHandler implements DataSegmentChangeHandler public void run() { try { - synchronized (lock) { + synchronized (segmentDeleteLock) { if (segmentsToDelete.remove(segment)) { segmentManager.dropSegment(segment);