From 1afedc608e3b299f705c73331e279a8a7065d31e Mon Sep 17 00:00:00 2001 From: Allan Yang Date: Thu, 18 Oct 2018 14:40:38 -0700 Subject: [PATCH] HBASE-21292 IdLock.getLockEntry() may hang if interrupted --- .../org/apache/hadoop/hbase/util/IdLock.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IdLock.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IdLock.java index 414cc66fd8f..c4adfbff154 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IdLock.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IdLock.java @@ -81,6 +81,17 @@ public class IdLock { existing.wait(); } catch (InterruptedException e) { --existing.numWaiters; // Remove ourselves from waiters. + // HBASE-21292 + // There is a rare case that interrupting and the lock owner thread call + // releaseLockEntry at the same time. Since the owner thread found there + // still one waiting, it won't remove the entry from the map. If the interrupted + // thread is the last one waiting on the lock, and since an exception is thrown, + // the 'existing' entry will stay in the map forever. Later threads which try to + // get this lock will stuck in a infinite loop because + // existing = map.putIfAbsent(entry.id, entry)) != null and existing.locked=false. + if (!existing.locked && existing.numWaiters == 0) { + map.remove(existing.id); + } throw new InterruptedIOException( "Interrupted waiting to acquire sparse lock"); } @@ -135,6 +146,12 @@ public class IdLock { } } catch (InterruptedException e) { + // HBASE-21292 + // Please refer to the comments in getLockEntry() + // the difference here is that we decrease numWaiters in finally block + if (!existing.locked && existing.numWaiters == 1) { + map.remove(existing.id); + } throw new InterruptedIOException( "Interrupted waiting to acquire sparse lock"); } finally {