diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IdLock.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IdLock.java index 414cc66fd8f..c4adfbff154 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IdLock.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IdLock.java @@ -81,6 +81,17 @@ public class IdLock { existing.wait(); } catch (InterruptedException e) { --existing.numWaiters; // Remove ourselves from waiters. + // HBASE-21292 + // There is a rare case that interrupting and the lock owner thread call + // releaseLockEntry at the same time. Since the owner thread found there + // still one waiting, it won't remove the entry from the map. If the interrupted + // thread is the last one waiting on the lock, and since an exception is thrown, + // the 'existing' entry will stay in the map forever. Later threads which try to + // get this lock will stuck in a infinite loop because + // existing = map.putIfAbsent(entry.id, entry)) != null and existing.locked=false. + if (!existing.locked && existing.numWaiters == 0) { + map.remove(existing.id); + } throw new InterruptedIOException( "Interrupted waiting to acquire sparse lock"); } @@ -135,6 +146,12 @@ public class IdLock { } } catch (InterruptedException e) { + // HBASE-21292 + // Please refer to the comments in getLockEntry() + // the difference here is that we decrease numWaiters in finally block + if (!existing.locked && existing.numWaiters == 1) { + map.remove(existing.id); + } throw new InterruptedIOException( "Interrupted waiting to acquire sparse lock"); } finally {