From 4405698ee99fe26d0ac9317a2df96096f2731a7b Mon Sep 17 00:00:00 2001
From: Jonathan Hsieh <jmhsieh@apache.org>
Date: Wed, 13 Feb 2013 19:13:38 +0000
Subject: [PATCH] HBASE-7703 Eventually all online snapshots fail due to
 Timeout at same regionserver.

Online snapshot attempts would fail due to timeout because a rowlock could not be obtained.  Prior to this a
cancellation occurred which likely grabbed the lock without cleaning it properly. The fix here is to use nice cancel
instead of interrupting cancel on failures.



git-svn-id: https://svn.apache.org/repos/asf/hbase/branches/hbase-7290@1445866 13f79535-47bb-0310-9956-ffa450edef68
---
 .../regionserver/snapshot/RegionServerSnapshotManager.java  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
index 3e5238e7b28..1282585d52e 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
@@ -347,7 +347,11 @@ public class RegionServerSnapshotManager {
       Collection<Future<Void>> tasks = futures;
       LOG.debug("cancelling " + tasks.size() + " tasks for snapshot " + name);
       for (Future<Void> f: tasks) {
-        f.cancel(true);
+        // TODO Ideally we'd interrupt hbase threads when we cancel.  However it seems that there
+        // are places in the HBase code where row/region locks are taken and not released in a
+        // finally block.  Thus we cancel without interrupting.  Cancellations will be slower to
+        // complete but we won't suffer from unreleased locks due to poor code discipline.
+        f.cancel(false);
       }
 
       // evict remaining tasks and futures from taskPool.