From 3509f88c48bea3e669ba5f3e8cb7d3ab6ff44375 Mon Sep 17 00:00:00 2001 From: Jim Kellerman Date: Tue, 5 Jun 2007 15:11:57 +0000 Subject: [PATCH] HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease that is not held' git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@544512 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + conf/hbase-site.xml | 50 ------------------- src/java/org/apache/hadoop/hbase/HMaster.java | 44 +++++++++------- src/java/org/apache/hadoop/hbase/Leases.java | 43 ++++++++++++---- 4 files changed, 59 insertions(+), 80 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index edbe5c48226..a17dbdd36aa 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -23,3 +23,5 @@ Trunk (unreleased changes) 12. HADOOP-1392. Part2: includes table compaction by merging adjacent regions that have shrunk in size. 13. HADOOP-1445 Support updates across region splits and compactions + 14. HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease + that is not held' diff --git a/conf/hbase-site.xml b/conf/hbase-site.xml index 1d609ea72a8..c1e46d1481a 100644 --- a/conf/hbase-site.xml +++ b/conf/hbase-site.xml @@ -1,54 +1,4 @@ - - hbase.regiondir - hbase - The directory shared by region servers. - - - - hbase.regionserver.msginterval - 1000 - Interval between messages from the RegionServer to HMaster - in milliseconds. Default is 15. Set this value low if you want unit - tests to be responsive. - - - - diff --git a/src/java/org/apache/hadoop/hbase/HMaster.java b/src/java/org/apache/hadoop/hbase/HMaster.java index ed45cbba709..e5ec425c45f 100644 --- a/src/java/org/apache/hadoop/hbase/HMaster.java +++ b/src/java/org/apache/hadoop/hbase/HMaster.java @@ -738,17 +738,17 @@ public class HMaster implements HConstants, HMasterInterface, /** HRegionServers call this method upon startup. */ public void regionServerStartup(HServerInfo serverInfo) throws IOException { - String server = serverInfo.getServerAddress().toString().trim(); + String s = serverInfo.getServerAddress().toString().trim(); HServerInfo storedInfo = null; if(LOG.isDebugEnabled()) { - LOG.debug("received start message from: " + server); + LOG.debug("received start message from: " + s); } // If we get the startup message but there's an old server by that // name, then we can timeout the old one right away and register // the new one. - storedInfo = serversToServerInfo.remove(server); + storedInfo = serversToServerInfo.remove(s); if(storedInfo != null && !closed) { synchronized(msgQueue) { @@ -759,34 +759,40 @@ public class HMaster implements HConstants, HMasterInterface, // Either way, record the new server - serversToServerInfo.put(server, serverInfo); + serversToServerInfo.put(s, serverInfo); if(!closed) { - Text serverLabel = new Text(server); - serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(server)); + Text serverLabel = new Text(s); + LOG.debug("Created lease for " + serverLabel); + serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(s)); } } /** HRegionServers call this method repeatedly. */ - public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[]) throws IOException { - String server = serverInfo.getServerAddress().toString().trim(); - Text serverLabel = new Text(server); + public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[]) + throws IOException { + String s = serverInfo.getServerAddress().toString().trim(); + Text serverLabel = new Text(s); - if(closed - || (msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING)) { - // We're shutting down. Or the HRegionServer is. - serversToServerInfo.remove(server); - serverLeases.cancelLease(serverLabel, serverLabel); + if (closed || + msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING) { + // HRegionServer is shutting down. + if (serversToServerInfo.remove(s) != null) { + // Only cancel lease once (This block can run a couple of times during + // shutdown). + LOG.debug("Cancelling lease for " + serverLabel); + serverLeases.cancelLease(serverLabel, serverLabel); + } HMsg returnMsgs[] = {new HMsg(HMsg.MSG_REGIONSERVER_STOP)}; return returnMsgs; } - HServerInfo storedInfo = serversToServerInfo.get(server); + HServerInfo storedInfo = serversToServerInfo.get(s); if(storedInfo == null) { if(LOG.isDebugEnabled()) { - LOG.debug("received server report from unknown server: " + server); + LOG.debug("received server report from unknown server: " + s); } // The HBaseMaster may have been restarted. @@ -808,7 +814,7 @@ public class HMaster implements HConstants, HMasterInterface, // The answer is to ask A to shut down for good. if(LOG.isDebugEnabled()) { - LOG.debug("region server race condition detected: " + server); + LOG.debug("region server race condition detected: " + s); } HMsg returnMsgs[] = { @@ -821,11 +827,11 @@ public class HMaster implements HConstants, HMasterInterface, // All's well. Renew the server's lease. // This will always succeed; otherwise, the fetch of serversToServerInfo // would have failed above. - + serverLeases.renewLease(serverLabel, serverLabel); // Refresh the info object - serversToServerInfo.put(server, serverInfo); + serversToServerInfo.put(s, serverInfo); // Next, process messages for this server return processMsgs(serverInfo, msgs); diff --git a/src/java/org/apache/hadoop/hbase/Leases.java b/src/java/org/apache/hadoop/hbase/Leases.java index 2a5b222c0d1..dd549e39c16 100644 --- a/src/java/org/apache/hadoop/hbase/Leases.java +++ b/src/java/org/apache/hadoop/hbase/Leases.java @@ -22,7 +22,7 @@ import org.apache.hadoop.io.*; import java.io.*; import java.util.*; -/******************************************************************************* +/** * Leases * * There are several server classes in HBase that need to track external clients @@ -36,9 +36,9 @@ import java.util.*; * * An instance of the Leases class will create a thread to do its dirty work. * You should close() the instance if you want to clean up the thread properly. - ******************************************************************************/ + */ public class Leases { - private static final Log LOG = LogFactory.getLog(Leases.class); + static final Log LOG = LogFactory.getLog(Leases.class.getName()); long leasePeriod; long leaseCheckFrequency; @@ -83,22 +83,30 @@ public class Leases { LOG.debug("leases closed"); } } + + String getLeaseName(final Text holderId, final Text resourceId) { + return ""; + } /** A client obtains a lease... */ - public void createLease(Text holderId, Text resourceId, LeaseListener listener) throws IOException { + public void createLease(Text holderId, Text resourceId, + final LeaseListener listener) + throws IOException { synchronized(leases) { synchronized(sortedLeases) { Lease lease = new Lease(holderId, resourceId, listener); Text leaseId = lease.getLeaseId(); if(leases.get(leaseId) != null) { throw new IOException("Impossible state for createLease(): Lease " + - "for holderId " + holderId + " and resourceId " + resourceId + - " is still held."); + getLeaseName(holderId, resourceId) + " is still held."); } leases.put(leaseId, lease); sortedLeases.add(lease); } } + if (LOG.isDebugEnabled()) { + LOG.debug("Created lease " + getLeaseName(holderId, resourceId)); + } } /** A client renews a lease... */ @@ -110,8 +118,8 @@ public class Leases { if(lease == null) { // It's possible that someone tries to renew the lease, but // it just expired a moment ago. So fail. - throw new IOException("Cannot renew lease; not held (holderId=" + - holderId + ", resourceId=" + resourceId + ")"); + throw new IOException("Cannot renew lease that is not held: " + + getLeaseName(holderId, resourceId)); } sortedLeases.remove(lease); @@ -119,9 +127,14 @@ public class Leases { sortedLeases.add(lease); } } + if (LOG.isDebugEnabled()) { + LOG.debug("Renewed lease " + getLeaseName(holderId, resourceId)); + } } - /** A client explicitly cancels a lease. The lease-cleanup method is not called. */ + /** A client explicitly cancels a lease. + * The lease-cleanup method is not called. + */ public void cancelLease(Text holderId, Text resourceId) throws IOException { synchronized(leases) { synchronized(sortedLeases) { @@ -132,7 +145,8 @@ public class Leases { // It's possible that someone tries to renew the lease, but // it just expired a moment ago. So fail. - throw new IOException("Cannot cancel lease that is not held (holderId=" + holderId + ", resourceId=" + resourceId + ")"); + throw new IOException("Cannot cancel lease that is not held: " + + getLeaseName(holderId, resourceId)); } sortedLeases.remove(lease); @@ -140,7 +154,10 @@ public class Leases { lease.cancelled(); } - } + } + if (LOG.isDebugEnabled()) { + LOG.debug("Cancel lease " + getLeaseName(holderId, resourceId)); + } } /** LeaseMonitor is a thread that expires Leases that go on too long. */ @@ -211,6 +228,10 @@ public class Leases { } public void expired() { + if (LOG.isDebugEnabled()) { + LOG.debug("Lease expired " + getLeaseName(this.holderId, + this.resourceId)); + } listener.leaseExpired(); }