HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease that is not held'

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@544512 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2007-06-05 15:11:57 +00:00
parent ac718209e5
commit 3509f88c48
4 changed files with 59 additions and 80 deletions

View File

@ -23,3 +23,5 @@ Trunk (unreleased changes)
12. HADOOP-1392. Part2: includes table compaction by merging adjacent regions
that have shrunk in size.
13. HADOOP-1445 Support updates across region splits and compactions
14. HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease
that is not held'

View File

@ -1,54 +1,4 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hbase.regiondir</name>
<value>hbase</value>
<description>The directory shared by region servers.
</description>
</property>
<property>
<name>hbase.regionserver.msginterval</name>
<value>1000</value>
<description>Interval between messages from the RegionServer to HMaster
in milliseconds. Default is 15. Set this value low if you want unit
tests to be responsive.
</description>
</property>
<!--
<property>
<name>hbase.master.meta.thread.rescanfrequency</name>
<value>600000</value>
<description>How long the HMaster sleeps (in milliseconds) between scans of
the root and meta tables.
</description>
</property>
<property>
<name>hbase.master.lease.period</name>
<value>360000</value>
<description>HMaster server lease period in milliseconds. Default is
180 seconds.</description>
</property>
<property>
<name>hbase.regionserver.lease.period</name>
<value>360000</value>
<description>HMaster server lease period in milliseconds. Default is
180 seconds.</description>
</property>
-->
<!--
<property>
<name>hbase.hregion.max.filesize</name>
<value>3421223</value>
<description>
Maximum desired file size for an HRegion. If filesize exceeds
value + (value / 2), the HRegion is split in two. Default: 128M.
</description>
</property>
<property>
<name>hbase.client.timeout.length</name>
<value>10000</value>
<description>Client timeout in milliseconds</description>
</property>
-->
</configuration>

View File

@ -738,17 +738,17 @@ public class HMaster implements HConstants, HMasterInterface,
/** HRegionServers call this method upon startup. */
public void regionServerStartup(HServerInfo serverInfo) throws IOException {
String server = serverInfo.getServerAddress().toString().trim();
String s = serverInfo.getServerAddress().toString().trim();
HServerInfo storedInfo = null;
if(LOG.isDebugEnabled()) {
LOG.debug("received start message from: " + server);
LOG.debug("received start message from: " + s);
}
// If we get the startup message but there's an old server by that
// name, then we can timeout the old one right away and register
// the new one.
storedInfo = serversToServerInfo.remove(server);
storedInfo = serversToServerInfo.remove(s);
if(storedInfo != null && !closed) {
synchronized(msgQueue) {
@ -759,34 +759,40 @@ public class HMaster implements HConstants, HMasterInterface,
// Either way, record the new server
serversToServerInfo.put(server, serverInfo);
serversToServerInfo.put(s, serverInfo);
if(!closed) {
Text serverLabel = new Text(server);
serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(server));
Text serverLabel = new Text(s);
LOG.debug("Created lease for " + serverLabel);
serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(s));
}
}
/** HRegionServers call this method repeatedly. */
public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[]) throws IOException {
String server = serverInfo.getServerAddress().toString().trim();
Text serverLabel = new Text(server);
public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[])
throws IOException {
String s = serverInfo.getServerAddress().toString().trim();
Text serverLabel = new Text(s);
if(closed
|| (msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING)) {
// We're shutting down. Or the HRegionServer is.
serversToServerInfo.remove(server);
serverLeases.cancelLease(serverLabel, serverLabel);
if (closed ||
msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING) {
// HRegionServer is shutting down.
if (serversToServerInfo.remove(s) != null) {
// Only cancel lease once (This block can run a couple of times during
// shutdown).
LOG.debug("Cancelling lease for " + serverLabel);
serverLeases.cancelLease(serverLabel, serverLabel);
}
HMsg returnMsgs[] = {new HMsg(HMsg.MSG_REGIONSERVER_STOP)};
return returnMsgs;
}
HServerInfo storedInfo = serversToServerInfo.get(server);
HServerInfo storedInfo = serversToServerInfo.get(s);
if(storedInfo == null) {
if(LOG.isDebugEnabled()) {
LOG.debug("received server report from unknown server: " + server);
LOG.debug("received server report from unknown server: " + s);
}
// The HBaseMaster may have been restarted.
@ -808,7 +814,7 @@ public class HMaster implements HConstants, HMasterInterface,
// The answer is to ask A to shut down for good.
if(LOG.isDebugEnabled()) {
LOG.debug("region server race condition detected: " + server);
LOG.debug("region server race condition detected: " + s);
}
HMsg returnMsgs[] = {
@ -821,11 +827,11 @@ public class HMaster implements HConstants, HMasterInterface,
// All's well. Renew the server's lease.
// This will always succeed; otherwise, the fetch of serversToServerInfo
// would have failed above.
serverLeases.renewLease(serverLabel, serverLabel);
// Refresh the info object
serversToServerInfo.put(server, serverInfo);
serversToServerInfo.put(s, serverInfo);
// Next, process messages for this server
return processMsgs(serverInfo, msgs);

View File

@ -22,7 +22,7 @@ import org.apache.hadoop.io.*;
import java.io.*;
import java.util.*;
/*******************************************************************************
/**
* Leases
*
* There are several server classes in HBase that need to track external clients
@ -36,9 +36,9 @@ import java.util.*;
*
* An instance of the Leases class will create a thread to do its dirty work.
* You should close() the instance if you want to clean up the thread properly.
******************************************************************************/
*/
public class Leases {
private static final Log LOG = LogFactory.getLog(Leases.class);
static final Log LOG = LogFactory.getLog(Leases.class.getName());
long leasePeriod;
long leaseCheckFrequency;
@ -83,22 +83,30 @@ public class Leases {
LOG.debug("leases closed");
}
}
String getLeaseName(final Text holderId, final Text resourceId) {
return "<holderId=" + holderId + ", resourceId=" + resourceId + ">";
}
/** A client obtains a lease... */
public void createLease(Text holderId, Text resourceId, LeaseListener listener) throws IOException {
public void createLease(Text holderId, Text resourceId,
final LeaseListener listener)
throws IOException {
synchronized(leases) {
synchronized(sortedLeases) {
Lease lease = new Lease(holderId, resourceId, listener);
Text leaseId = lease.getLeaseId();
if(leases.get(leaseId) != null) {
throw new IOException("Impossible state for createLease(): Lease " +
"for holderId " + holderId + " and resourceId " + resourceId +
" is still held.");
getLeaseName(holderId, resourceId) + " is still held.");
}
leases.put(leaseId, lease);
sortedLeases.add(lease);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Created lease " + getLeaseName(holderId, resourceId));
}
}
/** A client renews a lease... */
@ -110,8 +118,8 @@ public class Leases {
if(lease == null) {
// It's possible that someone tries to renew the lease, but
// it just expired a moment ago. So fail.
throw new IOException("Cannot renew lease; not held (holderId=" +
holderId + ", resourceId=" + resourceId + ")");
throw new IOException("Cannot renew lease that is not held: " +
getLeaseName(holderId, resourceId));
}
sortedLeases.remove(lease);
@ -119,9 +127,14 @@ public class Leases {
sortedLeases.add(lease);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Renewed lease " + getLeaseName(holderId, resourceId));
}
}
/** A client explicitly cancels a lease. The lease-cleanup method is not called. */
/** A client explicitly cancels a lease.
* The lease-cleanup method is not called.
*/
public void cancelLease(Text holderId, Text resourceId) throws IOException {
synchronized(leases) {
synchronized(sortedLeases) {
@ -132,7 +145,8 @@ public class Leases {
// It's possible that someone tries to renew the lease, but
// it just expired a moment ago. So fail.
throw new IOException("Cannot cancel lease that is not held (holderId=" + holderId + ", resourceId=" + resourceId + ")");
throw new IOException("Cannot cancel lease that is not held: " +
getLeaseName(holderId, resourceId));
}
sortedLeases.remove(lease);
@ -140,7 +154,10 @@ public class Leases {
lease.cancelled();
}
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Cancel lease " + getLeaseName(holderId, resourceId));
}
}
/** LeaseMonitor is a thread that expires Leases that go on too long. */
@ -211,6 +228,10 @@ public class Leases {
}
public void expired() {
if (LOG.isDebugEnabled()) {
LOG.debug("Lease expired " + getLeaseName(this.holderId,
this.resourceId));
}
listener.leaseExpired();
}