HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease that is not held'
git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@544512 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ac718209e5
commit
3509f88c48
|
@ -23,3 +23,5 @@ Trunk (unreleased changes)
|
||||||
12. HADOOP-1392. Part2: includes table compaction by merging adjacent regions
|
12. HADOOP-1392. Part2: includes table compaction by merging adjacent regions
|
||||||
that have shrunk in size.
|
that have shrunk in size.
|
||||||
13. HADOOP-1445 Support updates across region splits and compactions
|
13. HADOOP-1445 Support updates across region splits and compactions
|
||||||
|
14. HADOOP-1460 On shutdown IOException with complaint 'Cannot cancel lease
|
||||||
|
that is not held'
|
||||||
|
|
|
@ -1,54 +1,4 @@
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
<configuration>
|
<configuration>
|
||||||
<property>
|
|
||||||
<name>hbase.regiondir</name>
|
|
||||||
<value>hbase</value>
|
|
||||||
<description>The directory shared by region servers.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>hbase.regionserver.msginterval</name>
|
|
||||||
<value>1000</value>
|
|
||||||
<description>Interval between messages from the RegionServer to HMaster
|
|
||||||
in milliseconds. Default is 15. Set this value low if you want unit
|
|
||||||
tests to be responsive.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<!--
|
|
||||||
<property>
|
|
||||||
<name>hbase.master.meta.thread.rescanfrequency</name>
|
|
||||||
<value>600000</value>
|
|
||||||
<description>How long the HMaster sleeps (in milliseconds) between scans of
|
|
||||||
the root and meta tables.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>hbase.master.lease.period</name>
|
|
||||||
<value>360000</value>
|
|
||||||
<description>HMaster server lease period in milliseconds. Default is
|
|
||||||
180 seconds.</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>hbase.regionserver.lease.period</name>
|
|
||||||
<value>360000</value>
|
|
||||||
<description>HMaster server lease period in milliseconds. Default is
|
|
||||||
180 seconds.</description>
|
|
||||||
</property>
|
|
||||||
-->
|
|
||||||
<!--
|
|
||||||
<property>
|
|
||||||
<name>hbase.hregion.max.filesize</name>
|
|
||||||
<value>3421223</value>
|
|
||||||
<description>
|
|
||||||
Maximum desired file size for an HRegion. If filesize exceeds
|
|
||||||
value + (value / 2), the HRegion is split in two. Default: 128M.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>hbase.client.timeout.length</name>
|
|
||||||
<value>10000</value>
|
|
||||||
<description>Client timeout in milliseconds</description>
|
|
||||||
</property>
|
|
||||||
-->
|
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|
|
@ -738,17 +738,17 @@ public class HMaster implements HConstants, HMasterInterface,
|
||||||
|
|
||||||
/** HRegionServers call this method upon startup. */
|
/** HRegionServers call this method upon startup. */
|
||||||
public void regionServerStartup(HServerInfo serverInfo) throws IOException {
|
public void regionServerStartup(HServerInfo serverInfo) throws IOException {
|
||||||
String server = serverInfo.getServerAddress().toString().trim();
|
String s = serverInfo.getServerAddress().toString().trim();
|
||||||
HServerInfo storedInfo = null;
|
HServerInfo storedInfo = null;
|
||||||
|
|
||||||
if(LOG.isDebugEnabled()) {
|
if(LOG.isDebugEnabled()) {
|
||||||
LOG.debug("received start message from: " + server);
|
LOG.debug("received start message from: " + s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we get the startup message but there's an old server by that
|
// If we get the startup message but there's an old server by that
|
||||||
// name, then we can timeout the old one right away and register
|
// name, then we can timeout the old one right away and register
|
||||||
// the new one.
|
// the new one.
|
||||||
storedInfo = serversToServerInfo.remove(server);
|
storedInfo = serversToServerInfo.remove(s);
|
||||||
|
|
||||||
if(storedInfo != null && !closed) {
|
if(storedInfo != null && !closed) {
|
||||||
synchronized(msgQueue) {
|
synchronized(msgQueue) {
|
||||||
|
@ -759,34 +759,40 @@ public class HMaster implements HConstants, HMasterInterface,
|
||||||
|
|
||||||
// Either way, record the new server
|
// Either way, record the new server
|
||||||
|
|
||||||
serversToServerInfo.put(server, serverInfo);
|
serversToServerInfo.put(s, serverInfo);
|
||||||
|
|
||||||
if(!closed) {
|
if(!closed) {
|
||||||
Text serverLabel = new Text(server);
|
Text serverLabel = new Text(s);
|
||||||
serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(server));
|
LOG.debug("Created lease for " + serverLabel);
|
||||||
|
serverLeases.createLease(serverLabel, serverLabel, new ServerExpirer(s));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** HRegionServers call this method repeatedly. */
|
/** HRegionServers call this method repeatedly. */
|
||||||
public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[]) throws IOException {
|
public HMsg[] regionServerReport(HServerInfo serverInfo, HMsg msgs[])
|
||||||
String server = serverInfo.getServerAddress().toString().trim();
|
throws IOException {
|
||||||
Text serverLabel = new Text(server);
|
String s = serverInfo.getServerAddress().toString().trim();
|
||||||
|
Text serverLabel = new Text(s);
|
||||||
|
|
||||||
if(closed
|
if (closed ||
|
||||||
|| (msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING)) {
|
msgs.length == 1 && msgs[0].getMsg() == HMsg.MSG_REPORT_EXITING) {
|
||||||
// We're shutting down. Or the HRegionServer is.
|
// HRegionServer is shutting down.
|
||||||
serversToServerInfo.remove(server);
|
if (serversToServerInfo.remove(s) != null) {
|
||||||
serverLeases.cancelLease(serverLabel, serverLabel);
|
// Only cancel lease once (This block can run a couple of times during
|
||||||
|
// shutdown).
|
||||||
|
LOG.debug("Cancelling lease for " + serverLabel);
|
||||||
|
serverLeases.cancelLease(serverLabel, serverLabel);
|
||||||
|
}
|
||||||
HMsg returnMsgs[] = {new HMsg(HMsg.MSG_REGIONSERVER_STOP)};
|
HMsg returnMsgs[] = {new HMsg(HMsg.MSG_REGIONSERVER_STOP)};
|
||||||
return returnMsgs;
|
return returnMsgs;
|
||||||
}
|
}
|
||||||
|
|
||||||
HServerInfo storedInfo = serversToServerInfo.get(server);
|
HServerInfo storedInfo = serversToServerInfo.get(s);
|
||||||
|
|
||||||
if(storedInfo == null) {
|
if(storedInfo == null) {
|
||||||
|
|
||||||
if(LOG.isDebugEnabled()) {
|
if(LOG.isDebugEnabled()) {
|
||||||
LOG.debug("received server report from unknown server: " + server);
|
LOG.debug("received server report from unknown server: " + s);
|
||||||
}
|
}
|
||||||
|
|
||||||
// The HBaseMaster may have been restarted.
|
// The HBaseMaster may have been restarted.
|
||||||
|
@ -808,7 +814,7 @@ public class HMaster implements HConstants, HMasterInterface,
|
||||||
// The answer is to ask A to shut down for good.
|
// The answer is to ask A to shut down for good.
|
||||||
|
|
||||||
if(LOG.isDebugEnabled()) {
|
if(LOG.isDebugEnabled()) {
|
||||||
LOG.debug("region server race condition detected: " + server);
|
LOG.debug("region server race condition detected: " + s);
|
||||||
}
|
}
|
||||||
|
|
||||||
HMsg returnMsgs[] = {
|
HMsg returnMsgs[] = {
|
||||||
|
@ -825,7 +831,7 @@ public class HMaster implements HConstants, HMasterInterface,
|
||||||
serverLeases.renewLease(serverLabel, serverLabel);
|
serverLeases.renewLease(serverLabel, serverLabel);
|
||||||
|
|
||||||
// Refresh the info object
|
// Refresh the info object
|
||||||
serversToServerInfo.put(server, serverInfo);
|
serversToServerInfo.put(s, serverInfo);
|
||||||
|
|
||||||
// Next, process messages for this server
|
// Next, process messages for this server
|
||||||
return processMsgs(serverInfo, msgs);
|
return processMsgs(serverInfo, msgs);
|
||||||
|
|
|
@ -22,7 +22,7 @@ import org.apache.hadoop.io.*;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/*******************************************************************************
|
/**
|
||||||
* Leases
|
* Leases
|
||||||
*
|
*
|
||||||
* There are several server classes in HBase that need to track external clients
|
* There are several server classes in HBase that need to track external clients
|
||||||
|
@ -36,9 +36,9 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* An instance of the Leases class will create a thread to do its dirty work.
|
* An instance of the Leases class will create a thread to do its dirty work.
|
||||||
* You should close() the instance if you want to clean up the thread properly.
|
* You should close() the instance if you want to clean up the thread properly.
|
||||||
******************************************************************************/
|
*/
|
||||||
public class Leases {
|
public class Leases {
|
||||||
private static final Log LOG = LogFactory.getLog(Leases.class);
|
static final Log LOG = LogFactory.getLog(Leases.class.getName());
|
||||||
|
|
||||||
long leasePeriod;
|
long leasePeriod;
|
||||||
long leaseCheckFrequency;
|
long leaseCheckFrequency;
|
||||||
|
@ -84,21 +84,29 @@ public class Leases {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String getLeaseName(final Text holderId, final Text resourceId) {
|
||||||
|
return "<holderId=" + holderId + ", resourceId=" + resourceId + ">";
|
||||||
|
}
|
||||||
|
|
||||||
/** A client obtains a lease... */
|
/** A client obtains a lease... */
|
||||||
public void createLease(Text holderId, Text resourceId, LeaseListener listener) throws IOException {
|
public void createLease(Text holderId, Text resourceId,
|
||||||
|
final LeaseListener listener)
|
||||||
|
throws IOException {
|
||||||
synchronized(leases) {
|
synchronized(leases) {
|
||||||
synchronized(sortedLeases) {
|
synchronized(sortedLeases) {
|
||||||
Lease lease = new Lease(holderId, resourceId, listener);
|
Lease lease = new Lease(holderId, resourceId, listener);
|
||||||
Text leaseId = lease.getLeaseId();
|
Text leaseId = lease.getLeaseId();
|
||||||
if(leases.get(leaseId) != null) {
|
if(leases.get(leaseId) != null) {
|
||||||
throw new IOException("Impossible state for createLease(): Lease " +
|
throw new IOException("Impossible state for createLease(): Lease " +
|
||||||
"for holderId " + holderId + " and resourceId " + resourceId +
|
getLeaseName(holderId, resourceId) + " is still held.");
|
||||||
" is still held.");
|
|
||||||
}
|
}
|
||||||
leases.put(leaseId, lease);
|
leases.put(leaseId, lease);
|
||||||
sortedLeases.add(lease);
|
sortedLeases.add(lease);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Created lease " + getLeaseName(holderId, resourceId));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A client renews a lease... */
|
/** A client renews a lease... */
|
||||||
|
@ -110,8 +118,8 @@ public class Leases {
|
||||||
if(lease == null) {
|
if(lease == null) {
|
||||||
// It's possible that someone tries to renew the lease, but
|
// It's possible that someone tries to renew the lease, but
|
||||||
// it just expired a moment ago. So fail.
|
// it just expired a moment ago. So fail.
|
||||||
throw new IOException("Cannot renew lease; not held (holderId=" +
|
throw new IOException("Cannot renew lease that is not held: " +
|
||||||
holderId + ", resourceId=" + resourceId + ")");
|
getLeaseName(holderId, resourceId));
|
||||||
}
|
}
|
||||||
|
|
||||||
sortedLeases.remove(lease);
|
sortedLeases.remove(lease);
|
||||||
|
@ -119,9 +127,14 @@ public class Leases {
|
||||||
sortedLeases.add(lease);
|
sortedLeases.add(lease);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Renewed lease " + getLeaseName(holderId, resourceId));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** A client explicitly cancels a lease. The lease-cleanup method is not called. */
|
/** A client explicitly cancels a lease.
|
||||||
|
* The lease-cleanup method is not called.
|
||||||
|
*/
|
||||||
public void cancelLease(Text holderId, Text resourceId) throws IOException {
|
public void cancelLease(Text holderId, Text resourceId) throws IOException {
|
||||||
synchronized(leases) {
|
synchronized(leases) {
|
||||||
synchronized(sortedLeases) {
|
synchronized(sortedLeases) {
|
||||||
|
@ -132,7 +145,8 @@ public class Leases {
|
||||||
// It's possible that someone tries to renew the lease, but
|
// It's possible that someone tries to renew the lease, but
|
||||||
// it just expired a moment ago. So fail.
|
// it just expired a moment ago. So fail.
|
||||||
|
|
||||||
throw new IOException("Cannot cancel lease that is not held (holderId=" + holderId + ", resourceId=" + resourceId + ")");
|
throw new IOException("Cannot cancel lease that is not held: " +
|
||||||
|
getLeaseName(holderId, resourceId));
|
||||||
}
|
}
|
||||||
|
|
||||||
sortedLeases.remove(lease);
|
sortedLeases.remove(lease);
|
||||||
|
@ -141,6 +155,9 @@ public class Leases {
|
||||||
lease.cancelled();
|
lease.cancelled();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Cancel lease " + getLeaseName(holderId, resourceId));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** LeaseMonitor is a thread that expires Leases that go on too long. */
|
/** LeaseMonitor is a thread that expires Leases that go on too long. */
|
||||||
|
@ -211,6 +228,10 @@ public class Leases {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void expired() {
|
public void expired() {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Lease expired " + getLeaseName(this.holderId,
|
||||||
|
this.resourceId));
|
||||||
|
}
|
||||||
listener.leaseExpired();
|
listener.leaseExpired();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue