HBASE-3621 The timeout handler in AssignmentManager does an RPC while holding lock on RIT; a big no-no
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1084059 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
53a031f00c
commit
99f9ca542f
|
@ -44,6 +44,8 @@ Release 0.91.0 - Unreleased
|
||||||
HBASE-3641 LruBlockCache.CacheStats.getHitCount() is not using the
|
HBASE-3641 LruBlockCache.CacheStats.getHitCount() is not using the
|
||||||
correct variable
|
correct variable
|
||||||
HBASE-3532 HRegion#equals is broken (Ted Yu via Stack)
|
HBASE-3532 HRegion#equals is broken (Ted Yu via Stack)
|
||||||
|
HBASE-3621 The timeout handler in AssignmentManager does an RPC while
|
||||||
|
holding lock on RIT; a big no-no (Ted Yu via Stack)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
|
HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack)
|
||||||
|
|
|
@ -1730,11 +1730,14 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
protected void chore() {
|
protected void chore() {
|
||||||
// If bulkAssign in progress, suspend checks
|
// If bulkAssign in progress, suspend checks
|
||||||
if (this.bulkAssign) return;
|
if (this.bulkAssign) return;
|
||||||
|
List<HRegionInfo> unassigns = new ArrayList<HRegionInfo>();
|
||||||
|
Map<HRegionInfo, Boolean> assigns =
|
||||||
|
new HashMap<HRegionInfo, Boolean>();
|
||||||
synchronized (regionsInTransition) {
|
synchronized (regionsInTransition) {
|
||||||
// Iterate all regions in transition checking for time outs
|
// Iterate all regions in transition checking for time outs
|
||||||
long now = System.currentTimeMillis();
|
long now = System.currentTimeMillis();
|
||||||
for (RegionState regionState : regionsInTransition.values()) {
|
for (RegionState regionState : regionsInTransition.values()) {
|
||||||
if(regionState.getStamp() + timeout <= now) {
|
if (regionState.getStamp() + timeout <= now) {
|
||||||
HRegionInfo regionInfo = regionState.getRegion();
|
HRegionInfo regionInfo = regionState.getRegion();
|
||||||
LOG.info("Regions in transition timed out: " + regionState);
|
LOG.info("Regions in transition timed out: " + regionState);
|
||||||
// Expired! Do a retry.
|
// Expired! Do a retry.
|
||||||
|
@ -1752,13 +1755,13 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
LOG.info("Region has been OFFLINE for too long, " +
|
LOG.info("Region has been OFFLINE for too long, " +
|
||||||
"reassigning " + regionInfo.getRegionNameAsString() +
|
"reassigning " + regionInfo.getRegionNameAsString() +
|
||||||
" to a random server");
|
" to a random server");
|
||||||
assign(regionState.getRegion(), false);
|
assigns.put(regionState.getRegion(), Boolean.FALSE);
|
||||||
break;
|
break;
|
||||||
case PENDING_OPEN:
|
case PENDING_OPEN:
|
||||||
LOG.info("Region has been PENDING_OPEN for too " +
|
LOG.info("Region has been PENDING_OPEN for too " +
|
||||||
"long, reassigning region=" +
|
"long, reassigning region=" +
|
||||||
regionInfo.getRegionNameAsString());
|
regionInfo.getRegionNameAsString());
|
||||||
assign(regionState.getRegion(), false, true);
|
assigns.put(regionState.getRegion(), Boolean.TRUE);
|
||||||
break;
|
break;
|
||||||
case OPENING:
|
case OPENING:
|
||||||
LOG.info("Region has been OPENING for too " +
|
LOG.info("Region has been OPENING for too " +
|
||||||
|
@ -1794,7 +1797,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
LOG.info("Successfully transitioned region=" +
|
LOG.info("Successfully transitioned region=" +
|
||||||
regionInfo.getRegionNameAsString() + " into OFFLINE" +
|
regionInfo.getRegionNameAsString() + " into OFFLINE" +
|
||||||
" and forcing a new assignment");
|
" and forcing a new assignment");
|
||||||
assign(regionState, false, true);
|
assigns.put(regionState.getRegion(), Boolean.TRUE);
|
||||||
}
|
}
|
||||||
} catch (KeeperException.NoNodeException nne) {
|
} catch (KeeperException.NoNodeException nne) {
|
||||||
// Node did not exist, can't time this out
|
// Node did not exist, can't time this out
|
||||||
|
@ -1817,16 +1820,17 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// If the server got the RPC, it will transition the node
|
// If the server got the RPC, it will transition the node
|
||||||
// to CLOSING, so only do something here if no node exists
|
// to CLOSING, so only do something here if no node exists
|
||||||
if (!ZKUtil.watchAndCheckExists(watcher,
|
if (!ZKUtil.watchAndCheckExists(watcher,
|
||||||
ZKAssign.getNodeName(watcher,
|
ZKAssign.getNodeName(watcher, regionInfo.getEncodedName()))) {
|
||||||
regionInfo.getEncodedName()))) {
|
// Queue running of an unassign -- do actual unassign
|
||||||
unassign(regionInfo, true);
|
// outside of the regionsInTransition lock.
|
||||||
|
unassigns.add(regionInfo);
|
||||||
}
|
}
|
||||||
} catch (NoNodeException e) {
|
} catch (NoNodeException e) {
|
||||||
LOG.debug("Node no longer existed so not forcing another " +
|
LOG.debug("Node no longer existed so not forcing another " +
|
||||||
"unassignment");
|
"unassignment");
|
||||||
} catch (KeeperException e) {
|
} catch (KeeperException e) {
|
||||||
LOG.warn("Unexpected ZK exception timing out a region " +
|
LOG.warn("Unexpected ZK exception timing out a region " +
|
||||||
"close", e);
|
"close", e);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CLOSING:
|
case CLOSING:
|
||||||
|
@ -1838,6 +1842,13 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Finish the work for regions in PENDING_CLOSE state
|
||||||
|
for (HRegionInfo hri: unassigns) {
|
||||||
|
unassign(hri, true);
|
||||||
|
}
|
||||||
|
for (Map.Entry<HRegionInfo, Boolean> e: assigns.entrySet()){
|
||||||
|
assign(e.getKey(), false, e.getValue());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue