HBASE-9387 Region could get lost during assignment (Ted Yu)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1520058 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2013-09-04 16:04:04 +00:00
parent 7c21efdb04
commit c925dd3d76
4 changed files with 78 additions and 10 deletions

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
import org.apache.hadoop.hbase.regionserver.RegionServerServices;
import org.apache.hadoop.hbase.util.CancelableProgressable;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.zookeeper.KeeperException;
/**
@ -359,7 +360,7 @@ public class OpenRegionHandler extends EventHandler {
* @return whether znode is successfully transitioned to OPENED state.
* @throws IOException
*/
private boolean transitionToOpened(final HRegion r) throws IOException {
boolean transitionToOpened(final HRegion r) throws IOException {
boolean result = false;
HRegionInfo hri = r.getRegionInfo();
final String name = hri.getRegionNameAsString();
@ -367,11 +368,20 @@ public class OpenRegionHandler extends EventHandler {
try {
if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
this.server.getServerName(), this.version) == -1) {
LOG.warn("Completed the OPEN of region " + name +
" but when transitioning from " +
" OPENING to OPENED got a version mismatch, someone else clashed " +
"so now unassigning -- closing region on server: " +
this.server.getServerName());
String warnMsg = "Completed the OPEN of region " + name +
" but when transitioning from " + " OPENING to OPENED ";
try {
String node = ZKAssign.getNodeName(this.server.getZooKeeper(), hri.getEncodedName());
if (ZKUtil.checkExists(this.server.getZooKeeper(), node) < 0) {
// if the znode
rsServices.abort(warnMsg + "the znode disappeared", null);
} else {
LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " +
"so now unassigning -- closing region on server: " + this.server.getServerName());
}
} catch (KeeperException ke) {
rsServices.abort(warnMsg, ke);
}
} else {
LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() +
" to OPENED in zk on " + this.server.getServerName());

View File

@ -55,6 +55,7 @@ class MockRegionServerServices implements RegionServerServices {
private ZooKeeperWatcher zkw = null;
private ServerName serverName = null;
private RpcServerInterface rpcServer = null;
private volatile boolean abortRequested;
MockRegionServerServices(ZooKeeperWatcher zkw) {
this.zkw = zkw;
@ -154,7 +155,8 @@ class MockRegionServerServices implements RegionServerServices {
@Override
public void abort(String why, Throwable e) {
//no-op
this.abortRequested = true;
stop(why);
}
@Override
@ -169,7 +171,7 @@ class MockRegionServerServices implements RegionServerServices {
@Override
public boolean isAborted() {
return false;
return this.abortRequested;
}
@Override

View File

@ -55,7 +55,7 @@ public class TestFullLogReconstruction {
c.setInt("ipc.client.connect.max.retries", 1);
c.setInt("dfs.client.block.recovery.retries", 1);
c.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
TEST_UTIL.startMiniCluster(2);
TEST_UTIL.startMiniCluster(3);
}
/**

View File

@ -129,6 +129,62 @@ public class TestOpenRegionHandler {
}
}
/**
* Test the openregionhandler can deal with perceived failure of transitioning to OPENED state
* due to intermittent zookeeper malfunctioning.
* @see <a href="https://issues.apache.org/jira/browse/HBASE-9387">HBASE-9387</a>
* @throws IOException
* @throws NodeExistsException
* @throws KeeperException
*/
@Test
public void testRegionServerAbortionDueToFailureTransitioningToOpened()
throws IOException, NodeExistsException, KeeperException {
final Server server = new MockServer(HTU);
final RegionServerServices rss = HTU.createMockRegionServerService();
HTableDescriptor htd = TEST_HTD;
final HRegionInfo hri = TEST_HRI;
HRegion region =
HRegion.createHRegion(hri, HTU.getDataTestDir(), HTU
.getConfiguration(), htd);
assertNotNull(region);
try {
OpenRegionHandler handler = new OpenRegionHandler(server, rss, hri, htd) {
boolean transitionToOpened(final HRegion r) throws IOException {
// remove znode simulating intermittent zookeeper connection issue
ZooKeeperWatcher zkw = this.server.getZooKeeper();
String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
try {
ZKUtil.deleteNodeFailSilent(zkw, node);
} catch (KeeperException e) {
throw new RuntimeException("Ugh failed delete of " + node, e);
}
// then try to transition to OPENED
return super.transitionToOpened(r);
}
};
rss.getRegionsInTransitionInRS().put(
hri.getEncodedNameAsBytes(), Boolean.TRUE);
// Call process without first creating OFFLINE region in zk, see if
// exception or just quiet return (expected).
handler.process();
rss.getRegionsInTransitionInRS().put(
hri.getEncodedNameAsBytes(), Boolean.TRUE);
ZKAssign.createNodeOffline(server.getZooKeeper(), hri, server.getServerName());
// Call process again but this time yank the zk znode out from under it
// post OPENING; again will expect it to come back w/o NPE or exception.
handler.process();
} catch (IOException ioe) {
} finally {
HRegion.closeHRegion(region);
}
// Region server is expected to abort due to OpenRegionHandler perceiving transitioning
// to OPENED as failed
// This was corresponding to the second handler.process() call above.
assertTrue("region server should have aborted", rss.isAborted());
}
@Test
public void testFailedOpenRegion() throws Exception {
Server server = new MockServer(HTU);