HBASE-9387 Region could get lost during assignment (Ted Yu)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1520058 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7c21efdb04
commit
c925dd3d76
|
@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
|
|||
import org.apache.hadoop.hbase.regionserver.RegionServerServices;
|
||||
import org.apache.hadoop.hbase.util.CancelableProgressable;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
||||
/**
|
||||
|
@ -359,7 +360,7 @@ public class OpenRegionHandler extends EventHandler {
|
|||
* @return whether znode is successfully transitioned to OPENED state.
|
||||
* @throws IOException
|
||||
*/
|
||||
private boolean transitionToOpened(final HRegion r) throws IOException {
|
||||
boolean transitionToOpened(final HRegion r) throws IOException {
|
||||
boolean result = false;
|
||||
HRegionInfo hri = r.getRegionInfo();
|
||||
final String name = hri.getRegionNameAsString();
|
||||
|
@ -367,11 +368,20 @@ public class OpenRegionHandler extends EventHandler {
|
|||
try {
|
||||
if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
|
||||
this.server.getServerName(), this.version) == -1) {
|
||||
LOG.warn("Completed the OPEN of region " + name +
|
||||
" but when transitioning from " +
|
||||
" OPENING to OPENED got a version mismatch, someone else clashed " +
|
||||
"so now unassigning -- closing region on server: " +
|
||||
this.server.getServerName());
|
||||
String warnMsg = "Completed the OPEN of region " + name +
|
||||
" but when transitioning from " + " OPENING to OPENED ";
|
||||
try {
|
||||
String node = ZKAssign.getNodeName(this.server.getZooKeeper(), hri.getEncodedName());
|
||||
if (ZKUtil.checkExists(this.server.getZooKeeper(), node) < 0) {
|
||||
// if the znode
|
||||
rsServices.abort(warnMsg + "the znode disappeared", null);
|
||||
} else {
|
||||
LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " +
|
||||
"so now unassigning -- closing region on server: " + this.server.getServerName());
|
||||
}
|
||||
} catch (KeeperException ke) {
|
||||
rsServices.abort(warnMsg, ke);
|
||||
}
|
||||
} else {
|
||||
LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() +
|
||||
" to OPENED in zk on " + this.server.getServerName());
|
||||
|
|
|
@ -55,6 +55,7 @@ class MockRegionServerServices implements RegionServerServices {
|
|||
private ZooKeeperWatcher zkw = null;
|
||||
private ServerName serverName = null;
|
||||
private RpcServerInterface rpcServer = null;
|
||||
private volatile boolean abortRequested;
|
||||
|
||||
MockRegionServerServices(ZooKeeperWatcher zkw) {
|
||||
this.zkw = zkw;
|
||||
|
@ -154,7 +155,8 @@ class MockRegionServerServices implements RegionServerServices {
|
|||
|
||||
@Override
|
||||
public void abort(String why, Throwable e) {
|
||||
//no-op
|
||||
this.abortRequested = true;
|
||||
stop(why);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -169,7 +171,7 @@ class MockRegionServerServices implements RegionServerServices {
|
|||
|
||||
@Override
|
||||
public boolean isAborted() {
|
||||
return false;
|
||||
return this.abortRequested;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TestFullLogReconstruction {
|
|||
c.setInt("ipc.client.connect.max.retries", 1);
|
||||
c.setInt("dfs.client.block.recovery.retries", 1);
|
||||
c.setInt(HConstants.ZK_SESSION_TIMEOUT, 1000);
|
||||
TEST_UTIL.startMiniCluster(2);
|
||||
TEST_UTIL.startMiniCluster(3);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -109,4 +109,4 @@ public class TestFullLogReconstruction {
|
|||
assertEquals(count, newCount);
|
||||
table.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -129,6 +129,62 @@ public class TestOpenRegionHandler {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the openregionhandler can deal with perceived failure of transitioning to OPENED state
|
||||
* due to intermittent zookeeper malfunctioning.
|
||||
* @see <a href="https://issues.apache.org/jira/browse/HBASE-9387">HBASE-9387</a>
|
||||
* @throws IOException
|
||||
* @throws NodeExistsException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
@Test
|
||||
public void testRegionServerAbortionDueToFailureTransitioningToOpened()
|
||||
throws IOException, NodeExistsException, KeeperException {
|
||||
final Server server = new MockServer(HTU);
|
||||
final RegionServerServices rss = HTU.createMockRegionServerService();
|
||||
|
||||
HTableDescriptor htd = TEST_HTD;
|
||||
final HRegionInfo hri = TEST_HRI;
|
||||
HRegion region =
|
||||
HRegion.createHRegion(hri, HTU.getDataTestDir(), HTU
|
||||
.getConfiguration(), htd);
|
||||
assertNotNull(region);
|
||||
try {
|
||||
OpenRegionHandler handler = new OpenRegionHandler(server, rss, hri, htd) {
|
||||
boolean transitionToOpened(final HRegion r) throws IOException {
|
||||
// remove znode simulating intermittent zookeeper connection issue
|
||||
ZooKeeperWatcher zkw = this.server.getZooKeeper();
|
||||
String node = ZKAssign.getNodeName(zkw, hri.getEncodedName());
|
||||
try {
|
||||
ZKUtil.deleteNodeFailSilent(zkw, node);
|
||||
} catch (KeeperException e) {
|
||||
throw new RuntimeException("Ugh failed delete of " + node, e);
|
||||
}
|
||||
// then try to transition to OPENED
|
||||
return super.transitionToOpened(r);
|
||||
}
|
||||
};
|
||||
rss.getRegionsInTransitionInRS().put(
|
||||
hri.getEncodedNameAsBytes(), Boolean.TRUE);
|
||||
// Call process without first creating OFFLINE region in zk, see if
|
||||
// exception or just quiet return (expected).
|
||||
handler.process();
|
||||
rss.getRegionsInTransitionInRS().put(
|
||||
hri.getEncodedNameAsBytes(), Boolean.TRUE);
|
||||
ZKAssign.createNodeOffline(server.getZooKeeper(), hri, server.getServerName());
|
||||
// Call process again but this time yank the zk znode out from under it
|
||||
// post OPENING; again will expect it to come back w/o NPE or exception.
|
||||
handler.process();
|
||||
} catch (IOException ioe) {
|
||||
} finally {
|
||||
HRegion.closeHRegion(region);
|
||||
}
|
||||
// Region server is expected to abort due to OpenRegionHandler perceiving transitioning
|
||||
// to OPENED as failed
|
||||
// This was corresponding to the second handler.process() call above.
|
||||
assertTrue("region server should have aborted", rss.isAborted());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testFailedOpenRegion() throws Exception {
|
||||
Server server = new MockServer(HTU);
|
||||
|
|
Loading…
Reference in New Issue