HBASE-4357 Region stayed in transition - in closing state (Ming Ma)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1228489 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2012-01-06 22:55:36 +00:00
parent ff3cf1aeda
commit 30a0a2c6d6
12 changed files with 230 additions and 59 deletions

View File

@ -471,6 +471,7 @@ Release 0.92.0 - Unreleased
HBASE-5094 The META can hold an entry for a region with a different server name from the one
actually in the AssignmentManager thus making the region inaccessible. (Ram)
HBASE-5081 Distributed log splitting deleteNode races against splitLog retry (Prakash)
HBASE-4357 Region stayed in transition - in closing state (Ming Ma)
TESTS
HBASE-4450 test for number of blocks read: to serve as baseline for expected

View File

@ -435,6 +435,19 @@ public interface HRegionInterface extends VersionedProtocol, Stoppable, Abortabl
public boolean closeRegion(final HRegionInfo region)
throws IOException;
/**
* Closes the specified region.
* @param region region to close
* @param versionOfClosingNode
* the version of znode to compare when RS transitions the znode
* from CLOSING state.
* @return true if closing region, false if not
* @throws IOException
*/
public boolean closeRegion(final HRegionInfo region,
final int versionOfClosingNode)
throws IOException;
/**
* Closes the specified region and will use or not use ZK during the close
* according to the specified flag.

View File

@ -1753,13 +1753,20 @@ public class AssignmentManager extends ZooKeeperListener {
String encodedName = region.getEncodedName();
// Grab the state of this region and synchronize on it
RegionState state;
int versionOfClosingNode = -1;
synchronized (regionsInTransition) {
state = regionsInTransition.get(encodedName);
if (state == null) {
// Create the znode in CLOSING state
try {
ZKAssign.createNodeClosing(
versionOfClosingNode = ZKAssign.createNodeClosing(
master.getZooKeeper(), region, master.getServerName());
if (versionOfClosingNode == -1) {
LOG.debug("Attempting to unassign region " +
region.getRegionNameAsString() + " but ZK closing node "
+ "can't be created.");
return;
}
} catch (KeeperException e) {
if (e instanceof NodeExistsException) {
// Handle race between master initiated close and regionserver
@ -1810,17 +1817,18 @@ public class AssignmentManager extends ZooKeeperListener {
try {
// TODO: We should consider making this look more like it does for the
// region open where we catch all throwables and never abort
if (serverManager.sendRegionClose(server, state.getRegion())) {
if (serverManager.sendRegionClose(server, state.getRegion(),
versionOfClosingNode)) {
LOG.debug("Sent CLOSE to " + server + " for region " +
region.getRegionNameAsString());
return;
}
// This never happens. Currently regionserver close always return true.
LOG.warn("Server " + server + " region CLOSE RPC returned false for " +
region.getEncodedName());
region.getRegionNameAsString());
} catch (NotServingRegionException nsre) {
LOG.info("Server " + server + " returned " + nsre + " for " +
region.getEncodedName());
region.getRegionNameAsString());
// Presume that master has stale data. Presume remote side just split.
// Presume that the split message when it comes in will fix up the master's
// in memory cluster state.

View File

@ -481,11 +481,14 @@ public class ServerManager {
* have the specified region or the region is being split.
* @param server server to open a region
* @param region region to open
* @param versionOfClosingNode
* the version of znode to compare when RS transitions the znode from
* CLOSING state.
* @return true if server acknowledged close, false if not
* @throws IOException
*/
public boolean sendRegionClose(ServerName server, HRegionInfo region)
throws IOException {
public boolean sendRegionClose(ServerName server, HRegionInfo region,
int versionOfClosingNode) throws IOException {
if (server == null) throw new NullPointerException("Passed server is null");
HRegionInterface hri = getServerConnection(server);
if (hri == null) {
@ -494,7 +497,7 @@ public class ServerManager {
region.getRegionNameAsString() +
" failed because no RPC connection found to this server");
}
return hri.closeRegion(region);
return hri.closeRegion(region, versionOfClosingNode);
}
/**

View File

@ -2696,15 +2696,31 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
@QosPriority(priority=HIGH_QOS)
public boolean closeRegion(HRegionInfo region)
throws IOException {
return closeRegion(region, true);
return closeRegion(region, true, -1);
}
@Override
@QosPriority(priority=HIGH_QOS)
public boolean closeRegion(final HRegionInfo region,
final int versionOfClosingNode)
throws IOException {
return closeRegion(region, true, versionOfClosingNode);
}
@Override
@QosPriority(priority=HIGH_QOS)
public boolean closeRegion(HRegionInfo region, final boolean zk)
throws IOException {
return closeRegion(region, zk, -1);
}
@QosPriority(priority=HIGH_QOS)
protected boolean closeRegion(HRegionInfo region, final boolean zk,
final int versionOfClosingNode)
throws IOException {
checkOpen();
LOG.info("Received close region: " + region.getRegionNameAsString());
LOG.info("Received close region: " + region.getRegionNameAsString() +
". Version of ZK closing node:" + versionOfClosingNode);
boolean hasit = this.onlineRegions.containsKey(region.getEncodedName());
if (!hasit) {
LOG.warn("Received close for region we are not serving; " +
@ -2713,12 +2729,13 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
+ region.getRegionNameAsString() + " but we are not serving it");
}
checkIfRegionInTransition(region, CLOSE);
return closeRegion(region, false, zk);
return closeRegion(region, false, zk, versionOfClosingNode);
}
@Override
@QosPriority(priority=HIGH_QOS)
public boolean closeRegion(byte[] encodedRegionName, boolean zk) throws IOException {
public boolean closeRegion(byte[] encodedRegionName, boolean zk)
throws IOException {
return closeRegion(encodedRegionName, false, zk);
}
@ -2732,6 +2749,23 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
*/
protected boolean closeRegion(HRegionInfo region, final boolean abort,
final boolean zk) {
return closeRegion(region, abort, zk, -1);
}
/**
* @param region Region to close
* @param abort True if we are aborting
* @param zk True if we are to update zk about the region close; if the close
* was orchestrated by master, then update zk. If the close is being run by
* the regionserver because its going down, don't update zk.
* @param versionOfClosingNode
* the version of znode to compare when RS transitions the znode from
* CLOSING state.
* @return True if closed a region.
*/
protected boolean closeRegion(HRegionInfo region, final boolean abort,
final boolean zk, final int versionOfClosingNode) {
if (this.regionsInTransitionInRS.containsKey(region.getEncodedNameAsBytes())) {
LOG.warn("Received close for region we are already opening or closing; " +
region.getEncodedName());
@ -2740,11 +2774,14 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler,
this.regionsInTransitionInRS.putIfAbsent(region.getEncodedNameAsBytes(), false);
CloseRegionHandler crh = null;
if (region.isRootRegion()) {
crh = new CloseRootHandler(this, this, region, abort, zk);
crh = new CloseRootHandler(this, this, region, abort, zk,
versionOfClosingNode);
} else if (region.isMetaRegion()) {
crh = new CloseMetaHandler(this, this, region, abort, zk);
crh = new CloseMetaHandler(this, this, region, abort, zk,
versionOfClosingNode);
} else {
crh = new CloseRegionHandler(this, this, region, abort, zk);
crh = new CloseRegionHandler(this, this, region, abort, zk,
versionOfClosingNode);
}
this.service.submit(crh);
return true;

View File

@ -30,14 +30,15 @@ public class CloseMetaHandler extends CloseRegionHandler {
// Called when master tells us shutdown a region via close rpc
public CloseMetaHandler(final Server server,
final RegionServerServices rsServices, final HRegionInfo regionInfo) {
this(server, rsServices, regionInfo, false, true);
this(server, rsServices, regionInfo, false, true, -1);
}
// Called when regionserver determines its to go down; not master orchestrated
public CloseMetaHandler(final Server server,
final RegionServerServices rsServices,
final HRegionInfo regionInfo,
final boolean abort, final boolean zk) {
super(server, rsServices, regionInfo, abort, zk, EventType.M_RS_CLOSE_META);
final boolean abort, final boolean zk, final int versionOfClosingNode) {
super(server, rsServices, regionInfo, abort, zk, versionOfClosingNode,
EventType.M_RS_CLOSE_META);
}
}
}

View File

@ -43,6 +43,7 @@ public class CloseRegionHandler extends EventHandler {
private static final Log LOG = LogFactory.getLog(CloseRegionHandler.class);
private final int FAILED = -1;
int expectedVersion = FAILED;
private final RegionServerServices rsServices;
@ -61,7 +62,7 @@ public class CloseRegionHandler extends EventHandler {
// This is executed after receiving an CLOSE RPC from the master.
public CloseRegionHandler(final Server server,
final RegionServerServices rsServices, HRegionInfo regionInfo) {
this(server, rsServices, regionInfo, false, true);
this(server, rsServices, regionInfo, false, true, -1);
}
/**
@ -74,19 +75,23 @@ public class CloseRegionHandler extends EventHandler {
*/
public CloseRegionHandler(final Server server,
final RegionServerServices rsServices,
final HRegionInfo regionInfo, final boolean abort, final boolean zk) {
this(server, rsServices, regionInfo, abort, zk, EventType.M_RS_CLOSE_REGION);
final HRegionInfo regionInfo, final boolean abort, final boolean zk,
final int versionOfClosingNode) {
this(server, rsServices, regionInfo, abort, zk, versionOfClosingNode,
EventType.M_RS_CLOSE_REGION);
}
protected CloseRegionHandler(final Server server,
final RegionServerServices rsServices, HRegionInfo regionInfo,
boolean abort, final boolean zk, EventType eventType) {
boolean abort, final boolean zk, final int versionOfClosingNode,
EventType eventType) {
super(server, eventType);
this.server = server;
this.rsServices = rsServices;
this.regionInfo = regionInfo;
this.abort = abort;
this.zk = zk;
this.expectedVersion = versionOfClosingNode;
}
public HRegionInfo getRegionInfo() {
@ -107,12 +112,6 @@ public class CloseRegionHandler extends EventHandler {
return;
}
int expectedVersion = FAILED;
if (this.zk) {
expectedVersion = getCurrentVersion();
if (expectedVersion == FAILED) return;
}
// Close the region
try {
// TODO: If we need to keep updating CLOSING stamp to prevent against
@ -137,7 +136,7 @@ public class CloseRegionHandler extends EventHandler {
this.rsServices.removeFromOnlineRegions(regionInfo.getEncodedName());
if (this.zk) {
if (setClosedState(expectedVersion, region)) {
if (setClosedState(this.expectedVersion, region)) {
LOG.debug("set region closed state in zk successfully for region " +
name + " sn name: " + this.server.getServerName());
} else {
@ -183,22 +182,4 @@ public class CloseRegionHandler extends EventHandler {
return true;
}
/**
* Get the node's current version
* @return The expectedVersion. If -1, we failed getting the node
*/
private int getCurrentVersion() {
int expectedVersion = FAILED;
try {
if ((expectedVersion = ZKAssign.getVersion(
server.getZooKeeper(), regionInfo)) == FAILED) {
LOG.warn("Error getting node's version in CLOSING state," +
" aborting close of " + regionInfo.getRegionNameAsString());
}
} catch (KeeperException e) {
LOG.warn("Error creating node in CLOSING state, aborting close of " +
regionInfo.getRegionNameAsString(), e);
}
return expectedVersion;
}
}

View File

@ -30,14 +30,15 @@ public class CloseRootHandler extends CloseRegionHandler {
// This is executed after receiving an CLOSE RPC from the master for root.
public CloseRootHandler(final Server server,
final RegionServerServices rsServices, HRegionInfo regionInfo) {
this(server, rsServices, regionInfo, false, true);
this(server, rsServices, regionInfo, false, true, -1);
}
// This is called directly by the regionserver when its determined its
// shutting down.
public CloseRootHandler(final Server server,
final RegionServerServices rsServices, HRegionInfo regionInfo,
final boolean abort, final boolean zk) {
super(server, rsServices, regionInfo, abort, zk, EventType.M_RS_CLOSE_ROOT);
final boolean abort, final boolean zk, final int versionOfClosingNode) {
super(server, rsServices, regionInfo, abort, zk, versionOfClosingNode,
EventType.M_RS_CLOSE_ROOT);
}
}
}

View File

@ -263,8 +263,8 @@ public class OpenRegionHandler extends EventHandler {
/**
* @param r Region we're working on.
* @return Transition znode to OPENED state.
* @throws IOException
* @return whether znode is successfully transitioned to OPENED state.
* @throws IOException
*/
private boolean transitionToOpened(final HRegion r) throws IOException {
boolean result = false;
@ -290,11 +290,11 @@ public class OpenRegionHandler extends EventHandler {
}
return result;
}
/**
* @param Region we're working on.
* This is not guaranteed to succeed, we just do our best.
* @return Transition znode to CLOSED state.
* @return whether znode is successfully transitioned to FAILED_OPEN state.
*/
private boolean tryTransitionToFailedOpen(final HRegionInfo hri) {
boolean result = false;

View File

@ -101,7 +101,8 @@ public class TestAssignmentManager {
// First amend the servermanager mock so that when we do send close of the
// first meta region on RANDOM_SERVERNAME, it will return true rather than
// default null.
Mockito.when(this.serverManager.sendRegionClose(RANDOM_SERVERNAME, hri)).thenReturn(true);
Mockito.when(this.serverManager.sendRegionClose(RANDOM_SERVERNAME, hri, -1))
.thenReturn(true);
// Create an AM.
AssignmentManager am =
new AssignmentManager(this.server, this.serverManager, this.ct, this.executor);

View File

@ -25,13 +25,25 @@ import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.RegionServerServices;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.MockRegionServerServices;
import org.apache.hadoop.hbase.util.MockServer;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.mockito.Mockito;
@ -43,6 +55,31 @@ import org.mockito.Mockito;
public class TestCloseRegionHandler {
static final Log LOG = LogFactory.getLog(TestCloseRegionHandler.class);
private final static HBaseTestingUtility HTU = new HBaseTestingUtility();
private static final HTableDescriptor TEST_HTD =
new HTableDescriptor("TestCloseRegionHandler");
private HRegionInfo TEST_HRI;
private int testIndex = 0;
@BeforeClass public static void before() throws Exception {
HTU.startMiniZKCluster();
}
@AfterClass public static void after() throws IOException {
HTU.shutdownMiniZKCluster();
}
/**
* Before each test, use a different HRI, so the different tests
* don't interfere with each other. This allows us to use just
* a single ZK cluster for the whole suite.
*/
@Before
public void setupHRI() {
TEST_HRI = new HRegionInfo(TEST_HTD.getName(),
Bytes.toBytes(testIndex),
Bytes.toBytes(testIndex + 1));
testIndex++;
}
/**
* Test that if we fail a flush, abort gets set on close.
@ -55,7 +92,7 @@ public class TestCloseRegionHandler {
throws IOException, NodeExistsException, KeeperException {
final Server server = new MockServer(HTU, false);
final RegionServerServices rss = new MockRegionServerServices();
HTableDescriptor htd = new HTableDescriptor("testFailedFlushAborts");
HTableDescriptor htd = TEST_HTD;
final HRegionInfo hri =
new HRegionInfo(htd.getName(), HConstants.EMPTY_END_ROW,
HConstants.EMPTY_END_ROW);
@ -74,7 +111,7 @@ public class TestCloseRegionHandler {
// Assert the Server is NOT stopped before we call close region.
assertFalse(server.isStopped());
CloseRegionHandler handler =
new CloseRegionHandler(server, rss, hri, false, false);
new CloseRegionHandler(server, rss, hri, false, false, -1);
boolean throwable = false;
try {
handler.process();
@ -86,6 +123,93 @@ public class TestCloseRegionHandler {
assertTrue(server.isStopped());
}
}
/**
* Test if close region can handle ZK closing node version mismatch
* @throws IOException
* @throws NodeExistsException
* @throws KeeperException
*/
@Test public void testZKClosingNodeVersionMismatch()
throws IOException, NodeExistsException, KeeperException {
final Server server = new MockServer(HTU);
final RegionServerServices rss = new MockRegionServerServices();
HTableDescriptor htd = TEST_HTD;
final HRegionInfo hri = TEST_HRI;
// open a region first so that it can be closed later
OpenRegion(server, rss, htd, hri);
// close the region
// Create it CLOSING, which is what Master set before sending CLOSE RPC
int versionOfClosingNode = ZKAssign.createNodeClosing(server.getZooKeeper(),
hri, server.getServerName());
// The CloseRegionHandler will validate the expected version
// Given it is set to invalid versionOfClosingNode+1,
// CloseRegionHandler should be M_ZK_REGION_CLOSING
CloseRegionHandler handler =
new CloseRegionHandler(server, rss, hri, false, true,
versionOfClosingNode+1);
handler.process();
// Handler should remain in M_ZK_REGION_CLOSING
RegionTransitionData data =
ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName());
assertTrue(EventType.M_ZK_REGION_CLOSING == data.getEventType());
}
/**
* Test if the region can be closed properly
* @throws IOException
* @throws NodeExistsException
* @throws KeeperException
*/
@Test public void testCloseRegion()
throws IOException, NodeExistsException, KeeperException {
final Server server = new MockServer(HTU);
final RegionServerServices rss = new MockRegionServerServices();
HTableDescriptor htd = TEST_HTD;
HRegionInfo hri = TEST_HRI;
// open a region first so that it can be closed later
OpenRegion(server, rss, htd, hri);
// close the region
// Create it CLOSING, which is what Master set before sending CLOSE RPC
int versionOfClosingNode = ZKAssign.createNodeClosing(server.getZooKeeper(),
hri, server.getServerName());
// The CloseRegionHandler will validate the expected version
// Given it is set to correct versionOfClosingNode,
// CloseRegionHandlerit should be RS_ZK_REGION_CLOSED
CloseRegionHandler handler =
new CloseRegionHandler(server, rss, hri, false, true,
versionOfClosingNode);
handler.process();
// Handler should have transitioned it to RS_ZK_REGION_CLOSED
RegionTransitionData data =
ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName());
assertTrue(EventType.RS_ZK_REGION_CLOSED == data.getEventType());
}
private void OpenRegion(Server server, RegionServerServices rss,
HTableDescriptor htd, HRegionInfo hri)
throws IOException, NodeExistsException, KeeperException {
// Create it OFFLINE node, which is what Master set before sending OPEN RPC
ZKAssign.createNodeOffline(server.getZooKeeper(), hri,
server.getServerName());
OpenRegionHandler openHandler = new OpenRegionHandler(server, rss, hri,
htd);
openHandler.process();
RegionTransitionData data =
ZKAssign.getData(server.getZooKeeper(), hri.getEncodedName());
// delete the node, which is what Master do after the region is opened
ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
EventType.RS_ZK_REGION_OPENED);
}
@org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =

View File

@ -71,6 +71,7 @@ public class MockRegionServerServices implements RegionServerServices {
@Override
public void postOpenDeployTasks(HRegion r, CatalogTracker ct, boolean daughter)
throws KeeperException, IOException {
addToOnlineRegions(r);
}
@Override