HBASE-5806 Handle split region related failures on master restart and RS restart (Chinna Rao)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1338325 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
ramkrishna 2012-05-14 18:14:37 +00:00
parent 31776afa74
commit e8560bf973
5 changed files with 219 additions and 11 deletions

View File

@ -71,7 +71,6 @@ public class LocalHBaseCluster {
/** 'local:' */ /** 'local:' */
public static final String LOCAL_COLON = LOCAL + ":"; public static final String LOCAL_COLON = LOCAL + ":";
private final Configuration conf; private final Configuration conf;
private final Class<? extends HMaster> masterClass;
private final Class<? extends HRegionServer> regionServerClass; private final Class<? extends HRegionServer> regionServerClass;
/** /**
@ -145,9 +144,6 @@ public class LocalHBaseCluster {
conf.set(HConstants.MASTER_PORT, "0"); conf.set(HConstants.MASTER_PORT, "0");
conf.set(HConstants.REGIONSERVER_PORT, "0"); conf.set(HConstants.REGIONSERVER_PORT, "0");
// Start the HMasters. // Start the HMasters.
this.masterClass =
(Class<? extends HMaster>)conf.getClass(HConstants.MASTER_IMPL,
masterClass);
for (int i = 0; i < noMasters; i++) { for (int i = 0; i < noMasters; i++) {
addMaster(new Configuration(conf), i); addMaster(new Configuration(conf), i);
} }
@ -199,9 +195,8 @@ public class LocalHBaseCluster {
// Create each master with its own Configuration instance so each has // Create each master with its own Configuration instance so each has
// its HConnection instance rather than share (see HBASE_INSTANCES down in // its HConnection instance rather than share (see HBASE_INSTANCES down in
// the guts of HConnectionManager. // the guts of HConnectionManager.
JVMClusterUtil.MasterThread mt = JVMClusterUtil.MasterThread mt = JVMClusterUtil.createMasterThread(c,
JVMClusterUtil.createMasterThread(c, (Class<? extends HMaster>) c.getClass(HConstants.MASTER_IMPL, HMaster.class), index);
this.masterClass, index);
this.masterThreads.add(mt); this.masterThreads.add(mt);
return mt; return mt;
} }

View File

@ -609,9 +609,14 @@ public class AssignmentManager extends ZooKeeperListener {
} }
failoverProcessedRegions.put(encodedRegionName, regionInfo); failoverProcessedRegions.put(encodedRegionName, regionInfo);
break; break;
case RS_ZK_REGION_SPLITTING:
LOG.debug("Processed region in state : " + et);
break;
case RS_ZK_REGION_SPLIT:
LOG.debug("Processed region in state : " + et);
break;
default: default:
throw new IllegalStateException("Received event is not valid."); throw new IllegalStateException("Received region in state :" + et + " is not valid");
} }
} }
} }
@ -2547,6 +2552,19 @@ public class AssignmentManager extends ZooKeeperListener {
enableTableIfNotDisabledOrDisablingOrEnabling(disabled, enableTableIfNotDisabledOrDisablingOrEnabling(disabled,
disablingOrEnabling, tableName); disablingOrEnabling, tableName);
} else { } else {
// If region is in offline and split state check the ZKNode
if (regionInfo.isOffline() && regionInfo.isSplit()) {
String node = ZKAssign.getNodeName(this.watcher, regionInfo
.getEncodedName());
Stat stat = new Stat();
byte[] data = ZKUtil.getDataNoWatch(this.watcher, node, stat);
// If znode does not exist dont consider this region
if (data == null) {
LOG.debug("Region " + regionInfo.getRegionNameAsString()
+ " split is completed. Hence need not add to regions list");
continue;
}
}
// Region is being served and on an active server // Region is being served and on an active server
// add only if region not in disabled and enabling table // add only if region not in disabled and enabling table
if (false == checkIfRegionBelongsToDisabled(regionInfo) if (false == checkIfRegionBelongsToDisabled(regionInfo)

View File

@ -611,7 +611,7 @@ Server {
status.setStatus("Starting balancer and catalog janitor"); status.setStatus("Starting balancer and catalog janitor");
this.balancerChore = getAndStartBalancerChore(this); this.balancerChore = getAndStartBalancerChore(this);
this.catalogJanitorChore = new CatalogJanitor(this, this); this.catalogJanitorChore = new CatalogJanitor(this, this);
Threads.setDaemonThreadRunning(catalogJanitorChore.getThread()); startCatalogJanitorChore();
registerMBean(); registerMBean();
@ -629,6 +629,14 @@ Server {
} }
} }
/**
* Useful for testing purpose also where we have
* master restart scenarios.
*/
protected void startCatalogJanitorChore() {
Threads.setDaemonThreadRunning(catalogJanitorChore.getThread());
}
/** /**
* Override to change master's splitLogAfterStartup. Used testing * Override to change master's splitLogAfterStartup. Used testing
* @param mfs * @param mfs

View File

@ -269,7 +269,7 @@ public class ServerShutdownHandler extends EventHandler {
// Skip regions that were in transition unless CLOSING or PENDING_CLOSE // Skip regions that were in transition unless CLOSING or PENDING_CLOSE
for (RegionState rit : regionsInTransition) { for (RegionState rit : regionsInTransition) {
if (!rit.isClosing() && !rit.isPendingClose()) { if (!rit.isClosing() && !rit.isPendingClose() && !rit.isSplitting()) {
LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() + LOG.debug("Removed " + rit.getRegion().getRegionNameAsString() +
" from list of regions to assign because in RIT; region state: " + " from list of regions to assign because in RIT; region state: " +
rit.getState()); rit.getState());

View File

@ -29,17 +29,20 @@ import java.util.List;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.executor.EventHandler.EventType; import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.handler.SplitRegionHandler; import org.apache.hadoop.hbase.master.handler.SplitRegionHandler;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NodeExistsException; import org.apache.zookeeper.KeeperException.NodeExistsException;
import org.apache.zookeeper.data.Stat; import org.apache.zookeeper.data.Stat;
@ -355,6 +358,178 @@ public class TestSplitTransactionOnCluster {
} }
} }
/**
* Verifies HBASE-5806. When splitting is partially done and the master goes down
* when the SPLIT node is in either SPLIT or SPLITTING state.
*
* @throws IOException
* @throws InterruptedException
* @throws NodeExistsException
* @throws KeeperException
* @throws DeserializationException
*/
@Test(timeout = 300000)
public void testMasterRestartWhenSplittingIsPartial()
throws IOException, InterruptedException, NodeExistsException,
KeeperException, DeserializationException {
final byte[] tableName = Bytes.toBytes("testMasterRestartWhenSplittingIsPartial");
// Create table then get the single region for our new table.
HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY);
List<HRegion> regions = cluster.getRegions(tableName);
HRegionInfo hri = getAndCheckSingleTableRegion(regions);
int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
// Turn off balancer so it doesn't cut in and mess up our placements.
this.admin.balanceSwitch(false);
// Turn off the meta scanner so it don't remove parent on us.
cluster.getMaster().setCatalogJanitorEnabled(false);
try {
// Add a bit of load up into the table so splittable.
TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
// Get region pre-split.
HRegionServer server = cluster.getRegionServer(tableRegionIndex);
printOutRegions(server, "Initial regions: ");
// Now, before we split, set special flag in master, a flag that has
// it FAIL the processing of split.
SplitRegionHandler.TEST_SKIP = true;
// Now try splitting and it should work.
this.admin.split(hri.getRegionNameAsString());
while (!(cluster.getRegions(tableName).size() >= 2)) {
LOG.debug("Waiting on region to split");
Thread.sleep(100);
}
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
// Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(t.getConnection()
.getZooKeeperWatcher(), hri.getEncodedName());
Stat stats = t.getConnection().getZooKeeperWatcher()
.getRecoverableZooKeeper().exists(path, false);
LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
+ stats);
byte[] bytes = ZKAssign.getData(t.getConnection()
.getZooKeeperWatcher(), hri.getEncodedName());
RegionTransition rtd = RegionTransition.parseFrom(bytes);
// State could be SPLIT or SPLITTING.
assertTrue(rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLIT)
|| rtd.getEventType().equals(EventType.RS_ZK_REGION_SPLITTING));
// abort and wait for new master.
MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
// update the hri to be offlined and splitted.
hri.setOffline(true);
hri.setSplit(true);
ServerName regionServerOfRegion = master.getAssignmentManager()
.getRegionServerOfRegion(hri);
assertTrue(regionServerOfRegion != null);
} finally {
// Set this flag back.
SplitRegionHandler.TEST_SKIP = false;
admin.balanceSwitch(true);
cluster.getMaster().setCatalogJanitorEnabled(true);
}
}
/**
* Verifies HBASE-5806. Here the case is that splitting is completed but before the
* CJ could remove the parent region the master is killed and restarted.
* @throws IOException
* @throws InterruptedException
* @throws NodeExistsException
* @throws KeeperException
*/
@Test (timeout = 300000)
public void testMasterRestartAtRegionSplitPendingCatalogJanitor()
throws IOException, InterruptedException, NodeExistsException,
KeeperException {
final byte[] tableName = Bytes.toBytes("testMasterRestartAtRegionSplitPendingCatalogJanitor");
// Create table then get the single region for our new table.
HTable t = TESTING_UTIL.createTable(tableName, HConstants.CATALOG_FAMILY);
List<HRegion> regions = cluster.getRegions(tableName);
HRegionInfo hri = getAndCheckSingleTableRegion(regions);
int tableRegionIndex = ensureTableRegionNotOnSameServerAsMeta(admin, hri);
// Turn off balancer so it doesn't cut in and mess up our placements.
this.admin.balanceSwitch(false);
// Turn off the meta scanner so it don't remove parent on us.
cluster.getMaster().setCatalogJanitorEnabled(false);
try {
// Add a bit of load up into the table so splittable.
TESTING_UTIL.loadTable(t, HConstants.CATALOG_FAMILY);
// Get region pre-split.
HRegionServer server = cluster.getRegionServer(tableRegionIndex);
printOutRegions(server, "Initial regions: ");
this.admin.split(hri.getRegionNameAsString());
while (!(cluster.getRegions(tableName).size() >= 2)) {
LOG.debug("Waiting on region to split");
Thread.sleep(100);
}
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
// Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(t.getConnection()
.getZooKeeperWatcher(), hri.getEncodedName());
Stat stats = t.getConnection().getZooKeeperWatcher()
.getRecoverableZooKeeper().exists(path, false);
LOG.info("EPHEMERAL NODE BEFORE SERVER ABORT, path=" + path + ", stats="
+ stats);
String node = ZKAssign.getNodeName(t.getConnection()
.getZooKeeperWatcher(), hri.getEncodedName());
Stat stat = new Stat();
byte[] data = ZKUtil.getDataNoWatch(t.getConnection()
.getZooKeeperWatcher(), node, stat);
// ZKUtil.create
while (data != null) {
Thread.sleep(1000);
data = ZKUtil.getDataNoWatch(t.getConnection().getZooKeeperWatcher(),
node, stat);
}
MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
hri.setOffline(true);
hri.setSplit(true);
ServerName regionServerOfRegion = master.getAssignmentManager()
.getRegionServerOfRegion(hri);
assertTrue(regionServerOfRegion == null);
} finally {
// Set this flag back.
SplitRegionHandler.TEST_SKIP = false;
this.admin.balanceSwitch(true);
cluster.getMaster().setCatalogJanitorEnabled(true);
}
}
private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
throws IOException, InterruptedException {
cluster.abortMaster(0);
cluster.waitOnMaster(0);
cluster.getConfiguration().setClass(HConstants.MASTER_IMPL,
MockMasterWithoutCatalogJanitor.class, HMaster.class);
MockMasterWithoutCatalogJanitor master = null;
master = (MockMasterWithoutCatalogJanitor) cluster.startMaster().getMaster();
cluster.waitForActiveAndReadyMaster();
return master;
}
private void split(final HRegionInfo hri, final HRegionServer server, private void split(final HRegionInfo hri, final HRegionServer server,
final int regionCount) final int regionCount)
throws IOException, InterruptedException { throws IOException, InterruptedException {
@ -460,6 +635,18 @@ public class TestSplitTransactionOnCluster {
} }
} }
public static class MockMasterWithoutCatalogJanitor extends HMaster {
public MockMasterWithoutCatalogJanitor(Configuration conf) throws IOException, KeeperException,
InterruptedException {
super(conf);
}
protected void startCatalogJanitorChore() {
LOG.debug("Customised master executed.");
}
}
@org.junit.Rule @org.junit.Rule
public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();