HBASE-6088 Region splitting not happened for long time due to ZK exception while creating RS_ZK_SPLITTING node (Rajesh)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1343817 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fa54aef855
commit
ccb9c74400
|
@ -108,6 +108,10 @@ public class SplitTransaction {
|
|||
* we need to rollback.
|
||||
*/
|
||||
enum JournalEntry {
|
||||
/**
|
||||
* Before creating node in splitting state.
|
||||
*/
|
||||
STARTED_SPLITTING,
|
||||
/**
|
||||
* Set region as in transition, set it into SPLITTING state.
|
||||
*/
|
||||
|
@ -232,7 +236,8 @@ public class SplitTransaction {
|
|||
this.fileSplitTimeout = testing ? this.fileSplitTimeout :
|
||||
server.getConfiguration().getLong("hbase.regionserver.fileSplitTimeout",
|
||||
this.fileSplitTimeout);
|
||||
|
||||
|
||||
this.journal.add(JournalEntry.STARTED_SPLITTING);
|
||||
// Set ephemeral SPLITTING znode up in zk. Mocked servers sometimes don't
|
||||
// have zookeeper so don't do zk stuff if server or zookeeper is null
|
||||
if (server != null && server.getZooKeeper() != null) {
|
||||
|
@ -733,9 +738,16 @@ public class SplitTransaction {
|
|||
while (iterator.hasPrevious()) {
|
||||
JournalEntry je = iterator.previous();
|
||||
switch(je) {
|
||||
|
||||
case STARTED_SPLITTING:
|
||||
if (server != null && server.getZooKeeper() != null) {
|
||||
cleanZK(server, this.parent.getRegionInfo(), false);
|
||||
}
|
||||
break;
|
||||
|
||||
case SET_SPLITTING_IN_ZK:
|
||||
if (server != null && server.getZooKeeper() != null) {
|
||||
cleanZK(server, this.parent.getRegionInfo());
|
||||
cleanZK(server, this.parent.getRegionInfo(), true);
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -828,11 +840,15 @@ public class SplitTransaction {
|
|||
LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
|
||||
}
|
||||
|
||||
private static void cleanZK(final Server server, final HRegionInfo hri) {
|
||||
private static void cleanZK(final Server server, final HRegionInfo hri, boolean abort) {
|
||||
try {
|
||||
// Only delete if its in expected state; could have been hijacked.
|
||||
ZKAssign.deleteNode(server.getZooKeeper(), hri.getEncodedName(),
|
||||
EventType.RS_ZK_REGION_SPLITTING);
|
||||
} catch (KeeperException.NoNodeException nn) {
|
||||
if (abort) {
|
||||
server.abort("Failed cleanup of " + hri.getRegionNameAsString(), nn);
|
||||
}
|
||||
} catch (KeeperException e) {
|
||||
server.abort("Failed cleanup of " + hri.getRegionNameAsString(), e);
|
||||
}
|
||||
|
@ -852,9 +868,8 @@ public class SplitTransaction {
|
|||
* @throws KeeperException
|
||||
* @throws IOException
|
||||
*/
|
||||
private static int createNodeSplitting(final ZooKeeperWatcher zkw,
|
||||
final HRegionInfo region, final ServerName serverName)
|
||||
throws KeeperException, IOException {
|
||||
int createNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo region,
|
||||
final ServerName serverName) throws KeeperException, IOException {
|
||||
LOG.debug(zkw.prefix("Creating ephemeral node for " +
|
||||
region.getEncodedName() + " in SPLITTING state"));
|
||||
RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING,
|
||||
|
@ -911,10 +926,18 @@ public class SplitTransaction {
|
|||
znodeVersion, payload);
|
||||
}
|
||||
|
||||
private static int transitionNodeSplitting(final ZooKeeperWatcher zkw,
|
||||
final HRegionInfo parent,
|
||||
final ServerName serverName, final int version)
|
||||
throws KeeperException, IOException {
|
||||
/**
|
||||
*
|
||||
* @param zkw zk reference
|
||||
* @param parent region to be transitioned to splitting
|
||||
* @param serverName server event originates from
|
||||
* @param version znode version
|
||||
* @return version of node after transition, -1 if unsuccessful transition
|
||||
* @throws KeeperException
|
||||
* @throws IOException
|
||||
*/
|
||||
int transitionNodeSplitting(final ZooKeeperWatcher zkw, final HRegionInfo parent,
|
||||
final ServerName serverName, final int version) throws KeeperException, IOException {
|
||||
return ZKAssign.transitionNode(zkw, parent, serverName,
|
||||
EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version);
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import static org.junit.Assert.assertEquals;
|
|||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNotSame;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
@ -43,6 +44,7 @@ import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
|
|||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.KeeperException.NodeExistsException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
|
@ -520,6 +522,84 @@ public class TestSplitTransactionOnCluster {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* While transitioning node from RS_ZK_REGION_SPLITTING to
|
||||
* RS_ZK_REGION_SPLITTING during region split,if zookeper went down split always
|
||||
* fails for the region. HBASE-6088 fixes this scenario.
|
||||
* This test case is to test the znode is deleted(if created) or not in roll back.
|
||||
*
|
||||
* @throws IOException
|
||||
* @throws InterruptedException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
@Test
|
||||
public void testSplitBeforeSettingSplittingInZK() throws IOException,
|
||||
InterruptedException, KeeperException {
|
||||
testSplitBeforeSettingSplittingInZK(true);
|
||||
testSplitBeforeSettingSplittingInZK(false);
|
||||
}
|
||||
|
||||
private void testSplitBeforeSettingSplittingInZK(boolean nodeCreated) throws IOException,
|
||||
KeeperException {
|
||||
final byte[] tableName = Bytes.toBytes("testSplitBeforeSettingSplittingInZK");
|
||||
HBaseAdmin admin = TESTING_UTIL.getHBaseAdmin();
|
||||
try {
|
||||
// Create table then get the single region for our new table.
|
||||
HTableDescriptor htd = new HTableDescriptor(tableName);
|
||||
htd.addFamily(new HColumnDescriptor("cf"));
|
||||
admin.createTable(htd);
|
||||
|
||||
List<HRegion> regions = cluster.getRegions(tableName);
|
||||
int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
|
||||
HRegionServer regionServer = cluster.getRegionServer(regionServerIndex);
|
||||
SplitTransaction st = null;
|
||||
if (nodeCreated) {
|
||||
st = new MockedSplitTransaction(regions.get(0), null) {
|
||||
@Override
|
||||
int transitionNodeSplitting(ZooKeeperWatcher zkw, HRegionInfo parent,
|
||||
ServerName serverName, int version) throws KeeperException, IOException {
|
||||
throw new IOException();
|
||||
}
|
||||
};
|
||||
} else {
|
||||
st = new MockedSplitTransaction(regions.get(0), null) {
|
||||
@Override
|
||||
int createNodeSplitting(ZooKeeperWatcher zkw, HRegionInfo region, ServerName serverName)
|
||||
throws KeeperException, IOException {
|
||||
throw new IOException();
|
||||
}
|
||||
};
|
||||
}
|
||||
try {
|
||||
st.execute(regionServer, regionServer);
|
||||
} catch (IOException e) {
|
||||
String node = ZKAssign.getNodeName(regionServer.getZooKeeper(), regions.get(0)
|
||||
.getRegionInfo().getEncodedName());
|
||||
if (nodeCreated) {
|
||||
assertFalse(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
|
||||
} else {
|
||||
assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
|
||||
}
|
||||
assertTrue(st.rollback(regionServer, regionServer));
|
||||
assertTrue(ZKUtil.checkExists(regionServer.getZooKeeper(), node) == -1);
|
||||
}
|
||||
} finally {
|
||||
if (admin.isTableAvailable(tableName) && admin.isTableEnabled(tableName)) {
|
||||
admin.disableTable(tableName);
|
||||
admin.deleteTable(tableName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class MockedSplitTransaction extends SplitTransaction {
|
||||
|
||||
public MockedSplitTransaction(HRegion r, byte[] splitrow) {
|
||||
super(r, splitrow);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
|
||||
throws IOException, InterruptedException {
|
||||
cluster.abortMaster(0);
|
||||
|
|
Loading…
Reference in New Issue