diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index a9e7c58bc08..fcfb7647829 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -158,8 +158,7 @@ public class TestSplitTransactionOnCluster { // Now try splitting and it should work. split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + List daughters = checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(), hri.getEncodedName()); @@ -187,7 +186,12 @@ public class TestSplitTransactionOnCluster { assertTrue(daughters.contains(r)); } // Finally assert that the ephemeral SPLIT znode was cleaned up. - stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); + for (int i=0; i<100; i++) { + // wait a bit (10s max) for the node to disappear + stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); + if (stats == null) break; + Thread.sleep(100); + } LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats); assertTrue(stats == null); } finally { @@ -241,8 +245,7 @@ public class TestSplitTransactionOnCluster { // Now try splitting and it should work. split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + checkAndGetDaughters(tableName); // OK, so split happened after we cleared the blocking node. } finally { admin.setBalancerRunning(true, false); @@ -284,8 +287,7 @@ public class TestSplitTransactionOnCluster { // Now split. split(hri, server, regionCount); // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + List daughters = checkAndGetDaughters(tableName); // Remove one of the daughters from .META. to simulate failed insert of // daughter region up into .META. removeDaughterFromMeta(daughters.get(0).getRegionName()); @@ -341,11 +343,7 @@ public class TestSplitTransactionOnCluster { // Now split. split(hri, server, regionCount); // Get daughters - List daughters; - do { - daughters = cluster.getRegions(tableName); - } while (daughters.size() < 2); - assertTrue(daughters.size() >= 2); + List daughters = checkAndGetDaughters(tableName); // Now split one of the daughters. regionCount = ProtobufUtil.getOnlineRegions(server).size(); HRegionInfo daughter = daughters.get(0).getRegionInfo(); @@ -426,14 +424,7 @@ public class TestSplitTransactionOnCluster { // Now try splitting and it should work. this.admin.split(hri.getRegionNameAsString()); - while (!(cluster.getRegions(tableName).size() >= 2)) { - LOG.debug("Waiting on region to split"); - Thread.sleep(100); - } - - // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); @@ -502,14 +493,7 @@ public class TestSplitTransactionOnCluster { printOutRegions(server, "Initial regions: "); this.admin.split(hri.getRegionNameAsString()); - while (!(cluster.getRegions(tableName).size() >= 2)) { - LOG.debug("Waiting on region to split"); - Thread.sleep(100); - } - - // Get daughters - List daughters = cluster.getRegions(tableName); - assertTrue(daughters.size() >= 2); + checkAndGetDaughters(tableName); // Assert the ephemeral node is up in zk. String path = ZKAssign.getNodeName(t.getConnection() .getZooKeeperWatcher(), hri.getEncodedName()); @@ -786,6 +770,19 @@ public class TestSplitTransactionOnCluster { } + private List checkAndGetDaughters(byte[] tableName) + throws InterruptedException { + List daughters = null; + // try up to 10s + for (int i=0; i<100; i++) { + daughters = cluster.getRegions(tableName); + if (daughters.size() >= 2) break; + Thread.sleep(100); + } + assertTrue(daughters.size() >= 2); + return daughters; + } + private MockMasterWithoutCatalogJanitor abortAndWaitForMaster() throws IOException, InterruptedException { cluster.abortMaster(0);