HBASE-7166 TestSplitTransactionOnCluster tests are flaky

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1410060 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
larsh 2012-11-15 22:20:07 +00:00
parent 3e393dd4f8
commit 3e0385ec1d
1 changed files with 25 additions and 28 deletions

View File

@ -158,8 +158,7 @@ public class TestSplitTransactionOnCluster {
// Now try splitting and it should work.
split(hri, server, regionCount);
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
List<HRegion> daughters = checkAndGetDaughters(tableName);
// Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
hri.getEncodedName());
@ -187,7 +186,12 @@ public class TestSplitTransactionOnCluster {
assertTrue(daughters.contains(r));
}
// Finally assert that the ephemeral SPLIT znode was cleaned up.
for (int i=0; i<100; i++) {
// wait a bit (10s max) for the node to disappear
stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
if (stats == null) break;
Thread.sleep(100);
}
LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
assertTrue(stats == null);
} finally {
@ -241,8 +245,7 @@ public class TestSplitTransactionOnCluster {
// Now try splitting and it should work.
split(hri, server, regionCount);
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
checkAndGetDaughters(tableName);
// OK, so split happened after we cleared the blocking node.
} finally {
admin.setBalancerRunning(true, false);
@ -284,8 +287,7 @@ public class TestSplitTransactionOnCluster {
// Now split.
split(hri, server, regionCount);
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
List<HRegion> daughters = checkAndGetDaughters(tableName);
// Remove one of the daughters from .META. to simulate failed insert of
// daughter region up into .META.
removeDaughterFromMeta(daughters.get(0).getRegionName());
@ -341,11 +343,7 @@ public class TestSplitTransactionOnCluster {
// Now split.
split(hri, server, regionCount);
// Get daughters
List<HRegion> daughters;
do {
daughters = cluster.getRegions(tableName);
} while (daughters.size() < 2);
assertTrue(daughters.size() >= 2);
List<HRegion> daughters = checkAndGetDaughters(tableName);
// Now split one of the daughters.
regionCount = ProtobufUtil.getOnlineRegions(server).size();
HRegionInfo daughter = daughters.get(0).getRegionInfo();
@ -426,14 +424,7 @@ public class TestSplitTransactionOnCluster {
// Now try splitting and it should work.
this.admin.split(hri.getRegionNameAsString());
while (!(cluster.getRegions(tableName).size() >= 2)) {
LOG.debug("Waiting on region to split");
Thread.sleep(100);
}
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
checkAndGetDaughters(tableName);
// Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(t.getConnection()
.getZooKeeperWatcher(), hri.getEncodedName());
@ -502,14 +493,7 @@ public class TestSplitTransactionOnCluster {
printOutRegions(server, "Initial regions: ");
this.admin.split(hri.getRegionNameAsString());
while (!(cluster.getRegions(tableName).size() >= 2)) {
LOG.debug("Waiting on region to split");
Thread.sleep(100);
}
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
checkAndGetDaughters(tableName);
// Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(t.getConnection()
.getZooKeeperWatcher(), hri.getEncodedName());
@ -786,6 +770,19 @@ public class TestSplitTransactionOnCluster {
}
private List<HRegion> checkAndGetDaughters(byte[] tableName)
throws InterruptedException {
List<HRegion> daughters = null;
// try up to 10s
for (int i=0; i<100; i++) {
daughters = cluster.getRegions(tableName);
if (daughters.size() >= 2) break;
Thread.sleep(100);
}
assertTrue(daughters.size() >= 2);
return daughters;
}
private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
throws IOException, InterruptedException {
cluster.abortMaster(0);