HBASE-7166 TestSplitTransactionOnCluster tests are flaky

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1410060 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
larsh 2012-11-15 22:20:07 +00:00
parent 3e393dd4f8
commit 3e0385ec1d

View File

@ -158,8 +158,7 @@ public class TestSplitTransactionOnCluster {
// Now try splitting and it should work. // Now try splitting and it should work.
split(hri, server, regionCount); split(hri, server, regionCount);
// Get daughters // Get daughters
List<HRegion> daughters = cluster.getRegions(tableName); List<HRegion> daughters = checkAndGetDaughters(tableName);
assertTrue(daughters.size() >= 2);
// Assert the ephemeral node is up in zk. // Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(), String path = ZKAssign.getNodeName(TESTING_UTIL.getZooKeeperWatcher(),
hri.getEncodedName()); hri.getEncodedName());
@ -187,7 +186,12 @@ public class TestSplitTransactionOnCluster {
assertTrue(daughters.contains(r)); assertTrue(daughters.contains(r));
} }
// Finally assert that the ephemeral SPLIT znode was cleaned up. // Finally assert that the ephemeral SPLIT znode was cleaned up.
for (int i=0; i<100; i++) {
// wait a bit (10s max) for the node to disappear
stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false); stats = TESTING_UTIL.getZooKeeperWatcher().getRecoverableZooKeeper().exists(path, false);
if (stats == null) break;
Thread.sleep(100);
}
LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats); LOG.info("EPHEMERAL NODE AFTER SERVER ABORT, path=" + path + ", stats=" + stats);
assertTrue(stats == null); assertTrue(stats == null);
} finally { } finally {
@ -241,8 +245,7 @@ public class TestSplitTransactionOnCluster {
// Now try splitting and it should work. // Now try splitting and it should work.
split(hri, server, regionCount); split(hri, server, regionCount);
// Get daughters // Get daughters
List<HRegion> daughters = cluster.getRegions(tableName); checkAndGetDaughters(tableName);
assertTrue(daughters.size() >= 2);
// OK, so split happened after we cleared the blocking node. // OK, so split happened after we cleared the blocking node.
} finally { } finally {
admin.setBalancerRunning(true, false); admin.setBalancerRunning(true, false);
@ -284,8 +287,7 @@ public class TestSplitTransactionOnCluster {
// Now split. // Now split.
split(hri, server, regionCount); split(hri, server, regionCount);
// Get daughters // Get daughters
List<HRegion> daughters = cluster.getRegions(tableName); List<HRegion> daughters = checkAndGetDaughters(tableName);
assertTrue(daughters.size() >= 2);
// Remove one of the daughters from .META. to simulate failed insert of // Remove one of the daughters from .META. to simulate failed insert of
// daughter region up into .META. // daughter region up into .META.
removeDaughterFromMeta(daughters.get(0).getRegionName()); removeDaughterFromMeta(daughters.get(0).getRegionName());
@ -341,11 +343,7 @@ public class TestSplitTransactionOnCluster {
// Now split. // Now split.
split(hri, server, regionCount); split(hri, server, regionCount);
// Get daughters // Get daughters
List<HRegion> daughters; List<HRegion> daughters = checkAndGetDaughters(tableName);
do {
daughters = cluster.getRegions(tableName);
} while (daughters.size() < 2);
assertTrue(daughters.size() >= 2);
// Now split one of the daughters. // Now split one of the daughters.
regionCount = ProtobufUtil.getOnlineRegions(server).size(); regionCount = ProtobufUtil.getOnlineRegions(server).size();
HRegionInfo daughter = daughters.get(0).getRegionInfo(); HRegionInfo daughter = daughters.get(0).getRegionInfo();
@ -426,14 +424,7 @@ public class TestSplitTransactionOnCluster {
// Now try splitting and it should work. // Now try splitting and it should work.
this.admin.split(hri.getRegionNameAsString()); this.admin.split(hri.getRegionNameAsString());
while (!(cluster.getRegions(tableName).size() >= 2)) { checkAndGetDaughters(tableName);
LOG.debug("Waiting on region to split");
Thread.sleep(100);
}
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
// Assert the ephemeral node is up in zk. // Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(t.getConnection() String path = ZKAssign.getNodeName(t.getConnection()
.getZooKeeperWatcher(), hri.getEncodedName()); .getZooKeeperWatcher(), hri.getEncodedName());
@ -502,14 +493,7 @@ public class TestSplitTransactionOnCluster {
printOutRegions(server, "Initial regions: "); printOutRegions(server, "Initial regions: ");
this.admin.split(hri.getRegionNameAsString()); this.admin.split(hri.getRegionNameAsString());
while (!(cluster.getRegions(tableName).size() >= 2)) { checkAndGetDaughters(tableName);
LOG.debug("Waiting on region to split");
Thread.sleep(100);
}
// Get daughters
List<HRegion> daughters = cluster.getRegions(tableName);
assertTrue(daughters.size() >= 2);
// Assert the ephemeral node is up in zk. // Assert the ephemeral node is up in zk.
String path = ZKAssign.getNodeName(t.getConnection() String path = ZKAssign.getNodeName(t.getConnection()
.getZooKeeperWatcher(), hri.getEncodedName()); .getZooKeeperWatcher(), hri.getEncodedName());
@ -786,6 +770,19 @@ public class TestSplitTransactionOnCluster {
} }
private List<HRegion> checkAndGetDaughters(byte[] tableName)
throws InterruptedException {
List<HRegion> daughters = null;
// try up to 10s
for (int i=0; i<100; i++) {
daughters = cluster.getRegions(tableName);
if (daughters.size() >= 2) break;
Thread.sleep(100);
}
assertTrue(daughters.size() >= 2);
return daughters;
}
private MockMasterWithoutCatalogJanitor abortAndWaitForMaster() private MockMasterWithoutCatalogJanitor abortAndWaitForMaster()
throws IOException, InterruptedException { throws IOException, InterruptedException {
cluster.abortMaster(0); cluster.abortMaster(0);