HBASE-7438 TestSplitTransactionOnCluster has too many infinite loops
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1426066 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1ae751a725
commit
58aa9d60c0
|
@ -21,8 +21,10 @@ package org.apache.hadoop.hbase.regionserver;
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertFalse;
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
import static org.junit.Assert.assertNotSame;
|
import static org.junit.Assert.assertNotSame;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -175,12 +177,8 @@ public class TestSplitTransactionOnCluster {
|
||||||
// Now crash the server
|
// Now crash the server
|
||||||
cluster.abortRegionServer(tableRegionIndex);
|
cluster.abortRegionServer(tableRegionIndex);
|
||||||
waitUntilRegionServerDead();
|
waitUntilRegionServerDead();
|
||||||
|
awaitDaughters(tableName, daughters.size());
|
||||||
|
|
||||||
// Wait till regions are back on line again.
|
|
||||||
while(cluster.getRegions(tableName).size() < daughters.size()) {
|
|
||||||
LOG.info("Waiting for repair to happen");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
// Assert daughters are online.
|
// Assert daughters are online.
|
||||||
regions = cluster.getRegions(tableName);
|
regions = cluster.getRegions(tableName);
|
||||||
for (HRegion r: regions) {
|
for (HRegion r: regions) {
|
||||||
|
@ -295,11 +293,7 @@ public class TestSplitTransactionOnCluster {
|
||||||
// Now crash the server
|
// Now crash the server
|
||||||
cluster.abortRegionServer(tableRegionIndex);
|
cluster.abortRegionServer(tableRegionIndex);
|
||||||
waitUntilRegionServerDead();
|
waitUntilRegionServerDead();
|
||||||
// Wait till regions are back on line again.
|
awaitDaughters(tableName, daughters.size());
|
||||||
while(cluster.getRegions(tableName).size() < daughters.size()) {
|
|
||||||
LOG.info("Waiting for repair to happen");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
// Assert daughters are online.
|
// Assert daughters are online.
|
||||||
regions = cluster.getRegions(tableName);
|
regions = cluster.getRegions(tableName);
|
||||||
for (HRegion r: regions) {
|
for (HRegion r: regions) {
|
||||||
|
@ -357,21 +351,18 @@ public class TestSplitTransactionOnCluster {
|
||||||
if (r.getRegionInfo().equals(daughter)) daughterRegion = r;
|
if (r.getRegionInfo().equals(daughter)) daughterRegion = r;
|
||||||
}
|
}
|
||||||
assertTrue(daughterRegion != null);
|
assertTrue(daughterRegion != null);
|
||||||
while (true) {
|
for (int i=0; i<100; i++) {
|
||||||
if (!daughterRegion.hasReferences()) break;
|
if (!daughterRegion.hasReferences()) break;
|
||||||
Threads.sleep(100);
|
Threads.sleep(100);
|
||||||
}
|
}
|
||||||
|
assertFalse("Waiting for refereces to be compacted", daughterRegion.hasReferences());
|
||||||
split(daughter, server, regionCount);
|
split(daughter, server, regionCount);
|
||||||
// Get list of daughters
|
// Get list of daughters
|
||||||
daughters = cluster.getRegions(tableName);
|
daughters = cluster.getRegions(tableName);
|
||||||
// Now crash the server
|
// Now crash the server
|
||||||
cluster.abortRegionServer(tableRegionIndex);
|
cluster.abortRegionServer(tableRegionIndex);
|
||||||
waitUntilRegionServerDead();
|
waitUntilRegionServerDead();
|
||||||
// Wait till regions are back on line again.
|
awaitDaughters(tableName, daughters.size());
|
||||||
while(cluster.getRegions(tableName).size() < daughters.size()) {
|
|
||||||
LOG.info("Waiting for repair to happen");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
}
|
|
||||||
// Assert daughters are online and ONLY the original daughters -- that
|
// Assert daughters are online and ONLY the original daughters -- that
|
||||||
// fixup didn't insert one during server shutdown recover.
|
// fixup didn't insert one during server shutdown recover.
|
||||||
regions = cluster.getRegions(tableName);
|
regions = cluster.getRegions(tableName);
|
||||||
|
@ -508,12 +499,14 @@ public class TestSplitTransactionOnCluster {
|
||||||
byte[] data = ZKUtil.getDataNoWatch(t.getConnection()
|
byte[] data = ZKUtil.getDataNoWatch(t.getConnection()
|
||||||
.getZooKeeperWatcher(), node, stat);
|
.getZooKeeperWatcher(), node, stat);
|
||||||
// ZKUtil.create
|
// ZKUtil.create
|
||||||
while (data != null) {
|
for (int i=0; data != null && i<60; i++) {
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
data = ZKUtil.getDataNoWatch(t.getConnection().getZooKeeperWatcher(),
|
data = ZKUtil.getDataNoWatch(t.getConnection().getZooKeeperWatcher(),
|
||||||
node, stat);
|
node, stat);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
assertNull("Waited too long for ZK node to be removed: "+node, data);
|
||||||
|
|
||||||
MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
|
MockMasterWithoutCatalogJanitor master = abortAndWaitForMaster();
|
||||||
|
|
||||||
this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
|
this.admin = new HBaseAdmin(TESTING_UTIL.getConfiguration());
|
||||||
|
@ -559,9 +552,12 @@ public class TestSplitTransactionOnCluster {
|
||||||
htd.addFamily(new HColumnDescriptor("cf"));
|
htd.addFamily(new HColumnDescriptor("cf"));
|
||||||
admin.createTable(htd);
|
admin.createTable(htd);
|
||||||
HTable t = new HTable(cluster.getConfiguration(), tableName);
|
HTable t = new HTable(cluster.getConfiguration(), tableName);
|
||||||
while (!(cluster.getRegions(tableName).size() == 1)) {
|
// wait for up to 10s
|
||||||
|
for (int i=0; cluster.getRegions(tableName).size() != 1 && i<100; i++) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
|
assertTrue("waited too long for table to get online",
|
||||||
|
cluster.getRegions(tableName).size() == 1);
|
||||||
final List<HRegion> regions = cluster.getRegions(tableName);
|
final List<HRegion> regions = cluster.getRegions(tableName);
|
||||||
HRegionInfo hri = getAndCheckSingleTableRegion(regions);
|
HRegionInfo hri = getAndCheckSingleTableRegion(regions);
|
||||||
int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
|
int regionServerIndex = cluster.getServerWith(regions.get(0).getRegionName());
|
||||||
|
@ -584,9 +580,10 @@ public class TestSplitTransactionOnCluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}.start();
|
}.start();
|
||||||
while (!callRollBack) {
|
for (int i=0; !callRollBack && i<100; i++) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
|
assertTrue("Waited too long for rollback", callRollBack);
|
||||||
SplitTransaction st = null;
|
SplitTransaction st = null;
|
||||||
st = new MockedSplitTransaction(regions.get(0), Bytes.toBytes("row2"));
|
st = new MockedSplitTransaction(regions.get(0), Bytes.toBytes("row2"));
|
||||||
try {
|
try {
|
||||||
|
@ -597,15 +594,19 @@ public class TestSplitTransactionOnCluster {
|
||||||
LOG.debug("Rollback started :"+ e.getMessage());
|
LOG.debug("Rollback started :"+ e.getMessage());
|
||||||
st.rollback(regionServer, regionServer);
|
st.rollback(regionServer, regionServer);
|
||||||
}
|
}
|
||||||
while (!firstSplitCompleted) {
|
for (int i=0; !firstSplitCompleted && i<100; i++) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
|
assertTrue("fist split did not complete", firstSplitCompleted);
|
||||||
|
|
||||||
RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
|
RegionStates regionStates = cluster.getMaster().getAssignmentManager().getRegionStates();
|
||||||
Map<String, RegionState> rit = regionStates.getRegionsInTransition();
|
Map<String, RegionState> rit = regionStates.getRegionsInTransition();
|
||||||
|
|
||||||
while (rit.containsKey(hri.getTableNameAsString())) {
|
for (int i=0; rit.containsKey(hri.getTableNameAsString()) && i<100; i++) {
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
|
assertFalse("region still in transition", rit.containsKey(rit.containsKey(hri.getTableNameAsString())));
|
||||||
|
|
||||||
List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
|
List<HRegion> onlineRegions = regionServer.getOnlineRegions(tableName);
|
||||||
// Region server side split is successful.
|
// Region server side split is successful.
|
||||||
assertEquals("The parent region should be splitted", 2, onlineRegions.size());
|
assertEquals("The parent region should be splitted", 2, onlineRegions.size());
|
||||||
|
@ -845,10 +846,12 @@ public class TestSplitTransactionOnCluster {
|
||||||
final int regionCount)
|
final int regionCount)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
this.admin.split(hri.getRegionNameAsString());
|
this.admin.split(hri.getRegionNameAsString());
|
||||||
while (ProtobufUtil.getOnlineRegions(server).size() <= regionCount) {
|
for (int i = 0; ProtobufUtil.getOnlineRegions(server).size() <= regionCount && i < 100; i++) {
|
||||||
LOG.debug("Waiting on region to split");
|
LOG.debug("Waiting on region to split");
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
|
assertFalse("Waited too long for split",
|
||||||
|
ProtobufUtil.getOnlineRegions(server).size() <= regionCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void removeDaughterFromMeta(final byte [] regionName) throws IOException {
|
private void removeDaughterFromMeta(final byte [] regionName) throws IOException {
|
||||||
|
@ -895,13 +898,15 @@ public class TestSplitTransactionOnCluster {
|
||||||
Bytes.toBytes(hrs.getServerName().toString()));
|
Bytes.toBytes(hrs.getServerName().toString()));
|
||||||
}
|
}
|
||||||
// Wait till table region is up on the server that is NOT carrying .META..
|
// Wait till table region is up on the server that is NOT carrying .META..
|
||||||
while (true) {
|
for (int i=0; i<100; i++) {
|
||||||
tableRegionIndex = cluster.getServerWith(hri.getRegionName());
|
tableRegionIndex = cluster.getServerWith(hri.getRegionName());
|
||||||
if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
|
if (tableRegionIndex != -1 && tableRegionIndex != metaServerIndex) break;
|
||||||
LOG.debug("Waiting on region move off the .META. server; current index " +
|
LOG.debug("Waiting on region move off the .META. server; current index " +
|
||||||
tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
|
tableRegionIndex + " and metaServerIndex=" + metaServerIndex);
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
|
assertTrue("Region not moved off .META. server", tableRegionIndex != -1
|
||||||
|
&& tableRegionIndex != metaServerIndex);
|
||||||
// Verify for sure table region is not on same server as .META.
|
// Verify for sure table region is not on same server as .META.
|
||||||
tableRegionIndex = cluster.getServerWith(hri.getRegionName());
|
tableRegionIndex = cluster.getServerWith(hri.getRegionName());
|
||||||
assertTrue(tableRegionIndex != -1);
|
assertTrue(tableRegionIndex != -1);
|
||||||
|
@ -939,11 +944,24 @@ public class TestSplitTransactionOnCluster {
|
||||||
|
|
||||||
private void waitUntilRegionServerDead() throws InterruptedException {
|
private void waitUntilRegionServerDead() throws InterruptedException {
|
||||||
// Wait until the master processes the RS shutdown
|
// Wait until the master processes the RS shutdown
|
||||||
while (cluster.getMaster().getClusterStatus().
|
for (int i=0; cluster.getMaster().getClusterStatus().
|
||||||
getServers().size() == NB_SERVERS) {
|
getServers().size() == NB_SERVERS && i<100; i++) {
|
||||||
LOG.info("Waiting on server to go down");
|
LOG.info("Waiting on server to go down");
|
||||||
Thread.sleep(100);
|
Thread.sleep(100);
|
||||||
}
|
}
|
||||||
|
assertFalse("Waited too long for RS to die", cluster.getMaster().getClusterStatus().
|
||||||
|
getServers().size() == NB_SERVERS);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void awaitDaughters(byte[] tableName, int numDaughters) throws InterruptedException {
|
||||||
|
// Wait till regions are back on line again.
|
||||||
|
for (int i=0; cluster.getRegions(tableName).size() < numDaughters && i<60; i++) {
|
||||||
|
LOG.info("Waiting for repair to happen");
|
||||||
|
Thread.sleep(1000);
|
||||||
|
}
|
||||||
|
if (cluster.getRegions(tableName).size() < numDaughters) {
|
||||||
|
fail("Waiting too long for daughter regions");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class MockMasterWithoutCatalogJanitor extends HMaster {
|
public static class MockMasterWithoutCatalogJanitor extends HMaster {
|
||||||
|
|
Loading…
Reference in New Issue