HBASE-13308 Fix flaky TestEndToEndSplitTransaction

Conflicts:
	hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestEndToEndSplitTransaction.java
This commit is contained in:
zhangduo 2015-03-21 18:15:23 +08:00
parent e90ac5f812
commit bbf9a90ff2
1 changed files with 100 additions and 112 deletions

View File

@ -25,10 +25,10 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.NavigableMap; import java.util.NavigableMap;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.TreeSet;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -38,6 +38,8 @@ import org.apache.hadoop.hbase.ChoreService;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
@ -47,8 +49,6 @@ import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
@ -64,7 +64,6 @@ import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos; import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ConfigUtil;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType; import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.StoppableImplementation; import org.apache.hadoop.hbase.util.StoppableImplementation;
@ -82,7 +81,7 @@ import com.google.protobuf.ServiceException;
public class TestEndToEndSplitTransaction { public class TestEndToEndSplitTransaction {
private static final Log LOG = LogFactory.getLog(TestEndToEndSplitTransaction.class); private static final Log LOG = LogFactory.getLog(TestEndToEndSplitTransaction.class);
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static final Configuration conf = TEST_UTIL.getConfiguration(); private static final Configuration CONF = TEST_UTIL.getConfiguration();
@BeforeClass @BeforeClass
public static void beforeAllTests() throws Exception { public static void beforeAllTests() throws Exception {
@ -97,86 +96,81 @@ public class TestEndToEndSplitTransaction {
@Test @Test
public void testMasterOpsWhileSplitting() throws Exception { public void testMasterOpsWhileSplitting() throws Exception {
TableName tableName = TableName tableName = TableName.valueOf("TestSplit");
TableName.valueOf("TestSplit");
byte[] familyName = Bytes.toBytes("fam"); byte[] familyName = Bytes.toBytes("fam");
try (HTable ht = TEST_UTIL.createTable(tableName, familyName)) { try (HTable ht = TEST_UTIL.createTable(tableName, familyName)) {
TEST_UTIL.loadTable(ht, familyName, false); TEST_UTIL.loadTable(ht, familyName, false);
} }
HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0); HRegionServer server = TEST_UTIL.getHBaseCluster().getRegionServer(0);
byte []firstRow = Bytes.toBytes("aaa"); byte[] firstRow = Bytes.toBytes("aaa");
byte []splitRow = Bytes.toBytes("lll"); byte[] splitRow = Bytes.toBytes("lll");
byte []lastRow = Bytes.toBytes("zzz"); byte[] lastRow = Bytes.toBytes("zzz");
HConnection con = HConnectionManager try (Connection conn = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration())) {
.getConnection(TEST_UTIL.getConfiguration()); // this will also cache the region
// this will also cache the region byte[] regionName = conn.getRegionLocator(tableName).getRegionLocation(splitRow)
byte[] regionName = con.locateRegion(tableName, splitRow).getRegionInfo() .getRegionInfo().getRegionName();
.getRegionName(); HRegion region = server.getRegion(regionName);
HRegion region = server.getRegion(regionName); SplitTransaction split = new SplitTransaction(region, splitRow);
SplitTransaction split = new SplitTransaction(region, splitRow); split.prepare();
split.useZKForAssignment = ConfigUtil.useZKForAssignment(conf);
split.prepare();
// 1. phase I // 1. phase I
PairOfSameType<HRegion> regions = split.createDaughters(server, server); PairOfSameType<HRegion> regions = split.createDaughters(server, server);
assertFalse(test(con, tableName, firstRow, server)); assertFalse(test(conn, tableName, firstRow, server));
assertFalse(test(con, tableName, lastRow, server)); assertFalse(test(conn, tableName, lastRow, server));
// passing null as services prevents final step // passing null as services prevents final step
// 2, most of phase II // 2, most of phase II
split.openDaughters(server, null, regions.getFirst(), regions.getSecond()); split.openDaughters(server, null, regions.getFirst(), regions.getSecond());
assertFalse(test(con, tableName, firstRow, server)); assertFalse(test(conn, tableName, firstRow, server));
assertFalse(test(con, tableName, lastRow, server)); assertFalse(test(conn, tableName, lastRow, server));
// 3. finish phase II // 3. finish phase II
// note that this replicates some code from SplitTransaction // note that this replicates some code from SplitTransaction
// 2nd daughter first // 2nd daughter first
if (split.useZKForAssignment) { if (split.useZKForAssignment) {
server.postOpenDeployTasks(regions.getSecond()); server.postOpenDeployTasks(regions.getSecond());
} else { } else {
server.reportRegionStateTransition( server.reportRegionStateTransition(
RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT, RegionServerStatusProtos.RegionStateTransition.TransitionCode.SPLIT,
region.getRegionInfo(), regions.getFirst().getRegionInfo(), region.getRegionInfo(), regions.getFirst().getRegionInfo(),
regions.getSecond().getRegionInfo()); regions.getSecond().getRegionInfo());
}
// Add to online regions
server.addToOnlineRegions(regions.getSecond());
// THIS is the crucial point:
// the 2nd daughter was added, so querying before the split key should fail.
assertFalse(test(conn, tableName, firstRow, server));
// past splitkey is ok.
assertTrue(test(conn, tableName, lastRow, server));
// Add to online regions
server.addToOnlineRegions(regions.getFirst());
assertTrue(test(conn, tableName, firstRow, server));
assertTrue(test(conn, tableName, lastRow, server));
if (split.useZKForAssignment) {
// 4. phase III
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
regions.getSecond(), split.std, region);
}
assertTrue(test(conn, tableName, firstRow, server));
assertTrue(test(conn, tableName, lastRow, server));
} }
// Add to online regions
server.addToOnlineRegions(regions.getSecond());
// THIS is the crucial point:
// the 2nd daughter was added, so querying before the split key should fail.
assertFalse(test(con, tableName, firstRow, server));
// past splitkey is ok.
assertTrue(test(con, tableName, lastRow, server));
// first daughter second
if (split.useZKForAssignment) {
server.postOpenDeployTasks(regions.getFirst());
}
// Add to online regions
server.addToOnlineRegions(regions.getFirst());
assertTrue(test(con, tableName, firstRow, server));
assertTrue(test(con, tableName, lastRow, server));
if (split.useZKForAssignment) {
// 4. phase III
((BaseCoordinatedStateManager) server.getCoordinatedStateManager())
.getSplitTransactionCoordination().completeSplitTransaction(server, regions.getFirst(),
regions.getSecond(), split.std, region);
}
assertTrue(test(con, tableName, firstRow, server));
assertTrue(test(con, tableName, lastRow, server));
} }
/** /**
* attempt to locate the region and perform a get and scan * attempt to locate the region and perform a get and scan
* @return True if successful, False otherwise. * @return True if successful, False otherwise.
*/ */
private boolean test(HConnection con, TableName tableName, byte[] row, private boolean test(Connection conn, TableName tableName, byte[] row,
HRegionServer server) { HRegionServer server) {
// not using HTable to avoid timeouts and retries // not using HTable to avoid timeouts and retries
try { try {
byte[] regionName = con.relocateRegion(tableName, row).getRegionInfo() byte[] regionName = conn.getRegionLocator(tableName).getRegionLocation(row, true)
.getRegionName(); .getRegionInfo().getRegionName();
// get and scan should now succeed without exception // get and scan should now succeed without exception
ClientProtos.GetRequest request = ClientProtos.GetRequest request =
RequestConverter.buildGetRequest(regionName, new Get(row)); RequestConverter.buildGetRequest(regionName, new Get(row));
@ -189,7 +183,7 @@ public class TestEndToEndSplitTransaction {
} catch (ServiceException se) { } catch (ServiceException se) {
throw ProtobufUtil.getRemoteException(se); throw ProtobufUtil.getRemoteException(se);
} }
} catch (IOException x) { } catch (IOException e) {
return false; return false;
} catch (ServiceException e) { } catch (ServiceException e) {
return false; return false;
@ -213,7 +207,7 @@ public class TestEndToEndSplitTransaction {
Stoppable stopper = new StoppableImplementation(); Stoppable stopper = new StoppableImplementation();
RegionSplitter regionSplitter = new RegionSplitter(table); RegionSplitter regionSplitter = new RegionSplitter(table);
RegionChecker regionChecker = new RegionChecker(conf, stopper, TABLENAME); RegionChecker regionChecker = new RegionChecker(CONF, stopper, TABLENAME);
final ChoreService choreService = new ChoreService("TEST_SERVER"); final ChoreService choreService = new ChoreService("TEST_SERVER");
choreService.scheduleChore(regionChecker); choreService.scheduleChore(regionChecker);
@ -290,7 +284,7 @@ public class TestEndToEndSplitTransaction {
try { try {
admin.splitRegion(region.getRegionName(), splitPoint); admin.splitRegion(region.getRegionName(), splitPoint);
//wait until the split is complete //wait until the split is complete
blockUntilRegionSplit(conf, 50000, region.getRegionName(), true); blockUntilRegionSplit(CONF, 50000, region.getRegionName(), true);
} catch (NotServingRegionException ex) { } catch (NotServingRegionException ex) {
//ignore //ignore
@ -305,7 +299,7 @@ public class TestEndToEndSplitTransaction {
List<Put> puts = new ArrayList<>(); List<Put> puts = new ArrayList<>();
for (int i=start; i< start + 100; i++) { for (int i=start; i< start + 100; i++) {
Put put = new Put(Bytes.toBytes(i)); Put put = new Put(Bytes.toBytes(i));
put.add(family, family, Bytes.toBytes(i)); put.addColumn(family, family, Bytes.toBytes(i));
puts.add(put); puts.add(put);
} }
table.put(puts); table.put(puts);
@ -346,14 +340,16 @@ public class TestEndToEndSplitTransaction {
void verifyRegionsUsingHTable() throws IOException { void verifyRegionsUsingHTable() throws IOException {
HTable table = null; HTable table = null;
try { try {
//HTable.getStartEndKeys() table = (HTable) connection.getTable(tableName);
table = new HTable(conf, tableName); Pair<byte[][], byte[][]> keys = table.getRegionLocator().getStartEndKeys();
Pair<byte[][], byte[][]> keys = table.getStartEndKeys();
verifyStartEndKeys(keys); verifyStartEndKeys(keys);
//HTable.getRegionsInfo() //HTable.getRegionsInfo()
Map<HRegionInfo, ServerName> regions = table.getRegionLocations(); Set<HRegionInfo> regions = new TreeSet<HRegionInfo>();
verifyTableRegions(regions.keySet()); for (HRegionLocation loc : table.getRegionLocator().getAllRegionLocations()) {
regions.add(loc.getRegionInfo());
}
verifyTableRegions(regions);
} finally { } finally {
IOUtils.closeQuietly(table); IOUtils.closeQuietly(table);
} }
@ -426,7 +422,7 @@ public class TestEndToEndSplitTransaction {
admin.flushRegion(regionName); admin.flushRegion(regionName);
log("blocking until flush is complete: " + Bytes.toStringBinary(regionName)); log("blocking until flush is complete: " + Bytes.toStringBinary(regionName));
Threads.sleepWithoutInterrupt(500); Threads.sleepWithoutInterrupt(500);
while (rs.cacheFlusher.getFlushQueueSize() > 0) { while (rs.getOnlineRegion(regionName).getMemstoreSize().get() > 0) {
Threads.sleep(50); Threads.sleep(50);
} }
} }
@ -437,8 +433,14 @@ public class TestEndToEndSplitTransaction {
admin.majorCompactRegion(regionName); admin.majorCompactRegion(regionName);
log("blocking until compaction is complete: " + Bytes.toStringBinary(regionName)); log("blocking until compaction is complete: " + Bytes.toStringBinary(regionName));
Threads.sleepWithoutInterrupt(500); Threads.sleepWithoutInterrupt(500);
while (rs.compactSplitThread.getCompactionQueueSize() > 0) { outer: for (;;) {
Threads.sleep(50); for (Store store : rs.getOnlineRegion(regionName).getStores().values()) {
if (store.getStorefilesCount() > 1) {
Threads.sleep(50);
continue outer;
}
}
break;
} }
} }
@ -449,21 +451,20 @@ public class TestEndToEndSplitTransaction {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
log("blocking until region is split:" + Bytes.toStringBinary(regionName)); log("blocking until region is split:" + Bytes.toStringBinary(regionName));
HRegionInfo daughterA = null, daughterB = null; HRegionInfo daughterA = null, daughterB = null;
Table metaTable = new HTable(conf, TableName.META_TABLE_NAME); try (Connection conn = ConnectionFactory.createConnection(conf);
Table metaTable = conn.getTable(TableName.META_TABLE_NAME)) {
try {
Result result = null; Result result = null;
HRegionInfo region = null; HRegionInfo region = null;
while ((System.currentTimeMillis() - start) < timeout) { while ((System.currentTimeMillis() - start) < timeout) {
result = getRegionRow(metaTable, regionName); result = metaTable.get(new Get(regionName));
if (result == null) { if (result == null) {
break; break;
} }
region = HRegionInfo.getHRegionInfo(result); region = MetaTableAccessor.getHRegionInfo(result);
if (region.isSplitParent()) { if (region.isSplitParent()) {
log("found parent region: " + region.toString()); log("found parent region: " + region.toString());
PairOfSameType<HRegionInfo> pair = HRegionInfo.getDaughterRegions(result); PairOfSameType<HRegionInfo> pair = MetaTableAccessor.getDaughterRegions(result);
daughterA = pair.getFirst(); daughterA = pair.getFirst();
daughterB = pair.getSecond(); daughterB = pair.getSecond();
break; break;
@ -479,10 +480,10 @@ public class TestEndToEndSplitTransaction {
//if we are here, this means the region split is complete or timed out //if we are here, this means the region split is complete or timed out
if (waitForDaughters) { if (waitForDaughters) {
long rem = timeout - (System.currentTimeMillis() - start); long rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsInMeta(metaTable, rem, daughterA); blockUntilRegionIsInMeta(conn, rem, daughterA);
rem = timeout - (System.currentTimeMillis() - start); rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsInMeta(metaTable, rem, daughterB); blockUntilRegionIsInMeta(conn, rem, daughterB);
rem = timeout - (System.currentTimeMillis() - start); rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpened(conf, rem, daughterA); blockUntilRegionIsOpened(conf, rem, daughterA);
@ -490,28 +491,18 @@ public class TestEndToEndSplitTransaction {
rem = timeout - (System.currentTimeMillis() - start); rem = timeout - (System.currentTimeMillis() - start);
blockUntilRegionIsOpened(conf, rem, daughterB); blockUntilRegionIsOpened(conf, rem, daughterB);
} }
} finally {
IOUtils.closeQuietly(metaTable);
} }
} }
public static Result getRegionRow(Table metaTable, byte[] regionName) throws IOException { public static void blockUntilRegionIsInMeta(Connection conn, long timeout, HRegionInfo hri)
Get get = new Get(regionName);
return metaTable.get(get);
}
public static void blockUntilRegionIsInMeta(Table metaTable, long timeout, HRegionInfo hri)
throws IOException, InterruptedException { throws IOException, InterruptedException {
log("blocking until region is in META: " + hri.getRegionNameAsString()); log("blocking until region is in META: " + hri.getRegionNameAsString());
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < timeout) { while (System.currentTimeMillis() - start < timeout) {
Result result = getRegionRow(metaTable, hri.getRegionName()); HRegionLocation loc = MetaTableAccessor.getRegionLocation(conn, hri);
if (result != null) { if (loc != null && !loc.getRegionInfo().isOffline()) {
HRegionInfo info = HRegionInfo.getHRegionInfo(result); log("found region in META: " + hri.getRegionNameAsString());
if (info != null && !info.isOffline()) { break;
log("found region in META: " + hri.getRegionNameAsString());
break;
}
} }
Threads.sleep(10); Threads.sleep(10);
} }
@ -521,24 +512,21 @@ public class TestEndToEndSplitTransaction {
throws IOException, InterruptedException { throws IOException, InterruptedException {
log("blocking until region is opened for reading:" + hri.getRegionNameAsString()); log("blocking until region is opened for reading:" + hri.getRegionNameAsString());
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
Table table = new HTable(conf, hri.getTable()); try (Connection conn = ConnectionFactory.createConnection(conf);
Table table = conn.getTable(hri.getTable())) {
try { byte[] row = hri.getStartKey();
byte [] row = hri.getStartKey(); // Check for null/empty row. If we find one, use a key that is likely to be in first region.
// Check for null/empty row. If we find one, use a key that is likely to be in first region. if (row == null || row.length <= 0) row = new byte[] { '0' };
if (row == null || row.length <= 0) row = new byte [] {'0'};
Get get = new Get(row); Get get = new Get(row);
while (System.currentTimeMillis() - start < timeout) { while (System.currentTimeMillis() - start < timeout) {
try { try {
table.get(get); table.get(get);
break; break;
} catch(IOException ex) { } catch (IOException ex) {
//wait some more // wait some more
} }
Threads.sleep(10); Threads.sleep(10);
} }
} finally {
IOUtils.closeQuietly(table);
} }
} }
} }