HBASE-19454 Debugging TestDistributedLogSplitting#testThreeRSAbort
- Changed testThreeRSAbort to kill the RSs intead of aborting. Simple aborting will close the regions, we want extreme failure testing here. - Adds some logging for easier debugging. - Refactors TestDistributedLogSplitting to use standard junit rules.
This commit is contained in:
parent
b0d68d6814
commit
a70b9b5e94
|
@ -3224,8 +3224,7 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
|
|||
* @throws IOException
|
||||
*/
|
||||
public void waitUntilAllRegionsAssigned(final TableName tableName) throws IOException {
|
||||
waitUntilAllRegionsAssigned(
|
||||
tableName,
|
||||
waitUntilAllRegionsAssigned( tableName,
|
||||
this.conf.getLong("hbase.client.sync.wait.timeout.msec", 60000));
|
||||
}
|
||||
|
||||
|
@ -3251,6 +3250,8 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
|
|||
throws IOException {
|
||||
final Table meta = getConnection().getTable(TableName.META_TABLE_NAME);
|
||||
try {
|
||||
LOG.debug("Waiting until all regions of table " + tableName + " get assigned. Timeout = " +
|
||||
timeout + "ms");
|
||||
waitFor(timeout, 200, true, new ExplainingPredicate<IOException>() {
|
||||
@Override
|
||||
public String explainFailure() throws IOException {
|
||||
|
@ -3259,7 +3260,6 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
|
|||
|
||||
@Override
|
||||
public boolean evaluate() throws IOException {
|
||||
boolean allRegionsAssigned = true;
|
||||
Scan scan = new Scan();
|
||||
scan.addFamily(HConstants.CATALOG_FAMILY);
|
||||
ResultScanner s = meta.getScanner(scan);
|
||||
|
@ -3295,17 +3295,17 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
|
|||
} finally {
|
||||
s.close();
|
||||
}
|
||||
return allRegionsAssigned;
|
||||
return true;
|
||||
}
|
||||
});
|
||||
} finally {
|
||||
meta.close();
|
||||
}
|
||||
|
||||
LOG.info("All regions for table " + tableName + " assigned to meta. Checking AM states.");
|
||||
// check from the master state if we are using a mini cluster
|
||||
if (!getHBaseClusterInterface().isDistributedCluster()) {
|
||||
// So, all regions are in the meta table but make sure master knows of the assignments before
|
||||
// returing -- sometimes this can lag.
|
||||
// returning -- sometimes this can lag.
|
||||
HMaster master = getHBaseCluster().getMaster();
|
||||
final RegionStates states = master.getAssignmentManager().getRegionStates();
|
||||
waitFor(timeout, 200, new ExplainingPredicate<IOException>() {
|
||||
|
@ -3321,6 +3321,7 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
|
|||
}
|
||||
});
|
||||
}
|
||||
LOG.info("All regions for table " + tableName + " assigned.");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -119,9 +119,14 @@ public class TestDistributedLogSplitting {
|
|||
|
||||
}
|
||||
|
||||
@Rule
|
||||
public TestName testName = new TestName();
|
||||
TableName tableName;
|
||||
|
||||
// Start a cluster with 2 masters and 6 regionservers
|
||||
static final int NUM_MASTERS = 2;
|
||||
static final int NUM_RS = 5;
|
||||
static byte[] COLUMN_FAMILY = Bytes.toBytes("family");
|
||||
|
||||
MiniHBaseCluster cluster;
|
||||
HMaster master;
|
||||
|
@ -174,6 +179,7 @@ public class TestDistributedLogSplitting {
|
|||
public void before() throws Exception {
|
||||
// refresh configuration
|
||||
conf = HBaseConfiguration.create(originalConf);
|
||||
tableName = TableName.valueOf(testName.getMethodName());
|
||||
}
|
||||
|
||||
@After
|
||||
|
@ -208,8 +214,7 @@ public class TestDistributedLogSplitting {
|
|||
Path rootdir = FSUtils.getRootDir(conf);
|
||||
|
||||
int numRegions = 50;
|
||||
Table t = installTable(new ZKWatcher(conf, "table-creation", null),
|
||||
"table", "family", numRegions);
|
||||
Table t = installTable(new ZKWatcher(conf, "table-creation", null), numRegions);
|
||||
try {
|
||||
TableName table = t.getName();
|
||||
List<RegionInfo> regions = null;
|
||||
|
@ -233,7 +238,7 @@ public class TestDistributedLogSplitting {
|
|||
}
|
||||
}
|
||||
|
||||
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
|
||||
makeWAL(hrs, regions, NUM_LOG_LINES, 100);
|
||||
|
||||
slm.splitLogDistributed(logDir);
|
||||
|
||||
|
@ -282,11 +287,11 @@ public class TestDistributedLogSplitting {
|
|||
master.balanceSwitch(false);
|
||||
|
||||
final ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null);
|
||||
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
|
||||
Table ht = installTable(zkw, NUM_REGIONS_TO_CREATE);
|
||||
try {
|
||||
HRegionServer hrs = findRSToKill(false, "table");
|
||||
HRegionServer hrs = findRSToKill(false);
|
||||
List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
|
||||
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100);
|
||||
makeWAL(hrs, regions, NUM_LOG_LINES, 100);
|
||||
|
||||
// abort master
|
||||
abortMaster(cluster);
|
||||
|
@ -345,16 +350,14 @@ public class TestDistributedLogSplitting {
|
|||
FileSystem fs = master.getMasterFileSystem().getFileSystem();
|
||||
|
||||
final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
|
||||
HRegionServer hrs = findRSToKill(false, "table");
|
||||
HRegionServer hrs = findRSToKill(false);
|
||||
Path rootdir = FSUtils.getRootDir(conf);
|
||||
final Path logDir = new Path(rootdir,
|
||||
AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
|
||||
|
||||
Table t = installTable(new ZKWatcher(conf, "table-creation", null),
|
||||
"table", "family", 40);
|
||||
Table t = installTable(new ZKWatcher(conf, "table-creation", null), 40);
|
||||
try {
|
||||
makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()),
|
||||
"table", "family", NUM_LOG_LINES, 100);
|
||||
makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()), NUM_LOG_LINES, 100);
|
||||
|
||||
new Thread() {
|
||||
@Override
|
||||
|
@ -405,47 +408,33 @@ public class TestDistributedLogSplitting {
|
|||
|
||||
startCluster(NUM_RS); // NUM_RS=6.
|
||||
|
||||
final ZKWatcher zkw = new ZKWatcher(conf,
|
||||
"distributed log splitting test", null);
|
||||
final ZKWatcher zkw = new ZKWatcher(conf, "distributed log splitting test", null);
|
||||
|
||||
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE);
|
||||
Table table = installTable(zkw, NUM_REGIONS_TO_CREATE);
|
||||
try {
|
||||
populateDataInTable(NUM_ROWS_PER_REGION, "family");
|
||||
|
||||
populateDataInTable(NUM_ROWS_PER_REGION);
|
||||
|
||||
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
|
||||
assertEquals(NUM_RS, rsts.size());
|
||||
rsts.get(0).getRegionServer().abort("testing");
|
||||
rsts.get(1).getRegionServer().abort("testing");
|
||||
rsts.get(2).getRegionServer().abort("testing");
|
||||
cluster.killRegionServer(rsts.get(0).getRegionServer().getServerName());
|
||||
cluster.killRegionServer(rsts.get(1).getRegionServer().getServerName());
|
||||
cluster.killRegionServer(rsts.get(2).getRegionServer().getServerName());
|
||||
|
||||
long start = EnvironmentEdgeManager.currentTime();
|
||||
while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
|
||||
if (EnvironmentEdgeManager.currentTime() - start > 60000) {
|
||||
assertTrue(false);
|
||||
fail("Timed out waiting for server aborts.");
|
||||
}
|
||||
Thread.sleep(200);
|
||||
}
|
||||
|
||||
start = EnvironmentEdgeManager.currentTime();
|
||||
while (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
|
||||
< (NUM_REGIONS_TO_CREATE + 1)) {
|
||||
if (EnvironmentEdgeManager.currentTime() - start > 60000) {
|
||||
assertTrue("Timedout", false);
|
||||
}
|
||||
Thread.sleep(200);
|
||||
}
|
||||
|
||||
assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
|
||||
TEST_UTIL.countRows(ht));
|
||||
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
|
||||
assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION, TEST_UTIL.countRows(table));
|
||||
} finally {
|
||||
if (ht != null) ht.close();
|
||||
if (table != null) table.close();
|
||||
if (zkw != null) zkw.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test(timeout=30000)
|
||||
public void testDelayedDeleteOnFailure() throws Exception {
|
||||
LOG.info("testDelayedDeleteOnFailure");
|
||||
|
@ -519,7 +508,7 @@ public class TestDistributedLogSplitting {
|
|||
LOG.info("testReadWriteSeqIdFiles");
|
||||
startCluster(2);
|
||||
final ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null);
|
||||
Table ht = installTable(zkw, name.getMethodName(), "family", 10);
|
||||
Table ht = installTable(zkw, 10);
|
||||
try {
|
||||
FileSystem fs = master.getMasterFileSystem().getFileSystem();
|
||||
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), TableName.valueOf(name.getMethodName()));
|
||||
|
@ -549,19 +538,17 @@ public class TestDistributedLogSplitting {
|
|||
}
|
||||
}
|
||||
|
||||
Table installTable(ZKWatcher zkw, String tname, String fname, int nrs) throws Exception {
|
||||
return installTable(zkw, tname, fname, nrs, 0);
|
||||
Table installTable(ZKWatcher zkw, int nrs) throws Exception {
|
||||
return installTable(zkw, nrs, 0);
|
||||
}
|
||||
|
||||
Table installTable(ZKWatcher zkw, String tname, String fname, int nrs,
|
||||
int existingRegions) throws Exception {
|
||||
Table installTable(ZKWatcher zkw, int nrs, int existingRegions) throws Exception {
|
||||
// Create a table with regions
|
||||
TableName table = TableName.valueOf(tname);
|
||||
byte [] family = Bytes.toBytes(fname);
|
||||
byte [] family = Bytes.toBytes("family");
|
||||
LOG.info("Creating table with " + nrs + " regions");
|
||||
Table ht = TEST_UTIL.createMultiRegionTable(table, family, nrs);
|
||||
Table table = TEST_UTIL.createMultiRegionTable(tableName, family, nrs);
|
||||
int numRegions = -1;
|
||||
try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) {
|
||||
try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
|
||||
numRegions = r.getStartKeys().length;
|
||||
}
|
||||
assertEquals(nrs, numRegions);
|
||||
|
@ -570,7 +557,7 @@ public class TestDistributedLogSplitting {
|
|||
// disable-enable cycle to get rid of table's dead regions left behind
|
||||
// by createMultiRegions
|
||||
LOG.debug("Disabling table\n");
|
||||
TEST_UTIL.getAdmin().disableTable(table);
|
||||
TEST_UTIL.getAdmin().disableTable(tableName);
|
||||
LOG.debug("Waiting for no more RIT\n");
|
||||
blockUntilNoRIT(zkw, master);
|
||||
NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
|
||||
|
@ -581,18 +568,16 @@ public class TestDistributedLogSplitting {
|
|||
}
|
||||
assertEquals(2 + existingRegions, regions.size());
|
||||
LOG.debug("Enabling table\n");
|
||||
TEST_UTIL.getAdmin().enableTable(table);
|
||||
TEST_UTIL.getAdmin().enableTable(tableName);
|
||||
LOG.debug("Waiting for no more RIT\n");
|
||||
blockUntilNoRIT(zkw, master);
|
||||
LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
|
||||
regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
|
||||
assertEquals(numRegions + 2 + existingRegions, regions.size());
|
||||
return ht;
|
||||
return table;
|
||||
}
|
||||
|
||||
void populateDataInTable(int nrows, String fname) throws Exception {
|
||||
byte [] family = Bytes.toBytes(fname);
|
||||
|
||||
void populateDataInTable(int nrows) throws Exception {
|
||||
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
|
||||
assertEquals(NUM_RS, rsts.size());
|
||||
|
||||
|
@ -607,7 +592,7 @@ public class TestDistributedLogSplitting {
|
|||
" region = "+ hri.getRegionNameAsString());
|
||||
Region region = hrs.getOnlineRegion(hri.getRegionName());
|
||||
assertTrue(region != null);
|
||||
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
|
||||
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), COLUMN_FAMILY);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -628,37 +613,34 @@ public class TestDistributedLogSplitting {
|
|||
" region = "+ hri.getRegionNameAsString());
|
||||
Region region = hrs.getOnlineRegion(hri.getRegionName());
|
||||
assertTrue(region != null);
|
||||
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family);
|
||||
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), COLUMN_FAMILY);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void makeWAL(HRegionServer hrs, List<RegionInfo> regions, String tname, String fname,
|
||||
int num_edits, int edit_size) throws IOException {
|
||||
makeWAL(hrs, regions, tname, fname, num_edits, edit_size, true);
|
||||
public void makeWAL(HRegionServer hrs, List<RegionInfo> regions, int num_edits, int edit_size)
|
||||
throws IOException {
|
||||
makeWAL(hrs, regions, num_edits, edit_size, true);
|
||||
}
|
||||
|
||||
public void makeWAL(HRegionServer hrs, List<RegionInfo> regions, String tname, String fname,
|
||||
public void makeWAL(HRegionServer hrs, List<RegionInfo> regions,
|
||||
int num_edits, int edit_size, boolean cleanShutdown) throws IOException {
|
||||
TableName fullTName = TableName.valueOf(tname);
|
||||
// remove root and meta region
|
||||
regions.remove(RegionInfoBuilder.FIRST_META_REGIONINFO);
|
||||
|
||||
|
||||
for(Iterator<RegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
|
||||
RegionInfo regionInfo = iter.next();
|
||||
if(regionInfo.getTable().isSystemTable()) {
|
||||
iter.remove();
|
||||
}
|
||||
}
|
||||
HTableDescriptor htd = new HTableDescriptor(fullTName);
|
||||
byte[] family = Bytes.toBytes(fname);
|
||||
htd.addFamily(new HColumnDescriptor(family));
|
||||
HTableDescriptor htd = new HTableDescriptor(tableName);
|
||||
htd.addFamily(new HColumnDescriptor(COLUMN_FAMILY));
|
||||
byte[] value = new byte[edit_size];
|
||||
|
||||
List<RegionInfo> hris = new ArrayList<>();
|
||||
for (RegionInfo region : regions) {
|
||||
if (!region.getTable().getNameAsString().equalsIgnoreCase(tname)) {
|
||||
if (region.getTable() != tableName) {
|
||||
continue;
|
||||
}
|
||||
hris.add(region);
|
||||
|
@ -685,9 +667,9 @@ public class TestDistributedLogSplitting {
|
|||
row = Arrays.copyOfRange(row, 3, 8); // use last 5 bytes because
|
||||
// HBaseTestingUtility.createMultiRegions use 5 bytes key
|
||||
byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
|
||||
e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value));
|
||||
e.add(new KeyValue(row, COLUMN_FAMILY, qualifier, System.currentTimeMillis(), value));
|
||||
log.append(curRegionInfo,
|
||||
new WALKey(curRegionInfo.getEncodedNameAsBytes(), fullTName,
|
||||
new WALKey(curRegionInfo.getEncodedNameAsBytes(), tableName,
|
||||
System.currentTimeMillis(), mvcc), e, true);
|
||||
if (0 == i % syncEvery) {
|
||||
log.sync();
|
||||
|
@ -781,11 +763,8 @@ public class TestDistributedLogSplitting {
|
|||
/**
|
||||
* Find a RS that has regions of a table.
|
||||
* @param hasMetaRegion when true, the returned RS has hbase:meta region as well
|
||||
* @param tableName
|
||||
* @return
|
||||
* @throws Exception
|
||||
*/
|
||||
private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception {
|
||||
private HRegionServer findRSToKill(boolean hasMetaRegion) throws Exception {
|
||||
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
|
||||
List<RegionInfo> regions = null;
|
||||
HRegionServer hrs = null;
|
||||
|
@ -805,7 +784,7 @@ public class TestDistributedLogSplitting {
|
|||
if (region.isMetaRegion()) {
|
||||
isCarryingMeta = true;
|
||||
}
|
||||
if (tableName == null || region.getTable().getNameAsString().equals(tableName)) {
|
||||
if (region.getTable() == tableName) {
|
||||
foundTableRegion = true;
|
||||
}
|
||||
if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
|
||||
|
@ -817,8 +796,7 @@ public class TestDistributedLogSplitting {
|
|||
if (!foundTableRegion) {
|
||||
final HRegionServer destRS = hrs;
|
||||
// the RS doesn't have regions of the specified table so we need move one to this RS
|
||||
List<RegionInfo> tableRegions =
|
||||
TEST_UTIL.getAdmin().getRegions(TableName.valueOf(tableName));
|
||||
List<RegionInfo> tableRegions = TEST_UTIL.getAdmin().getRegions(tableName);
|
||||
final RegionInfo hri = tableRegions.get(0);
|
||||
TEST_UTIL.getAdmin().move(hri.getEncodedNameAsBytes(),
|
||||
Bytes.toBytes(destRS.getServerName().getServerName()));
|
||||
|
|
Loading…
Reference in New Issue