HBASE-19454 Debugging TestDistributedLogSplitting#testThreeRSAbort

- Changed testThreeRSAbort to kill the RSs intead of aborting. Simple aborting will close the regions, we want extreme failure testing here.
- Adds some logging for easier debugging.
- Refactors TestDistributedLogSplitting to use standard junit rules.
This commit is contained in:
Apekshit Sharma 2017-12-07 13:32:10 -08:00
parent b0d68d6814
commit a70b9b5e94
2 changed files with 56 additions and 77 deletions

View File

@ -3224,8 +3224,7 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
* @throws IOException * @throws IOException
*/ */
public void waitUntilAllRegionsAssigned(final TableName tableName) throws IOException { public void waitUntilAllRegionsAssigned(final TableName tableName) throws IOException {
waitUntilAllRegionsAssigned( waitUntilAllRegionsAssigned( tableName,
tableName,
this.conf.getLong("hbase.client.sync.wait.timeout.msec", 60000)); this.conf.getLong("hbase.client.sync.wait.timeout.msec", 60000));
} }
@ -3251,6 +3250,8 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
throws IOException { throws IOException {
final Table meta = getConnection().getTable(TableName.META_TABLE_NAME); final Table meta = getConnection().getTable(TableName.META_TABLE_NAME);
try { try {
LOG.debug("Waiting until all regions of table " + tableName + " get assigned. Timeout = " +
timeout + "ms");
waitFor(timeout, 200, true, new ExplainingPredicate<IOException>() { waitFor(timeout, 200, true, new ExplainingPredicate<IOException>() {
@Override @Override
public String explainFailure() throws IOException { public String explainFailure() throws IOException {
@ -3259,7 +3260,6 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
@Override @Override
public boolean evaluate() throws IOException { public boolean evaluate() throws IOException {
boolean allRegionsAssigned = true;
Scan scan = new Scan(); Scan scan = new Scan();
scan.addFamily(HConstants.CATALOG_FAMILY); scan.addFamily(HConstants.CATALOG_FAMILY);
ResultScanner s = meta.getScanner(scan); ResultScanner s = meta.getScanner(scan);
@ -3295,17 +3295,17 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
} finally { } finally {
s.close(); s.close();
} }
return allRegionsAssigned; return true;
} }
}); });
} finally { } finally {
meta.close(); meta.close();
} }
LOG.info("All regions for table " + tableName + " assigned to meta. Checking AM states.");
// check from the master state if we are using a mini cluster // check from the master state if we are using a mini cluster
if (!getHBaseClusterInterface().isDistributedCluster()) { if (!getHBaseClusterInterface().isDistributedCluster()) {
// So, all regions are in the meta table but make sure master knows of the assignments before // So, all regions are in the meta table but make sure master knows of the assignments before
// returing -- sometimes this can lag. // returning -- sometimes this can lag.
HMaster master = getHBaseCluster().getMaster(); HMaster master = getHBaseCluster().getMaster();
final RegionStates states = master.getAssignmentManager().getRegionStates(); final RegionStates states = master.getAssignmentManager().getRegionStates();
waitFor(timeout, 200, new ExplainingPredicate<IOException>() { waitFor(timeout, 200, new ExplainingPredicate<IOException>() {
@ -3321,6 +3321,7 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
} }
}); });
} }
LOG.info("All regions for table " + tableName + " assigned.");
} }
/** /**

View File

@ -119,9 +119,14 @@ public class TestDistributedLogSplitting {
} }
@Rule
public TestName testName = new TestName();
TableName tableName;
// Start a cluster with 2 masters and 6 regionservers // Start a cluster with 2 masters and 6 regionservers
static final int NUM_MASTERS = 2; static final int NUM_MASTERS = 2;
static final int NUM_RS = 5; static final int NUM_RS = 5;
static byte[] COLUMN_FAMILY = Bytes.toBytes("family");
MiniHBaseCluster cluster; MiniHBaseCluster cluster;
HMaster master; HMaster master;
@ -174,6 +179,7 @@ public class TestDistributedLogSplitting {
public void before() throws Exception { public void before() throws Exception {
// refresh configuration // refresh configuration
conf = HBaseConfiguration.create(originalConf); conf = HBaseConfiguration.create(originalConf);
tableName = TableName.valueOf(testName.getMethodName());
} }
@After @After
@ -208,8 +214,7 @@ public class TestDistributedLogSplitting {
Path rootdir = FSUtils.getRootDir(conf); Path rootdir = FSUtils.getRootDir(conf);
int numRegions = 50; int numRegions = 50;
Table t = installTable(new ZKWatcher(conf, "table-creation", null), Table t = installTable(new ZKWatcher(conf, "table-creation", null), numRegions);
"table", "family", numRegions);
try { try {
TableName table = t.getName(); TableName table = t.getName();
List<RegionInfo> regions = null; List<RegionInfo> regions = null;
@ -233,7 +238,7 @@ public class TestDistributedLogSplitting {
} }
} }
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100); makeWAL(hrs, regions, NUM_LOG_LINES, 100);
slm.splitLogDistributed(logDir); slm.splitLogDistributed(logDir);
@ -282,11 +287,11 @@ public class TestDistributedLogSplitting {
master.balanceSwitch(false); master.balanceSwitch(false);
final ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null); final ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE); Table ht = installTable(zkw, NUM_REGIONS_TO_CREATE);
try { try {
HRegionServer hrs = findRSToKill(false, "table"); HRegionServer hrs = findRSToKill(false);
List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); List<RegionInfo> regions = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
makeWAL(hrs, regions, "table", "family", NUM_LOG_LINES, 100); makeWAL(hrs, regions, NUM_LOG_LINES, 100);
// abort master // abort master
abortMaster(cluster); abortMaster(cluster);
@ -345,16 +350,14 @@ public class TestDistributedLogSplitting {
FileSystem fs = master.getMasterFileSystem().getFileSystem(); FileSystem fs = master.getMasterFileSystem().getFileSystem();
final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads(); final List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
HRegionServer hrs = findRSToKill(false, "table"); HRegionServer hrs = findRSToKill(false);
Path rootdir = FSUtils.getRootDir(conf); Path rootdir = FSUtils.getRootDir(conf);
final Path logDir = new Path(rootdir, final Path logDir = new Path(rootdir,
AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString())); AbstractFSWALProvider.getWALDirectoryName(hrs.getServerName().toString()));
Table t = installTable(new ZKWatcher(conf, "table-creation", null), Table t = installTable(new ZKWatcher(conf, "table-creation", null), 40);
"table", "family", 40);
try { try {
makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()), makeWAL(hrs, ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()), NUM_LOG_LINES, 100);
"table", "family", NUM_LOG_LINES, 100);
new Thread() { new Thread() {
@Override @Override
@ -405,47 +408,33 @@ public class TestDistributedLogSplitting {
startCluster(NUM_RS); // NUM_RS=6. startCluster(NUM_RS); // NUM_RS=6.
final ZKWatcher zkw = new ZKWatcher(conf, final ZKWatcher zkw = new ZKWatcher(conf, "distributed log splitting test", null);
"distributed log splitting test", null);
Table ht = installTable(zkw, "table", "family", NUM_REGIONS_TO_CREATE); Table table = installTable(zkw, NUM_REGIONS_TO_CREATE);
try { try {
populateDataInTable(NUM_ROWS_PER_REGION, "family"); populateDataInTable(NUM_ROWS_PER_REGION);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads(); List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
assertEquals(NUM_RS, rsts.size()); assertEquals(NUM_RS, rsts.size());
rsts.get(0).getRegionServer().abort("testing"); cluster.killRegionServer(rsts.get(0).getRegionServer().getServerName());
rsts.get(1).getRegionServer().abort("testing"); cluster.killRegionServer(rsts.get(1).getRegionServer().getServerName());
rsts.get(2).getRegionServer().abort("testing"); cluster.killRegionServer(rsts.get(2).getRegionServer().getServerName());
long start = EnvironmentEdgeManager.currentTime(); long start = EnvironmentEdgeManager.currentTime();
while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) { while (cluster.getLiveRegionServerThreads().size() > (NUM_RS - 3)) {
if (EnvironmentEdgeManager.currentTime() - start > 60000) { if (EnvironmentEdgeManager.currentTime() - start > 60000) {
assertTrue(false); fail("Timed out waiting for server aborts.");
} }
Thread.sleep(200); Thread.sleep(200);
} }
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
start = EnvironmentEdgeManager.currentTime(); assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION, TEST_UTIL.countRows(table));
while (HBaseTestingUtility.getAllOnlineRegions(cluster).size()
< (NUM_REGIONS_TO_CREATE + 1)) {
if (EnvironmentEdgeManager.currentTime() - start > 60000) {
assertTrue("Timedout", false);
}
Thread.sleep(200);
}
assertEquals(NUM_REGIONS_TO_CREATE * NUM_ROWS_PER_REGION,
TEST_UTIL.countRows(ht));
} finally { } finally {
if (ht != null) ht.close(); if (table != null) table.close();
if (zkw != null) zkw.close(); if (zkw != null) zkw.close();
} }
} }
@Test(timeout=30000) @Test(timeout=30000)
public void testDelayedDeleteOnFailure() throws Exception { public void testDelayedDeleteOnFailure() throws Exception {
LOG.info("testDelayedDeleteOnFailure"); LOG.info("testDelayedDeleteOnFailure");
@ -519,7 +508,7 @@ public class TestDistributedLogSplitting {
LOG.info("testReadWriteSeqIdFiles"); LOG.info("testReadWriteSeqIdFiles");
startCluster(2); startCluster(2);
final ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null); final ZKWatcher zkw = new ZKWatcher(conf, "table-creation", null);
Table ht = installTable(zkw, name.getMethodName(), "family", 10); Table ht = installTable(zkw, 10);
try { try {
FileSystem fs = master.getMasterFileSystem().getFileSystem(); FileSystem fs = master.getMasterFileSystem().getFileSystem();
Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), TableName.valueOf(name.getMethodName())); Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), TableName.valueOf(name.getMethodName()));
@ -549,19 +538,17 @@ public class TestDistributedLogSplitting {
} }
} }
Table installTable(ZKWatcher zkw, String tname, String fname, int nrs) throws Exception { Table installTable(ZKWatcher zkw, int nrs) throws Exception {
return installTable(zkw, tname, fname, nrs, 0); return installTable(zkw, nrs, 0);
} }
Table installTable(ZKWatcher zkw, String tname, String fname, int nrs, Table installTable(ZKWatcher zkw, int nrs, int existingRegions) throws Exception {
int existingRegions) throws Exception {
// Create a table with regions // Create a table with regions
TableName table = TableName.valueOf(tname); byte [] family = Bytes.toBytes("family");
byte [] family = Bytes.toBytes(fname);
LOG.info("Creating table with " + nrs + " regions"); LOG.info("Creating table with " + nrs + " regions");
Table ht = TEST_UTIL.createMultiRegionTable(table, family, nrs); Table table = TEST_UTIL.createMultiRegionTable(tableName, family, nrs);
int numRegions = -1; int numRegions = -1;
try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(table)) { try (RegionLocator r = TEST_UTIL.getConnection().getRegionLocator(tableName)) {
numRegions = r.getStartKeys().length; numRegions = r.getStartKeys().length;
} }
assertEquals(nrs, numRegions); assertEquals(nrs, numRegions);
@ -570,7 +557,7 @@ public class TestDistributedLogSplitting {
// disable-enable cycle to get rid of table's dead regions left behind // disable-enable cycle to get rid of table's dead regions left behind
// by createMultiRegions // by createMultiRegions
LOG.debug("Disabling table\n"); LOG.debug("Disabling table\n");
TEST_UTIL.getAdmin().disableTable(table); TEST_UTIL.getAdmin().disableTable(tableName);
LOG.debug("Waiting for no more RIT\n"); LOG.debug("Waiting for no more RIT\n");
blockUntilNoRIT(zkw, master); blockUntilNoRIT(zkw, master);
NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster); NavigableSet<String> regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
@ -581,18 +568,16 @@ public class TestDistributedLogSplitting {
} }
assertEquals(2 + existingRegions, regions.size()); assertEquals(2 + existingRegions, regions.size());
LOG.debug("Enabling table\n"); LOG.debug("Enabling table\n");
TEST_UTIL.getAdmin().enableTable(table); TEST_UTIL.getAdmin().enableTable(tableName);
LOG.debug("Waiting for no more RIT\n"); LOG.debug("Waiting for no more RIT\n");
blockUntilNoRIT(zkw, master); blockUntilNoRIT(zkw, master);
LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n"); LOG.debug("Verifying there are " + numRegions + " assigned on cluster\n");
regions = HBaseTestingUtility.getAllOnlineRegions(cluster); regions = HBaseTestingUtility.getAllOnlineRegions(cluster);
assertEquals(numRegions + 2 + existingRegions, regions.size()); assertEquals(numRegions + 2 + existingRegions, regions.size());
return ht; return table;
} }
void populateDataInTable(int nrows, String fname) throws Exception { void populateDataInTable(int nrows) throws Exception {
byte [] family = Bytes.toBytes(fname);
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads(); List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
assertEquals(NUM_RS, rsts.size()); assertEquals(NUM_RS, rsts.size());
@ -607,7 +592,7 @@ public class TestDistributedLogSplitting {
" region = "+ hri.getRegionNameAsString()); " region = "+ hri.getRegionNameAsString());
Region region = hrs.getOnlineRegion(hri.getRegionName()); Region region = hrs.getOnlineRegion(hri.getRegionName());
assertTrue(region != null); assertTrue(region != null);
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family); putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), COLUMN_FAMILY);
} }
} }
@ -628,37 +613,34 @@ public class TestDistributedLogSplitting {
" region = "+ hri.getRegionNameAsString()); " region = "+ hri.getRegionNameAsString());
Region region = hrs.getOnlineRegion(hri.getRegionName()); Region region = hrs.getOnlineRegion(hri.getRegionName());
assertTrue(region != null); assertTrue(region != null);
putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), family); putData(region, hri.getStartKey(), nrows, Bytes.toBytes("q"), COLUMN_FAMILY);
} }
} }
} }
public void makeWAL(HRegionServer hrs, List<RegionInfo> regions, String tname, String fname, public void makeWAL(HRegionServer hrs, List<RegionInfo> regions, int num_edits, int edit_size)
int num_edits, int edit_size) throws IOException { throws IOException {
makeWAL(hrs, regions, tname, fname, num_edits, edit_size, true); makeWAL(hrs, regions, num_edits, edit_size, true);
} }
public void makeWAL(HRegionServer hrs, List<RegionInfo> regions, String tname, String fname, public void makeWAL(HRegionServer hrs, List<RegionInfo> regions,
int num_edits, int edit_size, boolean cleanShutdown) throws IOException { int num_edits, int edit_size, boolean cleanShutdown) throws IOException {
TableName fullTName = TableName.valueOf(tname);
// remove root and meta region // remove root and meta region
regions.remove(RegionInfoBuilder.FIRST_META_REGIONINFO); regions.remove(RegionInfoBuilder.FIRST_META_REGIONINFO);
for(Iterator<RegionInfo> iter = regions.iterator(); iter.hasNext(); ) { for(Iterator<RegionInfo> iter = regions.iterator(); iter.hasNext(); ) {
RegionInfo regionInfo = iter.next(); RegionInfo regionInfo = iter.next();
if(regionInfo.getTable().isSystemTable()) { if(regionInfo.getTable().isSystemTable()) {
iter.remove(); iter.remove();
} }
} }
HTableDescriptor htd = new HTableDescriptor(fullTName); HTableDescriptor htd = new HTableDescriptor(tableName);
byte[] family = Bytes.toBytes(fname); htd.addFamily(new HColumnDescriptor(COLUMN_FAMILY));
htd.addFamily(new HColumnDescriptor(family));
byte[] value = new byte[edit_size]; byte[] value = new byte[edit_size];
List<RegionInfo> hris = new ArrayList<>(); List<RegionInfo> hris = new ArrayList<>();
for (RegionInfo region : regions) { for (RegionInfo region : regions) {
if (!region.getTable().getNameAsString().equalsIgnoreCase(tname)) { if (region.getTable() != tableName) {
continue; continue;
} }
hris.add(region); hris.add(region);
@ -685,9 +667,9 @@ public class TestDistributedLogSplitting {
row = Arrays.copyOfRange(row, 3, 8); // use last 5 bytes because row = Arrays.copyOfRange(row, 3, 8); // use last 5 bytes because
// HBaseTestingUtility.createMultiRegions use 5 bytes key // HBaseTestingUtility.createMultiRegions use 5 bytes key
byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i)); byte[] qualifier = Bytes.toBytes("c" + Integer.toString(i));
e.add(new KeyValue(row, family, qualifier, System.currentTimeMillis(), value)); e.add(new KeyValue(row, COLUMN_FAMILY, qualifier, System.currentTimeMillis(), value));
log.append(curRegionInfo, log.append(curRegionInfo,
new WALKey(curRegionInfo.getEncodedNameAsBytes(), fullTName, new WALKey(curRegionInfo.getEncodedNameAsBytes(), tableName,
System.currentTimeMillis(), mvcc), e, true); System.currentTimeMillis(), mvcc), e, true);
if (0 == i % syncEvery) { if (0 == i % syncEvery) {
log.sync(); log.sync();
@ -781,11 +763,8 @@ public class TestDistributedLogSplitting {
/** /**
* Find a RS that has regions of a table. * Find a RS that has regions of a table.
* @param hasMetaRegion when true, the returned RS has hbase:meta region as well * @param hasMetaRegion when true, the returned RS has hbase:meta region as well
* @param tableName
* @return
* @throws Exception
*/ */
private HRegionServer findRSToKill(boolean hasMetaRegion, String tableName) throws Exception { private HRegionServer findRSToKill(boolean hasMetaRegion) throws Exception {
List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads(); List<RegionServerThread> rsts = cluster.getLiveRegionServerThreads();
List<RegionInfo> regions = null; List<RegionInfo> regions = null;
HRegionServer hrs = null; HRegionServer hrs = null;
@ -805,7 +784,7 @@ public class TestDistributedLogSplitting {
if (region.isMetaRegion()) { if (region.isMetaRegion()) {
isCarryingMeta = true; isCarryingMeta = true;
} }
if (tableName == null || region.getTable().getNameAsString().equals(tableName)) { if (region.getTable() == tableName) {
foundTableRegion = true; foundTableRegion = true;
} }
if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) { if (foundTableRegion && (isCarryingMeta || !hasMetaRegion)) {
@ -817,8 +796,7 @@ public class TestDistributedLogSplitting {
if (!foundTableRegion) { if (!foundTableRegion) {
final HRegionServer destRS = hrs; final HRegionServer destRS = hrs;
// the RS doesn't have regions of the specified table so we need move one to this RS // the RS doesn't have regions of the specified table so we need move one to this RS
List<RegionInfo> tableRegions = List<RegionInfo> tableRegions = TEST_UTIL.getAdmin().getRegions(tableName);
TEST_UTIL.getAdmin().getRegions(TableName.valueOf(tableName));
final RegionInfo hri = tableRegions.get(0); final RegionInfo hri = tableRegions.get(0);
TEST_UTIL.getAdmin().move(hri.getEncodedNameAsBytes(), TEST_UTIL.getAdmin().move(hri.getEncodedNameAsBytes(),
Bytes.toBytes(destRS.getServerName().getServerName())); Bytes.toBytes(destRS.getServerName().getServerName()));