HBASE-19457 Debugging flaky TestTruncateTableProcedure

- Adds debug logging for future ease
- Removes 60s timeout since testRecoveryAndDoubleExecutionPreserveSplits is only halfway after a minute.
- Adds some comments
- Logging change: Some places report "regionState=" while others just "state=".
  State machine procs also have "state=" in their logs. Let me change all region related logging to "regionState=" so that
  1) it's consistent everywhere, 2) more filtered results when searching through logs.
This commit is contained in:
Apekshit Sharma 2017-12-08 17:16:06 -08:00
parent a5a77ae3d5
commit 7092b814bd
6 changed files with 29 additions and 16 deletions

View File

@ -733,7 +733,7 @@ public abstract class CommonFSUtils {
*/
public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
throws IOException {
LOG.debug("Current file system:");
LOG.debug("File system contents for path " + root);
logFSTree(LOG, fs, root, "|-");
}

View File

@ -176,6 +176,9 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
this.cycles++;
}
if (LOG.isDebugEnabled()) {
LOG.debug(toString());
}
stateFlow = executeFromState(env, state);
if (!hasMoreState()) setNextState(EOF_STATE);
if (subProcList != null && !subProcList.isEmpty()) {

View File

@ -805,7 +805,7 @@ public class AssignmentManager implements ServerListener {
}
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("Update region transition serverName=%s region=%s state=%s",
LOG.trace(String.format("Update region transition serverName=%s region=%s regionState=%s",
serverName, regionNode, state));
}
@ -838,7 +838,7 @@ public class AssignmentManager implements ServerListener {
checkFailoverCleanupCompleted(parent);
if (state != TransitionCode.READY_TO_SPLIT) {
throw new UnexpectedStateException("unsupported split state=" + state +
throw new UnexpectedStateException("unsupported split regionState=" + state +
" for parent region " + parent +
" maybe an old RS (< 2.0) had the operation in progress");
}
@ -870,7 +870,7 @@ public class AssignmentManager implements ServerListener {
checkFailoverCleanupCompleted(merged);
if (state != TransitionCode.READY_TO_MERGE) {
throw new UnexpectedStateException("Unsupported merge state=" + state +
throw new UnexpectedStateException("Unsupported merge regionState=" + state +
" for regionA=" + hriA + " regionB=" + hriB + " merged=" + merged +
" maybe an old RS (< 2.0) had the operation in progress");
}
@ -884,7 +884,8 @@ public class AssignmentManager implements ServerListener {
// If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge
if (regionStates.getOrCreateServer(serverName).getVersionNumber() < 0x0200000) {
throw new UnsupportedOperationException(String.format(
"Merge not handled yet: state=%s merged=%s hriA=%s hriB=%s", state, merged, hriA, hriB));
"Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state, merged, hriA,
hriB));
}
}
@ -1162,7 +1163,7 @@ public class AssignmentManager implements ServerListener {
LOG.info("waiting for RS to join");
Threads.sleep(250);
}
LOG.info("RS joined " + master.getServerManager().countOfRegionServers());
LOG.info("RS joined. Num RS = " + master.getServerManager().countOfRegionServers());
// This method will assign all user regions if a clean server startup or
// it will reconstruct master state and cleanup any leftovers from previous master process.
@ -1188,7 +1189,7 @@ public class AssignmentManager implements ServerListener {
// hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?
// In any of these cases, state is empty. For now, presume OFFLINE but there are probably
// cases where we need to probe more to be sure this correct; TODO informed by experience.
LOG.info(regionInfo.getEncodedName() + " state=null; presuming " + State.OFFLINE);
LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);
localState = State.OFFLINE;
}
synchronized (regionNode) {

View File

@ -104,6 +104,12 @@ public class TruncateTableProcedure
break;
case TRUNCATE_TABLE_CLEAR_FS_LAYOUT:
DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
// NOTE: It's very important that we create new HRegions before next state, so that
// they get persisted in procedure state before we start using them for anything.
// Otherwise, if we create them in next step and master crashes after creating fs
// layout but before saving state, region re-created after recovery will have different
// regionId(s) and encoded names. That will lead to unwanted regions in FS layout
// (which were created before the crash).
if (!preserveSplits) {
// if we are not preserving splits, generate a new single region
regions = Arrays.asList(ModifyRegionUtils.createRegionInfos(tableDescriptor, null));

View File

@ -170,11 +170,11 @@ public class MasterProcedureTestingUtility {
final Path tableDir = FSUtils.getTableDir(master.getMasterFileSystem().getRootDir(), tableName);
assertTrue(fs.exists(tableDir));
FSUtils.logFileSystemState(fs, tableDir, LOG);
List<Path> allRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
List<Path> unwantedRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
for (int i = 0; i < regions.length; ++i) {
Path regionDir = new Path(tableDir, regions[i].getEncodedName());
assertTrue(regions[i] + " region dir does not exist", fs.exists(regionDir));
assertTrue(allRegionDirs.remove(regionDir));
assertTrue(unwantedRegionDirs.remove(regionDir));
List<Path> allFamilyDirs = FSUtils.getFamilyDirs(fs, regionDir);
for (int j = 0; j < family.length; ++j) {
final Path familyDir = new Path(regionDir, family[j]);
@ -191,7 +191,8 @@ public class MasterProcedureTestingUtility {
}
assertTrue("found extraneous families: " + allFamilyDirs, allFamilyDirs.isEmpty());
}
assertTrue("found extraneous regions: " + allRegionDirs, allRegionDirs.isEmpty());
assertTrue("found extraneous regions: " + unwantedRegionDirs, unwantedRegionDirs.isEmpty());
LOG.debug("Table directory layout is as expected.");
// check meta
assertTrue(MetaTableAccessor.tableExists(master.getConnection(), tableName));

View File

@ -49,7 +49,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
@Rule
public TestName name = new TestName();
@Test(timeout=60000)
@Test
public void testTruncateNotExistentTable() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
@ -64,7 +64,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
assertTrue(ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotFoundException);
}
@Test(timeout=60000)
@Test
public void testTruncateNotDisabledTable() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
@ -82,13 +82,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotDisabledException);
}
@Test(timeout=60000)
@Test
public void testSimpleTruncatePreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testSimpleTruncate(tableName, true);
}
@Test(timeout=60000)
@Test
public void testSimpleTruncateNoPreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testSimpleTruncate(tableName, false);
@ -116,6 +116,8 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
// If truncate procedure completed successfully, it means all regions were assigned correctly
// and table is enabled now.
UTIL.waitUntilAllRegionsAssigned(tableName);
// validate the table regions and layout
@ -137,13 +139,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
assertEquals(50, UTIL.countRows(tableName));
}
@Test(timeout=60000)
@Test
public void testRecoveryAndDoubleExecutionPreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testRecoveryAndDoubleExecution(tableName, true);
}
@Test(timeout=60000)
@Test
public void testRecoveryAndDoubleExecutionNoPreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testRecoveryAndDoubleExecution(tableName, false);