HBASE-19457 Debugging flaky TestTruncateTableProcedure
- Adds debug logging for future ease - Removes 60s timeout since testRecoveryAndDoubleExecutionPreserveSplits is only halfway after a minute. - Adds some comments - Logging change: Some places report "regionState=" while others just "state=". State machine procs also have "state=" in their logs. Let me change all region related logging to "regionState=" so that 1) it's consistent everywhere, 2) more filtered results when searching through logs.
This commit is contained in:
parent
a5a77ae3d5
commit
7092b814bd
|
@ -733,7 +733,7 @@ public abstract class CommonFSUtils {
|
|||
*/
|
||||
public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
|
||||
throws IOException {
|
||||
LOG.debug("Current file system:");
|
||||
LOG.debug("File system contents for path " + root);
|
||||
logFSTree(LOG, fs, root, "|-");
|
||||
}
|
||||
|
||||
|
|
|
@ -176,6 +176,9 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
|
|||
this.cycles++;
|
||||
}
|
||||
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug(toString());
|
||||
}
|
||||
stateFlow = executeFromState(env, state);
|
||||
if (!hasMoreState()) setNextState(EOF_STATE);
|
||||
if (subProcList != null && !subProcList.isEmpty()) {
|
||||
|
|
|
@ -805,7 +805,7 @@ public class AssignmentManager implements ServerListener {
|
|||
}
|
||||
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace(String.format("Update region transition serverName=%s region=%s state=%s",
|
||||
LOG.trace(String.format("Update region transition serverName=%s region=%s regionState=%s",
|
||||
serverName, regionNode, state));
|
||||
}
|
||||
|
||||
|
@ -838,7 +838,7 @@ public class AssignmentManager implements ServerListener {
|
|||
checkFailoverCleanupCompleted(parent);
|
||||
|
||||
if (state != TransitionCode.READY_TO_SPLIT) {
|
||||
throw new UnexpectedStateException("unsupported split state=" + state +
|
||||
throw new UnexpectedStateException("unsupported split regionState=" + state +
|
||||
" for parent region " + parent +
|
||||
" maybe an old RS (< 2.0) had the operation in progress");
|
||||
}
|
||||
|
@ -870,7 +870,7 @@ public class AssignmentManager implements ServerListener {
|
|||
checkFailoverCleanupCompleted(merged);
|
||||
|
||||
if (state != TransitionCode.READY_TO_MERGE) {
|
||||
throw new UnexpectedStateException("Unsupported merge state=" + state +
|
||||
throw new UnexpectedStateException("Unsupported merge regionState=" + state +
|
||||
" for regionA=" + hriA + " regionB=" + hriB + " merged=" + merged +
|
||||
" maybe an old RS (< 2.0) had the operation in progress");
|
||||
}
|
||||
|
@ -884,7 +884,8 @@ public class AssignmentManager implements ServerListener {
|
|||
// If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge
|
||||
if (regionStates.getOrCreateServer(serverName).getVersionNumber() < 0x0200000) {
|
||||
throw new UnsupportedOperationException(String.format(
|
||||
"Merge not handled yet: state=%s merged=%s hriA=%s hriB=%s", state, merged, hriA, hriB));
|
||||
"Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state, merged, hriA,
|
||||
hriB));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1162,7 +1163,7 @@ public class AssignmentManager implements ServerListener {
|
|||
LOG.info("waiting for RS to join");
|
||||
Threads.sleep(250);
|
||||
}
|
||||
LOG.info("RS joined " + master.getServerManager().countOfRegionServers());
|
||||
LOG.info("RS joined. Num RS = " + master.getServerManager().countOfRegionServers());
|
||||
|
||||
// This method will assign all user regions if a clean server startup or
|
||||
// it will reconstruct master state and cleanup any leftovers from previous master process.
|
||||
|
@ -1188,7 +1189,7 @@ public class AssignmentManager implements ServerListener {
|
|||
// hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?
|
||||
// In any of these cases, state is empty. For now, presume OFFLINE but there are probably
|
||||
// cases where we need to probe more to be sure this correct; TODO informed by experience.
|
||||
LOG.info(regionInfo.getEncodedName() + " state=null; presuming " + State.OFFLINE);
|
||||
LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);
|
||||
localState = State.OFFLINE;
|
||||
}
|
||||
synchronized (regionNode) {
|
||||
|
|
|
@ -104,6 +104,12 @@ public class TruncateTableProcedure
|
|||
break;
|
||||
case TRUNCATE_TABLE_CLEAR_FS_LAYOUT:
|
||||
DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
|
||||
// NOTE: It's very important that we create new HRegions before next state, so that
|
||||
// they get persisted in procedure state before we start using them for anything.
|
||||
// Otherwise, if we create them in next step and master crashes after creating fs
|
||||
// layout but before saving state, region re-created after recovery will have different
|
||||
// regionId(s) and encoded names. That will lead to unwanted regions in FS layout
|
||||
// (which were created before the crash).
|
||||
if (!preserveSplits) {
|
||||
// if we are not preserving splits, generate a new single region
|
||||
regions = Arrays.asList(ModifyRegionUtils.createRegionInfos(tableDescriptor, null));
|
||||
|
|
|
@ -170,11 +170,11 @@ public class MasterProcedureTestingUtility {
|
|||
final Path tableDir = FSUtils.getTableDir(master.getMasterFileSystem().getRootDir(), tableName);
|
||||
assertTrue(fs.exists(tableDir));
|
||||
FSUtils.logFileSystemState(fs, tableDir, LOG);
|
||||
List<Path> allRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
|
||||
List<Path> unwantedRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
|
||||
for (int i = 0; i < regions.length; ++i) {
|
||||
Path regionDir = new Path(tableDir, regions[i].getEncodedName());
|
||||
assertTrue(regions[i] + " region dir does not exist", fs.exists(regionDir));
|
||||
assertTrue(allRegionDirs.remove(regionDir));
|
||||
assertTrue(unwantedRegionDirs.remove(regionDir));
|
||||
List<Path> allFamilyDirs = FSUtils.getFamilyDirs(fs, regionDir);
|
||||
for (int j = 0; j < family.length; ++j) {
|
||||
final Path familyDir = new Path(regionDir, family[j]);
|
||||
|
@ -191,7 +191,8 @@ public class MasterProcedureTestingUtility {
|
|||
}
|
||||
assertTrue("found extraneous families: " + allFamilyDirs, allFamilyDirs.isEmpty());
|
||||
}
|
||||
assertTrue("found extraneous regions: " + allRegionDirs, allRegionDirs.isEmpty());
|
||||
assertTrue("found extraneous regions: " + unwantedRegionDirs, unwantedRegionDirs.isEmpty());
|
||||
LOG.debug("Table directory layout is as expected.");
|
||||
|
||||
// check meta
|
||||
assertTrue(MetaTableAccessor.tableExists(master.getConnection(), tableName));
|
||||
|
|
|
@ -49,7 +49,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
|||
@Rule
|
||||
public TestName name = new TestName();
|
||||
|
||||
@Test(timeout=60000)
|
||||
@Test
|
||||
public void testTruncateNotExistentTable() throws Exception {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
|
||||
|
@ -64,7 +64,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
|||
assertTrue(ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotFoundException);
|
||||
}
|
||||
|
||||
@Test(timeout=60000)
|
||||
@Test
|
||||
public void testTruncateNotDisabledTable() throws Exception {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
|
||||
|
@ -82,13 +82,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
|||
ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotDisabledException);
|
||||
}
|
||||
|
||||
@Test(timeout=60000)
|
||||
@Test
|
||||
public void testSimpleTruncatePreserveSplits() throws Exception {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
testSimpleTruncate(tableName, true);
|
||||
}
|
||||
|
||||
@Test(timeout=60000)
|
||||
@Test
|
||||
public void testSimpleTruncateNoPreserveSplits() throws Exception {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
testSimpleTruncate(tableName, false);
|
||||
|
@ -116,6 +116,8 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
|||
new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
|
||||
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
|
||||
|
||||
// If truncate procedure completed successfully, it means all regions were assigned correctly
|
||||
// and table is enabled now.
|
||||
UTIL.waitUntilAllRegionsAssigned(tableName);
|
||||
|
||||
// validate the table regions and layout
|
||||
|
@ -137,13 +139,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
|||
assertEquals(50, UTIL.countRows(tableName));
|
||||
}
|
||||
|
||||
@Test(timeout=60000)
|
||||
@Test
|
||||
public void testRecoveryAndDoubleExecutionPreserveSplits() throws Exception {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
testRecoveryAndDoubleExecution(tableName, true);
|
||||
}
|
||||
|
||||
@Test(timeout=60000)
|
||||
@Test
|
||||
public void testRecoveryAndDoubleExecutionNoPreserveSplits() throws Exception {
|
||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||
testRecoveryAndDoubleExecution(tableName, false);
|
||||
|
|
Loading…
Reference in New Issue