HBASE-19457 Debugging flaky TestTruncateTableProcedure
- Adds debug logging for future ease - Removes 60s timeout since testRecoveryAndDoubleExecutionPreserveSplits is only halfway after a minute. - Adds some comments - Logging change: Some places report "regionState=" while others just "state=". State machine procs also have "state=" in their logs. Let me change all region related logging to "regionState=" so that 1) it's consistent everywhere, 2) more filtered results when searching through logs.
This commit is contained in:
parent
a33a9de643
commit
e8ba7b2320
|
@ -733,7 +733,7 @@ public abstract class CommonFSUtils {
|
||||||
*/
|
*/
|
||||||
public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
|
public static void logFileSystemState(final FileSystem fs, final Path root, Log LOG)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
LOG.debug("Current file system:");
|
LOG.debug("File system contents for path " + root);
|
||||||
logFSTree(LOG, fs, root, "|-");
|
logFSTree(LOG, fs, root, "|-");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -176,6 +176,9 @@ public abstract class StateMachineProcedure<TEnvironment, TState>
|
||||||
this.cycles++;
|
this.cycles++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug(toString());
|
||||||
|
}
|
||||||
stateFlow = executeFromState(env, state);
|
stateFlow = executeFromState(env, state);
|
||||||
if (!hasMoreState()) setNextState(EOF_STATE);
|
if (!hasMoreState()) setNextState(EOF_STATE);
|
||||||
if (subProcList != null && !subProcList.isEmpty()) {
|
if (subProcList != null && !subProcList.isEmpty()) {
|
||||||
|
|
|
@ -805,7 +805,7 @@ public class AssignmentManager implements ServerListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.trace(String.format("Update region transition serverName=%s region=%s state=%s",
|
LOG.trace(String.format("Update region transition serverName=%s region=%s regionState=%s",
|
||||||
serverName, regionNode, state));
|
serverName, regionNode, state));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -838,7 +838,7 @@ public class AssignmentManager implements ServerListener {
|
||||||
checkFailoverCleanupCompleted(parent);
|
checkFailoverCleanupCompleted(parent);
|
||||||
|
|
||||||
if (state != TransitionCode.READY_TO_SPLIT) {
|
if (state != TransitionCode.READY_TO_SPLIT) {
|
||||||
throw new UnexpectedStateException("unsupported split state=" + state +
|
throw new UnexpectedStateException("unsupported split regionState=" + state +
|
||||||
" for parent region " + parent +
|
" for parent region " + parent +
|
||||||
" maybe an old RS (< 2.0) had the operation in progress");
|
" maybe an old RS (< 2.0) had the operation in progress");
|
||||||
}
|
}
|
||||||
|
@ -870,7 +870,7 @@ public class AssignmentManager implements ServerListener {
|
||||||
checkFailoverCleanupCompleted(merged);
|
checkFailoverCleanupCompleted(merged);
|
||||||
|
|
||||||
if (state != TransitionCode.READY_TO_MERGE) {
|
if (state != TransitionCode.READY_TO_MERGE) {
|
||||||
throw new UnexpectedStateException("Unsupported merge state=" + state +
|
throw new UnexpectedStateException("Unsupported merge regionState=" + state +
|
||||||
" for regionA=" + hriA + " regionB=" + hriB + " merged=" + merged +
|
" for regionA=" + hriA + " regionB=" + hriB + " merged=" + merged +
|
||||||
" maybe an old RS (< 2.0) had the operation in progress");
|
" maybe an old RS (< 2.0) had the operation in progress");
|
||||||
}
|
}
|
||||||
|
@ -884,7 +884,8 @@ public class AssignmentManager implements ServerListener {
|
||||||
// If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge
|
// If the RS is < 2.0 throw an exception to abort the operation, we are handling the merge
|
||||||
if (regionStates.getOrCreateServer(serverName).getVersionNumber() < 0x0200000) {
|
if (regionStates.getOrCreateServer(serverName).getVersionNumber() < 0x0200000) {
|
||||||
throw new UnsupportedOperationException(String.format(
|
throw new UnsupportedOperationException(String.format(
|
||||||
"Merge not handled yet: state=%s merged=%s hriA=%s hriB=%s", state, merged, hriA, hriB));
|
"Merge not handled yet: regionState=%s merged=%s hriA=%s hriB=%s", state, merged, hriA,
|
||||||
|
hriB));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1162,7 +1163,7 @@ public class AssignmentManager implements ServerListener {
|
||||||
LOG.info("waiting for RS to join");
|
LOG.info("waiting for RS to join");
|
||||||
Threads.sleep(250);
|
Threads.sleep(250);
|
||||||
}
|
}
|
||||||
LOG.info("RS joined " + master.getServerManager().countOfRegionServers());
|
LOG.info("RS joined. Num RS = " + master.getServerManager().countOfRegionServers());
|
||||||
|
|
||||||
// This method will assign all user regions if a clean server startup or
|
// This method will assign all user regions if a clean server startup or
|
||||||
// it will reconstruct master state and cleanup any leftovers from previous master process.
|
// it will reconstruct master state and cleanup any leftovers from previous master process.
|
||||||
|
@ -1188,7 +1189,7 @@ public class AssignmentManager implements ServerListener {
|
||||||
// hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?
|
// hbase1 to hbase2? Am I restoring a SNAPSHOT or otherwise adding a region to hbase:meta?
|
||||||
// In any of these cases, state is empty. For now, presume OFFLINE but there are probably
|
// In any of these cases, state is empty. For now, presume OFFLINE but there are probably
|
||||||
// cases where we need to probe more to be sure this correct; TODO informed by experience.
|
// cases where we need to probe more to be sure this correct; TODO informed by experience.
|
||||||
LOG.info(regionInfo.getEncodedName() + " state=null; presuming " + State.OFFLINE);
|
LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE);
|
||||||
localState = State.OFFLINE;
|
localState = State.OFFLINE;
|
||||||
}
|
}
|
||||||
synchronized (regionNode) {
|
synchronized (regionNode) {
|
||||||
|
|
|
@ -104,6 +104,12 @@ public class TruncateTableProcedure
|
||||||
break;
|
break;
|
||||||
case TRUNCATE_TABLE_CLEAR_FS_LAYOUT:
|
case TRUNCATE_TABLE_CLEAR_FS_LAYOUT:
|
||||||
DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
|
DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
|
||||||
|
// NOTE: It's very important that we create new HRegions before next state, so that
|
||||||
|
// they get persisted in procedure state before we start using them for anything.
|
||||||
|
// Otherwise, if we create them in next step and master crashes after creating fs
|
||||||
|
// layout but before saving state, region re-created after recovery will have different
|
||||||
|
// regionId(s) and encoded names. That will lead to unwanted regions in FS layout
|
||||||
|
// (which were created before the crash).
|
||||||
if (!preserveSplits) {
|
if (!preserveSplits) {
|
||||||
// if we are not preserving splits, generate a new single region
|
// if we are not preserving splits, generate a new single region
|
||||||
regions = Arrays.asList(ModifyRegionUtils.createRegionInfos(tableDescriptor, null));
|
regions = Arrays.asList(ModifyRegionUtils.createRegionInfos(tableDescriptor, null));
|
||||||
|
|
|
@ -170,11 +170,11 @@ public class MasterProcedureTestingUtility {
|
||||||
final Path tableDir = FSUtils.getTableDir(master.getMasterFileSystem().getRootDir(), tableName);
|
final Path tableDir = FSUtils.getTableDir(master.getMasterFileSystem().getRootDir(), tableName);
|
||||||
assertTrue(fs.exists(tableDir));
|
assertTrue(fs.exists(tableDir));
|
||||||
FSUtils.logFileSystemState(fs, tableDir, LOG);
|
FSUtils.logFileSystemState(fs, tableDir, LOG);
|
||||||
List<Path> allRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
|
List<Path> unwantedRegionDirs = FSUtils.getRegionDirs(fs, tableDir);
|
||||||
for (int i = 0; i < regions.length; ++i) {
|
for (int i = 0; i < regions.length; ++i) {
|
||||||
Path regionDir = new Path(tableDir, regions[i].getEncodedName());
|
Path regionDir = new Path(tableDir, regions[i].getEncodedName());
|
||||||
assertTrue(regions[i] + " region dir does not exist", fs.exists(regionDir));
|
assertTrue(regions[i] + " region dir does not exist", fs.exists(regionDir));
|
||||||
assertTrue(allRegionDirs.remove(regionDir));
|
assertTrue(unwantedRegionDirs.remove(regionDir));
|
||||||
List<Path> allFamilyDirs = FSUtils.getFamilyDirs(fs, regionDir);
|
List<Path> allFamilyDirs = FSUtils.getFamilyDirs(fs, regionDir);
|
||||||
for (int j = 0; j < family.length; ++j) {
|
for (int j = 0; j < family.length; ++j) {
|
||||||
final Path familyDir = new Path(regionDir, family[j]);
|
final Path familyDir = new Path(regionDir, family[j]);
|
||||||
|
@ -191,7 +191,8 @@ public class MasterProcedureTestingUtility {
|
||||||
}
|
}
|
||||||
assertTrue("found extraneous families: " + allFamilyDirs, allFamilyDirs.isEmpty());
|
assertTrue("found extraneous families: " + allFamilyDirs, allFamilyDirs.isEmpty());
|
||||||
}
|
}
|
||||||
assertTrue("found extraneous regions: " + allRegionDirs, allRegionDirs.isEmpty());
|
assertTrue("found extraneous regions: " + unwantedRegionDirs, unwantedRegionDirs.isEmpty());
|
||||||
|
LOG.debug("Table directory layout is as expected.");
|
||||||
|
|
||||||
// check meta
|
// check meta
|
||||||
assertTrue(MetaTableAccessor.tableExists(master.getConnection(), tableName));
|
assertTrue(MetaTableAccessor.tableExists(master.getConnection(), tableName));
|
||||||
|
|
|
@ -49,7 +49,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
||||||
@Rule
|
@Rule
|
||||||
public TestName name = new TestName();
|
public TestName name = new TestName();
|
||||||
|
|
||||||
@Test(timeout=60000)
|
@Test
|
||||||
public void testTruncateNotExistentTable() throws Exception {
|
public void testTruncateNotExistentTable() throws Exception {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
|
|
||||||
|
@ -64,7 +64,7 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
||||||
assertTrue(ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotFoundException);
|
assertTrue(ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotFoundException);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=60000)
|
@Test
|
||||||
public void testTruncateNotDisabledTable() throws Exception {
|
public void testTruncateNotDisabledTable() throws Exception {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
|
|
||||||
|
@ -82,13 +82,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
||||||
ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotDisabledException);
|
ProcedureTestingUtility.getExceptionCause(result) instanceof TableNotDisabledException);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=60000)
|
@Test
|
||||||
public void testSimpleTruncatePreserveSplits() throws Exception {
|
public void testSimpleTruncatePreserveSplits() throws Exception {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
testSimpleTruncate(tableName, true);
|
testSimpleTruncate(tableName, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=60000)
|
@Test
|
||||||
public void testSimpleTruncateNoPreserveSplits() throws Exception {
|
public void testSimpleTruncateNoPreserveSplits() throws Exception {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
testSimpleTruncate(tableName, false);
|
testSimpleTruncate(tableName, false);
|
||||||
|
@ -116,6 +116,8 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
||||||
new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
|
new TruncateTableProcedure(procExec.getEnvironment(), tableName, preserveSplits));
|
||||||
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
|
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
|
||||||
|
|
||||||
|
// If truncate procedure completed successfully, it means all regions were assigned correctly
|
||||||
|
// and table is enabled now.
|
||||||
UTIL.waitUntilAllRegionsAssigned(tableName);
|
UTIL.waitUntilAllRegionsAssigned(tableName);
|
||||||
|
|
||||||
// validate the table regions and layout
|
// validate the table regions and layout
|
||||||
|
@ -137,13 +139,13 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
|
||||||
assertEquals(50, UTIL.countRows(tableName));
|
assertEquals(50, UTIL.countRows(tableName));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=60000)
|
@Test
|
||||||
public void testRecoveryAndDoubleExecutionPreserveSplits() throws Exception {
|
public void testRecoveryAndDoubleExecutionPreserveSplits() throws Exception {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
testRecoveryAndDoubleExecution(tableName, true);
|
testRecoveryAndDoubleExecution(tableName, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=60000)
|
@Test
|
||||||
public void testRecoveryAndDoubleExecutionNoPreserveSplits() throws Exception {
|
public void testRecoveryAndDoubleExecutionNoPreserveSplits() throws Exception {
|
||||||
final TableName tableName = TableName.valueOf(name.getMethodName());
|
final TableName tableName = TableName.valueOf(name.getMethodName());
|
||||||
testRecoveryAndDoubleExecution(tableName, false);
|
testRecoveryAndDoubleExecution(tableName, false);
|
||||||
|
|
Loading…
Reference in New Issue