HBASE-20616 TruncateTableProcedure is stuck in retry loop in TRUNCATE_TABLE_CREATE_FS_LAYOUT state

Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
Toshihiro Suzuki 2018-05-24 00:26:01 +09:00 committed by tedyu
parent 1792f541c6
commit 554d513f50
2 changed files with 102 additions and 1 deletions

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.util.ModifyRegionUtils;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
@ -121,6 +122,7 @@ public class TruncateTableProcedure
setNextState(TruncateTableState.TRUNCATE_TABLE_CREATE_FS_LAYOUT); setNextState(TruncateTableState.TRUNCATE_TABLE_CREATE_FS_LAYOUT);
break; break;
case TRUNCATE_TABLE_CREATE_FS_LAYOUT: case TRUNCATE_TABLE_CREATE_FS_LAYOUT:
DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
regions = CreateTableProcedure.createFsLayout(env, tableDescriptor, regions); regions = CreateTableProcedure.createFsLayout(env, tableDescriptor, regions);
CreateTableProcedure.updateTableDescCache(env, getTableName()); CreateTableProcedure.updateTableDescCache(env, getTableName());
setNextState(TruncateTableState.TRUNCATE_TABLE_ADD_TO_META); setNextState(TruncateTableState.TRUNCATE_TABLE_ADD_TO_META);
@ -148,7 +150,8 @@ public class TruncateTableProcedure
if (isRollbackSupported(state)) { if (isRollbackSupported(state)) {
setFailure("master-truncate-table", e); setFailure("master-truncate-table", e);
} else { } else {
LOG.warn("Retriable error trying to truncate table=" + getTableName() + " state=" + state, e); LOG.warn("Retriable error trying to truncate table=" + getTableName()
+ " state=" + state, e);
} }
} }
return Flow.HAS_MORE_STATE; return Flow.HAS_MORE_STATE;
@ -303,4 +306,12 @@ public class TruncateTableProcedure
cpHost.postCompletedTruncateTableAction(tableName, getUser()); cpHost.postCompletedTruncateTableAction(tableName, getUser());
} }
} }
@VisibleForTesting
RegionInfo getFirstRegionInfo() {
if (regions == null || regions.isEmpty()) {
return null;
}
return regions.get(0);
}
} }

View File

@ -19,18 +19,26 @@ package org.apache.hadoop.hbase.master.procedure;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.junit.ClassRule; import org.junit.ClassRule;
import org.junit.Rule; import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
@ -38,6 +46,7 @@ import org.junit.experimental.categories.Category;
import org.junit.rules.TestName; import org.junit.rules.TestName;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
@Category({MasterTests.class, MediumTests.class}) @Category({MasterTests.class, MediumTests.class})
public class TestTruncateTableProcedure extends TestTableDDLProcedureBase { public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
@ -217,4 +226,85 @@ public class TestTruncateTableProcedure extends TestTableDDLProcedureBase {
UTIL.getConnection(), tableName, 50, splitKeys, families); UTIL.getConnection(), tableName, 50, splitKeys, families);
assertEquals(50, UTIL.countRows(tableName)); assertEquals(50, UTIL.countRows(tableName));
} }
@Test
public void testOnHDFSFailurePreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testOnHDFSFailure(tableName, true);
}
@Test
public void testOnHDFSFailureNoPreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
testOnHDFSFailure(tableName, false);
}
public static class TruncateTableProcedureOnHDFSFailure extends TruncateTableProcedure {
private boolean failOnce = false;
public TruncateTableProcedureOnHDFSFailure() {
// Required by the Procedure framework to create the procedure on replay
super();
}
public TruncateTableProcedureOnHDFSFailure(final MasterProcedureEnv env, TableName tableName,
boolean preserveSplits)
throws HBaseIOException {
super(env, tableName, preserveSplits);
}
@Override
protected Flow executeFromState(MasterProcedureEnv env,
MasterProcedureProtos.TruncateTableState state) throws InterruptedException {
if (!failOnce &&
state == MasterProcedureProtos.TruncateTableState.TRUNCATE_TABLE_CREATE_FS_LAYOUT) {
try {
// To emulate an HDFS failure, create only the first region directory
RegionInfo regionInfo = getFirstRegionInfo();
Configuration conf = env.getMasterConfiguration();
MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
Path tempdir = mfs.getTempDir();
Path tableDir = FSUtils.getTableDir(tempdir, regionInfo.getTable());
Path regionDir = FSUtils.getRegionDir(tableDir, regionInfo);
FileSystem fs = FileSystem.get(conf);
fs.mkdirs(regionDir);
failOnce = true;
return Flow.HAS_MORE_STATE;
} catch (IOException e) {
fail("failed to create a region directory: " + e);
}
}
return super.executeFromState(env, state);
}
}
private void testOnHDFSFailure(TableName tableName, boolean preserveSplits) throws Exception {
String[] families = new String[] { "f1", "f2" };
byte[][] splitKeys = new byte[][] {
Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
};
// create a table
MasterProcedureTestingUtility.createTable(
getMasterProcedureExecutor(), tableName, splitKeys, families);
// load and verify that there are rows in the table
MasterProcedureTestingUtility.loadData(
UTIL.getConnection(), tableName, 100, splitKeys, families);
assertEquals(100, UTIL.countRows(tableName));
// disable the table
UTIL.getAdmin().disableTable(tableName);
// truncate the table
final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
long procId = ProcedureTestingUtility.submitAndWait(procExec,
new TruncateTableProcedureOnHDFSFailure(procExec.getEnvironment(), tableName,
preserveSplits));
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
}
} }