HBASE-20647 Backport HBASE-20616 "TruncateTableProcedure is stuck in retry loop in TRUNCATE_TABLE_CREATE_FS_LAYOUT state" to branch-1

Signed-off-by: tedyu <yuzhihong@gmail.com>
This commit is contained in:
Toshihiro Suzuki 2018-05-25 09:37:19 +09:00 committed by tedyu
parent 87d4e4e4e2
commit 289d588af3
2 changed files with 117 additions and 19 deletions

View File

@ -18,29 +18,29 @@
package org.apache.hadoop.hbase.master.procedure;
import java.io.InputStream;
import com.google.common.annotations.VisibleForTesting;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.HBaseException;
import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
import org.apache.hadoop.hbase.util.ModifyRegionUtils;
import org.apache.hadoop.security.UserGroupInformation;
@ -103,16 +103,15 @@ public class TruncateTableProcedure
case TRUNCATE_TABLE_CLEAR_FS_LAYOUT:
DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
setNextState(TruncateTableState.TRUNCATE_TABLE_CREATE_FS_LAYOUT);
break;
case TRUNCATE_TABLE_CREATE_FS_LAYOUT:
if (!preserveSplits) {
// if we are not preserving splits, generate a new single region
//recreateRegionInfo in TRUNCATE_TABLE_CREATE_FS_LAYOUT phase, since if create fs layout fails
//we need to refresh the region encoded name to prevent dir name conflict
regions = Arrays.asList(ModifyRegionUtils.createHRegionInfos(hTableDescriptor, null));
} else {
regions = recreateRegionInfo(regions);
}
break;
case TRUNCATE_TABLE_CREATE_FS_LAYOUT:
DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true);
regions = CreateTableProcedure.createFsLayout(env, hTableDescriptor, regions);
CreateTableProcedure.updateTableDescCache(env, getTableName());
setNextState(TruncateTableState.TRUNCATE_TABLE_ADD_TO_META);
@ -185,7 +184,9 @@ public class TruncateTableProcedure
@Override
protected boolean acquireLock(final MasterProcedureEnv env) {
if (env.waitInitialized(this)) return false;
if (env.waitInitialized(this)) {
return false;
}
return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, getTableName());
}
@ -297,4 +298,12 @@ public class TruncateTableProcedure
});
}
}
}
@VisibleForTesting
HRegionInfo getFirstRegionInfo() {
if (regions == null || regions.isEmpty()) {
return null;
}
return regions.get(0);
}
}

View File

@ -18,21 +18,33 @@
package org.apache.hadoop.hbase.master.procedure;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.ProcedureInfo;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
@ -40,11 +52,6 @@ import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
@Category(MediumTests.class)
public class TestTruncateTableProcedure {
private static final Log LOG = LogFactory.getLog(TestTruncateTableProcedure.class);
@ -289,4 +296,86 @@ public class TestTruncateTableProcedure {
UTIL.waitUntilAllRegionsAssigned(tableName);
}
@Test
public void testOnHDFSFailurePreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf("testOnHDFSFailurePreserveSplits");
testOnHDFSFailure(tableName, true);
}
@Test
public void testOnHDFSFailureNoPreserveSplits() throws Exception {
final TableName tableName = TableName.valueOf("testOnHDFSFailureNoPreserveSplits");
testOnHDFSFailure(tableName, false);
}
public static class TruncateTableProcedureOnHDFSFailure extends TruncateTableProcedure {
private boolean failOnce = false;
public TruncateTableProcedureOnHDFSFailure() {
// Required by the Procedure framework to create the procedure on replay
super();
}
public TruncateTableProcedureOnHDFSFailure(final MasterProcedureEnv env, TableName tableName,
boolean preserveSplits)
throws HBaseIOException {
super(env, tableName, preserveSplits);
}
@Override
protected Flow executeFromState(MasterProcedureEnv env,
MasterProcedureProtos.TruncateTableState state) throws InterruptedException {
if (!failOnce &&
state == MasterProcedureProtos.TruncateTableState.TRUNCATE_TABLE_CREATE_FS_LAYOUT) {
try {
// To emulate an HDFS failure, create only the first region directory
HRegionInfo regionInfo = getFirstRegionInfo();
Configuration conf = env.getMasterConfiguration();
MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
Path tempdir = mfs.getTempDir();
Path tableDir = FSUtils.getTableDir(tempdir, regionInfo.getTable());
Path regionDir = new Path(tableDir,
ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo).getEncodedName());
FileSystem fs = FileSystem.get(conf);
fs.mkdirs(regionDir);
failOnce = true;
return Flow.HAS_MORE_STATE;
} catch (IOException e) {
fail("failed to create a region directory: " + e);
}
}
return super.executeFromState(env, state);
}
}
private void testOnHDFSFailure(TableName tableName, boolean preserveSplits) throws Exception {
String[] families = new String[] { "f1", "f2" };
byte[][] splitKeys = new byte[][] {
Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c")
};
// create a table
MasterProcedureTestingUtility.createTable(
getMasterProcedureExecutor(), tableName, splitKeys, families);
// load and verify that there are rows in the table
MasterProcedureTestingUtility.loadData(
UTIL.getConnection(), tableName, 100, splitKeys, families);
assertEquals(100, UTIL.countRows(tableName));
// disable the table
UTIL.getHBaseAdmin().disableTable(tableName);
// truncate the table
final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
long procId = ProcedureTestingUtility.submitAndWait(procExec,
new TruncateTableProcedureOnHDFSFailure(procExec.getEnvironment(), tableName,
preserveSplits));
ProcedureTestingUtility.assertProcNotFailed(procExec, procId);
}
}