From 289d588af32c8aa94758d2e4b299eb40a2ecc6ac Mon Sep 17 00:00:00 2001 From: Toshihiro Suzuki Date: Fri, 25 May 2018 09:37:19 +0900 Subject: [PATCH] HBASE-20647 Backport HBASE-20616 "TruncateTableProcedure is stuck in retry loop in TRUNCATE_TABLE_CREATE_FS_LAYOUT state" to branch-1 Signed-off-by: tedyu --- .../procedure/TruncateTableProcedure.java | 35 +++--- .../procedure/TestTruncateTableProcedure.java | 101 ++++++++++++++++-- 2 files changed, 117 insertions(+), 19 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java index 120ae4fde55..46894266a54 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java @@ -18,29 +18,29 @@ package org.apache.hadoop.hbase.master.procedure; -import java.io.InputStream; +import com.google.common.annotations.VisibleForTesting; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Arrays; import java.util.List; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.exceptions.HBaseException; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; +import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState; -import org.apache.hadoop.hbase.protobuf.ProtobufUtil; -import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; import org.apache.hadoop.hbase.util.ModifyRegionUtils; import org.apache.hadoop.security.UserGroupInformation; @@ -103,16 +103,15 @@ public class TruncateTableProcedure case TRUNCATE_TABLE_CLEAR_FS_LAYOUT: DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true); setNextState(TruncateTableState.TRUNCATE_TABLE_CREATE_FS_LAYOUT); - break; - case TRUNCATE_TABLE_CREATE_FS_LAYOUT: if (!preserveSplits) { // if we are not preserving splits, generate a new single region - //recreateRegionInfo in TRUNCATE_TABLE_CREATE_FS_LAYOUT phase, since if create fs layout fails - //we need to refresh the region encoded name to prevent dir name conflict regions = Arrays.asList(ModifyRegionUtils.createHRegionInfos(hTableDescriptor, null)); } else { regions = recreateRegionInfo(regions); } + break; + case TRUNCATE_TABLE_CREATE_FS_LAYOUT: + DeleteTableProcedure.deleteFromFs(env, getTableName(), regions, true); regions = CreateTableProcedure.createFsLayout(env, hTableDescriptor, regions); CreateTableProcedure.updateTableDescCache(env, getTableName()); setNextState(TruncateTableState.TRUNCATE_TABLE_ADD_TO_META); @@ -185,7 +184,9 @@ public class TruncateTableProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { - if (env.waitInitialized(this)) return false; + if (env.waitInitialized(this)) { + return false; + } return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, getTableName()); } @@ -297,4 +298,12 @@ public class TruncateTableProcedure }); } } -} \ No newline at end of file + + @VisibleForTesting + HRegionInfo getFirstRegionInfo() { + if (regions == null || regions.isEmpty()) { + return null; + } + return regions.get(0); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java index 47460afa453..6bc0256ad54 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestTruncateTableProcedure.java @@ -18,21 +18,33 @@ package org.apache.hadoop.hbase.master.procedure; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.ProcedureInfo; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; +import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos; import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.TruncateTableState; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -40,11 +52,6 @@ import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; - @Category(MediumTests.class) public class TestTruncateTableProcedure { private static final Log LOG = LogFactory.getLog(TestTruncateTableProcedure.class); @@ -289,4 +296,86 @@ public class TestTruncateTableProcedure { UTIL.waitUntilAllRegionsAssigned(tableName); } + + @Test + public void testOnHDFSFailurePreserveSplits() throws Exception { + final TableName tableName = TableName.valueOf("testOnHDFSFailurePreserveSplits"); + testOnHDFSFailure(tableName, true); + } + + @Test + public void testOnHDFSFailureNoPreserveSplits() throws Exception { + final TableName tableName = TableName.valueOf("testOnHDFSFailureNoPreserveSplits"); + testOnHDFSFailure(tableName, false); + } + + public static class TruncateTableProcedureOnHDFSFailure extends TruncateTableProcedure { + + private boolean failOnce = false; + + public TruncateTableProcedureOnHDFSFailure() { + // Required by the Procedure framework to create the procedure on replay + super(); + } + + public TruncateTableProcedureOnHDFSFailure(final MasterProcedureEnv env, TableName tableName, + boolean preserveSplits) + throws HBaseIOException { + super(env, tableName, preserveSplits); + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, + MasterProcedureProtos.TruncateTableState state) throws InterruptedException { + + if (!failOnce && + state == MasterProcedureProtos.TruncateTableState.TRUNCATE_TABLE_CREATE_FS_LAYOUT) { + try { + // To emulate an HDFS failure, create only the first region directory + HRegionInfo regionInfo = getFirstRegionInfo(); + Configuration conf = env.getMasterConfiguration(); + MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + Path tempdir = mfs.getTempDir(); + Path tableDir = FSUtils.getTableDir(tempdir, regionInfo.getTable()); + Path regionDir = new Path(tableDir, + ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo).getEncodedName()); + FileSystem fs = FileSystem.get(conf); + fs.mkdirs(regionDir); + + failOnce = true; + return Flow.HAS_MORE_STATE; + } catch (IOException e) { + fail("failed to create a region directory: " + e); + } + } + + return super.executeFromState(env, state); + } + } + + private void testOnHDFSFailure(TableName tableName, boolean preserveSplits) throws Exception { + String[] families = new String[] { "f1", "f2" }; + byte[][] splitKeys = new byte[][] { + Bytes.toBytes("a"), Bytes.toBytes("b"), Bytes.toBytes("c") + }; + + // create a table + MasterProcedureTestingUtility.createTable( + getMasterProcedureExecutor(), tableName, splitKeys, families); + + // load and verify that there are rows in the table + MasterProcedureTestingUtility.loadData( + UTIL.getConnection(), tableName, 100, splitKeys, families); + assertEquals(100, UTIL.countRows(tableName)); + + // disable the table + UTIL.getHBaseAdmin().disableTable(tableName); + + // truncate the table + final ProcedureExecutor procExec = getMasterProcedureExecutor(); + long procId = ProcedureTestingUtility.submitAndWait(procExec, + new TruncateTableProcedureOnHDFSFailure(procExec.getEnvironment(), tableName, + preserveSplits)); + ProcedureTestingUtility.assertProcNotFailed(procExec, procId); + } }