From 3ce90521880feefa65d67c4a2010314bf52c0ecf Mon Sep 17 00:00:00 2001 From: "Tak Lon (Stephen) Wu" Date: Tue, 12 Oct 2021 10:56:00 -0700 Subject: [PATCH] =?UTF-8?q?HBASE-24833:=20Bootstrap=20should=20not=20delet?= =?UTF-8?q?e=20the=20META=20table=20directory=20if=20=E2=80=A6=20(#2237)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Duo Zhang Signed-off-by: Zach York --- .../apache/hadoop/hbase/master/HMaster.java | 3 + .../master/procedure/InitMetaProcedure.java | 39 ++- .../hbase/master/TestRecreateCluster.java | 245 ++++++++++++++++++ 3 files changed, 285 insertions(+), 2 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecreateCluster.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index cbce761eca1..5c8a194e217 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -981,6 +981,9 @@ public class HMaster extends HRegionServer implements MasterServices { // wait meta to be initialized after we start procedure executor if (initMetaProc != null) { initMetaProc.await(); + if (initMetaProc.isFailed() && initMetaProc.hasException()) { + throw new IOException("Failed to initialize meta table", initMetaProc.getException()); + } } // Wake up this server to check in sleeper.skipSleepCycle(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java index fe33556909f..0cc3b78c7df 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,16 +22,20 @@ import java.util.Arrays; import java.util.concurrent.CountDownLatch; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.ProcedureUtil; import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.StoreFileInfo; import org.apache.hadoop.hbase.util.CommonFSUtils; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.RetryCounter; @@ -70,7 +74,7 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure 0) { + RemoteIterator iterator = rootDirectoryFs.listFiles(metaTableDir, true); + while (iterator.hasNext()) { + LocatedFileStatus status = iterator.next(); + if (StoreFileInfo.isHFile(status.getPath()) && HFile + .isHFileFormat(rootDirectoryFs, status.getPath())) { + shouldDelete = false; + break; + } + } + } + } finally { + if (!shouldDelete) { + throw new IOException("Meta table is not partial, please sideline this meta directory " + + "or run HBCK to fix this meta table, e.g. rebuild the server hostname in ZNode for the " + + "meta region"); + } + return rootDirectoryFs.delete(metaTableDir, true); + } + + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecreateCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecreateCluster.java new file mode 100644 index 00000000000..d5ae3860602 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRecreateCluster.java @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.time.Duration; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.MiniHBaseCluster; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.region.MasterRegionFactory; +import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.zookeeper.ZKUtil; + +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +/** + * Test reuse storefiles within data directory when cluster failover with a set of new region + * servers with different hostnames with or without WALs and Zookeeper ZNodes, the master and + * cluster should fail respectively if there is any situation considered as not supported. + */ +@Category({ LargeTests.class }) +public class TestRecreateCluster { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRecreateCluster.class); + + @Rule + public TestName name = new TestName(); + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final int NUM_RS = 3; + private static final long TIMEOUT_MS = Duration.ofMinutes(1).toMillis(); + private static final long MASTER_INIT_TIMEOUT_MS = Duration.ofSeconds(45).toMillis(); + + @Before + public void setup() { + TEST_UTIL.getConfiguration() + .setLong("hbase.master.init.timeout.localHBaseCluster", MASTER_INIT_TIMEOUT_MS); + } + + @Test + public void testRecreateCluster_UserTableDisabled_ReuseWALsAndZNodes() + throws Exception { + validateRecreateClusterWithUserDisabled(false, false); + } + + @Test + public void testRecreateCluster_UserTableEnabled_ReuseWALsAndZNodes() throws Exception { + validateRecreateClusterWithUserTableEnabled(false, false); + } + + @Test + public void testRecreateCluster_UserTableEnabled_CleanupZNodes() throws Exception { + // new InitMetaProcedure are not submitted and reused the existing SUCCESS InitMetaProcedure + // initMetaProc.await() hangs forever. + validateRecreateClusterWithUserTableEnabled(false, true); + } + + @Test(expected = IOException.class) + public void testRecreateCluster_UserTableEnabled_CleanupWALAndZNodes() throws Exception { + // master fails with InitMetaProcedure because it cannot delete existing meta table directory, + // region server cannot join and time-out the cluster starts. + validateRecreateClusterWithUserTableEnabled(true, true); + } + + private void validateRecreateClusterWithUserDisabled(boolean cleanupWALs, + boolean cleanUpZNodes) throws Exception { + TEST_UTIL.startMiniCluster(NUM_RS); + try { + TableName tableName = TableName.valueOf("t1"); + prepareDataBeforeRecreate(TEST_UTIL, tableName); + TEST_UTIL.getAdmin().disableTable(tableName); + TEST_UTIL.waitTableDisabled(tableName.getName()); + restartHBaseCluster(cleanupWALs, cleanUpZNodes); + TEST_UTIL.getAdmin().enableTable(tableName); + validateDataAfterRecreate(TEST_UTIL, tableName); + } finally { + TEST_UTIL.shutdownMiniCluster(); + } + } + + private void validateRecreateClusterWithUserTableEnabled(boolean cleanupWALs, + boolean cleanUpZNodes) throws Exception { + TEST_UTIL.startMiniCluster(NUM_RS); + try { + TableName tableName = TableName.valueOf("t1"); + prepareDataBeforeRecreate(TEST_UTIL, tableName); + restartHBaseCluster(cleanupWALs, cleanUpZNodes); + validateDataAfterRecreate(TEST_UTIL, tableName); + } finally { + TEST_UTIL.shutdownMiniCluster(); + } + } + + private void restartHBaseCluster(boolean cleanUpWALs, boolean cleanUpZnodes) throws Exception { + // flush cache so that everything is on disk + TEST_UTIL.getMiniHBaseCluster().flushcache(TableName.META_TABLE_NAME); + TEST_UTIL.getMiniHBaseCluster().flushcache(); + + List oldServers = + TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList(); + + // make sure there is no procedures pending + TEST_UTIL.waitFor(TIMEOUT_MS, () -> TEST_UTIL.getHBaseCluster().getMaster() + .getProcedures().stream().filter(p -> p.isFinished()).findAny().isPresent()); + + // shutdown and delete data if needed + Path walRootDirPath = TEST_UTIL.getMiniHBaseCluster().getMaster().getWALRootDir(); + Path rootDirPath = CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration()); + TEST_UTIL.shutdownMiniHBaseCluster(); + + if (cleanUpWALs) { + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(rootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true); + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(walRootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true); + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(walRootDirPath, WALProcedureStore.MASTER_PROCEDURE_LOGDIR), true); + + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(walRootDirPath, HConstants.HREGION_LOGDIR_NAME), true); + TEST_UTIL.getDFSCluster().getFileSystem() + .delete(new Path(walRootDirPath, HConstants.HREGION_OLDLOGDIR_NAME), true); + } + + if (cleanUpZnodes) { + // delete all zk data + // we cannot keep ZK data because it will hold the meta region states as open and + // didn't submit a InitMetaProcedure + ZKUtil.deleteChildrenRecursively(TEST_UTIL.getZooKeeperWatcher(), + TEST_UTIL.getZooKeeperWatcher().getZNodePaths().baseZNode); + TEST_UTIL.shutdownMiniZKCluster(); + TEST_UTIL.startMiniZKCluster(); + } + + TEST_UTIL.restartHBaseCluster(NUM_RS); + TEST_UTIL.waitFor(TIMEOUT_MS, + () -> TEST_UTIL.getMiniHBaseCluster().getNumLiveRegionServers() == NUM_RS); + + // make sure we have a new set of region servers with different hostnames and ports + List newServers = + TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList(); + assertFalse(newServers.stream().filter(newServer -> oldServers.contains(newServer)).findAny() + .isPresent()); + } + + private void prepareDataBeforeRecreate( + HBaseTestingUtility testUtil, TableName tableName) throws Exception { + Table table = testUtil.createTable(tableName, "f"); + Put put = new Put(Bytes.toBytes("r1")); + put.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c"), Bytes.toBytes("v")); + table.put(put); + + ensureTableNotColocatedWithSystemTable(tableName, TableName.NAMESPACE_TABLE_NAME); + } + + private void ensureTableNotColocatedWithSystemTable(TableName userTable, TableName systemTable) + throws IOException, InterruptedException { + MiniHBaseCluster hbaseCluster = TEST_UTIL.getHBaseCluster(); + assertTrue("Please start more than 1 regionserver", + hbaseCluster.getRegionServerThreads().size() > 1); + + int userTableServerNum = getServerNumForTableWithOnlyOneRegion(userTable); + int systemTableServerNum = getServerNumForTableWithOnlyOneRegion(systemTable); + + if (userTableServerNum != systemTableServerNum) { + // no-ops if user table and system are already on a different host + return; + } + + int destServerNum = (systemTableServerNum + 1) % NUM_RS; + assertTrue(systemTableServerNum != destServerNum); + + HRegionServer systemTableServer = hbaseCluster.getRegionServer(systemTableServerNum); + HRegionServer destServer = hbaseCluster.getRegionServer(destServerNum); + assertTrue(!systemTableServer.equals(destServer)); + // make sure the dest server is live before moving region + hbaseCluster.waitForRegionServerToStart(destServer.getServerName().getHostname(), + destServer.getServerName().getPort(), TIMEOUT_MS); + // move region of userTable to a different regionserver not co-located with system table + TEST_UTIL.moveRegionAndWait(TEST_UTIL.getAdmin().getRegions(userTable).get(0), + destServer.getServerName()); + } + + private int getServerNumForTableWithOnlyOneRegion(TableName tableName) throws IOException { + List tableRegionInfos = TEST_UTIL.getAdmin().getRegions(tableName); + assertEquals(1, tableRegionInfos.size()); + return TEST_UTIL.getHBaseCluster() + .getServerWith(tableRegionInfos.get(0).getRegionName()); + } + + private void validateDataAfterRecreate( + HBaseTestingUtility testUtil, TableName tableName) throws Exception { + Table t1 = testUtil.getConnection().getTable(tableName); + Get get = new Get(Bytes.toBytes("r1")); + get.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c")); + Result result = t1.get(get); + assertTrue(result.advance()); + Cell cell = result.current(); + assertEquals("v", Bytes.toString(cell.getValueArray(), + cell.getValueOffset(), cell.getValueLength())); + assertFalse(result.advance()); + } + +}