HBASE-24833: Bootstrap should not delete the META table directory if … (#2237)
Signed-off-by: Duo Zhang <zhangduo@apache.org> Signed-off-by: Zach York <zyork@apache.org>
This commit is contained in:
parent
10584d70d2
commit
3ce9052188
|
@ -981,6 +981,9 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
// wait meta to be initialized after we start procedure executor
|
// wait meta to be initialized after we start procedure executor
|
||||||
if (initMetaProc != null) {
|
if (initMetaProc != null) {
|
||||||
initMetaProc.await();
|
initMetaProc.await();
|
||||||
|
if (initMetaProc.isFailed() && initMetaProc.hasException()) {
|
||||||
|
throw new IOException("Failed to initialize meta table", initMetaProc.getException());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Wake up this server to check in
|
// Wake up this server to check in
|
||||||
sleeper.skipSleepCycle();
|
sleeper.skipSleepCycle();
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -22,16 +22,20 @@ import java.util.Arrays;
|
||||||
import java.util.concurrent.CountDownLatch;
|
import java.util.concurrent.CountDownLatch;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.io.hfile.HFile;
|
||||||
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
|
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
|
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
|
import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
|
import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
|
||||||
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
|
import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
|
||||||
import org.apache.hadoop.hbase.util.CommonFSUtils;
|
import org.apache.hadoop.hbase.util.CommonFSUtils;
|
||||||
import org.apache.hadoop.hbase.util.FSTableDescriptors;
|
import org.apache.hadoop.hbase.util.FSTableDescriptors;
|
||||||
import org.apache.hadoop.hbase.util.RetryCounter;
|
import org.apache.hadoop.hbase.util.RetryCounter;
|
||||||
|
@ -70,7 +74,7 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
|
||||||
LOG.info("BOOTSTRAP: creating hbase:meta region");
|
LOG.info("BOOTSTRAP: creating hbase:meta region");
|
||||||
FileSystem fs = rootDir.getFileSystem(conf);
|
FileSystem fs = rootDir.getFileSystem(conf);
|
||||||
Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.META_TABLE_NAME);
|
Path tableDir = CommonFSUtils.getTableDir(rootDir, TableName.META_TABLE_NAME);
|
||||||
if (fs.exists(tableDir) && !fs.delete(tableDir, true)) {
|
if (fs.exists(tableDir) && !deleteMetaTableDirectoryIfPartial(fs, tableDir)) {
|
||||||
LOG.warn("Can not delete partial created meta table, continue...");
|
LOG.warn("Can not delete partial created meta table, continue...");
|
||||||
}
|
}
|
||||||
// Bootstrapping, make sure blockcache is off. Else, one will be
|
// Bootstrapping, make sure blockcache is off. Else, one will be
|
||||||
|
@ -166,4 +170,35 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure<InitMe
|
||||||
public void await() throws InterruptedException {
|
public void await() throws InterruptedException {
|
||||||
latch.await();
|
latch.await();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean deleteMetaTableDirectoryIfPartial(FileSystem rootDirectoryFs,
|
||||||
|
Path metaTableDir) throws IOException {
|
||||||
|
boolean shouldDelete = true;
|
||||||
|
try {
|
||||||
|
TableDescriptor metaDescriptor =
|
||||||
|
FSTableDescriptors.getTableDescriptorFromFs(rootDirectoryFs, metaTableDir);
|
||||||
|
// when entering the state of INIT_META_WRITE_FS_LAYOUT, if a meta table directory is found,
|
||||||
|
// the meta table should not have any useful data and considers as partial.
|
||||||
|
// if we find any valid HFiles, operator should fix the meta e.g. via HBCK.
|
||||||
|
if (metaDescriptor != null && metaDescriptor.getColumnFamilyCount() > 0) {
|
||||||
|
RemoteIterator<LocatedFileStatus> iterator = rootDirectoryFs.listFiles(metaTableDir, true);
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
LocatedFileStatus status = iterator.next();
|
||||||
|
if (StoreFileInfo.isHFile(status.getPath()) && HFile
|
||||||
|
.isHFileFormat(rootDirectoryFs, status.getPath())) {
|
||||||
|
shouldDelete = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
if (!shouldDelete) {
|
||||||
|
throw new IOException("Meta table is not partial, please sideline this meta directory "
|
||||||
|
+ "or run HBCK to fix this meta table, e.g. rebuild the server hostname in ZNode for the "
|
||||||
|
+ "meta region");
|
||||||
|
}
|
||||||
|
return rootDirectoryFs.delete(metaTableDir, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,245 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.master;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.hbase.Cell;
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||||
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.client.Get;
|
||||||
|
import org.apache.hadoop.hbase.client.Put;
|
||||||
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.client.Result;
|
||||||
|
import org.apache.hadoop.hbase.client.Table;
|
||||||
|
import org.apache.hadoop.hbase.master.region.MasterRegionFactory;
|
||||||
|
import org.apache.hadoop.hbase.procedure2.store.wal.WALProcedureStore;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.CommonFSUtils;
|
||||||
|
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||||
|
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
import org.junit.rules.TestName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test reuse storefiles within data directory when cluster failover with a set of new region
|
||||||
|
* servers with different hostnames with or without WALs and Zookeeper ZNodes, the master and
|
||||||
|
* cluster should fail respectively if there is any situation considered as not supported.
|
||||||
|
*/
|
||||||
|
@Category({ LargeTests.class })
|
||||||
|
public class TestRecreateCluster {
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestRecreateCluster.class);
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public TestName name = new TestName();
|
||||||
|
|
||||||
|
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||||
|
private static final int NUM_RS = 3;
|
||||||
|
private static final long TIMEOUT_MS = Duration.ofMinutes(1).toMillis();
|
||||||
|
private static final long MASTER_INIT_TIMEOUT_MS = Duration.ofSeconds(45).toMillis();
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setup() {
|
||||||
|
TEST_UTIL.getConfiguration()
|
||||||
|
.setLong("hbase.master.init.timeout.localHBaseCluster", MASTER_INIT_TIMEOUT_MS);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRecreateCluster_UserTableDisabled_ReuseWALsAndZNodes()
|
||||||
|
throws Exception {
|
||||||
|
validateRecreateClusterWithUserDisabled(false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRecreateCluster_UserTableEnabled_ReuseWALsAndZNodes() throws Exception {
|
||||||
|
validateRecreateClusterWithUserTableEnabled(false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRecreateCluster_UserTableEnabled_CleanupZNodes() throws Exception {
|
||||||
|
// new InitMetaProcedure are not submitted and reused the existing SUCCESS InitMetaProcedure
|
||||||
|
// initMetaProc.await() hangs forever.
|
||||||
|
validateRecreateClusterWithUserTableEnabled(false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = IOException.class)
|
||||||
|
public void testRecreateCluster_UserTableEnabled_CleanupWALAndZNodes() throws Exception {
|
||||||
|
// master fails with InitMetaProcedure because it cannot delete existing meta table directory,
|
||||||
|
// region server cannot join and time-out the cluster starts.
|
||||||
|
validateRecreateClusterWithUserTableEnabled(true, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateRecreateClusterWithUserDisabled(boolean cleanupWALs,
|
||||||
|
boolean cleanUpZNodes) throws Exception {
|
||||||
|
TEST_UTIL.startMiniCluster(NUM_RS);
|
||||||
|
try {
|
||||||
|
TableName tableName = TableName.valueOf("t1");
|
||||||
|
prepareDataBeforeRecreate(TEST_UTIL, tableName);
|
||||||
|
TEST_UTIL.getAdmin().disableTable(tableName);
|
||||||
|
TEST_UTIL.waitTableDisabled(tableName.getName());
|
||||||
|
restartHBaseCluster(cleanupWALs, cleanUpZNodes);
|
||||||
|
TEST_UTIL.getAdmin().enableTable(tableName);
|
||||||
|
validateDataAfterRecreate(TEST_UTIL, tableName);
|
||||||
|
} finally {
|
||||||
|
TEST_UTIL.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateRecreateClusterWithUserTableEnabled(boolean cleanupWALs,
|
||||||
|
boolean cleanUpZNodes) throws Exception {
|
||||||
|
TEST_UTIL.startMiniCluster(NUM_RS);
|
||||||
|
try {
|
||||||
|
TableName tableName = TableName.valueOf("t1");
|
||||||
|
prepareDataBeforeRecreate(TEST_UTIL, tableName);
|
||||||
|
restartHBaseCluster(cleanupWALs, cleanUpZNodes);
|
||||||
|
validateDataAfterRecreate(TEST_UTIL, tableName);
|
||||||
|
} finally {
|
||||||
|
TEST_UTIL.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void restartHBaseCluster(boolean cleanUpWALs, boolean cleanUpZnodes) throws Exception {
|
||||||
|
// flush cache so that everything is on disk
|
||||||
|
TEST_UTIL.getMiniHBaseCluster().flushcache(TableName.META_TABLE_NAME);
|
||||||
|
TEST_UTIL.getMiniHBaseCluster().flushcache();
|
||||||
|
|
||||||
|
List<ServerName> oldServers =
|
||||||
|
TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList();
|
||||||
|
|
||||||
|
// make sure there is no procedures pending
|
||||||
|
TEST_UTIL.waitFor(TIMEOUT_MS, () -> TEST_UTIL.getHBaseCluster().getMaster()
|
||||||
|
.getProcedures().stream().filter(p -> p.isFinished()).findAny().isPresent());
|
||||||
|
|
||||||
|
// shutdown and delete data if needed
|
||||||
|
Path walRootDirPath = TEST_UTIL.getMiniHBaseCluster().getMaster().getWALRootDir();
|
||||||
|
Path rootDirPath = CommonFSUtils.getRootDir(TEST_UTIL.getConfiguration());
|
||||||
|
TEST_UTIL.shutdownMiniHBaseCluster();
|
||||||
|
|
||||||
|
if (cleanUpWALs) {
|
||||||
|
TEST_UTIL.getDFSCluster().getFileSystem()
|
||||||
|
.delete(new Path(rootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true);
|
||||||
|
TEST_UTIL.getDFSCluster().getFileSystem()
|
||||||
|
.delete(new Path(walRootDirPath, MasterRegionFactory.MASTER_STORE_DIR), true);
|
||||||
|
TEST_UTIL.getDFSCluster().getFileSystem()
|
||||||
|
.delete(new Path(walRootDirPath, WALProcedureStore.MASTER_PROCEDURE_LOGDIR), true);
|
||||||
|
|
||||||
|
TEST_UTIL.getDFSCluster().getFileSystem()
|
||||||
|
.delete(new Path(walRootDirPath, HConstants.HREGION_LOGDIR_NAME), true);
|
||||||
|
TEST_UTIL.getDFSCluster().getFileSystem()
|
||||||
|
.delete(new Path(walRootDirPath, HConstants.HREGION_OLDLOGDIR_NAME), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cleanUpZnodes) {
|
||||||
|
// delete all zk data
|
||||||
|
// we cannot keep ZK data because it will hold the meta region states as open and
|
||||||
|
// didn't submit a InitMetaProcedure
|
||||||
|
ZKUtil.deleteChildrenRecursively(TEST_UTIL.getZooKeeperWatcher(),
|
||||||
|
TEST_UTIL.getZooKeeperWatcher().getZNodePaths().baseZNode);
|
||||||
|
TEST_UTIL.shutdownMiniZKCluster();
|
||||||
|
TEST_UTIL.startMiniZKCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_UTIL.restartHBaseCluster(NUM_RS);
|
||||||
|
TEST_UTIL.waitFor(TIMEOUT_MS,
|
||||||
|
() -> TEST_UTIL.getMiniHBaseCluster().getNumLiveRegionServers() == NUM_RS);
|
||||||
|
|
||||||
|
// make sure we have a new set of region servers with different hostnames and ports
|
||||||
|
List<ServerName> newServers =
|
||||||
|
TEST_UTIL.getHBaseCluster().getMaster().getServerManager().getOnlineServersList();
|
||||||
|
assertFalse(newServers.stream().filter(newServer -> oldServers.contains(newServer)).findAny()
|
||||||
|
.isPresent());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void prepareDataBeforeRecreate(
|
||||||
|
HBaseTestingUtility testUtil, TableName tableName) throws Exception {
|
||||||
|
Table table = testUtil.createTable(tableName, "f");
|
||||||
|
Put put = new Put(Bytes.toBytes("r1"));
|
||||||
|
put.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c"), Bytes.toBytes("v"));
|
||||||
|
table.put(put);
|
||||||
|
|
||||||
|
ensureTableNotColocatedWithSystemTable(tableName, TableName.NAMESPACE_TABLE_NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void ensureTableNotColocatedWithSystemTable(TableName userTable, TableName systemTable)
|
||||||
|
throws IOException, InterruptedException {
|
||||||
|
MiniHBaseCluster hbaseCluster = TEST_UTIL.getHBaseCluster();
|
||||||
|
assertTrue("Please start more than 1 regionserver",
|
||||||
|
hbaseCluster.getRegionServerThreads().size() > 1);
|
||||||
|
|
||||||
|
int userTableServerNum = getServerNumForTableWithOnlyOneRegion(userTable);
|
||||||
|
int systemTableServerNum = getServerNumForTableWithOnlyOneRegion(systemTable);
|
||||||
|
|
||||||
|
if (userTableServerNum != systemTableServerNum) {
|
||||||
|
// no-ops if user table and system are already on a different host
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int destServerNum = (systemTableServerNum + 1) % NUM_RS;
|
||||||
|
assertTrue(systemTableServerNum != destServerNum);
|
||||||
|
|
||||||
|
HRegionServer systemTableServer = hbaseCluster.getRegionServer(systemTableServerNum);
|
||||||
|
HRegionServer destServer = hbaseCluster.getRegionServer(destServerNum);
|
||||||
|
assertTrue(!systemTableServer.equals(destServer));
|
||||||
|
// make sure the dest server is live before moving region
|
||||||
|
hbaseCluster.waitForRegionServerToStart(destServer.getServerName().getHostname(),
|
||||||
|
destServer.getServerName().getPort(), TIMEOUT_MS);
|
||||||
|
// move region of userTable to a different regionserver not co-located with system table
|
||||||
|
TEST_UTIL.moveRegionAndWait(TEST_UTIL.getAdmin().getRegions(userTable).get(0),
|
||||||
|
destServer.getServerName());
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getServerNumForTableWithOnlyOneRegion(TableName tableName) throws IOException {
|
||||||
|
List<RegionInfo> tableRegionInfos = TEST_UTIL.getAdmin().getRegions(tableName);
|
||||||
|
assertEquals(1, tableRegionInfos.size());
|
||||||
|
return TEST_UTIL.getHBaseCluster()
|
||||||
|
.getServerWith(tableRegionInfos.get(0).getRegionName());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateDataAfterRecreate(
|
||||||
|
HBaseTestingUtility testUtil, TableName tableName) throws Exception {
|
||||||
|
Table t1 = testUtil.getConnection().getTable(tableName);
|
||||||
|
Get get = new Get(Bytes.toBytes("r1"));
|
||||||
|
get.addColumn(Bytes.toBytes("f"), Bytes.toBytes("c"));
|
||||||
|
Result result = t1.get(get);
|
||||||
|
assertTrue(result.advance());
|
||||||
|
Cell cell = result.current();
|
||||||
|
assertEquals("v", Bytes.toString(cell.getValueArray(),
|
||||||
|
cell.getValueOffset(), cell.getValueLength()));
|
||||||
|
assertFalse(result.advance());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue