From 36a6ef9cf94a8cfbcfe89c7c73e158ee14b813b2 Mon Sep 17 00:00:00 2001 From: Duo Zhang Date: Sat, 30 May 2020 16:03:52 +0800 Subject: [PATCH] HBASE-24471 The way we bootstrap meta table is confusing (#1806) Signed-off-by: Michael Stack --- .../server/master/MasterProcedure.proto | 5 +- .../hadoop/hbase/master/MasterFileSystem.java | 109 +++++------------- .../master/procedure/InitMetaProcedure.java | 89 +++++++++----- .../hbase/regionserver/HRegionServer.java | 9 +- .../hadoop/hbase/util/FSTableDescriptors.java | 1 + 5 files changed, 97 insertions(+), 116 deletions(-) diff --git a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto index 5171e66ea39..b51b5297bec 100644 --- a/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/server/master/MasterProcedure.proto @@ -489,8 +489,9 @@ message ReopenTableRegionsStateData { } enum InitMetaState { - INIT_META_ASSIGN_META = 1; - INIT_META_CREATE_NAMESPACES = 2; + INIT_META_WRITE_FS_LAYOUT = 1; + INIT_META_ASSIGN_META = 2; + INIT_META_CREATE_NAMESPACES = 3; } message InitMetaStateData { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index b9dddaba24e..0bbfa4a180e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -18,34 +18,27 @@ */ package org.apache.hadoop.hbase.master; +import java.io.FileNotFoundException; import java.io.IOException; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hbase.ClusterId; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.HFileArchiver; -import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; -import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.client.RegionInfoBuilder; -import org.apache.hadoop.hbase.client.TableDescriptor; -import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.log.HBaseMarkers; import org.apache.hadoop.hbase.mob.MobConstants; -import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.replication.ReplicationUtils; import org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclHelper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CommonFSUtils; -import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.FSUtils; -import org.apache.hadoop.ipc.RemoteException; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -244,15 +237,26 @@ public class MasterFileSystem { * @return hbase.rootdir (after checks for existence and bootstrapping if needed populating the * directory with necessary bootup files). */ - private Path checkRootDir(final Path rd, final Configuration c, final FileSystem fs) - throws IOException { + private void checkRootDir(final Path rd, final Configuration c, final FileSystem fs) + throws IOException { + int threadWakeFrequency = c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000); // If FS is in safe mode wait till out of it. - FSUtils.waitOnSafeMode(c, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000)); + FSUtils.waitOnSafeMode(c, threadWakeFrequency); // Filesystem is good. Go ahead and check for hbase.rootdir. + FileStatus status; try { - if (!fs.exists(rd)) { - fs.mkdirs(rd); + status = fs.getFileStatus(rd); + } catch (FileNotFoundException e) { + status = null; + } + int versionFileWriteAttempts = c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS, + HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS); + try { + if (status == null) { + if (!fs.mkdirs(rd)) { + throw new IOException("Can not create configured '" + HConstants.HBASE_DIR + "' " + rd); + } // DFS leaves safe mode with 0 DNs when there are 0 blocks. // We used to handle this by checking the current DN count and waiting until // it is nonzero. With security, the check for datanode count doesn't work -- @@ -260,47 +264,29 @@ public class MasterFileSystem { // and simply retry file creation during bootstrap indefinitely. As soon as // there is one datanode it will succeed. Permission problems should have // already been caught by mkdirs above. - FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, - 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS, - HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS)); + FSUtils.setVersion(fs, rd, threadWakeFrequency, versionFileWriteAttempts); } else { - if (!fs.isDirectory(rd)) { - throw new IllegalArgumentException(rd.toString() + " is not a directory"); + if (!status.isDirectory()) { + throw new IllegalArgumentException( + "Configured '" + HConstants.HBASE_DIR + "' " + rd + " is not a directory."); } // as above - FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, - 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS, - HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS)); + FSUtils.checkVersion(fs, rd, true, threadWakeFrequency, versionFileWriteAttempts); } } catch (DeserializationException de) { - LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for " - + HConstants.HBASE_DIR, de); + LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for '{}' {}", + HConstants.HBASE_DIR, rd, de); throw new IOException(de); } catch (IllegalArgumentException iae) { - LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for " - + HConstants.HBASE_DIR + " " + rd.toString(), iae); + LOG.error(HBaseMarkers.FATAL, "Please fix invalid configuration for '{}' {}", + HConstants.HBASE_DIR, rd, iae); throw iae; } // Make sure cluster ID exists - if (!FSUtils.checkClusterIdExists(fs, rd, c.getInt( - HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000))) { - FSUtils.setClusterId(fs, rd, new ClusterId(), c.getInt(HConstants.THREAD_WAKE_FREQUENCY, 10 * 1000)); + if (!FSUtils.checkClusterIdExists(fs, rd, threadWakeFrequency)) { + FSUtils.setClusterId(fs, rd, new ClusterId(), threadWakeFrequency); } clusterId = FSUtils.getClusterId(fs, rd); - - // Make sure the meta region directory exists! - if (!FSUtils.metaRegionExists(fs, rd)) { - bootstrap(rd, c); - } - - // Create tableinfo-s for hbase:meta if not already there. - // assume, created table descriptor is for enabling table - // meta table is a system table, so descriptors are predefined, - // we should get them from registry. - FSTableDescriptors fsd = new FSTableDescriptors(fs, rd); - fsd.createTableDescriptor(fsd.get(TableName.META_TABLE_NAME)); - - return rd; } /** @@ -395,43 +381,6 @@ public class MasterFileSystem { } } - private static void bootstrap(final Path rd, final Configuration c) - throws IOException { - LOG.info("BOOTSTRAP: creating hbase:meta region"); - try { - // Bootstrapping, make sure blockcache is off. Else, one will be - // created here in bootstrap and it'll need to be cleaned up. Better to - // not make it in first place. Turn off block caching for bootstrap. - // Enable after. - FSTableDescriptors.tryUpdateMetaTableDescriptor(c); - TableDescriptor metaDescriptor = new FSTableDescriptors(c).get(TableName.META_TABLE_NAME); - HRegion meta = HRegion.createHRegion(RegionInfoBuilder.FIRST_META_REGIONINFO, rd, - c, setInfoFamilyCachingForMeta(metaDescriptor, false), null); - meta.close(); - } catch (IOException e) { - e = e instanceof RemoteException ? - ((RemoteException)e).unwrapRemoteException() : e; - LOG.error("bootstrap", e); - throw e; - } - } - - /** - * Enable in memory caching for hbase:meta - */ - public static TableDescriptor setInfoFamilyCachingForMeta(TableDescriptor metaDescriptor, final boolean b) { - TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(metaDescriptor); - for (ColumnFamilyDescriptor hcd: metaDescriptor.getColumnFamilies()) { - if (Bytes.equals(hcd.getName(), HConstants.CATALOG_FAMILY)) { - builder.modifyColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(hcd) - .setBlockCacheEnabled(b) - .setInMemory(b) - .build()); - } - } - return builder.build(); - } - public void deleteFamilyFromFS(RegionInfo region, byte[] familyName) throws IOException { deleteFamilyFromFS(rootdir, region, familyName); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java index 4cd305b043a..cdf78cc3017 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/InitMetaProcedure.java @@ -25,13 +25,21 @@ import static org.apache.hadoop.hbase.master.procedure.AbstractStateMachineNames import java.io.IOException; import java.util.Arrays; import java.util.concurrent.CountDownLatch; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.ProcedureUtil; import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -64,20 +72,46 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure builder.setRegionReplication( + conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM))); + TableDescriptor metaDescriptor = new FSTableDescriptors(conf).get(TableName.META_TABLE_NAME); + HRegion + .createHRegion(RegionInfoBuilder.FIRST_META_REGIONINFO, rootDir, conf, metaDescriptor, null) + .close(); + } + @Override protected Flow executeFromState(MasterProcedureEnv env, InitMetaState state) - throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { + throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { LOG.debug("Execute {}", this); - switch (state) { - case INIT_META_ASSIGN_META: - LOG.info("Going to assign meta"); - addChildProcedure(env.getAssignmentManager() - .createAssignProcedures(Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO))); - setNextState(InitMetaState.INIT_META_CREATE_NAMESPACES); - return Flow.HAS_MORE_STATE; - case INIT_META_CREATE_NAMESPACES: - LOG.info("Going to create {} and {} namespaces", DEFAULT_NAMESPACE, SYSTEM_NAMESPACE); - try { + try { + switch (state) { + case INIT_META_WRITE_FS_LAYOUT: + Configuration conf = env.getMasterConfiguration(); + Path rootDir = CommonFSUtils.getRootDir(conf); + writeFsLayout(rootDir, conf); + setNextState(InitMetaState.INIT_META_ASSIGN_META); + return Flow.HAS_MORE_STATE; + case INIT_META_ASSIGN_META: + LOG.info("Going to assign meta"); + addChildProcedure(env.getAssignmentManager() + .createAssignProcedures(Arrays.asList(RegionInfoBuilder.FIRST_META_REGIONINFO))); + setNextState(InitMetaState.INIT_META_CREATE_NAMESPACES); + return Flow.HAS_MORE_STATE; + case INIT_META_CREATE_NAMESPACES: + LOG.info("Going to create {} and {} namespaces", DEFAULT_NAMESPACE, SYSTEM_NAMESPACE); createDirectory(env, DEFAULT_NAMESPACE); createDirectory(env, SYSTEM_NAMESPACE); // here the TableNamespaceManager has not been initialized yet, so we have to insert the @@ -85,20 +119,21 @@ public class InitMetaProcedure extends AbstractStateMachineTableProcedure builder.setRegionReplication( - conf.getInt(HConstants.META_REPLICAS_NUM, HConstants.DEFAULT_META_REPLICA_NUM))); - } + new FSTableDescriptors(this.dataFs, this.dataRootDir, !canUpdateTableDescriptor(), false); } protected void login(UserProvider user, String host) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java index 1dfdca5dcd8..bee3114c886 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java @@ -119,6 +119,7 @@ public class FSTableDescriptors implements TableDescriptors { this.usecache = usecache; } + @VisibleForTesting public static void tryUpdateMetaTableDescriptor(Configuration conf) throws IOException { tryUpdateMetaTableDescriptor(conf, CommonFSUtils.getCurrentFileSystem(conf), CommonFSUtils.getRootDir(conf), null);