From 4c7da496ad1444ac245a0d174bbcafd35c477fdb Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 1 Jul 2021 15:13:25 +0530 Subject: [PATCH] HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3441) (#3417) Signed-off-by: Michael Stack --- .../hbase/PleaseRestartMasterException.java | 34 ++++++++ .../apache/hadoop/hbase/master/HMaster.java | 78 ++++++++++++++++++- .../hadoop/hbase/util/FSTableDescriptors.java | 45 +++++++---- 3 files changed, 139 insertions(+), 18 deletions(-) create mode 100644 hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java new file mode 100644 index 00000000000..62f84e9495b --- /dev/null +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/PleaseRestartMasterException.java @@ -0,0 +1,34 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase; + +import org.apache.yetus.audience.InterfaceAudience; + +/** + * Thrown if the master requires restart. + */ +@InterfaceAudience.Public +public class PleaseRestartMasterException extends HBaseIOException { + + public PleaseRestartMasterException(final String s) { + super(s); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index a0dfae7e237..ba38a19e8c5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -67,6 +67,7 @@ import org.apache.hadoop.hbase.MasterNotRunningException; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.PleaseHoldException; +import org.apache.hadoop.hbase.PleaseRestartMasterException; import org.apache.hadoop.hbase.RegionMetrics; import org.apache.hadoop.hbase.ReplicationPeerNotFoundException; import org.apache.hadoop.hbase.ServerMetrics; @@ -176,6 +177,7 @@ import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier; import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory; import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException; import org.apache.hadoop.hbase.regionserver.RSRpcServices; import org.apache.hadoop.hbase.replication.ReplicationException; import org.apache.hadoop.hbase.replication.ReplicationLoadSource; @@ -196,6 +198,7 @@ import org.apache.hadoop.hbase.security.UserProvider; import org.apache.hadoop.hbase.util.Addressing; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.FutureUtils; import org.apache.hadoop.hbase.util.HBaseFsck; import org.apache.hadoop.hbase.util.HFileArchiveUtil; @@ -965,6 +968,14 @@ public class HMaster extends HRegionServer implements MasterServices { if (!waitForMetaOnline()) { return; } + + TableDescriptor metaDescriptor = + tableDescriptors.get(TableName.META_TABLE_NAME); + final ColumnFamilyDescriptor tableFamilyDesc = + metaDescriptor.getColumnFamily(HConstants.TABLE_FAMILY); + final ColumnFamilyDescriptor replBarrierFamilyDesc = + metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY); + this.assignmentManager.joinCluster(); // The below depends on hbase:meta being online. this.assignmentManager.processOfflineRegions(); @@ -1032,7 +1043,17 @@ public class HMaster extends HRegionServer implements MasterServices { return; } status.setStatus("Starting cluster schema service"); - initClusterSchemaService(); + try { + initClusterSchemaService(); + } catch (IllegalStateException e) { + if (e.getCause() != null && e.getCause() instanceof NoSuchColumnFamilyException + && tableFamilyDesc == null && replBarrierFamilyDesc == null) { + LOG.info("ClusterSchema service could not be initialized. This is " + + "expected during HBase 1 to 2 upgrade", e); + } else { + throw e; + } + } if (this.cpHost != null) { try { @@ -1054,6 +1075,29 @@ public class HMaster extends HRegionServer implements MasterServices { // Set master as 'initialized'. setInitialized(true); + if (tableFamilyDesc == null && replBarrierFamilyDesc == null) { + // create missing CFs in meta table after master is set to 'initialized'. + createMissingCFsInMetaDuringUpgrade(metaDescriptor); + + // Throwing this Exception to abort active master is painful but this + // seems the only way to add missing CFs in meta while upgrading from + // HBase 1 to 2 (where HBase 2 has HBASE-23055 & HBASE-23782 checked-in). + // So, why do we abort active master after adding missing CFs in meta? + // When we reach here, we would have already bypassed NoSuchColumnFamilyException + // in initClusterSchemaService(), meaning ClusterSchemaService is not + // correctly initialized but we bypassed it. Similarly, we bypassed + // tableStateManager.start() as well. Hence, we should better abort + // current active master because our main task - adding missing CFs + // in meta table is done (possible only after master state is set as + // initialized) at the expense of bypassing few important tasks as part + // of active master init routine. So now we abort active master so that + // next active master init will not face any issues and all mandatory + // services will be started during master init phase. + throw new PleaseRestartMasterException("Aborting active master after missing" + + " CFs are successfully added in meta. Subsequent active master " + + "initialization should be uninterrupted"); + } + if (maintenanceMode) { LOG.info("Detected repair mode, skipping final initialization steps."); return; @@ -1113,6 +1157,38 @@ public class HMaster extends HRegionServer implements MasterServices { } } + private void createMissingCFsInMetaDuringUpgrade( + TableDescriptor metaDescriptor) throws IOException { + TableDescriptor newMetaDesc = + TableDescriptorBuilder.newBuilder(metaDescriptor) + .setColumnFamily(FSTableDescriptors.getTableFamilyDescForMeta(conf)) + .setColumnFamily(FSTableDescriptors.getReplBarrierFamilyDescForMeta()) + .build(); + long pid = this.modifyTable(TableName.META_TABLE_NAME, () -> newMetaDesc, + 0, 0, false); + int tries = 30; + while (!(getMasterProcedureExecutor().isFinished(pid)) + && getMasterProcedureExecutor().isRunning() && tries > 0) { + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + throw new IOException("Wait interrupted", e); + } + tries--; + } + if (tries <= 0) { + throw new HBaseIOException( + "Failed to add table and rep_barrier CFs to meta in a given time."); + } else { + Procedure result = getMasterProcedureExecutor().getResult(pid); + if (result != null && result.isFailed()) { + throw new IOException( + "Failed to add table and rep_barrier CFs to meta. " + + MasterProcedureUtil.unwrapRemoteIOException(result)); + } + } + } + /** * Check hbase:meta is up and ready for reading. For use during Master startup only. * @return True if meta is UP and online and startup can progress. Otherwise, meta is not online diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java index 4fb231234e6..91de8b01738 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableDescriptors; import org.apache.hadoop.hbase.TableInfoMissingException; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptor; @@ -139,6 +140,31 @@ public class FSTableDescriptors implements TableDescriptors { } } + public static ColumnFamilyDescriptor getTableFamilyDescForMeta( + final Configuration conf) { + return ColumnFamilyDescriptorBuilder + .newBuilder(HConstants.TABLE_FAMILY) + .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS, + HConstants.DEFAULT_HBASE_META_VERSIONS)) + .setInMemory(true) + .setBlocksize(8 * 1024) + .setScope(HConstants.REPLICATION_SCOPE_LOCAL) + .setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1) + .setBloomFilterType(BloomType.ROWCOL) + .build(); + } + + public static ColumnFamilyDescriptor getReplBarrierFamilyDescForMeta() { + return ColumnFamilyDescriptorBuilder + .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY) + .setMaxVersions(HConstants.ALL_VERSIONS) + .setInMemory(true) + .setScope(HConstants.REPLICATION_SCOPE_LOCAL) + .setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1) + .setBloomFilterType(BloomType.ROWCOL) + .build(); + } + private static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf) throws IOException { // TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now @@ -155,23 +181,8 @@ public class FSTableDescriptors implements TableDescriptors { .setBloomFilterType(BloomType.ROWCOL) .setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1) .build()) - .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY) - .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS, - HConstants.DEFAULT_HBASE_META_VERSIONS)) - .setInMemory(true) - .setBlocksize(8 * 1024) - .setScope(HConstants.REPLICATION_SCOPE_LOCAL) - .setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1) - .setBloomFilterType(BloomType.ROWCOL) - .build()) - .setColumnFamily(ColumnFamilyDescriptorBuilder - .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY) - .setMaxVersions(HConstants.ALL_VERSIONS) - .setInMemory(true) - .setScope(HConstants.REPLICATION_SCOPE_LOCAL) - .setDataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.ROW_INDEX_V1) - .setBloomFilterType(BloomType.ROWCOL) - .build()) + .setColumnFamily(getTableFamilyDescForMeta(conf)) + .setColumnFamily(getReplBarrierFamilyDescForMeta()) .setColumnFamily(ColumnFamilyDescriptorBuilder .newBuilder(HConstants.NAMESPACE_FAMILY) .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,