HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3417)
Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
parent
399b58e7c6
commit
5b5c92f427
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase;
|
||||
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Thrown if the master requires restart.
|
||||
*/
|
||||
@InterfaceAudience.Public
|
||||
public class PleaseRestartMasterException extends HBaseIOException {
|
||||
|
||||
public PleaseRestartMasterException(final String s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
}
|
|
@ -70,6 +70,7 @@ import org.apache.hadoop.hbase.InvalidFamilyOperationException;
|
|||
import org.apache.hadoop.hbase.MasterNotRunningException;
|
||||
import org.apache.hadoop.hbase.NamespaceDescriptor;
|
||||
import org.apache.hadoop.hbase.PleaseHoldException;
|
||||
import org.apache.hadoop.hbase.PleaseRestartMasterException;
|
||||
import org.apache.hadoop.hbase.RegionMetrics;
|
||||
import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
|
||||
import org.apache.hadoop.hbase.ServerMetrics;
|
||||
|
@ -175,6 +176,7 @@ import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
|
|||
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
|
||||
import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
|
||||
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
|
||||
import org.apache.hadoop.hbase.replication.ReplicationException;
|
||||
import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
|
||||
|
@ -191,6 +193,7 @@ import org.apache.hadoop.hbase.security.UserProvider;
|
|||
import org.apache.hadoop.hbase.trace.TraceUtil;
|
||||
import org.apache.hadoop.hbase.util.Addressing;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSTableDescriptors;
|
||||
import org.apache.hadoop.hbase.util.HBaseFsck;
|
||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||
import org.apache.hadoop.hbase.util.IdLock;
|
||||
|
@ -953,9 +956,26 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
if (!waitForMetaOnline()) {
|
||||
return;
|
||||
}
|
||||
TableDescriptor metaDescriptor = tableDescriptors.get(
|
||||
TableName.META_TABLE_NAME);
|
||||
final ColumnFamilyDescriptor tableFamilyDesc = metaDescriptor
|
||||
.getColumnFamily(HConstants.TABLE_FAMILY);
|
||||
final ColumnFamilyDescriptor replBarrierFamilyDesc =
|
||||
metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);
|
||||
|
||||
this.assignmentManager.joinCluster();
|
||||
// The below depends on hbase:meta being online.
|
||||
try {
|
||||
this.tableStateManager.start();
|
||||
} catch (NoSuchColumnFamilyException e) {
|
||||
if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
|
||||
LOG.info("TableStates manager could not be started. This is expected"
|
||||
+ " during HBase 1 to 2 upgrade.", e);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
this.assignmentManager.processOfflineRegions();
|
||||
// this must be called after the above processOfflineRegions to prevent race
|
||||
this.assignmentManager.wakeMetaLoadedEvent();
|
||||
|
@ -1025,7 +1045,17 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
return;
|
||||
}
|
||||
status.setStatus("Starting cluster schema service");
|
||||
try {
|
||||
initClusterSchemaService();
|
||||
} catch (IllegalStateException e) {
|
||||
if (e.getCause() != null && e.getCause() instanceof NoSuchColumnFamilyException
|
||||
&& tableFamilyDesc == null && replBarrierFamilyDesc == null) {
|
||||
LOG.info("ClusterSchema service could not be initialized. This is "
|
||||
+ "expected during HBase 1 to 2 upgrade", e);
|
||||
} else {
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
if (this.cpHost != null) {
|
||||
try {
|
||||
|
@ -1047,6 +1077,29 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
// Set master as 'initialized'.
|
||||
setInitialized(true);
|
||||
|
||||
if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
|
||||
// create missing CFs in meta table after master is set to 'initialized'.
|
||||
createMissingCFsInMetaDuringUpgrade(metaDescriptor);
|
||||
|
||||
// Throwing this Exception to abort active master is painful but this
|
||||
// seems the only way to add missing CFs in meta while upgrading from
|
||||
// HBase 1 to 2 (where HBase 2 has HBASE-23055 & HBASE-23782 checked-in).
|
||||
// So, why do we abort active master after adding missing CFs in meta?
|
||||
// When we reach here, we would have already bypassed NoSuchColumnFamilyException
|
||||
// in initClusterSchemaService(), meaning ClusterSchemaService is not
|
||||
// correctly initialized but we bypassed it. Similarly, we bypassed
|
||||
// tableStateManager.start() as well. Hence, we should better abort
|
||||
// current active master because our main task - adding missing CFs
|
||||
// in meta table is done (possible only after master state is set as
|
||||
// initialized) at the expense of bypassing few important tasks as part
|
||||
// of active master init routine. So now we abort active master so that
|
||||
// next active master init will not face any issues and all mandatory
|
||||
// services will be started during master init phase.
|
||||
throw new PleaseRestartMasterException("Aborting active master after missing"
|
||||
+ " CFs are successfully added in meta. Subsequent active master "
|
||||
+ "initialization should be uninterrupted");
|
||||
}
|
||||
|
||||
if (maintenanceMode) {
|
||||
LOG.info("Detected repair mode, skipping final initialization steps.");
|
||||
return;
|
||||
|
@ -1106,6 +1159,38 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
}
|
||||
}
|
||||
|
||||
private void createMissingCFsInMetaDuringUpgrade(
|
||||
TableDescriptor metaDescriptor) throws IOException {
|
||||
TableDescriptor newMetaDesc =
|
||||
TableDescriptorBuilder.newBuilder(metaDescriptor)
|
||||
.setColumnFamily(FSTableDescriptors.getTableFamilyDescForMeta(conf))
|
||||
.setColumnFamily(FSTableDescriptors.getReplBarrierFamilyDescForMeta())
|
||||
.build();
|
||||
long pid = this.modifyTable(TableName.META_TABLE_NAME, () -> newMetaDesc,
|
||||
0, 0, false);
|
||||
int tries = 30;
|
||||
while (!(getMasterProcedureExecutor().isFinished(pid))
|
||||
&& getMasterProcedureExecutor().isRunning() && tries > 0) {
|
||||
try {
|
||||
Thread.sleep(1000);
|
||||
} catch (InterruptedException e) {
|
||||
throw new IOException("Wait interrupted", e);
|
||||
}
|
||||
tries--;
|
||||
}
|
||||
if (tries <= 0) {
|
||||
throw new HBaseIOException(
|
||||
"Failed to add table and rep_barrier CFs to meta in a given time.");
|
||||
} else {
|
||||
Procedure<?> result = getMasterProcedureExecutor().getResult(pid);
|
||||
if (result != null && result.isFailed()) {
|
||||
throw new IOException(
|
||||
"Failed to add table and rep_barrier CFs to meta. "
|
||||
+ MasterProcedureUtil.unwrapRemoteIOException(result));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check hbase:meta is up and ready for reading. For use during Master startup only.
|
||||
* @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HConstants;
|
|||
import org.apache.hadoop.hbase.TableDescriptors;
|
||||
import org.apache.hadoop.hbase.TableInfoMissingException;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
|
||||
import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
|
||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||
|
@ -139,6 +140,31 @@ public class FSTableDescriptors implements TableDescriptors {
|
|||
}
|
||||
}
|
||||
|
||||
public static ColumnFamilyDescriptor getTableFamilyDescForMeta(
|
||||
final Configuration conf) {
|
||||
return ColumnFamilyDescriptorBuilder
|
||||
.newBuilder(HConstants.TABLE_FAMILY)
|
||||
.setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
|
||||
HConstants.DEFAULT_HBASE_META_VERSIONS))
|
||||
.setInMemory(true)
|
||||
.setBlocksize(8 * 1024)
|
||||
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
|
||||
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
||||
.setBloomFilterType(BloomType.NONE)
|
||||
.build();
|
||||
}
|
||||
|
||||
public static ColumnFamilyDescriptor getReplBarrierFamilyDescForMeta() {
|
||||
return ColumnFamilyDescriptorBuilder
|
||||
.newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
|
||||
.setMaxVersions(HConstants.ALL_VERSIONS)
|
||||
.setInMemory(true)
|
||||
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
|
||||
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
||||
.setBloomFilterType(BloomType.NONE)
|
||||
.build();
|
||||
}
|
||||
|
||||
public static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf)
|
||||
throws IOException {
|
||||
// TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now
|
||||
|
@ -155,23 +181,8 @@ public class FSTableDescriptors implements TableDescriptors {
|
|||
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
||||
.setBloomFilterType(BloomType.NONE)
|
||||
.build())
|
||||
.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY)
|
||||
.setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
|
||||
HConstants.DEFAULT_HBASE_META_VERSIONS))
|
||||
.setInMemory(true)
|
||||
.setBlocksize(8 * 1024)
|
||||
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
|
||||
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
||||
.setBloomFilterType(BloomType.NONE)
|
||||
.build())
|
||||
.setColumnFamily(ColumnFamilyDescriptorBuilder
|
||||
.newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
|
||||
.setMaxVersions(HConstants.ALL_VERSIONS)
|
||||
.setInMemory(true)
|
||||
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
|
||||
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
||||
.setBloomFilterType(BloomType.NONE)
|
||||
.build())
|
||||
.setColumnFamily(getTableFamilyDescForMeta(conf))
|
||||
.setColumnFamily(getReplBarrierFamilyDescForMeta())
|
||||
.setCoprocessor(CoprocessorDescriptorBuilder.newBuilder(
|
||||
MultiRowMutationEndpoint.class.getName())
|
||||
.setPriority(Coprocessor.PRIORITY_SYSTEM)
|
||||
|
|
Loading…
Reference in New Issue