HBASE-25902 Add missing CFs in meta during HBase 1 to 2 Upgrade (#3417)
Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
parent
399b58e7c6
commit
5b5c92f427
|
@ -0,0 +1,34 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase;
|
||||||
|
|
||||||
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Thrown if the master requires restart.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Public
|
||||||
|
public class PleaseRestartMasterException extends HBaseIOException {
|
||||||
|
|
||||||
|
public PleaseRestartMasterException(final String s) {
|
||||||
|
super(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -70,6 +70,7 @@ import org.apache.hadoop.hbase.InvalidFamilyOperationException;
|
||||||
import org.apache.hadoop.hbase.MasterNotRunningException;
|
import org.apache.hadoop.hbase.MasterNotRunningException;
|
||||||
import org.apache.hadoop.hbase.NamespaceDescriptor;
|
import org.apache.hadoop.hbase.NamespaceDescriptor;
|
||||||
import org.apache.hadoop.hbase.PleaseHoldException;
|
import org.apache.hadoop.hbase.PleaseHoldException;
|
||||||
|
import org.apache.hadoop.hbase.PleaseRestartMasterException;
|
||||||
import org.apache.hadoop.hbase.RegionMetrics;
|
import org.apache.hadoop.hbase.RegionMetrics;
|
||||||
import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
|
import org.apache.hadoop.hbase.ReplicationPeerNotFoundException;
|
||||||
import org.apache.hadoop.hbase.ServerMetrics;
|
import org.apache.hadoop.hbase.ServerMetrics;
|
||||||
|
@ -175,6 +176,7 @@ import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifier;
|
||||||
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
|
import org.apache.hadoop.hbase.quotas.SpaceQuotaSnapshotNotifierFactory;
|
||||||
import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
|
import org.apache.hadoop.hbase.quotas.SpaceViolationPolicy;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.NoSuchColumnFamilyException;
|
||||||
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
|
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationException;
|
import org.apache.hadoop.hbase.replication.ReplicationException;
|
||||||
import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
|
import org.apache.hadoop.hbase.replication.ReplicationLoadSource;
|
||||||
|
@ -191,6 +193,7 @@ import org.apache.hadoop.hbase.security.UserProvider;
|
||||||
import org.apache.hadoop.hbase.trace.TraceUtil;
|
import org.apache.hadoop.hbase.trace.TraceUtil;
|
||||||
import org.apache.hadoop.hbase.util.Addressing;
|
import org.apache.hadoop.hbase.util.Addressing;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.apache.hadoop.hbase.util.FSTableDescriptors;
|
||||||
import org.apache.hadoop.hbase.util.HBaseFsck;
|
import org.apache.hadoop.hbase.util.HBaseFsck;
|
||||||
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
|
||||||
import org.apache.hadoop.hbase.util.IdLock;
|
import org.apache.hadoop.hbase.util.IdLock;
|
||||||
|
@ -953,9 +956,26 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
if (!waitForMetaOnline()) {
|
if (!waitForMetaOnline()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
TableDescriptor metaDescriptor = tableDescriptors.get(
|
||||||
|
TableName.META_TABLE_NAME);
|
||||||
|
final ColumnFamilyDescriptor tableFamilyDesc = metaDescriptor
|
||||||
|
.getColumnFamily(HConstants.TABLE_FAMILY);
|
||||||
|
final ColumnFamilyDescriptor replBarrierFamilyDesc =
|
||||||
|
metaDescriptor.getColumnFamily(HConstants.REPLICATION_BARRIER_FAMILY);
|
||||||
|
|
||||||
this.assignmentManager.joinCluster();
|
this.assignmentManager.joinCluster();
|
||||||
// The below depends on hbase:meta being online.
|
// The below depends on hbase:meta being online.
|
||||||
this.tableStateManager.start();
|
try {
|
||||||
|
this.tableStateManager.start();
|
||||||
|
} catch (NoSuchColumnFamilyException e) {
|
||||||
|
if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
|
||||||
|
LOG.info("TableStates manager could not be started. This is expected"
|
||||||
|
+ " during HBase 1 to 2 upgrade.", e);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
this.assignmentManager.processOfflineRegions();
|
this.assignmentManager.processOfflineRegions();
|
||||||
// this must be called after the above processOfflineRegions to prevent race
|
// this must be called after the above processOfflineRegions to prevent race
|
||||||
this.assignmentManager.wakeMetaLoadedEvent();
|
this.assignmentManager.wakeMetaLoadedEvent();
|
||||||
|
@ -1025,7 +1045,17 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
status.setStatus("Starting cluster schema service");
|
status.setStatus("Starting cluster schema service");
|
||||||
initClusterSchemaService();
|
try {
|
||||||
|
initClusterSchemaService();
|
||||||
|
} catch (IllegalStateException e) {
|
||||||
|
if (e.getCause() != null && e.getCause() instanceof NoSuchColumnFamilyException
|
||||||
|
&& tableFamilyDesc == null && replBarrierFamilyDesc == null) {
|
||||||
|
LOG.info("ClusterSchema service could not be initialized. This is "
|
||||||
|
+ "expected during HBase 1 to 2 upgrade", e);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (this.cpHost != null) {
|
if (this.cpHost != null) {
|
||||||
try {
|
try {
|
||||||
|
@ -1047,6 +1077,29 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
// Set master as 'initialized'.
|
// Set master as 'initialized'.
|
||||||
setInitialized(true);
|
setInitialized(true);
|
||||||
|
|
||||||
|
if (tableFamilyDesc == null && replBarrierFamilyDesc == null) {
|
||||||
|
// create missing CFs in meta table after master is set to 'initialized'.
|
||||||
|
createMissingCFsInMetaDuringUpgrade(metaDescriptor);
|
||||||
|
|
||||||
|
// Throwing this Exception to abort active master is painful but this
|
||||||
|
// seems the only way to add missing CFs in meta while upgrading from
|
||||||
|
// HBase 1 to 2 (where HBase 2 has HBASE-23055 & HBASE-23782 checked-in).
|
||||||
|
// So, why do we abort active master after adding missing CFs in meta?
|
||||||
|
// When we reach here, we would have already bypassed NoSuchColumnFamilyException
|
||||||
|
// in initClusterSchemaService(), meaning ClusterSchemaService is not
|
||||||
|
// correctly initialized but we bypassed it. Similarly, we bypassed
|
||||||
|
// tableStateManager.start() as well. Hence, we should better abort
|
||||||
|
// current active master because our main task - adding missing CFs
|
||||||
|
// in meta table is done (possible only after master state is set as
|
||||||
|
// initialized) at the expense of bypassing few important tasks as part
|
||||||
|
// of active master init routine. So now we abort active master so that
|
||||||
|
// next active master init will not face any issues and all mandatory
|
||||||
|
// services will be started during master init phase.
|
||||||
|
throw new PleaseRestartMasterException("Aborting active master after missing"
|
||||||
|
+ " CFs are successfully added in meta. Subsequent active master "
|
||||||
|
+ "initialization should be uninterrupted");
|
||||||
|
}
|
||||||
|
|
||||||
if (maintenanceMode) {
|
if (maintenanceMode) {
|
||||||
LOG.info("Detected repair mode, skipping final initialization steps.");
|
LOG.info("Detected repair mode, skipping final initialization steps.");
|
||||||
return;
|
return;
|
||||||
|
@ -1106,6 +1159,38 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void createMissingCFsInMetaDuringUpgrade(
|
||||||
|
TableDescriptor metaDescriptor) throws IOException {
|
||||||
|
TableDescriptor newMetaDesc =
|
||||||
|
TableDescriptorBuilder.newBuilder(metaDescriptor)
|
||||||
|
.setColumnFamily(FSTableDescriptors.getTableFamilyDescForMeta(conf))
|
||||||
|
.setColumnFamily(FSTableDescriptors.getReplBarrierFamilyDescForMeta())
|
||||||
|
.build();
|
||||||
|
long pid = this.modifyTable(TableName.META_TABLE_NAME, () -> newMetaDesc,
|
||||||
|
0, 0, false);
|
||||||
|
int tries = 30;
|
||||||
|
while (!(getMasterProcedureExecutor().isFinished(pid))
|
||||||
|
&& getMasterProcedureExecutor().isRunning() && tries > 0) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new IOException("Wait interrupted", e);
|
||||||
|
}
|
||||||
|
tries--;
|
||||||
|
}
|
||||||
|
if (tries <= 0) {
|
||||||
|
throw new HBaseIOException(
|
||||||
|
"Failed to add table and rep_barrier CFs to meta in a given time.");
|
||||||
|
} else {
|
||||||
|
Procedure<?> result = getMasterProcedureExecutor().getResult(pid);
|
||||||
|
if (result != null && result.isFailed()) {
|
||||||
|
throw new IOException(
|
||||||
|
"Failed to add table and rep_barrier CFs to meta. "
|
||||||
|
+ MasterProcedureUtil.unwrapRemoteIOException(result));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check hbase:meta is up and ready for reading. For use during Master startup only.
|
* Check hbase:meta is up and ready for reading. For use during Master startup only.
|
||||||
* @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
|
* @return True if meta is UP and online and startup can progress. Otherwise, meta is not online
|
||||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.TableDescriptors;
|
import org.apache.hadoop.hbase.TableDescriptors;
|
||||||
import org.apache.hadoop.hbase.TableInfoMissingException;
|
import org.apache.hadoop.hbase.TableInfoMissingException;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
|
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
|
||||||
import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
|
import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
|
||||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
|
@ -139,6 +140,31 @@ public class FSTableDescriptors implements TableDescriptors {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static ColumnFamilyDescriptor getTableFamilyDescForMeta(
|
||||||
|
final Configuration conf) {
|
||||||
|
return ColumnFamilyDescriptorBuilder
|
||||||
|
.newBuilder(HConstants.TABLE_FAMILY)
|
||||||
|
.setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
|
||||||
|
HConstants.DEFAULT_HBASE_META_VERSIONS))
|
||||||
|
.setInMemory(true)
|
||||||
|
.setBlocksize(8 * 1024)
|
||||||
|
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
|
||||||
|
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
||||||
|
.setBloomFilterType(BloomType.NONE)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static ColumnFamilyDescriptor getReplBarrierFamilyDescForMeta() {
|
||||||
|
return ColumnFamilyDescriptorBuilder
|
||||||
|
.newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
|
||||||
|
.setMaxVersions(HConstants.ALL_VERSIONS)
|
||||||
|
.setInMemory(true)
|
||||||
|
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
|
||||||
|
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
||||||
|
.setBloomFilterType(BloomType.NONE)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
public static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf)
|
public static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
// TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now
|
// TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now
|
||||||
|
@ -155,23 +181,8 @@ public class FSTableDescriptors implements TableDescriptors {
|
||||||
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
||||||
.setBloomFilterType(BloomType.NONE)
|
.setBloomFilterType(BloomType.NONE)
|
||||||
.build())
|
.build())
|
||||||
.setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY)
|
.setColumnFamily(getTableFamilyDescForMeta(conf))
|
||||||
.setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
|
.setColumnFamily(getReplBarrierFamilyDescForMeta())
|
||||||
HConstants.DEFAULT_HBASE_META_VERSIONS))
|
|
||||||
.setInMemory(true)
|
|
||||||
.setBlocksize(8 * 1024)
|
|
||||||
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
|
|
||||||
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
|
||||||
.setBloomFilterType(BloomType.NONE)
|
|
||||||
.build())
|
|
||||||
.setColumnFamily(ColumnFamilyDescriptorBuilder
|
|
||||||
.newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
|
|
||||||
.setMaxVersions(HConstants.ALL_VERSIONS)
|
|
||||||
.setInMemory(true)
|
|
||||||
.setScope(HConstants.REPLICATION_SCOPE_LOCAL)
|
|
||||||
// Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore.
|
|
||||||
.setBloomFilterType(BloomType.NONE)
|
|
||||||
.build())
|
|
||||||
.setCoprocessor(CoprocessorDescriptorBuilder.newBuilder(
|
.setCoprocessor(CoprocessorDescriptorBuilder.newBuilder(
|
||||||
MultiRowMutationEndpoint.class.getName())
|
MultiRowMutationEndpoint.class.getName())
|
||||||
.setPriority(Coprocessor.PRIORITY_SYSTEM)
|
.setPriority(Coprocessor.PRIORITY_SYSTEM)
|
||||||
|
|
Loading…
Reference in New Issue