HBASE-23652 Move the unsupported procedure type check before migrating to RegionProcedureStore (#1018)

Signed-off-by: Nick Dimiduk <ndimiduk@apache.org>
This commit is contained in:
Duo Zhang 2020-01-16 22:58:16 +08:00 committed by GitHub
parent 19d3bed1d4
commit 0321f56765
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 103 additions and 56 deletions

View File

@ -98,14 +98,11 @@ import org.apache.hadoop.hbase.ipc.RpcServer;
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
import org.apache.hadoop.hbase.log.HBaseMarkers;
import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure;
import org.apache.hadoop.hbase.master.assignment.MoveRegionProcedure;
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.master.assignment.UnassignProcedure;
import org.apache.hadoop.hbase.master.balancer.BalancerChore;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
@ -135,7 +132,6 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedu
import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure;
import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch;
import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
import org.apache.hadoop.hbase.master.procedure.RecoverMetaProcedure;
import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure;
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure;
@ -225,7 +221,6 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet;
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
@ -833,45 +828,6 @@ public class HMaster extends HRegionServer implements MasterServices {
this.mpmHost.initialize(this, this.metricsMaster);
}
private static final ImmutableSet<Class<? extends Procedure>> UNSUPPORTED_PROCEDURES =
ImmutableSet.of(RecoverMetaProcedure.class, AssignProcedure.class, UnassignProcedure.class,
MoveRegionProcedure.class);
/**
* In HBASE-20811, we have introduced a new TRSP to assign/unassign/move regions, and it is
* incompatible with the old AssignProcedure/UnassignProcedure/MoveRegionProcedure. So we need to
* make sure that there are none these procedures when upgrading. If there are, the master will
* quit, you need to go back to the old version to finish these procedures first before upgrading.
*/
private void checkUnsupportedProcedure(
Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType)
throws HBaseIOException {
// Confirm that we do not have unfinished assign/unassign related procedures. It is not easy to
// support both the old assign/unassign procedures and the new TransitRegionStateProcedure as
// there will be conflict in the code for AM. We should finish all these procedures before
// upgrading.
for (Class<? extends Procedure> clazz : UNSUPPORTED_PROCEDURES) {
List<Procedure<MasterProcedureEnv>> procs = procsByType.get(clazz);
if (procs != null) {
LOG.error(
"Unsupported procedure type {} found, please rollback your master to the old" +
" version to finish them, and then try to upgrade again. The full procedure list: {}",
clazz, procs);
throw new HBaseIOException("Unsupported procedure type " + clazz + " found");
}
}
// A special check for SCP, as we do not support RecoverMetaProcedure any more so we need to
// make sure that no one will try to schedule it but SCP does have a state which will schedule
// it.
if (procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream()
.map(p -> (ServerCrashProcedure) p).anyMatch(ServerCrashProcedure::isInRecoverMetaState)) {
LOG.error("At least one ServerCrashProcedure is going to schedule a RecoverMetaProcedure," +
" which is not supported any more. Please rollback your master to the old version to" +
" finish them, and then try to upgrade again.");
throw new HBaseIOException("Unsupported procedure state found for ServerCrashProcedure");
}
}
// Will be overriden in test to inject customized AssignmentManager
@VisibleForTesting
protected AssignmentManager createAssignmentManager(MasterServices master) {
@ -965,13 +921,10 @@ public class HMaster extends HRegionServer implements MasterServices {
this.splitWALManager = new SplitWALManager(this);
}
createProcedureExecutor();
@SuppressWarnings("rawtypes")
Map<Class<? extends Procedure>, List<Procedure<MasterProcedureEnv>>> procsByType =
Map<Class<?>, List<Procedure<MasterProcedureEnv>>> procsByType =
procedureExecutor.getActiveProceduresNoCopy().stream()
.collect(Collectors.groupingBy(p -> p.getClass()));
checkUnsupportedProcedure(procsByType);
// Create Assignment Manager
this.assignmentManager = createAssignmentManager(this);
this.assignmentManager.start();

View File

@ -29,6 +29,8 @@ import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.AssignRegionStateData;
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState;
@ -140,4 +142,10 @@ public class AssignProcedure extends RegionTransitionProcedure {
protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) {
return env.getAssignmentManager().getAssignmentManagerMetrics().getAssignProcMetrics();
}
@VisibleForTesting
@Override
public void setProcId(long procId) {
super.setProcId(procId);
}
}

View File

@ -69,7 +69,8 @@ public abstract class RegionTransitionProcedure extends Procedure<MasterProcedur
return regionInfo;
}
protected void setRegionInfo(final RegionInfo regionInfo) {
@VisibleForTesting
public void setRegionInfo(final RegionInfo regionInfo) {
this.regionInfo = regionInfo;
}

View File

@ -25,7 +25,10 @@ import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.mutable.MutableLong;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@ -45,6 +48,11 @@ import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.log.HBaseMarkers;
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
import org.apache.hadoop.hbase.master.assignment.MoveRegionProcedure;
import org.apache.hadoop.hbase.master.assignment.UnassignProcedure;
import org.apache.hadoop.hbase.master.procedure.RecoverMetaProcedure;
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
import org.apache.hadoop.hbase.procedure2.store.LeaseRecovery;
@ -65,6 +73,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet;
import org.apache.hbase.thirdparty.com.google.common.math.IntMath;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
@ -298,6 +307,46 @@ public class RegionProcedureStore extends ProcedureStoreBase {
null);
}
@SuppressWarnings("deprecation")
private static final ImmutableSet<Class<?>> UNSUPPORTED_PROCEDURES =
ImmutableSet.of(RecoverMetaProcedure.class, AssignProcedure.class, UnassignProcedure.class,
MoveRegionProcedure.class);
/**
* In HBASE-20811, we have introduced a new TRSP to assign/unassign/move regions, and it is
* incompatible with the old AssignProcedure/UnassignProcedure/MoveRegionProcedure. So we need to
* make sure that there are none these procedures when upgrading. If there are, the master will
* quit, you need to go back to the old version to finish these procedures first before upgrading.
*/
private void checkUnsupportedProcedure(Map<Class<?>, List<Procedure<?>>> procsByType)
throws HBaseIOException {
// Confirm that we do not have unfinished assign/unassign related procedures. It is not easy to
// support both the old assign/unassign procedures and the new TransitRegionStateProcedure as
// there will be conflict in the code for AM. We should finish all these procedures before
// upgrading.
for (Class<?> clazz : UNSUPPORTED_PROCEDURES) {
List<Procedure<?>> procs = procsByType.get(clazz);
if (procs != null) {
LOG.error("Unsupported procedure type {} found, please rollback your master to the old" +
" version to finish them, and then try to upgrade again." +
" See https://hbase.apache.org/book.html#upgrade2.2 for more details." +
" The full procedure list: {}", clazz, procs);
throw new HBaseIOException("Unsupported procedure type " + clazz + " found");
}
}
// A special check for SCP, as we do not support RecoverMetaProcedure any more so we need to
// make sure that no one will try to schedule it but SCP does have a state which will schedule
// it.
if (procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream()
.map(p -> (ServerCrashProcedure) p).anyMatch(ServerCrashProcedure::isInRecoverMetaState)) {
LOG.error("At least one ServerCrashProcedure is going to schedule a RecoverMetaProcedure," +
" which is not supported any more. Please rollback your master to the old version to" +
" finish them, and then try to upgrade again." +
" See https://hbase.apache.org/book.html#upgrade2.2 for more details.");
throw new HBaseIOException("Unsupported procedure state found for ServerCrashProcedure");
}
}
@SuppressWarnings("deprecation")
private void tryMigrate(FileSystem fs) throws IOException {
Configuration conf = server.getConfiguration();
@ -311,7 +360,8 @@ public class RegionProcedureStore extends ProcedureStoreBase {
store.start(numThreads);
store.recoverLease();
MutableLong maxProcIdSet = new MutableLong(-1);
MutableLong maxProcIdFromProcs = new MutableLong(-1);
List<Procedure<?>> procs = new ArrayList<>();
Map<Class<?>, List<Procedure<?>>> activeProcsByType = new HashMap<>();
store.load(new ProcedureLoader() {
@Override
@ -321,16 +371,13 @@ public class RegionProcedureStore extends ProcedureStoreBase {
@Override
public void load(ProcedureIterator procIter) throws IOException {
long procCount = 0;
while (procIter.hasNext()) {
Procedure<?> proc = procIter.next();
update(proc);
procCount++;
if (proc.getProcId() > maxProcIdFromProcs.longValue()) {
maxProcIdFromProcs.setValue(proc.getProcId());
procs.add(proc);
if (!proc.isFinished()) {
activeProcsByType.computeIfAbsent(proc.getClass(), k -> new ArrayList<>()).add(proc);
}
}
LOG.info("Migrated {} procedures", procCount);
}
@Override
@ -347,6 +394,22 @@ public class RegionProcedureStore extends ProcedureStoreBase {
}
}
});
// check whether there are unsupported procedures, this could happen when we are migrating from
// 2.1-. We used to do this in HMaster, after loading all the procedures from procedure store,
// but here we have to do it before migrating, otherwise, if we find some unsupported
// procedures, the users can not go back to 2.1 to finish them any more, as all the data are now
// in the new region based procedure store, which is not supported in 2.1-.
checkUnsupportedProcedure(activeProcsByType);
MutableLong maxProcIdFromProcs = new MutableLong(-1);
for (Procedure<?> proc : procs) {
update(proc);
if (proc.getProcId() > maxProcIdFromProcs.longValue()) {
maxProcIdFromProcs.setValue(proc.getProcId());
}
}
LOG.info("Migrated {} existing procedures from the old storage format.", procs.size());
LOG.info("The WALProcedureStore max pid is {}, and the max pid of all loaded procedures is {}",
maxProcIdSet.longValue(), maxProcIdFromProcs.longValue());
// Theoretically, the maxProcIdSet should be greater than or equal to maxProcIdFromProcs, but

View File

@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hbase.procedure2.store.region;
import static org.hamcrest.CoreMatchers.startsWith;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.fail;
@ -32,6 +34,10 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseCommonTestingUtility;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.LoadCounter;
import org.apache.hadoop.hbase.procedure2.store.LeaseRecovery;
import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureIterator;
@ -140,4 +146,20 @@ public class TestRegionProcedureStoreMigration {
// make sure the old proc wal directory has been deleted.
assertFalse(fs.exists(oldProcWALDir));
}
@Test
public void testMigrateWithUnsupportedProcedures() throws IOException {
AssignProcedure assignProc = new AssignProcedure();
assignProc.setProcId(1L);
assignProc.setRegionInfo(RegionInfoBuilder.newBuilder(TableName.valueOf("table")).build());
walStore.insert(assignProc, null);
walStore.stop(true);
try {
store = RegionProcedureStoreTestHelper.createStore(htu.getConfiguration(), new LoadCounter());
fail("Should fail since AssignProcedure is not supported");
} catch (HBaseIOException e) {
assertThat(e.getMessage(), startsWith("Unsupported"));
}
}
}