diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index b5f19d85d67..90299e566ef 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -98,14 +98,11 @@ import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.log.HBaseMarkers; import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode; -import org.apache.hadoop.hbase.master.assignment.AssignProcedure; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.hadoop.hbase.master.assignment.MergeTableRegionsProcedure; -import org.apache.hadoop.hbase.master.assignment.MoveRegionProcedure; import org.apache.hadoop.hbase.master.assignment.RegionStateNode; import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; -import org.apache.hadoop.hbase.master.assignment.UnassignProcedure; import org.apache.hadoop.hbase.master.balancer.BalancerChore; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore; @@ -135,7 +132,6 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil.NonceProcedu import org.apache.hadoop.hbase.master.procedure.ModifyTableProcedure; import org.apache.hadoop.hbase.master.procedure.ProcedurePrepareLatch; import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait; -import org.apache.hadoop.hbase.master.procedure.RecoverMetaProcedure; import org.apache.hadoop.hbase.master.procedure.ReopenTableRegionsProcedure; import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; import org.apache.hadoop.hbase.master.procedure.TruncateTableProcedure; @@ -225,7 +221,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; -import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet; import org.apache.hbase.thirdparty.com.google.common.collect.Lists; import org.apache.hbase.thirdparty.com.google.common.collect.Maps; @@ -833,45 +828,6 @@ public class HMaster extends HRegionServer implements MasterServices { this.mpmHost.initialize(this, this.metricsMaster); } - private static final ImmutableSet> UNSUPPORTED_PROCEDURES = - ImmutableSet.of(RecoverMetaProcedure.class, AssignProcedure.class, UnassignProcedure.class, - MoveRegionProcedure.class); - - /** - * In HBASE-20811, we have introduced a new TRSP to assign/unassign/move regions, and it is - * incompatible with the old AssignProcedure/UnassignProcedure/MoveRegionProcedure. So we need to - * make sure that there are none these procedures when upgrading. If there are, the master will - * quit, you need to go back to the old version to finish these procedures first before upgrading. - */ - private void checkUnsupportedProcedure( - Map, List>> procsByType) - throws HBaseIOException { - // Confirm that we do not have unfinished assign/unassign related procedures. It is not easy to - // support both the old assign/unassign procedures and the new TransitRegionStateProcedure as - // there will be conflict in the code for AM. We should finish all these procedures before - // upgrading. - for (Class clazz : UNSUPPORTED_PROCEDURES) { - List> procs = procsByType.get(clazz); - if (procs != null) { - LOG.error( - "Unsupported procedure type {} found, please rollback your master to the old" + - " version to finish them, and then try to upgrade again. The full procedure list: {}", - clazz, procs); - throw new HBaseIOException("Unsupported procedure type " + clazz + " found"); - } - } - // A special check for SCP, as we do not support RecoverMetaProcedure any more so we need to - // make sure that no one will try to schedule it but SCP does have a state which will schedule - // it. - if (procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream() - .map(p -> (ServerCrashProcedure) p).anyMatch(ServerCrashProcedure::isInRecoverMetaState)) { - LOG.error("At least one ServerCrashProcedure is going to schedule a RecoverMetaProcedure," + - " which is not supported any more. Please rollback your master to the old version to" + - " finish them, and then try to upgrade again."); - throw new HBaseIOException("Unsupported procedure state found for ServerCrashProcedure"); - } - } - // Will be overriden in test to inject customized AssignmentManager @VisibleForTesting protected AssignmentManager createAssignmentManager(MasterServices master) { @@ -965,13 +921,10 @@ public class HMaster extends HRegionServer implements MasterServices { this.splitWALManager = new SplitWALManager(this); } createProcedureExecutor(); - @SuppressWarnings("rawtypes") - Map, List>> procsByType = + Map, List>> procsByType = procedureExecutor.getActiveProceduresNoCopy().stream() .collect(Collectors.groupingBy(p -> p.getClass())); - checkUnsupportedProcedure(procsByType); - // Create Assignment Manager this.assignmentManager = createAssignmentManager(this); this.assignmentManager.start(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index 0079033e2c8..25c94ba5bbc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -29,6 +29,8 @@ import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; import org.apache.hadoop.hbase.procedure2.RemoteProcedureDispatcher.RemoteOperation; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; + import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.AssignRegionStateData; import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RegionTransitionState; @@ -140,4 +142,10 @@ public class AssignProcedure extends RegionTransitionProcedure { protected ProcedureMetrics getProcedureMetrics(MasterProcedureEnv env) { return env.getAssignmentManager().getAssignmentManagerMetrics().getAssignProcMetrics(); } + + @VisibleForTesting + @Override + public void setProcId(long procId) { + super.setProcId(procId); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java index b94b45dc918..59f2fbfa69e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java @@ -69,7 +69,8 @@ public abstract class RegionTransitionProcedure extends Procedure> UNSUPPORTED_PROCEDURES = + ImmutableSet.of(RecoverMetaProcedure.class, AssignProcedure.class, UnassignProcedure.class, + MoveRegionProcedure.class); + + /** + * In HBASE-20811, we have introduced a new TRSP to assign/unassign/move regions, and it is + * incompatible with the old AssignProcedure/UnassignProcedure/MoveRegionProcedure. So we need to + * make sure that there are none these procedures when upgrading. If there are, the master will + * quit, you need to go back to the old version to finish these procedures first before upgrading. + */ + private void checkUnsupportedProcedure(Map, List>> procsByType) + throws HBaseIOException { + // Confirm that we do not have unfinished assign/unassign related procedures. It is not easy to + // support both the old assign/unassign procedures and the new TransitRegionStateProcedure as + // there will be conflict in the code for AM. We should finish all these procedures before + // upgrading. + for (Class clazz : UNSUPPORTED_PROCEDURES) { + List> procs = procsByType.get(clazz); + if (procs != null) { + LOG.error("Unsupported procedure type {} found, please rollback your master to the old" + + " version to finish them, and then try to upgrade again." + + " See https://hbase.apache.org/book.html#upgrade2.2 for more details." + + " The full procedure list: {}", clazz, procs); + throw new HBaseIOException("Unsupported procedure type " + clazz + " found"); + } + } + // A special check for SCP, as we do not support RecoverMetaProcedure any more so we need to + // make sure that no one will try to schedule it but SCP does have a state which will schedule + // it. + if (procsByType.getOrDefault(ServerCrashProcedure.class, Collections.emptyList()).stream() + .map(p -> (ServerCrashProcedure) p).anyMatch(ServerCrashProcedure::isInRecoverMetaState)) { + LOG.error("At least one ServerCrashProcedure is going to schedule a RecoverMetaProcedure," + + " which is not supported any more. Please rollback your master to the old version to" + + " finish them, and then try to upgrade again." + + " See https://hbase.apache.org/book.html#upgrade2.2 for more details."); + throw new HBaseIOException("Unsupported procedure state found for ServerCrashProcedure"); + } + } + @SuppressWarnings("deprecation") private void tryMigrate(FileSystem fs) throws IOException { Configuration conf = server.getConfiguration(); @@ -311,7 +360,8 @@ public class RegionProcedureStore extends ProcedureStoreBase { store.start(numThreads); store.recoverLease(); MutableLong maxProcIdSet = new MutableLong(-1); - MutableLong maxProcIdFromProcs = new MutableLong(-1); + List> procs = new ArrayList<>(); + Map, List>> activeProcsByType = new HashMap<>(); store.load(new ProcedureLoader() { @Override @@ -321,16 +371,13 @@ public class RegionProcedureStore extends ProcedureStoreBase { @Override public void load(ProcedureIterator procIter) throws IOException { - long procCount = 0; while (procIter.hasNext()) { Procedure proc = procIter.next(); - update(proc); - procCount++; - if (proc.getProcId() > maxProcIdFromProcs.longValue()) { - maxProcIdFromProcs.setValue(proc.getProcId()); + procs.add(proc); + if (!proc.isFinished()) { + activeProcsByType.computeIfAbsent(proc.getClass(), k -> new ArrayList<>()).add(proc); } } - LOG.info("Migrated {} procedures", procCount); } @Override @@ -347,6 +394,22 @@ public class RegionProcedureStore extends ProcedureStoreBase { } } }); + + // check whether there are unsupported procedures, this could happen when we are migrating from + // 2.1-. We used to do this in HMaster, after loading all the procedures from procedure store, + // but here we have to do it before migrating, otherwise, if we find some unsupported + // procedures, the users can not go back to 2.1 to finish them any more, as all the data are now + // in the new region based procedure store, which is not supported in 2.1-. + checkUnsupportedProcedure(activeProcsByType); + + MutableLong maxProcIdFromProcs = new MutableLong(-1); + for (Procedure proc : procs) { + update(proc); + if (proc.getProcId() > maxProcIdFromProcs.longValue()) { + maxProcIdFromProcs.setValue(proc.getProcId()); + } + } + LOG.info("Migrated {} existing procedures from the old storage format.", procs.size()); LOG.info("The WALProcedureStore max pid is {}, and the max pid of all loaded procedures is {}", maxProcIdSet.longValue(), maxProcIdFromProcs.longValue()); // Theoretically, the maxProcIdSet should be greater than or equal to maxProcIdFromProcs, but diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/TestRegionProcedureStoreMigration.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/TestRegionProcedureStoreMigration.java index 475ae595d26..44d7daaf6a8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/TestRegionProcedureStoreMigration.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/procedure2/store/region/TestRegionProcedureStoreMigration.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hbase.procedure2.store.region; +import static org.hamcrest.CoreMatchers.startsWith; +import static org.hamcrest.MatcherAssert.assertThat; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.fail; @@ -32,6 +34,10 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseCommonTestingUtility; +import org.apache.hadoop.hbase.HBaseIOException; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.master.assignment.AssignProcedure; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility.LoadCounter; import org.apache.hadoop.hbase.procedure2.store.LeaseRecovery; import org.apache.hadoop.hbase.procedure2.store.ProcedureStore.ProcedureIterator; @@ -140,4 +146,20 @@ public class TestRegionProcedureStoreMigration { // make sure the old proc wal directory has been deleted. assertFalse(fs.exists(oldProcWALDir)); } + + @Test + public void testMigrateWithUnsupportedProcedures() throws IOException { + AssignProcedure assignProc = new AssignProcedure(); + assignProc.setProcId(1L); + assignProc.setRegionInfo(RegionInfoBuilder.newBuilder(TableName.valueOf("table")).build()); + walStore.insert(assignProc, null); + walStore.stop(true); + + try { + store = RegionProcedureStoreTestHelper.createStore(htu.getConfiguration(), new LoadCounter()); + fail("Should fail since AssignProcedure is not supported"); + } catch (HBaseIOException e) { + assertThat(e.getMessage(), startsWith("Unsupported")); + } + } }