HBASE-26864 SplitTableRegionProcedure calls openParentRegions() at a … (#4261)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
Signed-off-by: Xiaolin Ha <haxiaolin@apache.org>
This commit is contained in:
huaxiangsun 2022-03-26 12:02:16 -07:00 committed by GitHub
parent 4ac600b04b
commit 698937d32b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 117 additions and 4 deletions

View File

@ -180,9 +180,15 @@ final class AssignmentManagerUtil {
for (RegionInfo hri : regions) {
// start the index from 1
for (int i = 1; i < regionReplication; i++) {
replicaRegionInfos.add(RegionReplicaUtil.getRegionInfoForReplica(hri, i));
RegionInfo ri = RegionReplicaUtil.getRegionInfoForReplica(hri, i);
// apply ignoreRITs to replica regions as well.
if (!ignoreIfInTransition || !env.getAssignmentManager().getRegionStates().
getOrCreateRegionStateNode(ri).isInTransition()) {
replicaRegionInfos.add(ri);
}
}
}
// create round robin procs. Note that we exclude the primary region's target server
TransitRegionStateProcedure[] replicaRegionAssignProcs =
env.getAssignmentManager().createRoundRobinAssignProcedures(replicaRegionInfos,

View File

@ -302,6 +302,11 @@ public class SplitTableRegionProcedure
break;
case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
addChildProcedure(createUnassignProcedures(env));
// createUnassignProcedures() can throw out IOException. If this happens,
// it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions
// is closed as all created UnassignProcedures are rolled back. If it rolls back with
// state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(),
// otherwise, it will result in OpenRegionProcedure for an already open region.
setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS);
break;
case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
@ -379,11 +384,12 @@ public class SplitTableRegionProcedure
deleteDaughterRegions(env);
break;
case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS:
// Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION,
// we will bring parent region online
openParentRegion(env);
break;
case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION:
openParentRegion(env);
// If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call
// openParentRegion(), otherwise, it will result in OpenRegionProcedure for an
// already open region.
break;
case SPLIT_TABLE_REGION_PRE_OPERATION:
postRollBackSplitRegion(env);

View File

@ -20,10 +20,12 @@ package org.apache.hadoop.hbase.master.assignment;
import static org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil.insertData;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
@ -37,9 +39,14 @@ import org.apache.hadoop.hbase.client.CompactionState;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility;
@ -98,6 +105,39 @@ public class TestSplitTableRegionProcedure {
private static void setupConf(Configuration conf) {
conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1);
conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0);
conf.set("hbase.coprocessor.region.classes",
RegionServerHostingReplicaSlowOpenCopro.class.getName());
conf.setInt("hbase.client.sync.wait.timeout.msec", 1500);
}
/**
* This copro is used to slow down opening of the replica regions.
*/
public static class RegionServerHostingReplicaSlowOpenCopro
implements RegionCoprocessor, RegionObserver {
static int countForReplica = 0;
static boolean slowDownReplicaOpen = false;
@Override
public Optional<RegionObserver> getRegionObserver() {
return Optional.of(this);
}
@Override
public void preOpen(ObserverContext<RegionCoprocessorEnvironment> c) throws IOException {
int replicaId = c.getEnvironment().getRegion().getRegionInfo().getReplicaId();
if ((replicaId != RegionInfo.DEFAULT_REPLICA_ID) && (countForReplica == 0)) {
countForReplica ++;
while (slowDownReplicaOpen) {
LOG.info("Slow down replica region open a bit");
try {
Thread.sleep(100);
} catch (InterruptedException ie) {
// Ingore
}
}
}
}
}
@BeforeClass
@ -137,6 +177,67 @@ public class TestSplitTableRegionProcedure {
}
}
@Test
public void testRollbackForSplitTableRegionWithReplica() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
final ProcedureExecutor<MasterProcedureEnv> procExec = getMasterProcedureExecutor();
RegionServerHostingReplicaSlowOpenCopro.slowDownReplicaOpen = true;
RegionInfo [] regions = MasterProcedureTestingUtility.createTable(
procExec, tableName, null, columnFamilyName1);
try {
HBaseTestingUtil.setReplicas(UTIL.getAdmin(), tableName, 2);
} catch (IOException ioe) {
}
// wait until the primary region is online.
HBaseTestingUtil.await(2000, () -> {
try {
AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
if (am == null) return false;
if (am.getRegionStates().getRegionState(regions[0]).isOpened()) {
return true;
}
return false;
} catch (Exception e) {
throw new RuntimeException(e);
}
});
// Split region of the table, it will fail and rollback as replica parent region
// is still at OPENING state.
long procId = procExec.submitProcedure(new SplitTableRegionProcedure(
procExec.getEnvironment(), regions[0], HConstants.CATALOG_FAMILY));
// Wait for the completion.
ProcedureTestingUtility.waitProcedure(procExec, procId);
// Let replica parent region open.
RegionServerHostingReplicaSlowOpenCopro.slowDownReplicaOpen = false;
// wait until the replica region is online.
HBaseTestingUtil.await(2000, () -> {
try {
AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager();
if (am == null) return false;
RegionInfo replicaRegion = RegionReplicaUtil.getRegionInfoForReplica(regions[0], 1);
if (am.getRegionStates().getRegionState(replicaRegion).isOpened()) {
return true;
}
return false;
} catch (Exception e) {
throw new RuntimeException(e);
}
});
ProcedureTestingUtility.assertProcFailed(procExec, procId);
// There should not be any active OpenRegionProcedure
procExec.getActiveProceduresNoCopy().forEach(p -> assertFalse(p instanceof OpenRegionProcedure));
}
@Test
public void testSplitTableRegion() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());