diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java index e7be28bb932..6ab6f3630b8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManagerUtil.java @@ -190,9 +190,15 @@ final class AssignmentManagerUtil { for (RegionInfo hri : regions) { // start the index from 1 for (int i = 1; i < regionReplication; i++) { - replicaRegionInfos.add(RegionReplicaUtil.getRegionInfoForReplica(hri, i)); + RegionInfo ri = RegionReplicaUtil.getRegionInfoForReplica(hri, i); + // apply ignoreRITs to replica regions as well. + if (!ignoreIfInTransition || !env.getAssignmentManager().getRegionStates(). + getOrCreateRegionStateNode(ri).isInTransition()) { + replicaRegionInfos.add(ri); + } } } + // create round robin procs. Note that we exclude the primary region's target server TransitRegionStateProcedure[] replicaRegionAssignProcs = env.getAssignmentManager().createRoundRobinAssignProcedures(replicaRegionInfos, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index effdba4f201..825b9895560 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -302,6 +302,11 @@ public class SplitTableRegionProcedure break; case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: addChildProcedure(createUnassignProcedures(env)); + // createUnassignProcedures() can throw out IOException. If this happens, + // it wont reach state SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGION and no parent regions + // is closed as all created UnassignProcedures are rolled back. If it rolls back with + // state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call openParentRegion(), + // otherwise, it will result in OpenRegionProcedure for an already open region. setNextState(SplitTableRegionState.SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS); break; case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: @@ -379,11 +384,12 @@ public class SplitTableRegionProcedure deleteDaughterRegions(env); break; case SPLIT_TABLE_REGIONS_CHECK_CLOSED_REGIONS: - // Doing nothing, in SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, - // we will bring parent region online + openParentRegion(env); break; case SPLIT_TABLE_REGION_CLOSE_PARENT_REGION: - openParentRegion(env); + // If it rolls back with state SPLIT_TABLE_REGION_CLOSE_PARENT_REGION, no need to call + // openParentRegion(), otherwise, it will result in OpenRegionProcedure for an + // already open region. break; case SPLIT_TABLE_REGION_PRE_OPERATION: postRollBackSplitRegion(env); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java index 4e07b9a8808..4485f70c629 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSplitTableRegionProcedure.java @@ -20,10 +20,12 @@ package org.apache.hadoop.hbase.master.assignment; import static org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil.insertData; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.fail; import java.io.IOException; import java.util.List; +import java.util.Optional; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; @@ -37,9 +39,14 @@ import org.apache.hadoop.hbase.client.CompactionState; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessor; +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; +import org.apache.hadoop.hbase.coprocessor.RegionObserver; import org.apache.hadoop.hbase.master.procedure.MasterProcedureConstants; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureTestingUtility; @@ -98,6 +105,39 @@ public class TestSplitTableRegionProcedure { private static void setupConf(Configuration conf) { conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); conf.setLong(HConstants.MAJOR_COMPACTION_PERIOD, 0); + conf.set("hbase.coprocessor.region.classes", + RegionServerHostingReplicaSlowOpenCopro.class.getName()); + conf.setInt("hbase.client.sync.wait.timeout.msec", 1500); + } + + /** + * This copro is used to slow down opening of the replica regions. + */ + public static class RegionServerHostingReplicaSlowOpenCopro + implements RegionCoprocessor, RegionObserver { + static int countForReplica = 0; + static boolean slowDownReplicaOpen = false; + + @Override + public Optional getRegionObserver() { + return Optional.of(this); + } + + @Override + public void preOpen(ObserverContext c) throws IOException { + int replicaId = c.getEnvironment().getRegion().getRegionInfo().getReplicaId(); + if ((replicaId != RegionInfo.DEFAULT_REPLICA_ID) && (countForReplica == 0)) { + countForReplica ++; + while (slowDownReplicaOpen) { + LOG.info("Slow down replica region open a bit"); + try { + Thread.sleep(100); + } catch (InterruptedException ie) { + // Ingore + } + } + } + } } @BeforeClass @@ -137,6 +177,67 @@ public class TestSplitTableRegionProcedure { } } + + @Test + public void testRollbackForSplitTableRegionWithReplica() throws Exception { + final TableName tableName = TableName.valueOf(name.getMethodName()); + final ProcedureExecutor procExec = getMasterProcedureExecutor(); + + + RegionServerHostingReplicaSlowOpenCopro.slowDownReplicaOpen = true; + RegionInfo [] regions = MasterProcedureTestingUtility.createTable( + procExec, tableName, null, columnFamilyName1); + + try { + HBaseTestingUtility.setReplicas(UTIL.getAdmin(), tableName, 2); + } catch (IOException ioe) { + + } + + // wait until the primary region is online. + HBaseTestingUtility.await(2000, () -> { + try { + AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); + if (am == null) return false; + if (am.getRegionStates().getRegionState(regions[0]).isOpened()) { + return true; + } + return false; + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + // Split region of the table, it will fail and rollback as replica parent region + // is still at OPENING state. + long procId = procExec.submitProcedure(new SplitTableRegionProcedure( + procExec.getEnvironment(), regions[0], HConstants.CATALOG_FAMILY)); + // Wait for the completion. + ProcedureTestingUtility.waitProcedure(procExec, procId); + + // Let replica parent region open. + RegionServerHostingReplicaSlowOpenCopro.slowDownReplicaOpen = false; + + // wait until the replica region is online. + HBaseTestingUtility.await(2000, () -> { + try { + AssignmentManager am = UTIL.getHBaseCluster().getMaster().getAssignmentManager(); + if (am == null) return false; + RegionInfo replicaRegion = RegionReplicaUtil.getRegionInfoForReplica(regions[0], 1); + if (am.getRegionStates().getRegionState(replicaRegion).isOpened()) { + return true; + } + return false; + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + ProcedureTestingUtility.assertProcFailed(procExec, procId); + // There should not be any active OpenRegionProcedure + procExec.getActiveProceduresNoCopy().forEach(p -> assertFalse(p instanceof OpenRegionProcedure)); + } + @Test public void testSplitTableRegion() throws Exception { final TableName tableName = TableName.valueOf(name.getMethodName());