From 62ad94c2b5d278210d24329850402c22c26c928c Mon Sep 17 00:00:00 2001 From: zhangduo Date: Fri, 10 May 2019 20:50:59 +0800 Subject: [PATCH] HBASE-22365 Region may be opened on two RegionServers --- .../master/assignment/AssignmentManager.java | 101 ++++---- .../assignment/CloseRegionProcedure.java | 22 +- .../assignment/OpenRegionProcedure.java | 64 ++--- .../assignment/RegionRemoteProcedureBase.java | 49 +++- .../TransitRegionStateProcedure.java | 11 +- .../assignment/TestSCPGetRegionsRace.java | 218 ++++++++++++++++++ 6 files changed, 381 insertions(+), 84 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestSCPGetRegionsRace.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 5bdbb927690..921101bbfc9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -213,6 +213,9 @@ public class AssignmentManager { try { regionNode.setRegionLocation(regionState.getServerName()); regionNode.setState(regionState.getState()); + if (regionNode.getProcedure() != null) { + regionNode.getProcedure().stateLoaded(this, regionNode); + } setMetaAssigned(regionState.getRegion(), regionState.getState() == State.OPEN); } finally { regionNode.unlock(); @@ -235,14 +238,12 @@ public class AssignmentManager { TransitRegionStateProcedure existingProc = regionNode.getProcedure(); if (existingProc != null) { // This is possible, as we will detach the procedure from the RSN before we - // actually finish the procedure. This is because that, we will update the region state - // directly in the reportTransition method for TRSP, and theoretically the region transition - // has been done, so we need to detach the procedure from the RSN. But actually the - // procedure has not been marked as done in the pv2 framework yet, so it is possible that we - // schedule a new TRSP immediately and when arriving here, we will find out that there are - // multiple TRSPs for the region. But we can make sure that, only the last one can take the - // charge, the previous ones should have all been finished already. - // So here we will compare the proc id, the greater one will win. + // actually finish the procedure. This is because that, we will detach the TRSP from the RSN + // during execution, at that time, the procedure has not been marked as done in the pv2 + // framework yet, so it is possible that we schedule a new TRSP immediately and when + // arriving here, we will find out that there are multiple TRSPs for the region. But we can + // make sure that, only the last one can take the charge, the previous ones should have all + // been finished already. So here we will compare the proc id, the greater one will win. if (existingProc.getProcId() < proc.getProcId()) { // the new one wins, unset and set it to the new one below regionNode.unsetProcedure(existingProc); @@ -1307,7 +1308,6 @@ public class AssignmentManager { // In any of these cases, state is empty. For now, presume OFFLINE but there are probably // cases where we need to probe more to be sure this correct; TODO informed by experience. LOG.info(regionInfo.getEncodedName() + " regionState=null; presuming " + State.OFFLINE); - localState = State.OFFLINE; } RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo); @@ -1328,6 +1328,9 @@ public class AssignmentManager { } else if (localState == State.OFFLINE || regionInfo.isOffline()) { regionStates.addToOfflineRegions(regionNode); } + if (regionNode.getProcedure() != null) { + regionNode.getProcedure().stateLoaded(AssignmentManager.this, regionNode); + } } }); @@ -1488,8 +1491,9 @@ public class AssignmentManager { private static final State[] STATES_EXPECTED_ON_UNASSIGN_OR_MOVE = { State.OPEN }; // ============================================================================================ - // Region Status update - // Should only be called in TransitRegionStateProcedure + // Region Status update + // Should only be called in TransitRegionStateProcedure(and related procedures), as the locking + // and pre-assumptions are very tricky. // ============================================================================================ private void transitStateAndUpdate(RegionStateNode regionNode, RegionState.State newState, RegionState.State... expectedStates) throws IOException { @@ -1518,7 +1522,7 @@ public class AssignmentManager { metrics.incrementOperationCounter(); } - // should be called within the synchronized block of RegionStateNode. + // should be called under the RegionStateNode lock // The parameter 'giveUp' means whether we will try to open the region again, if it is true, then // we will persist the FAILED_OPEN state into hbase:meta. void regionFailedOpen(RegionStateNode regionNode, boolean giveUp) throws IOException { @@ -1544,24 +1548,7 @@ public class AssignmentManager { } } - // should be called within the synchronized block of RegionStateNode - void regionOpened(RegionStateNode regionNode) throws IOException { - // TODO: OPENING Updates hbase:meta too... we need to do both here and there? - // That is a lot of hbase:meta writing. - transitStateAndUpdate(regionNode, State.OPEN, STATES_EXPECTED_ON_OPEN); - RegionInfo hri = regionNode.getRegionInfo(); - if (isMetaRegion(hri)) { - // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it - // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager - // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state - // on table that contains state. - setMetaAssigned(hri, true); - } - regionStates.addRegionToServer(regionNode); - regionStates.removeFromFailedOpen(hri); - } - - // should be called within the synchronized block of RegionStateNode + // should be called under the RegionStateNode lock void regionClosing(RegionStateNode regionNode) throws IOException { transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING); @@ -1575,18 +1562,36 @@ public class AssignmentManager { metrics.incrementOperationCounter(); } - // should be called within the synchronized block of RegionStateNode - // The parameter 'normally' means whether we are closed cleanly, if it is true, then it means that - // we are closed due to a RS crash. - void regionClosed(RegionStateNode regionNode, boolean normally) throws IOException { + // for open and close, they will first be persist to the procedure store in + // RegionRemoteProcedureBase. So here we will first change the in memory state as it is considered + // as succeeded if the persistence to procedure store is succeeded, and then when the + // RegionRemoteProcedureBase is woken up, we will persist the RegionStateNode to hbase:meta. + + // should be called under the RegionStateNode lock + void regionOpenedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOException { + regionNode.transitionState(State.OPEN, STATES_EXPECTED_ON_OPEN); + RegionInfo regionInfo = regionNode.getRegionInfo(); + regionStates.addRegionToServer(regionNode); + regionStates.removeFromFailedOpen(regionInfo); + } + + // should be called under the RegionStateNode lock + void regionClosedWithoutPersistingToMeta(RegionStateNode regionNode) throws IOException { + ServerName regionLocation = regionNode.getRegionLocation(); + regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED); + regionNode.setRegionLocation(null); + if (regionLocation != null) { + regionNode.setLastHost(regionLocation); + regionStates.removeRegionFromServer(regionLocation, regionNode); + } + } + + // should be called under the RegionStateNode lock + // for SCP + void regionClosedAbnormally(RegionStateNode regionNode) throws IOException { RegionState.State state = regionNode.getState(); ServerName regionLocation = regionNode.getRegionLocation(); - if (normally) { - regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED); - } else { - // For SCP - regionNode.transitionState(State.ABNORMALLY_CLOSED); - } + regionNode.transitionState(State.ABNORMALLY_CLOSED); regionNode.setRegionLocation(null); boolean succ = false; try { @@ -1605,6 +1610,22 @@ public class AssignmentManager { } } + void persistToMeta(RegionStateNode regionNode) throws IOException { + regionStateStore.updateRegionLocation(regionNode); + RegionInfo regionInfo = regionNode.getRegionInfo(); + if (isMetaRegion(regionInfo) && regionNode.getState() == State.OPEN) { + // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it + // can't be disabled -- so skip the RPC (besides... enabled is managed by TableStateManager + // which is backed by hbase:meta... Avoid setting ENABLED to avoid having to update state + // on table that contains state. + setMetaAssigned(regionInfo, true); + } + } + + // ============================================================================================ + // The above methods can only be called in TransitRegionStateProcedure(and related procedures) + // ============================================================================================ + public void markRegionAsSplit(final RegionInfo parent, final ServerName serverName, final RegionInfo daughterA, final RegionInfo daughterB) throws IOException { // Update hbase:meta. Parent will be marked offline and split up in hbase:meta. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java index 4fd60f0eade..af319665f98 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; +import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionCloseOperation; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; @@ -90,8 +91,8 @@ public class CloseRegionProcedure extends RegionRemoteProcedureBase { } @Override - protected void reportTransition(RegionStateNode regionNode, TransitionCode transitionCode, - long seqId) throws IOException { + protected void checkTransition(RegionStateNode regionNode, TransitionCode transitionCode, + long seqId) throws UnexpectedStateException { if (transitionCode != TransitionCode.CLOSED) { throw new UnexpectedStateException("Received report unexpected " + transitionCode + " transition, " + regionNode.toShortString() + ", " + this + ", expected CLOSED."); @@ -99,8 +100,19 @@ public class CloseRegionProcedure extends RegionRemoteProcedureBase { } @Override - protected void updateTransition(MasterProcedureEnv env, RegionStateNode regionNode, - TransitionCode transitionCode, long seqId) throws IOException { - env.getAssignmentManager().regionClosed(regionNode, true); + protected void updateTransitionWithoutPersistingToMeta(MasterProcedureEnv env, + RegionStateNode regionNode, TransitionCode transitionCode, long seqId) throws IOException { + assert transitionCode == TransitionCode.CLOSED; + env.getAssignmentManager().regionClosedWithoutPersistingToMeta(regionNode); + } + + @Override + protected void restoreSucceedState(AssignmentManager am, RegionStateNode regionNode, long seqId) + throws IOException { + if (regionNode.getState() == State.CLOSED) { + // should have already been persisted, ignore + return; + } + am.regionClosedWithoutPersistingToMeta(regionNode); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java index 579b7570b09..4f7fcef2414 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; +import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; @@ -77,19 +78,31 @@ public class OpenRegionProcedure extends RegionRemoteProcedureBase { return env.getAssignmentManager().getAssignmentManagerMetrics().getOpenProcMetrics(); } + private void regionOpenedWithoutPersistingToMeta(AssignmentManager am, RegionStateNode regionNode, + TransitionCode transitionCode, long openSeqNum) throws IOException { + if (openSeqNum < regionNode.getOpenSeqNum()) { + LOG.warn( + "Received report {} transition from {} for {}, pid={} but the new openSeqNum {}" + + " is less than the current one {}, ignoring...", + transitionCode, targetServer, regionNode, getProcId(), openSeqNum, + regionNode.getOpenSeqNum()); + } else { + regionNode.setOpenSeqNum(openSeqNum); + } + am.regionOpenedWithoutPersistingToMeta(regionNode); + } + @Override - protected void reportTransition(RegionStateNode regionNode, TransitionCode transitionCode, - long seqId) throws IOException { + protected void checkTransition(RegionStateNode regionNode, TransitionCode transitionCode, + long openSeqNum) throws UnexpectedStateException { switch (transitionCode) { case OPENED: - // this is the openSeqNum - if (seqId < 0) { + if (openSeqNum < 0) { throw new UnexpectedStateException("Received report unexpected " + TransitionCode.OPENED + - " transition openSeqNum=" + seqId + ", " + regionNode + ", proc=" + this); + " transition openSeqNum=" + openSeqNum + ", " + regionNode + ", proc=" + this); } break; case FAILED_OPEN: - // nothing to check break; default: throw new UnexpectedStateException( @@ -99,27 +112,26 @@ public class OpenRegionProcedure extends RegionRemoteProcedureBase { } @Override - protected void updateTransition(MasterProcedureEnv env, RegionStateNode regionNode, - TransitionCode transitionCode, long openSeqNum) throws IOException { - switch (transitionCode) { - case OPENED: - if (openSeqNum < regionNode.getOpenSeqNum()) { - LOG.warn( - "Received report {} transition from {} for {}, pid={} but the new openSeqNum {}" + - " is less than the current one {}, ignoring...", - transitionCode, targetServer, regionNode, getProcId(), openSeqNum, - regionNode.getOpenSeqNum()); - } else { - regionNode.setOpenSeqNum(openSeqNum); - } - env.getAssignmentManager().regionOpened(regionNode); - break; - case FAILED_OPEN: - env.getAssignmentManager().regionFailedOpen(regionNode, false); - break; - default: - throw new UnexpectedStateException("Unexpected transition code: " + transitionCode); + protected void updateTransitionWithoutPersistingToMeta(MasterProcedureEnv env, + RegionStateNode regionNode, TransitionCode transitionCode, long openSeqNum) + throws IOException { + if (transitionCode == TransitionCode.OPENED) { + regionOpenedWithoutPersistingToMeta(env.getAssignmentManager(), regionNode, transitionCode, + openSeqNum); + } else { + assert transitionCode == TransitionCode.FAILED_OPEN; + // will not persist to meta if giveUp is false + env.getAssignmentManager().regionFailedOpen(regionNode, false); } + } + @Override + protected void restoreSucceedState(AssignmentManager am, RegionStateNode regionNode, + long openSeqNum) throws IOException { + if (regionNode.getState() == State.OPEN) { + // should have already been persisted, ignore + return; + } + regionOpenedWithoutPersistingToMeta(am, regionNode, TransitionCode.OPENED, openSeqNum); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java index 9377d895d6b..fa009589c77 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java @@ -153,9 +153,13 @@ public abstract class RegionRemoteProcedureBase extends Procedure[] execute(MasterProcedureEnv env) throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { @@ -254,7 +279,7 @@ public abstract class RegionRemoteProcedureBase extends Procedure EXCLUDE_SERVERS = new ArrayList<>(); + + private static final class ServerManagerForTest extends ServerManager { + + public ServerManagerForTest(MasterServices master) { + super(master); + } + + @Override + public List createDestinationServersList() { + return super.createDestinationServersList(EXCLUDE_SERVERS); + } + } + + private static CountDownLatch ARRIVE_REPORT; + + private static CountDownLatch RESUME_REPORT; + + private static CountDownLatch ARRIVE_GET; + + private static CountDownLatch RESUME_GET; + + private static final class AssignmentManagerForTest extends AssignmentManager { + + public AssignmentManagerForTest(MasterServices master) { + super(master); + } + + @Override + public ReportRegionStateTransitionResponse reportRegionStateTransition( + ReportRegionStateTransitionRequest req) throws PleaseHoldException { + if (req.getTransition(0).getTransitionCode() == TransitionCode.CLOSED) { + if (ARRIVE_REPORT != null) { + ARRIVE_REPORT.countDown(); + try { + RESUME_REPORT.await(); + RESUME_REPORT = null; + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + } + return super.reportRegionStateTransition(req); + } + + @Override + public List getRegionsOnServer(ServerName serverName) { + List regions = super.getRegionsOnServer(serverName); + if (ARRIVE_GET != null) { + ARRIVE_GET.countDown(); + try { + RESUME_GET.await(); + RESUME_GET = null; + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + return regions; + } + + } + + public static final class HMasterForTest extends HMaster { + + public HMasterForTest(Configuration conf) throws IOException, KeeperException { + super(conf); + } + + @Override + protected AssignmentManager createAssignmentManager(MasterServices master) { + return new AssignmentManagerForTest(master); + } + + @Override + protected ServerManager createServerManager(MasterServices master) throws IOException { + setupClusterConnection(); + return new ServerManagerForTest(master); + } + } + + private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); + + private static TableName NAME = TableName.valueOf("Assign"); + + private static byte[] CF = Bytes.toBytes("cf"); + + @BeforeClass + public static void setUp() throws Exception { + UTIL.startMiniCluster(StartMiniClusterOption.builder().masterClass(HMasterForTest.class) + .numMasters(1).numRegionServers(3).build()); + UTIL.createTable(NAME, CF); + UTIL.waitTableAvailable(NAME); + UTIL.getAdmin().balancerSwitch(false, true); + } + + @AfterClass + public static void tearDown() throws Exception { + UTIL.shutdownMiniCluster(); + } + + @Test + public void test() throws Exception { + RegionInfo region = + Iterables.getOnlyElement(UTIL.getMiniHBaseCluster().getRegions(NAME)).getRegionInfo(); + HMaster master = UTIL.getMiniHBaseCluster().getMaster(); + AssignmentManager am = master.getAssignmentManager(); + RegionStateNode rsn = am.getRegionStates().getRegionStateNode(region); + ServerName source = rsn.getRegionLocation(); + ServerName dest = + UTIL.getAdmin().getRegionServers().stream().filter(sn -> !sn.equals(source)).findAny().get(); + + ARRIVE_REPORT = new CountDownLatch(1); + RESUME_REPORT = new CountDownLatch(1); + + Future future = am.moveAsync(new RegionPlan(region, source, dest)); + + ARRIVE_REPORT.await(); + ARRIVE_REPORT = null; + // let's get procedure lock to stop the TRSP + IdLock procExecutionLock = master.getMasterProcedureExecutor().getProcExecutionLock(); + long procId = master.getProcedures().stream() + .filter(p -> p instanceof RegionRemoteProcedureBase).findAny().get().getProcId(); + IdLock.Entry lockEntry = procExecutionLock.getLockEntry(procId); + RESUME_REPORT.countDown(); + + // kill the source region server + ARRIVE_GET = new CountDownLatch(1); + RESUME_GET = new CountDownLatch(1); + UTIL.getMiniHBaseCluster().killRegionServer(source); + + // wait until we try to get the region list of the region server + ARRIVE_GET.await(); + ARRIVE_GET = null; + // release the procedure lock and let the TRSP to finish + procExecutionLock.releaseLockEntry(lockEntry); + future.get(); + + // resume the SCP + EXCLUDE_SERVERS.add(dest); + RESUME_GET.countDown(); + // wait until there are no SCPs and TRSPs + UTIL.waitFor(60000, () -> master.getProcedures().stream().allMatch(p -> p.isFinished() || + (!(p instanceof ServerCrashProcedure) && !(p instanceof TransitRegionStateProcedure)))); + + // assert the region is only on the dest server. + HRegionServer rs = UTIL.getMiniHBaseCluster().getRegionServer(dest); + assertNotNull(rs.getRegion(region.getEncodedName())); + assertNull(UTIL.getOtherRegionServer(rs).getRegion(region.getEncodedName())); + } +}