From fc21dc854bce453302ecbe5388657597c201f268 Mon Sep 17 00:00:00 2001 From: zhangduo Date: Tue, 28 Aug 2018 06:46:00 +0800 Subject: [PATCH] HBASE-21017 Revisit the expected states for open/close --- .../master/assignment/AssignmentManager.java | 55 ++++++++++++--- .../assignment/CloseRegionProcedure.java | 6 ++ .../assignment/OpenRegionProcedure.java | 6 ++ .../assignment/RegionRemoteProcedureBase.java | 40 ++++++++--- .../hbase/master/assignment/RegionStates.java | 16 ----- .../hadoop/hbase/client/TestEnableTable.java | 68 ------------------- 6 files changed, 88 insertions(+), 103 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index a91f8e45a4e..745703f8065 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -552,7 +552,7 @@ public class AssignmentManager implements ServerListener { TransitRegionStateProcedure proc; regionNode.lock(); try { - preTransitCheck(regionNode, RegionStates.STATES_EXPECTED_ON_OPEN); + preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN); proc = TransitRegionStateProcedure.assign(getProcedureEnvironment(), regionInfo, sn); regionNode.setProcedure(proc); } finally { @@ -573,7 +573,7 @@ public class AssignmentManager implements ServerListener { TransitRegionStateProcedure proc; regionNode.lock(); try { - preTransitCheck(regionNode, RegionStates.STATES_EXPECTED_ON_CLOSE); + preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE); proc = TransitRegionStateProcedure.unassign(getProcedureEnvironment(), regionInfo); regionNode.setProcedure(proc); } finally { @@ -591,7 +591,7 @@ public class AssignmentManager implements ServerListener { TransitRegionStateProcedure proc; regionNode.lock(); try { - preTransitCheck(regionNode, RegionStates.STATES_EXPECTED_ON_CLOSE); + preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE); regionNode.checkOnline(); proc = TransitRegionStateProcedure.move(getProcedureEnvironment(), regionInfo, targetServer); regionNode.setProcedure(proc); @@ -1410,6 +1410,35 @@ public class AssignmentManager implements ServerListener { return regionState != null ? regionState.getRegionInfo() : null; } + // ============================================================================================ + // Expected states on region state transition. + // Notice that there is expected states for transiting to OPENING state, this is because SCP. + // See the comments in regionOpening method for more details. + // ============================================================================================ + private static final State[] STATES_EXPECTED_ON_OPEN = { + State.OPENING, // Normal case + State.OPEN // Retrying + }; + + private static final State[] STATES_EXPECTED_ON_CLOSING = { + State.OPEN, // Normal case + State.CLOSING, // Retrying + State.SPLITTING, // Offline the split parent + State.MERGING // Offline the merge parents + }; + + private static final State[] STATES_EXPECTED_ON_CLOSED = { + State.CLOSING, // Normal case + State.CLOSED // Retrying + }; + + // This is for manually scheduled region assign, can add other states later if we find out other + // usages + private static final State[] STATES_EXPECTED_ON_ASSIGN = { State.CLOSED, State.OFFLINE }; + + // We only allow unassign or move a region which is in OPEN state. + private static final State[] STATES_EXPECTED_ON_UNASSIGN_OR_MOVE = { State.OPEN }; + // ============================================================================================ // Region Status update // Should only be called in TransitRegionStateProcedure @@ -1432,7 +1461,10 @@ public class AssignmentManager implements ServerListener { // should be called within the synchronized block of RegionStateNode void regionOpening(RegionStateNode regionNode) throws IOException { - transitStateAndUpdate(regionNode, State.OPENING, RegionStates.STATES_EXPECTED_ON_OPEN); + // As in SCP, for performance reason, there is no TRSP attached with this region, we will not + // update the region state, which means that the region could be in any state when we want to + // assign it after a RS crash. So here we do not pass the expectedStates parameter. + transitStateAndUpdate(regionNode, State.OPENING); regionStates.addRegionToServer(regionNode); // update the operation count metrics metrics.incrementOperationCounter(); @@ -1468,7 +1500,7 @@ public class AssignmentManager implements ServerListener { void regionOpened(RegionStateNode regionNode) throws IOException { // TODO: OPENING Updates hbase:meta too... we need to do both here and there? // That is a lot of hbase:meta writing. - transitStateAndUpdate(regionNode, State.OPEN, RegionStates.STATES_EXPECTED_ON_OPEN); + transitStateAndUpdate(regionNode, State.OPEN, STATES_EXPECTED_ON_OPEN); RegionInfo hri = regionNode.getRegionInfo(); if (isMetaRegion(hri)) { // Usually we'd set a table ENABLED at this stage but hbase:meta is ALWAYs enabled, it @@ -1483,8 +1515,7 @@ public class AssignmentManager implements ServerListener { // should be called within the synchronized block of RegionStateNode void regionClosing(RegionStateNode regionNode) throws IOException { - transitStateAndUpdate(regionNode, State.CLOSING, RegionStates.STATES_EXPECTED_ON_CLOSE); - regionStateStore.updateRegionLocation(regionNode); + transitStateAndUpdate(regionNode, State.CLOSING, STATES_EXPECTED_ON_CLOSING); RegionInfo hri = regionNode.getRegionInfo(); // Set meta has not initialized early. so people trying to create/edit tables will wait @@ -1502,8 +1533,13 @@ public class AssignmentManager implements ServerListener { void regionClosed(RegionStateNode regionNode, boolean normally) throws IOException { RegionState.State state = regionNode.getState(); ServerName regionLocation = regionNode.getRegionLocation(); - regionNode.transitionState(normally ? State.CLOSED : State.ABNORMALLY_CLOSED, - RegionStates.STATES_EXPECTED_ON_CLOSE); + if (normally) { + regionNode.transitionState(State.CLOSED, STATES_EXPECTED_ON_CLOSED); + } else { + // For SCP + regionNode.transitionState(State.ABNORMALLY_CLOSED); + } + regionNode.setRegionLocation(null); boolean succ = false; try { regionStateStore.updateRegionLocation(regionNode); @@ -1517,7 +1553,6 @@ public class AssignmentManager implements ServerListener { } if (regionLocation != null) { regionNode.setLastHost(regionLocation); - regionNode.setRegionLocation(null); regionStates.removeRegionFromServer(regionLocation, regionNode); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java index e446e176f6f..0d22a9c0164 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/CloseRegionProcedure.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master.assignment; import java.io.IOException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionCloseOperation; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; @@ -79,4 +80,9 @@ public class CloseRegionProcedure extends RegionRemoteProcedureBase { assignCandidate = ProtobufUtil.toServerName(data.getAssignCandidate()); } } + + @Override + protected boolean shouldDispatch(RegionStateNode regionNode) { + return !regionNode.isInState(RegionState.State.CLOSED, RegionState.State.ABNORMALLY_CLOSED); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java index 1a7969745ef..125ef115735 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/OpenRegionProcedure.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master.assignment; import java.io.IOException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.RSProcedureDispatcher.RegionOpenOperation; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; @@ -64,4 +65,9 @@ public class OpenRegionProcedure extends RegionRemoteProcedureBase { super.deserializeStateData(serializer); serializer.deserialize(OpenRegionProcedureStateData.class); } + + @Override + protected boolean shouldDispatch(RegionStateNode regionNode) { + return !regionNode.isInState(RegionState.State.OPEN); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java index 6770e6836dd..3e05cde1d21 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionRemoteProcedureBase.java @@ -77,16 +77,16 @@ public abstract class RegionRemoteProcedureBase extends Procedure getRegionEvent(MasterProcedureEnv env) { - return env.getAssignmentManager().getRegionStates().getOrCreateRegionStateNode(region) - .getProcedureEvent(); + private RegionStateNode getRegionNode(MasterProcedureEnv env) { + return env.getAssignmentManager().getRegionStates().getRegionStateNode(region); } @Override - public void remoteCallFailed(MasterProcedureEnv env, ServerName remote, - IOException exception) { - ProcedureEvent event = getRegionEvent(env); - synchronized (event) { + public void remoteCallFailed(MasterProcedureEnv env, ServerName remote, IOException exception) { + RegionStateNode regionNode = getRegionNode(env); + regionNode.lock(); + try { + ProcedureEvent event = regionNode.getProcedureEvent(); if (event.isReady()) { LOG.warn( "The procedure event of procedure {} for region {} to server {} is not suspended, " + @@ -97,6 +97,8 @@ public abstract class RegionRemoteProcedureBase extends Procedure + * Usually this will not happen if we do not allow assigning a already onlined region. But if we + * have something wrong in the RSProcedureDispatcher, where we have already sent the request to + * RS, but then we tell the upper layer the remote call is failed due to rpc timeout or connection + * closed or anything else, then this issue can still happen. So here we add a check to make it + * more robust. + */ + protected abstract boolean shouldDispatch(RegionStateNode regionNode); + @Override protected Procedure[] execute(MasterProcedureEnv env) throws ProcedureYieldException, ProcedureSuspendedException, InterruptedException { @@ -122,8 +135,15 @@ public abstract class RegionRemoteProcedureBase extends Procedure event = getRegionEvent(env); - synchronized (event) { + RegionStateNode regionNode = getRegionNode(env); + regionNode.lock(); + try { + if (!shouldDispatch(regionNode)) { + return null; + } + // The code which wakes us up also needs to lock the RSN so here we do not need to synchronize + // on the event. + ProcedureEvent event = regionNode.getProcedureEvent(); try { env.getRemoteDispatcher().addOperationToNode(targetServer, this); } catch (FailedRemoteDispatchException e) { @@ -136,6 +156,8 @@ public abstract class RegionRemoteProcedureBase extends Procedure regions = TEST_UTIL.getAdmin().getTableRegions(tableName); - assertEquals(1, regions.size()); - for (HRegionInfo region : regions) { - TEST_UTIL.getAdmin().assign(region.getEncodedNameAsBytes()); - } - LOG.debug("Waiting for table assigned " + tableName); - TEST_UTIL.waitUntilAllRegionsAssigned(tableName); - List onlineRegions = admin.getOnlineRegions( - rs2.getRegionServer().getServerName()); - ArrayList tableRegions = filterTableRegions(tableName, onlineRegions); - assertEquals(1, tableRegions.size()); - } - - private ArrayList filterTableRegions(final TableName tableName, - List onlineRegions) { - return Lists.newArrayList(Iterables.filter(onlineRegions, new Predicate() { - @Override - public boolean apply(HRegionInfo input) { - return input.getTable().equals(tableName); - } - })); - } - /** * We were only clearing rows that had a hregioninfo column in hbase:meta. Mangled rows that * were missing the hregioninfo because of error were being left behind messing up any