diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 72040ae0b95..580a0b4b3a0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -72,6 +72,7 @@ import org.apache.hadoop.hbase.ipc.RpcServerInterface; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.ipc.ServerRpcController; import org.apache.hadoop.hbase.master.assignment.RegionStates; +import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; import org.apache.hadoop.hbase.master.locking.LockProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; @@ -2582,30 +2583,40 @@ public class MasterRpcServices extends RSRpcServices implements } /** - * A 'raw' version of assign that does bulk and skirts Master state checks (assigns can be made - * during Master startup). For use by Hbck2. + * @throws ServiceException If no MasterProcedureExecutor */ - @Override - public MasterProtos.AssignsResponse assigns(RpcController controller, - MasterProtos.AssignsRequest request) - throws ServiceException { + private void checkMasterProcedureExecutor() throws ServiceException { if (this.master.getMasterProcedureExecutor() == null) { throw new ServiceException("Master's ProcedureExecutor not initialized; retry later"); } + } + + /** + * A 'raw' version of assign that does bulk and can skirt Master state checks if override + * is set; i.e. assigns can be forced during Master startup or if RegionState is unclean. + * Used by HBCK2. + */ + @Override + public MasterProtos.AssignsResponse assigns(RpcController controller, + MasterProtos.AssignsRequest request) throws ServiceException { + checkMasterProcedureExecutor(); MasterProtos.AssignsResponse.Builder responseBuilder = - MasterProtos.AssignsResponse.newBuilder(); + MasterProtos.AssignsResponse.newBuilder(); try { boolean override = request.getOverride(); LOG.info("{} assigns, override={}", master.getClientIdAuditPrefix(), override); for (HBaseProtos.RegionSpecifier rs: request.getRegionList()) { + long pid = Procedure.NO_PROC_ID; RegionInfo ri = getRegionInfo(rs); if (ri == null) { LOG.info("Unknown={}", rs); - responseBuilder.addPid(Procedure.NO_PROC_ID); - continue; + } else { + Procedure p = this.master.getAssignmentManager().createOneAssignProcedure(ri, override); + if (p != null) { + pid = this.master.getMasterProcedureExecutor().submitProcedure(p); + } } - responseBuilder.addPid(this.master.getMasterProcedureExecutor().submitProcedure(this.master - .getAssignmentManager().createOneAssignProcedure(ri, override))); + responseBuilder.addPid(pid); } return responseBuilder.build(); } catch (IOException ioe) { @@ -2614,30 +2625,31 @@ public class MasterRpcServices extends RSRpcServices implements } /** - * A 'raw' version of unassign that does bulk and skirts Master state checks (unassigns can be - * made during Master startup). For use by Hbck2. + * A 'raw' version of unassign that does bulk and can skirt Master state checks if override + * is set; i.e. unassigns can be forced during Master startup or if RegionState is unclean. + * Used by HBCK2. */ @Override public MasterProtos.UnassignsResponse unassigns(RpcController controller, - MasterProtos.UnassignsRequest request) - throws ServiceException { - if (this.master.getMasterProcedureExecutor() == null) { - throw new ServiceException("Master's ProcedureExecutor not initialized; retry later"); - } + MasterProtos.UnassignsRequest request) throws ServiceException { + checkMasterProcedureExecutor(); MasterProtos.UnassignsResponse.Builder responseBuilder = MasterProtos.UnassignsResponse.newBuilder(); try { boolean override = request.getOverride(); LOG.info("{} unassigns, override={}", master.getClientIdAuditPrefix(), override); for (HBaseProtos.RegionSpecifier rs: request.getRegionList()) { + long pid = Procedure.NO_PROC_ID; RegionInfo ri = getRegionInfo(rs); if (ri == null) { LOG.info("Unknown={}", rs); - responseBuilder.addPid(Procedure.NO_PROC_ID); - continue; + } else { + Procedure p = this.master.getAssignmentManager().createOneUnassignProcedure(ri, override); + if (p != null) { + pid = this.master.getMasterProcedureExecutor().submitProcedure(p); + } } - responseBuilder.addPid(this.master.getMasterProcedureExecutor().submitProcedure(this.master - .getAssignmentManager().createOneUnassignProcedure(ri, override))); + responseBuilder.addPid(pid); } return responseBuilder.build(); } catch (IOException ioe) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 360f7d28ef3..17e4c988b1c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -32,7 +32,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; import java.util.stream.Collectors; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CatalogFamilyFormat; import org.apache.hadoop.hbase.DoNotRetryIOException; @@ -84,9 +83,7 @@ import org.apache.yetus.audience.InterfaceAudience; import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; - import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition; import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; @@ -586,7 +583,8 @@ public class AssignmentManager { private void preTransitCheck(RegionStateNode regionNode, RegionState.State[] expectedStates) throws HBaseIOException { if (regionNode.getProcedure() != null) { - throw new HBaseIOException(regionNode + " is currently in transition"); + throw new HBaseIOException(regionNode + " is currently in transition; pid=" + + regionNode.getProcedure().getProcId()); } if (!regionNode.isInState(expectedStates)) { throw new DoNotRetryRegionException(UNEXPECTED_STATE_REGION + regionNode); @@ -596,25 +594,53 @@ public class AssignmentManager { } } - private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn) - throws IOException { - // TODO: should we use getRegionStateNode? + /** + * Create an assign TransitRegionStateProcedure. Makes sure of RegionState. + * Throws exception if not appropriate UNLESS override is set. Used by hbck2 but also by + * straightline {@link #assign(RegionInfo, ServerName)} and + * {@link #assignAsync(RegionInfo, ServerName)}. + * @see #createAssignProcedure(RegionStateNode, ServerName) for a version that does NO checking + * used when only when no checking needed. + * @param override If false, check RegionState is appropriate for assign; if not throw exception. + */ + private TransitRegionStateProcedure createAssignProcedure(RegionInfo regionInfo, ServerName sn, + boolean override) throws IOException { RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(regionInfo); - TransitRegionStateProcedure proc; regionNode.lock(); try { - preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN); - proc = TransitRegionStateProcedure.assign(getProcedureEnvironment(), regionInfo, sn); - regionNode.setProcedure(proc); + if (override) { + if (regionNode.getProcedure() != null) { + regionNode.unsetProcedure(regionNode.getProcedure()); + } + } else { + preTransitCheck(regionNode, STATES_EXPECTED_ON_ASSIGN); + } + assert regionNode.getProcedure() == null; + return regionNode.setProcedure(TransitRegionStateProcedure.assign(getProcedureEnvironment(), + regionInfo, sn)); + } finally { + regionNode.unlock(); + } + } + + /** + * Create an assign TransitRegionStateProcedure. Does NO checking of RegionState. + * Presumes appriopriate state ripe for assign. + * @see #createAssignProcedure(RegionInfo, ServerName, boolean) + */ + private TransitRegionStateProcedure createAssignProcedure(RegionStateNode regionNode, + ServerName targetServer) { + regionNode.lock(); + try { + return regionNode.setProcedure(TransitRegionStateProcedure.assign(getProcedureEnvironment(), + regionNode.getRegionInfo(), targetServer)); } finally { regionNode.unlock(); } - return proc; } - // TODO: Need an async version of this for hbck2. public long assign(RegionInfo regionInfo, ServerName sn) throws IOException { - TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn); + TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn, false); ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), proc); return proc.getProcId(); } @@ -627,19 +653,15 @@ public class AssignmentManager { * Submits a procedure that assigns a region to a target server without waiting for it to finish * @param regionInfo the region we would like to assign * @param sn target server name - * @return - * @throws IOException */ public Future assignAsync(RegionInfo regionInfo, ServerName sn) throws IOException { - TransitRegionStateProcedure proc = createAssignProcedure(regionInfo, sn); - return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc); + return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), + createAssignProcedure(regionInfo, sn, false)); } /** * Submits a procedure that assigns a region without waiting for it to finish * @param regionInfo the region we would like to assign - * @return - * @throws IOException */ public Future assignAsync(RegionInfo regionInfo) throws IOException { return assignAsync(regionInfo, null); @@ -762,84 +784,86 @@ public class AssignmentManager { return RegionInfo.COMPARATOR.compare(left.getRegion(), right.getRegion()); } - private TransitRegionStateProcedure createAssignProcedure(RegionStateNode regionNode, - ServerName targetServer, boolean override) { - TransitRegionStateProcedure proc; - regionNode.lock(); - try { - if(override && regionNode.getProcedure() != null) { - regionNode.unsetProcedure(regionNode.getProcedure()); - } - assert regionNode.getProcedure() == null; - proc = TransitRegionStateProcedure.assign(getProcedureEnvironment(), - regionNode.getRegionInfo(), targetServer); - regionNode.setProcedure(proc); - } finally { - regionNode.unlock(); - } - return proc; - } - - private TransitRegionStateProcedure createUnassignProcedure(RegionStateNode regionNode, - boolean override) { - TransitRegionStateProcedure proc; - regionNode.lock(); - try { - if(override && regionNode.getProcedure() != null) { - regionNode.unsetProcedure(regionNode.getProcedure()); - } - assert regionNode.getProcedure() == null; - proc = TransitRegionStateProcedure.unassign(getProcedureEnvironment(), - regionNode.getRegionInfo()); - regionNode.setProcedure(proc); - } finally { - regionNode.unlock(); - } - return proc; - } - /** * Create one TransitRegionStateProcedure to assign a region w/o specifying a target server. - * This method is specified for HBCK2 + * This method is called from HBCK2. + * @return an assign or null */ - public TransitRegionStateProcedure createOneAssignProcedure(RegionInfo hri, boolean override) { - RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(hri); - return createAssignProcedure(regionNode, null, override); + public TransitRegionStateProcedure createOneAssignProcedure(RegionInfo ri, boolean override) { + TransitRegionStateProcedure trsp = null; + try { + trsp = createAssignProcedure(ri, null, override); + } catch (IOException ioe) { + LOG.info("Failed {} assign, override={}" + + (override? "": "; set override to by-pass state checks."), + ri.getEncodedName(), override, ioe); + } + return trsp; } /** * Create one TransitRegionStateProcedure to unassign a region. - * This method is specified for HBCK2 + * This method is called from HBCK2. + * @return an unassign or null */ - public TransitRegionStateProcedure createOneUnassignProcedure(RegionInfo hri, boolean override) { - RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(hri); - return createUnassignProcedure(regionNode, override); + public TransitRegionStateProcedure createOneUnassignProcedure(RegionInfo ri, boolean override) { + RegionStateNode regionNode = regionStates.getOrCreateRegionStateNode(ri); + TransitRegionStateProcedure trsp = null; + regionNode.lock(); + try { + if (override) { + if (regionNode.getProcedure() != null) { + regionNode.unsetProcedure(regionNode.getProcedure()); + } + } else { + // This is where we could throw an exception; i.e. override is false. + preTransitCheck(regionNode, STATES_EXPECTED_ON_UNASSIGN_OR_MOVE); + } + assert regionNode.getProcedure() == null; + trsp = TransitRegionStateProcedure.unassign(getProcedureEnvironment(), + regionNode.getRegionInfo()); + regionNode.setProcedure(trsp); + } catch (IOException ioe) { + // 'override' must be false here. + LOG.info("Failed {} unassign, override=false; set override to by-pass state checks.", + ri.getEncodedName(), ioe); + } finally{ + regionNode.unlock(); + } + return trsp; } /** * Create an array of TransitRegionStateProcedure w/o specifying a target server. + * Used as fallback of caller is unable to do {@link #createAssignProcedures(Map)}. *

* If no target server, at assign time, we will try to use the former location of the region if * one exists. This is how we 'retain' the old location across a server restart. *

* Should only be called when you can make sure that no one can touch these regions other than - * you. For example, when you are creating table. + * you. For example, when you are creating or enabling table. Presumes all Regions are in + * appropriate state ripe for assign; no checking of Region state is done in here. + * @see #createAssignProcedures(Map) */ public TransitRegionStateProcedure[] createAssignProcedures(List hris) { return hris.stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri)) - .map(regionNode -> createAssignProcedure(regionNode, null, false)) + .map(regionNode -> createAssignProcedure(regionNode, null)) .sorted(AssignmentManager::compare).toArray(TransitRegionStateProcedure[]::new); } /** + * Tied to {@link #createAssignProcedures(List)} in that it is called if caller is unable to run + * this method. Presumes all Regions are in appropriate state ripe for assign; no checking + * of Region state is done in here. * @param assignments Map of assignments from which we produce an array of AssignProcedures. * @return Assignments made from the passed in assignments + * @see #createAssignProcedures(List) */ private TransitRegionStateProcedure[] createAssignProcedures( Map> assignments) { return assignments.entrySet().stream() .flatMap(e -> e.getValue().stream().map(hri -> regionStates.getOrCreateRegionStateNode(hri)) - .map(regionNode -> createAssignProcedure(regionNode, e.getKey(), false))) + .map(regionNode -> createAssignProcedure(regionNode, e.getKey()))) .sorted(AssignmentManager::compare).toArray(TransitRegionStateProcedure[]::new); } @@ -1060,8 +1084,8 @@ public class AssignmentManager { // If the RS is < 2.0 throw an exception to abort the operation, we are handling the split if (master.getServerManager().getVersionNumber(serverName) < 0x0200000) { - throw new UnsupportedOperationException(String.format( - "Split handled by the master: parent=%s hriA=%s hriB=%s", parent.getShortNameToLog(), hriA, hriB)); + throw new UnsupportedOperationException(String.format("Split handled by the master: " + + "parent=%s hriA=%s hriB=%s", parent.getShortNameToLog(), hriA, hriB)); } } @@ -1352,9 +1376,13 @@ public class AssignmentManager { public boolean isRegionTwiceOverThreshold(final RegionInfo regionInfo) { Map m = this.ritsOverThreshold; - if (m == null) return false; + if (m == null) { + return false; + } final RegionState state = m.get(regionInfo.getEncodedName()); - if (state == null) return false; + if (state == null) { + return false; + } return (statTimestamp - state.getStamp()) > (ritThreshold * 2); } @@ -1650,13 +1678,12 @@ public class AssignmentManager { // ============================================================================================ /** * Used by the client (via master) to identify if all regions have the schema updates - * - * @param tableName * @return Pair indicating the status of the alter command (pending/total) - * @throws IOException */ public Pair getReopenStatus(TableName tableName) { - if (isTableDisabled(tableName)) return new Pair(0, 0); + if (isTableDisabled(tableName)) { + return new Pair(0, 0); + } final List states = regionStates.getTableRegionStates(tableName); int ritCount = 0; @@ -1984,7 +2011,9 @@ public class AssignmentManager { assignQueueFullCond.await(); } - if (!isRunning()) return null; + if (!isRunning()) { + return null; + } assignQueueFullCond.await(assignDispatchWaitMillis, TimeUnit.MILLISECONDS); regions = new HashMap(pendingAssignQueue.size()); for (RegionStateNode regionNode: pendingAssignQueue) { @@ -2121,7 +2150,9 @@ public class AssignmentManager { throw new HBaseIOException("unable to compute plans for regions=" + regions.size()); } - if (plan.isEmpty()) return; + if (plan.isEmpty()) { + return; + } int evcount = 0; for (Map.Entry> entry: plan.entrySet()) { @@ -2178,7 +2209,7 @@ public class AssignmentManager { return Collections.emptyList(); } String highestVersion = Collections.max(serverList, - (o1, o2) -> VersionInfo.compareVersion(o1.getSecond(), o2.getSecond())).getSecond(); + (o1, o2) -> VersionInfo.compareVersion(o1.getSecond(), o2.getSecond())).getSecond(); return serverList.stream() .filter((p)->!p.getSecond().equals(highestVersion)) .map(Pair::getFirst) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java index e4a18d07098..b7fcdab96b9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStateNode.java @@ -194,10 +194,11 @@ public class RegionStateNode implements Comparable { return lastRegionLocation; } - public void setProcedure(TransitRegionStateProcedure proc) { + public TransitRegionStateProcedure setProcedure(TransitRegionStateProcedure proc) { assert this.procedure == null; this.procedure = proc; ritMap.put(regionInfo, this); + return proc; } public void unsetProcedure(TransitRegionStateProcedure proc) { @@ -316,4 +317,4 @@ public class RegionStateNode implements Comparable { public void unlock() { lock.unlock(); } -} \ No newline at end of file +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java index d4b2b5427a0..9376a113ce8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,8 +18,8 @@ package org.apache.hadoop.hbase.client; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertTrue; - import java.io.IOException; import java.util.Arrays; import java.util.HashMap; @@ -66,9 +66,7 @@ import org.junit.runners.Parameterized.Parameter; import org.junit.runners.Parameterized.Parameters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.io.Closeables; - import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; /** @@ -87,7 +85,7 @@ public class TestHbck { @Rule public TestName name = new TestName(); - @Parameter + @SuppressWarnings("checkstyle:VisibilityModifier") @Parameter public boolean async; private static final TableName TABLE_NAME = TableName.valueOf(TestHbck.class.getSimpleName()); @@ -228,6 +226,26 @@ public class TestHbck { List pids = hbck.unassigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList())); waitOnPids(pids); + // Rerun the unassign. Should fail for all Regions since they already unassigned; failed + // unassign will manifest as all pids being -1 (ever since HBASE-24885). + pids = + hbck.unassigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList())); + waitOnPids(pids); + for (long pid: pids) { + assertEquals(Procedure.NO_PROC_ID, pid); + } + // If we pass override, then we should be able to unassign EVEN THOUGH Regions already + // unassigned.... makes for a mess but operator might want to do this at an extreme when + // doing fixup of broke cluster. + pids = + hbck.unassigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList()), + true); + waitOnPids(pids); + for (long pid: pids) { + assertNotEquals(Procedure.NO_PROC_ID, pid); + } + // Clean-up by bypassing all the unassigns we just made so tests can continue. + hbck.bypassProcedure(pids, 10000, true, true); for (RegionInfo ri : regions) { RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() .getRegionStates().getRegionState(ri.getEncodedName()); @@ -237,6 +255,13 @@ public class TestHbck { pids = hbck.assigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList())); waitOnPids(pids); + // Rerun the assign. Should fail for all Regions since they already assigned; failed + // assign will manifest as all pids being -1 (ever since HBASE-24885). + pids = + hbck.assigns(regions.stream().map(r -> r.getEncodedName()).collect(Collectors.toList())); + for (long pid: pids) { + assertEquals(Procedure.NO_PROC_ID, pid); + } for (RegionInfo ri : regions) { RegionState rs = TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager() .getRegionStates().getRegionState(ri.getEncodedName()); @@ -247,7 +272,7 @@ public class TestHbck { pids = hbck.assigns( Arrays.stream(new String[] { "a", "some rubbish name" }).collect(Collectors.toList())); for (long pid : pids) { - assertEquals(org.apache.hadoop.hbase.procedure2.Procedure.NO_PROC_ID, pid); + assertEquals(Procedure.NO_PROC_ID, pid); } } } @@ -287,7 +312,7 @@ public class TestHbck { public static class FailingSplitAfterMetaUpdatedMasterObserver implements MasterCoprocessor, MasterObserver { - public volatile CountDownLatch latch; + @SuppressWarnings("checkstyle:VisibilityModifier") public volatile CountDownLatch latch; @Override public void start(CoprocessorEnvironment e) throws IOException { @@ -314,7 +339,7 @@ public class TestHbck { public static class FailingMergeAfterMetaUpdatedMasterObserver implements MasterCoprocessor, MasterObserver { - public volatile CountDownLatch latch; + @SuppressWarnings("checkstyle:VisibilityModifier") public volatile CountDownLatch latch; @Override public void start(CoprocessorEnvironment e) throws IOException {