From 259d12f7397679c6b0d0a4788e5a37f65fd49f20 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Sat, 29 Sep 2018 04:06:46 -0700 Subject: [PATCH] Revert "Revert "Revert "HBASE-21213 [hbck2] bypass leaves behind state in RegionStates when assign/unassign""" This reverts commit 2174461cf729b61a844950278773ee0802ced158. Revert because not ready to port to other branches. --- .../apache/hadoop/hbase/client/HBaseHbck.java | 15 +- .../org/apache/hadoop/hbase/client/Hbck.java | 34 +--- .../shaded/protobuf/RequestConverter.java | 10 +- .../hadoop/hbase/procedure2/Procedure.java | 29 ++- .../hbase/procedure2/ProcedureExecutor.java | 58 ++---- .../hbase/procedure2/ProcedureUtil.java | 2 +- .../procedure2/RemoteProcedureException.java | 6 +- .../hbase/procedure2/TestProcedureBypass.java | 23 +-- .../src/main/protobuf/Master.proto | 6 +- .../src/main/protobuf/MasterProcedure.proto | 2 - .../hbase/rsgroup/RSGroupInfoManagerImpl.java | 3 +- .../TestRSGroupBasedLoadBalancer.java | 3 - .../apache/hadoop/hbase/master/HMaster.java | 8 +- .../hbase/master/MasterRpcServices.java | 25 ++- .../master/assignment/AssignProcedure.java | 14 +- .../master/assignment/AssignmentManager.java | 29 +-- .../assignment/RegionTransitionProcedure.java | 78 ++------ .../master/assignment/UnassignProcedure.java | 14 +- .../master/balancer/BaseLoadBalancer.java | 13 +- .../master/procedure/MasterProcedureUtil.java | 16 -- .../master/procedure/ProcedureDescriber.java | 3 +- .../procedure/ProcedurePrepareLatch.java | 2 +- .../master/procedure/ProcedureSyncWait.java | 5 +- .../hadoop/hbase/TestMetaTableAccessor.java | 4 +- .../apache/hadoop/hbase/client/TestHbck.java | 3 +- .../master/assignment/TestRegionBypass.java | 181 ------------------ .../ruby/shell/commands/list_procedures.rb | 7 +- .../apache/hadoop/hbase/client/TestShell.java | 1 + hbase-shell/src/test/ruby/shell/shell_test.rb | 6 +- 29 files changed, 122 insertions(+), 478 deletions(-) delete mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionBypass.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java index 2d088253a4b..a8daa7b5db7 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseHbck.java @@ -102,12 +102,11 @@ public class HBaseHbck implements Hbck { } @Override - public List assigns(List encodedRegionNames, boolean override) - throws IOException { + public List assigns(List encodedRegionNames) throws IOException { try { MasterProtos.AssignsResponse response = this.hbck.assigns(rpcControllerFactory.newController(), - RequestConverter.toAssignRegionsRequest(encodedRegionNames, override)); + RequestConverter.toAssignRegionsRequest(encodedRegionNames)); return response.getPidList(); } catch (ServiceException se) { LOG.debug(toCommaDelimitedString(encodedRegionNames), se); @@ -116,12 +115,11 @@ public class HBaseHbck implements Hbck { } @Override - public List unassigns(List encodedRegionNames, boolean override) - throws IOException { + public List unassigns(List encodedRegionNames) throws IOException { try { MasterProtos.UnassignsResponse response = this.hbck.unassigns(rpcControllerFactory.newController(), - RequestConverter.toUnassignRegionsRequest(encodedRegionNames, override)); + RequestConverter.toUnassignRegionsRequest(encodedRegionNames)); return response.getPidList(); } catch (ServiceException se) { LOG.debug(toCommaDelimitedString(encodedRegionNames), se); @@ -134,8 +132,7 @@ public class HBaseHbck implements Hbck { } @Override - public List bypassProcedure(List pids, long waitTime, boolean override, - boolean recursive) + public List bypassProcedure(List pids, long waitTime, boolean force) throws IOException { MasterProtos.BypassProcedureResponse response = ProtobufUtil.call( new Callable() { @@ -144,7 +141,7 @@ public class HBaseHbck implements Hbck { try { return hbck.bypassProcedure(rpcControllerFactory.newController(), MasterProtos.BypassProcedureRequest.newBuilder().addAllProcId(pids). - setWaitTime(waitTime).setOverride(override).setRecursive(recursive).build()); + setWaitTime(waitTime).setForce(force).build()); } catch (Throwable t) { LOG.error(pids.stream().map(i -> i.toString()). collect(Collectors.joining(", ")), t); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java index b31e38ba789..5c9a862feb5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Hbck.java @@ -28,14 +28,11 @@ import org.apache.yetus.audience.InterfaceAudience; /** * Hbck fixup tool APIs. Obtain an instance from {@link ClusterConnection#getHbck()} and call * {@link #close()} when done. - *

WARNING: the below methods can damage the cluster. It may leave the cluster in an - * indeterminate state, e.g. region not assigned, or some hdfs files left behind. After running - * any of the below, operators may have to do some clean up on hdfs or schedule some assign - * procedures to get regions back online. DO AT YOUR OWN RISK. For experienced users only. + *

WARNING: the below methods can damage the cluster. For experienced users only. * * @see ConnectionFactory * @see ClusterConnection - * @since 2.0.2, 2.1.1 + * @since 2.2.0 */ @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.HBCK) public interface Hbck extends Abortable, Closeable { @@ -52,38 +49,22 @@ public interface Hbck extends Abortable, Closeable { * -- good if many Regions to online -- and it will schedule the assigns even in the case where * Master is initializing (as long as the ProcedureExecutor is up). Does NOT call Coprocessor * hooks. - * @param override You need to add the override for case where a region has previously been - * bypassed. When a Procedure has been bypassed, a Procedure will have completed - * but no other Procedure will be able to make progress on the target entity - * (intentionally). This override flag will override this fencing mechanism. * @param encodedRegionNames Region encoded names; e.g. 1588230740 is the hard-coded encoding * for hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an * example of what a random user-space encoded Region name looks like. */ - List assigns(List encodedRegionNames, boolean override) throws IOException; - - default List assigns(List encodedRegionNames) throws IOException { - return assigns(encodedRegionNames, false); - } + List assigns(List encodedRegionNames) throws IOException; /** * Like {@link Admin#unassign(byte[], boolean)} but 'raw' in that it can do more than one Region * at a time -- good if many Regions to offline -- and it will schedule the assigns even in the * case where Master is initializing (as long as the ProcedureExecutor is up). Does NOT call * Coprocessor hooks. - * @param override You need to add the override for case where a region has previously been - * bypassed. When a Procedure has been bypassed, a Procedure will have completed - * but no other Procedure will be able to make progress on the target entity - * (intentionally). This override flag will override this fencing mechanism. * @param encodedRegionNames Region encoded names; e.g. 1588230740 is the hard-coded encoding * for hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an * example of what a random user-space encoded Region name looks like. */ - List unassigns(List encodedRegionNames, boolean override) throws IOException; - - default List unassigns(List encodedRegionNames) throws IOException { - return unassigns(encodedRegionNames, false); - } + List unassigns(List encodedRegionNames) throws IOException; /** * Bypass specified procedure and move it to completion. Procedure is marked completed but @@ -92,12 +73,9 @@ public interface Hbck extends Abortable, Closeable { * * @param pids of procedures to complete. * @param waitTime wait time in ms for acquiring lock for a procedure - * @param override if override set to true, we will bypass the procedure even if it is executing. + * @param force if force set to true, we will bypass the procedure even if it is executing. * This is for procedures which can't break out during execution (bugs?). - * @param recursive If set, if a parent procedure, we will find and bypass children and then - * the parent procedure (Dangerous but useful in case where child procedure has been 'lost'). * @return true if procedure is marked for bypass successfully, false otherwise */ - List bypassProcedure(List pids, long waitTime, boolean override, boolean recursive) - throws IOException; + List bypassProcedure(List pids, long waitTime, boolean force) throws IOException; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java index 7db8b0225d5..6ab81e22f2c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/RequestConverter.java @@ -1883,18 +1883,16 @@ public final class RequestConverter { // HBCK2 public static MasterProtos.AssignsRequest toAssignRegionsRequest( - List encodedRegionNames, boolean override) { + List encodedRegionNames) { MasterProtos.AssignsRequest.Builder b = MasterProtos.AssignsRequest.newBuilder(); - return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)). - setOverride(override).build(); + return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)).build(); } public static MasterProtos.UnassignsRequest toUnassignRegionsRequest( - List encodedRegionNames, boolean override) { + List encodedRegionNames) { MasterProtos.UnassignsRequest.Builder b = MasterProtos.UnassignsRequest.newBuilder(); - return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)). - setOverride(override).build(); + return b.addAllRegion(toEncodedRegionNameRegionSpecifiers(encodedRegionNames)).build(); } private static List toEncodedRegionNameRegionSpecifiers( diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java index aa305bfd17e..08c7ce3babd 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java @@ -145,16 +145,12 @@ public abstract class Procedure implements Comparable

Bypassing a procedure is not like aborting. Aborting a procedure will trigger + * {@link #doExecute(Object)} to finish the procedure and releasing any locks + * it may currently hold. + * Bypassing a procedure is not like aborting. Aborting a procedure will trigger * a rollback. And since the {@link #abort(Object)} method is overrideable * Some procedures may have chosen to ignore the aborting. */ @@ -179,15 +175,12 @@ public abstract class Procedure implements Comparable implements Comparable { private Configuration conf; /** - * Created in the {@link #init(int, boolean)} method. Destroyed in {@link #join()} (FIX! Doing + * Created in the {@link #start(int, boolean)} method. Destroyed in {@link #join()} (FIX! Doing * resource handling rather than observing in a #join is unexpected). * Overridden when we do the ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery * (Should be ok). @@ -314,7 +314,7 @@ public class ProcedureExecutor { private ThreadGroup threadGroup; /** - * Created in the {@link #init(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing + * Created in the {@link #start(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing * resource handling rather than observing in a #join is unexpected). * Overridden when we do the ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery * (Should be ok). @@ -322,7 +322,7 @@ public class ProcedureExecutor { private CopyOnWriteArrayList workerThreads; /** - * Created in the {@link #init(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing + * Created in the {@link #start(int, boolean)} method. Terminated in {@link #join()} (FIX! Doing * resource handling rather than observing in a #join is unexpected). * Overridden when we do the ProcedureTestingUtility.testRecoveryAndDoubleExecution trickery * (Should be ok). @@ -966,7 +966,7 @@ public class ProcedureExecutor { * Bypass a procedure. If the procedure is set to bypass, all the logic in * execute/rollback will be ignored and it will return success, whatever. * It is used to recover buggy stuck procedures, releasing the lock resources - * and letting other procedures run. Bypassing one procedure (and its ancestors will + * and letting other procedures to run. Bypassing one procedure (and its ancestors will * be bypassed automatically) may leave the cluster in a middle state, e.g. region * not assigned, or some hdfs files left behind. After getting rid of those stuck procedures, * the operators may have to do some clean up on hdfs or schedule some assign procedures @@ -993,38 +993,34 @@ public class ProcedureExecutor { * there. We need to restart the master after bypassing, and letting the problematic * procedure to execute wth bypass=true, so in that condition, the procedure can be * successfully bypassed. - * @param recursive We will do an expensive search for children of each pid. EXPENSIVE! * @return true if bypass success * @throws IOException IOException */ - public List bypassProcedure(List pids, long lockWait, boolean force, - boolean recursive) + public List bypassProcedure(List pids, long lockWait, boolean force) throws IOException { List result = new ArrayList(pids.size()); for(long pid: pids) { - result.add(bypassProcedure(pid, lockWait, force, recursive)); + result.add(bypassProcedure(pid, lockWait, force)); } return result; } - boolean bypassProcedure(long pid, long lockWait, boolean override, boolean recursive) - throws IOException { - final Procedure procedure = getProcedure(pid); + boolean bypassProcedure(long pid, long lockWait, boolean force) throws IOException { + Procedure procedure = getProcedure(pid); if (procedure == null) { - LOG.debug("Procedure pid={} does not exist, skipping bypass", pid); + LOG.debug("Procedure with id={} does not exist, skipping bypass", pid); return false; } - LOG.debug("Begin bypass {} with lockWait={}, override={}, recursive={}", - procedure, lockWait, override, recursive); + LOG.debug("Begin bypass {} with lockWait={}, force={}", procedure, lockWait, force); IdLock.Entry lockEntry = procExecutionLock.tryLockEntry(procedure.getProcId(), lockWait); - if (lockEntry == null && !override) { + if (lockEntry == null && !force) { LOG.debug("Waited {} ms, but {} is still running, skipping bypass with force={}", - lockWait, procedure, override); + lockWait, procedure, force); return false; } else if (lockEntry == null) { LOG.debug("Waited {} ms, but {} is still running, begin bypass with force={}", - lockWait, procedure, override); + lockWait, procedure, force); } try { // check whether the procedure is already finished @@ -1034,29 +1030,8 @@ public class ProcedureExecutor { } if (procedure.hasChildren()) { - if (recursive) { - // EXPENSIVE. Checks each live procedure of which there could be many!!! - // Is there another way to get children of a procedure? - LOG.info("Recursive bypass on children of pid={}", procedure.getProcId()); - this.procedures.forEachValue(1 /*Single-threaded*/, - // Transformer - v -> { - return v.getParentProcId() == procedure.getProcId()? v: null; - }, - // Consumer - v -> { - boolean result = false; - IOException ioe = null; - try { - result = bypassProcedure(v.getProcId(), lockWait, override, recursive); - } catch (IOException e) { - LOG.warn("Recursive bypass of pid={}", v.getProcId(), e); - } - }); - } else { - LOG.debug("{} has children, skipping bypass", procedure); - return false; - } + LOG.debug("{} has children, skipping bypass", procedure); + return false; } // If the procedure has no parent or no child, we are safe to bypass it in whatever state @@ -1066,7 +1041,6 @@ public class ProcedureExecutor { LOG.debug("Bypassing procedures in RUNNABLE, WAITING and WAITING_TIMEOUT states " + "(with no parent), {}", procedure); - // Question: how is the bypass done here? return false; } @@ -1076,7 +1050,7 @@ public class ProcedureExecutor { Procedure current = procedure; while (current != null) { LOG.debug("Bypassing {}", current); - current.bypass(getEnvironment()); + current.bypass(); store.update(procedure); long parentID = current.getParentProcId(); current = getProcedure(parentID); diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java index b7362200d0a..27802efecd5 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureUtil.java @@ -272,7 +272,7 @@ public final class ProcedureUtil { } if (proto.getBypass()) { - proc.bypass(null); + proc.bypass(); } ProcedureStateSerializer serializer = null; diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java index 612ccf27cc4..6f4795d9dc4 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/RemoteProcedureException.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.hadoop.ipc.RemoteException; import org.apache.yetus.audience.InterfaceAudience; +import org.apache.yetus.audience.InterfaceStability; import org.apache.hadoop.hbase.shaded.protobuf.generated.ErrorHandlingProtos.ForeignExceptionMessage; import org.apache.hadoop.hbase.util.ForeignExceptionUtil; @@ -36,6 +37,7 @@ import org.apache.hadoop.hbase.util.ForeignExceptionUtil; * their stacks traces and messages overridden to reflect the original 'remote' exception. */ @InterfaceAudience.Private +@InterfaceStability.Evolving @SuppressWarnings("serial") public class RemoteProcedureException extends ProcedureException { @@ -72,10 +74,6 @@ public class RemoteProcedureException extends ProcedureException { return new Exception(cause); } - // NOTE: Does not throw DoNotRetryIOE because it does not - // have access (DNRIOE is in the client module). Use - // MasterProcedureUtil.unwrapRemoteIOException if need to - // throw DNRIOE. public IOException unwrapRemoteIOException() { final Exception cause = unwrapRemoteException(); if (cause instanceof IOException) { diff --git a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java index 739d1612c35..d58d57e76e8 100644 --- a/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java +++ b/hbase-procedure/src/test/java/org/apache/hadoop/hbase/procedure2/TestProcedureBypass.java @@ -87,7 +87,7 @@ public class TestProcedureBypass { long id = procExecutor.submitProcedure(proc); Thread.sleep(500); //bypass the procedure - assertTrue(procExecutor.bypassProcedure(id, 30000, false, false)); + assertTrue(procExecutor.bypassProcedure(id, 30000, false)); htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); LOG.info("{} finished", proc); } @@ -98,7 +98,7 @@ public class TestProcedureBypass { long id = procExecutor.submitProcedure(proc); Thread.sleep(500); //bypass the procedure - assertTrue(procExecutor.bypassProcedure(id, 1000, true, false)); + assertTrue(procExecutor.bypassProcedure(id, 1000, true)); //Since the procedure is stuck there, we need to restart the executor to recovery. ProcedureTestingUtility.restart(procExecutor); htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); @@ -114,24 +114,12 @@ public class TestProcedureBypass { .size() > 0); SuspendProcedure suspendProcedure = (SuspendProcedure)procExecutor.getProcedures().stream() .filter(p -> p.getParentProcId() == rootId).collect(Collectors.toList()).get(0); - assertTrue(procExecutor.bypassProcedure(suspendProcedure.getProcId(), 1000, false, false)); + assertTrue(procExecutor.bypassProcedure(suspendProcedure.getProcId(), 1000, false)); htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); LOG.info("{} finished", proc); } - @Test - public void testBypassingProcedureWithParentRecursive() throws Exception { - final RootProcedure proc = new RootProcedure(); - long rootId = procExecutor.submitProcedure(proc); - htu.waitFor(5000, () -> procExecutor.getProcedures().stream() - .filter(p -> p.getParentProcId() == rootId).collect(Collectors.toList()) - .size() > 0); - SuspendProcedure suspendProcedure = (SuspendProcedure)procExecutor.getProcedures().stream() - .filter(p -> p.getParentProcId() == rootId).collect(Collectors.toList()).get(0); - assertTrue(procExecutor.bypassProcedure(rootId, 1000, false, true)); - htu.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); - LOG.info("{} finished", proc); - } + @AfterClass public static void tearDown() throws Exception { @@ -191,4 +179,7 @@ public class TestProcedureBypass { } } } + + + } diff --git a/hbase-protocol-shaded/src/main/protobuf/Master.proto b/hbase-protocol-shaded/src/main/protobuf/Master.proto index afc6e64cd32..b2a3bda095d 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Master.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Master.proto @@ -99,7 +99,6 @@ message MergeTableRegionsResponse { message AssignRegionRequest { required RegionSpecifier region = 1; - optional bool override = 2 [default = false]; } message AssignRegionResponse { @@ -1006,7 +1005,6 @@ message SetTableStateInMetaRequest { // Region at a time. message AssignsRequest { repeated RegionSpecifier region = 1; - optional bool override = 2 [default = false]; } /** Like Admin's AssignRegionResponse except it can @@ -1021,7 +1019,6 @@ message AssignsResponse { */ message UnassignsRequest { repeated RegionSpecifier region = 1; - optional bool override = 2 [default = false]; } /** Like Admin's UnassignRegionResponse except it can @@ -1034,8 +1031,7 @@ message UnassignsResponse { message BypassProcedureRequest { repeated uint64 proc_id = 1; optional uint64 waitTime = 2; // wait time in ms to acquire lock on a procedure - optional bool override = 3 [default = false]; // if true, procedure is marked for bypass even if its executing - optional bool recursive = 4; + optional bool force = 3; // if true, procedure is marked for bypass even if its executing } message BypassProcedureResponse { diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto index d5525f67112..3108f9f57ab 100644 --- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto @@ -330,7 +330,6 @@ message AssignRegionStateData { optional ServerName target_server = 4; // Current attempt index used for expotential backoff when stuck optional int32 attempt = 5; - optional bool override = 6 [default = false]; } message UnassignRegionStateData { @@ -342,7 +341,6 @@ message UnassignRegionStateData { // This is the server currently hosting the Region, the // server we will send the unassign rpc too. optional ServerName hosting_server = 5; - // We hijacked an old param named 'force' and use it as 'override'. optional bool force = 4 [default = false]; optional bool remove_after_unassigning = 6 [default = false]; // Current attempt index used for expotential backoff when stuck diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java index 6c8cd9f64a4..b34ba8957e8 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupInfoManagerImpl.java @@ -65,7 +65,6 @@ import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.ServerListener; import org.apache.hadoop.hbase.master.TableStateManager; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; -import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.protobuf.ProtobufMagic; @@ -874,7 +873,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager { Procedure result = masterServices.getMasterProcedureExecutor().getResult(procId); if (result != null && result.isFailed()) { throw new IOException("Failed to create group table. " + - MasterProcedureUtil.unwrapRemoteIOException(result)); + result.getException().unwrapRemoteIOException()); } } } diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancer.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancer.java index 67923602f75..0ef0a0118c4 100644 --- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancer.java +++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancer.java @@ -49,7 +49,6 @@ import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; -import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.rsgroup.RSGroupBasedLoadBalancer; import org.apache.hadoop.hbase.rsgroup.RSGroupInfo; @@ -570,8 +569,6 @@ public class TestRSGroupBasedLoadBalancer { Mockito.when(services.getTableDescriptors()).thenReturn(tds); AssignmentManager am = Mockito.mock(AssignmentManager.class); Mockito.when(services.getAssignmentManager()).thenReturn(am); - RegionStates rss = Mockito.mock(RegionStates.class); - Mockito.when(am.getRegionStates()).thenReturn(rss); return services; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 2fbcd30df75..475b0ca1680 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -978,7 +978,7 @@ public class HMaster extends HRegionServer implements MasterServices { // as procedures run -- in particular SCPs for crashed servers... One should put up hbase:meta // if it is down. It may take a while to come online. So, wait here until meta if for sure // available. Thats what waitUntilMetaOnline does. - if (!waitForMetaOnline()) { + if (!waitUntilMetaOnline()) { return; } this.assignmentManager.joinCluster(); @@ -1010,7 +1010,7 @@ public class HMaster extends HRegionServer implements MasterServices { // Here we expect hbase:namespace to be online. See inside initClusterSchemaService. // TODO: Fix this. Namespace is a pain being a sort-of system table. Fold it in to hbase:meta. // isNamespace does like isMeta and waits until namespace is onlined before allowing progress. - if (!waitForNamespaceOnline()) { + if (!waitUntilNamespaceOnline()) { return; } status.setStatus("Starting cluster schema service"); @@ -1094,7 +1094,7 @@ public class HMaster extends HRegionServer implements MasterServices { * and we will hold here until operator intervention. */ @VisibleForTesting - public boolean waitForMetaOnline() throws InterruptedException { + public boolean waitUntilMetaOnline() throws InterruptedException { return isRegionOnline(RegionInfoBuilder.FIRST_META_REGIONINFO); } @@ -1135,7 +1135,7 @@ public class HMaster extends HRegionServer implements MasterServices { * @return True if namespace table is up/online. */ @VisibleForTesting - public boolean waitForNamespaceOnline() throws InterruptedException { + public boolean waitUntilNamespaceOnline() throws InterruptedException { List ris = this.assignmentManager.getRegionStates(). getRegionsOfTable(TableName.NAMESPACE_TABLE_NAME); if (ris.isEmpty()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 294e604f403..6bce9e54e66 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -29,7 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; -import java.util.function.BiFunction; +import java.util.function.Function; import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.ClusterMetricsBuilder; @@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.MasterSwitchType; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; @@ -594,7 +595,7 @@ public class MasterRpcServices extends RSRpcServices BalanceRequest request) throws ServiceException { try { return BalanceResponse.newBuilder().setBalancerRan(master.balance( - request.hasForce()? request.getForce(): false)).build(); + request.hasForce() ? request.getForce() : false)).build(); } catch (IOException ex) { throw new ServiceException(ex); } @@ -1180,8 +1181,7 @@ public class MasterRpcServices extends RSRpcServices if (executor.isFinished(procId)) { builder.setState(GetProcedureResultResponse.State.FINISHED); if (result.isFailed()) { - IOException exception = - MasterProcedureUtil.unwrapRemoteIOException(result); + IOException exception = result.getException().unwrapRemoteIOException(); builder.setException(ForeignExceptionUtil.toProtoForeignException(exception)); } byte[] resultData = result.getResult(); @@ -2335,15 +2335,14 @@ public class MasterRpcServices extends RSRpcServices * Submit the Procedure that gets created by f * @return pid of the submitted Procedure. */ - private long submitProcedure(HBaseProtos.RegionSpecifier rs, boolean override, - BiFunction f) + private long submitProcedure(HBaseProtos.RegionSpecifier rs, Function f) throws UnknownRegionException { RegionInfo ri = getRegionInfo(rs); long pid = Procedure.NO_PROC_ID; if (ri == null) { LOG.warn("No RegionInfo found to match {}", rs); } else { - pid = this.master.getMasterProcedureExecutor().submitProcedure(f.apply(ri, override)); + pid = this.master.getMasterProcedureExecutor().submitProcedure(f.apply(ri)); } return pid; } @@ -2363,10 +2362,9 @@ public class MasterRpcServices extends RSRpcServices MasterProtos.AssignsResponse.Builder responseBuilder = MasterProtos.AssignsResponse.newBuilder(); try { - boolean override = request.getOverride(); for (HBaseProtos.RegionSpecifier rs: request.getRegionList()) { - long pid = submitProcedure(rs, override, - (r, b) -> this.master.getAssignmentManager().createAssignProcedure(r, b)); + long pid = submitProcedure(rs, + r -> this.master.getAssignmentManager().createAssignProcedure(r)); responseBuilder.addPid(pid); } return responseBuilder.build(); @@ -2390,10 +2388,9 @@ public class MasterRpcServices extends RSRpcServices MasterProtos.UnassignsResponse.Builder responseBuilder = MasterProtos.UnassignsResponse.newBuilder(); try { - boolean override = request.getOverride(); for (HBaseProtos.RegionSpecifier rs: request.getRegionList()) { - long pid = submitProcedure(rs, override, - (r, b) -> this.master.getAssignmentManager().createUnassignProcedure(r, b)); + long pid = submitProcedure(rs, + ri -> this.master.getAssignmentManager().createUnassignProcedure(ri)); responseBuilder.addPid(pid); } return responseBuilder.build(); @@ -2419,7 +2416,7 @@ public class MasterRpcServices extends RSRpcServices try { List ret = master.getMasterProcedureExecutor().bypassProcedure(request.getProcIdList(), - request.getWaitTime(), request.getOverride(), request.getRecursive()); + request.getWaitTime(), request.getForce()); return MasterProtos.BypassProcedureResponse.newBuilder().addAllBypassed(ret).build(); } catch (IOException e) { throw new ServiceException(e); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java index ac4a67f323d..55aee4ad071 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignProcedure.java @@ -99,16 +99,12 @@ public class AssignProcedure extends RegionTransitionProcedure { } public AssignProcedure(final RegionInfo regionInfo) { - this(regionInfo, null); + super(regionInfo); + this.targetServer = null; } public AssignProcedure(final RegionInfo regionInfo, final ServerName destinationServer) { - this(regionInfo, destinationServer, false); - } - - public AssignProcedure(final RegionInfo regionInfo, final ServerName destinationServer, - boolean override) { - super(regionInfo, override); + super(regionInfo); this.targetServer = destinationServer; } @@ -142,9 +138,6 @@ public class AssignProcedure extends RegionTransitionProcedure { if (getAttempt() > 0) { state.setAttempt(getAttempt()); } - if (isOverride()) { - state.setOverride(isOverride()); - } serializer.serialize(state.build()); } @@ -155,7 +148,6 @@ public class AssignProcedure extends RegionTransitionProcedure { setTransitionState(state.getTransitionState()); setRegionInfo(ProtobufUtil.toRegionInfo(state.getRegionInfo())); forceNewPlan = state.getForceNewPlan(); - setOverride(state.getOverride()); if (state.hasTargetServer()) { this.targetServer = ProtobufUtil.toServerName(state.getTargetServer()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 04f888c9710..123abc28e98 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -677,37 +677,26 @@ public class AssignmentManager implements ServerListener { * Called by things like DisableTableProcedure to get a list of UnassignProcedure * to unassign the regions of the table. */ + public UnassignProcedure[] createUnassignProcedures(final TableName tableName) { + return createUnassignProcedures(regionStates.getTableRegionStateNodes(tableName)); + } + public AssignProcedure createAssignProcedure(final RegionInfo regionInfo) { - return createAssignProcedure(regionInfo, null, false); - } - - public AssignProcedure createAssignProcedure(final RegionInfo regionInfo, boolean override) { - return createAssignProcedure(regionInfo, null, override); + AssignProcedure proc = new AssignProcedure(regionInfo); + proc.setOwner(getProcedureEnvironment().getRequestUser().getShortName()); + return proc; } public AssignProcedure createAssignProcedure(final RegionInfo regionInfo, - ServerName targetServer) { - return createAssignProcedure(regionInfo, targetServer, false); - } - - public AssignProcedure createAssignProcedure(final RegionInfo regionInfo, - final ServerName targetServer, boolean override) { - AssignProcedure proc = new AssignProcedure(regionInfo, targetServer, override); + final ServerName targetServer) { + AssignProcedure proc = new AssignProcedure(regionInfo, targetServer); proc.setOwner(getProcedureEnvironment().getRequestUser().getShortName()); return proc; } public UnassignProcedure createUnassignProcedure(final RegionInfo regionInfo) { return createUnassignProcedure(regionInfo, null, false); - } - public UnassignProcedure createUnassignProcedure(final RegionInfo regionInfo, - boolean override) { - return createUnassignProcedure(regionInfo, null, override); - } - - public UnassignProcedure[] createUnassignProcedures(final TableName tableName) { - return createUnassignProcedures(regionStates.getTableRegionStateNodes(tableName)); } UnassignProcedure createUnassignProcedure(final RegionInfo regionInfo, diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java index e80cdf333a4..3b0735e46e4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java @@ -111,12 +111,6 @@ public abstract class RegionTransitionProcedure */ private RegionInfo regionInfo; - /** - * this data member must also be persisted. - * @see #regionInfo - */ - private boolean override; - /** * Like {@link #regionInfo}, the expectation is that subclasses persist the value of this * data member. It is used doing backoff when Procedure gets stuck. @@ -126,9 +120,8 @@ public abstract class RegionTransitionProcedure // Required by the Procedure framework to create the procedure on replay public RegionTransitionProcedure() {} - public RegionTransitionProcedure(final RegionInfo regionInfo, boolean override) { + public RegionTransitionProcedure(final RegionInfo regionInfo) { this.regionInfo = regionInfo; - this.override = override; } @VisibleForTesting @@ -141,7 +134,7 @@ public abstract class RegionTransitionProcedure * {@link #deserializeStateData(ProcedureStateSerializer)} method. Expectation is that * subclasses will persist `regioninfo` in their * {@link #serializeStateData(ProcedureStateSerializer)} method and then restore `regionInfo` on - * deserialization by calling this. + * deserialization by calling. */ protected void setRegionInfo(final RegionInfo regionInfo) { this.regionInfo = regionInfo; @@ -149,21 +142,9 @@ public abstract class RegionTransitionProcedure /** * This setter is for subclasses to call in their - * {@link #deserializeStateData(ProcedureStateSerializer)} method. Expectation is that - * subclasses will persist `override` in their - * {@link #serializeStateData(ProcedureStateSerializer)} method and then restore `override` on - * deserialization by calling this. + * {@link #deserializeStateData(ProcedureStateSerializer)} method. + * @see #setRegionInfo(RegionInfo) */ - protected void setOverride(boolean override) { - this.override = override; - } - - - /** - * This setter is for subclasses to call in their - * {@link #deserializeStateData(ProcedureStateSerializer)} method. - * @see #setRegionInfo(RegionInfo) - */ protected void setAttempt(int attempt) { this.attempt = attempt; } @@ -189,11 +170,6 @@ public abstract class RegionTransitionProcedure sb.append(getTableName()); sb.append(", region="); sb.append(getRegionInfo() == null? null: getRegionInfo().getEncodedName()); - if (isOverride()) { - // Only log if set. - sb.append(", override="); - sb.append(isOverride()); - } } public RegionStateNode getRegionState(final MasterProcedureEnv env) { @@ -332,19 +308,12 @@ public abstract class RegionTransitionProcedure final AssignmentManager am = env.getAssignmentManager(); final RegionStateNode regionNode = getRegionState(env); if (!am.addRegionInTransition(regionNode, this)) { - if (this.isOverride()) { - LOG.info("{} owned by pid={}, OVERRIDDEN by 'this' (pid={}, override=true).", - regionNode.getRegionInfo().getEncodedName(), - regionNode.getProcedure().getProcId(), getProcId()); - regionNode.unsetProcedure(regionNode.getProcedure()); - } else { - String msg = String.format("%s owned by pid=%d, CANNOT run 'this' (pid=%d).", - regionNode.getRegionInfo().getEncodedName(), - regionNode.getProcedure().getProcId(), getProcId()); - LOG.warn(msg); - setAbortFailure(getClass().getSimpleName(), msg); - return null; - } + String msg = String.format( + "There is already another procedure running on this region this=%s owner=%s", + this, regionNode.getProcedure()); + LOG.warn(msg + " " + this + "; " + regionNode.toShortString()); + setAbortFailure(getClass().getSimpleName(), msg); + return null; } try { boolean retry; @@ -456,12 +425,8 @@ public abstract class RegionTransitionProcedure // TODO: Revisit this and move it to the executor if (env.getProcedureScheduler().waitRegion(this, getRegionInfo())) { try { - // Enable TRACE on this class to see lock dump. Can be really large when cluster is big - // or big tables being enabled/disabled. - if (LOG.isTraceEnabled()) { - LOG.trace("{} pid={} {}", LockState.LOCK_EVENT_WAIT, getProcId(), - env.getProcedureScheduler().dumpLocks()); - } + LOG.debug(LockState.LOCK_EVENT_WAIT + " pid=" + getProcId() + " " + + env.getProcedureScheduler().dumpLocks()); } catch (IOException e) { // ignore, just for logging } @@ -504,23 +469,4 @@ public abstract class RegionTransitionProcedure // should not be called for region operation until we modified the open/close region procedure throw new UnsupportedOperationException(); } - - @Override - protected void bypass(MasterProcedureEnv env) { - // This override is just so I can write a note on how bypass is done in - // RTP. For RTP procedures -- i.e. assign/unassign -- if bypass is called, - // we intentionally do NOT cleanup our state. We leave a reference to the - // bypassed Procedure in the RegionStateNode. Doing this makes it so the - // RSN is in an odd state. The bypassed Procedure is finished but no one - // else can make progress on this RSN entity (see the #execute above where - // we check the RSN to see if an already registered procedure and if so, - // we exit without proceeding). This is done to intentionally block - // subsequent Procedures from running. Only a Procedure with the 'override' flag - // set can overwrite the RSN and make progress. - super.bypass(env); - } - - boolean isOverride() { - return this.override; - } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java index b001885ee9f..4f58a0f305c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/UnassignProcedure.java @@ -84,6 +84,10 @@ public class UnassignProcedure extends RegionTransitionProcedure { */ protected volatile ServerName destinationServer; + // TODO: should this be in a reassign procedure? + // ...and keep unassign for 'disable' case? + private boolean force; + /** * Whether deleting the region from in-memory states after unassigning the region. */ @@ -105,11 +109,12 @@ public class UnassignProcedure extends RegionTransitionProcedure { } public UnassignProcedure(final RegionInfo regionInfo, final ServerName hostingServer, - final ServerName destinationServer, final boolean override, + final ServerName destinationServer, final boolean force, final boolean removeAfterUnassigning) { - super(regionInfo, override); + super(regionInfo); this.hostingServer = hostingServer; this.destinationServer = destinationServer; + this.force = force; this.removeAfterUnassigning = removeAfterUnassigning; // we don't need REGION_TRANSITION_QUEUE, we jump directly to sending the request @@ -142,7 +147,7 @@ public class UnassignProcedure extends RegionTransitionProcedure { if (this.destinationServer != null) { state.setDestinationServer(ProtobufUtil.toServerName(destinationServer)); } - if (isOverride()) { + if (force) { state.setForce(true); } if (removeAfterUnassigning) { @@ -162,8 +167,7 @@ public class UnassignProcedure extends RegionTransitionProcedure { setTransitionState(state.getTransitionState()); setRegionInfo(ProtobufUtil.toRegionInfo(state.getRegionInfo())); this.hostingServer = ProtobufUtil.toServerName(state.getHostingServer()); - // The 'force' flag is the override flag in unassign. - setOverride(state.getForce()); + force = state.getForce(); if (state.hasDestinationServer()) { this.destinationServer = ProtobufUtil.toServerName(state.getDestinationServer()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 4ec927835f1..ef5c49ab020 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -51,8 +51,6 @@ import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.RackManager; import org.apache.hadoop.hbase.master.RegionPlan; -import org.apache.hadoop.hbase.master.assignment.AssignmentManager; -import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type; import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hbase.thirdparty.com.google.common.base.Joiner; @@ -1458,13 +1456,10 @@ public abstract class BaseLoadBalancer implements LoadBalancer { ServerName oldServerName = entry.getValue(); // In the current set of regions even if one has region replica let us go with // getting the entire snapshot - if (this.services != null) { // for tests - AssignmentManager am = this.services.getAssignmentManager(); - if (am != null) { - RegionStates rss = am.getRegionStates(); - if (!hasRegionReplica && rss.isReplicaAvailableForRegion(region)) { - hasRegionReplica = true; - } + if (this.services != null && this.services.getAssignmentManager() != null) { // for tests + if (!hasRegionReplica && this.services.getAssignmentManager().getRegionStates() + .isReplicaAvailableForRegion(region)) { + hasRegionReplica = true; } } List localServers = new ArrayList<>(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java index 1e488b61857..58263d30445 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureUtil.java @@ -19,12 +19,9 @@ package org.apache.hadoop.hbase.master.procedure; import java.io.IOException; import java.util.regex.Pattern; - -import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.procedure2.Procedure; -import org.apache.hadoop.hbase.procedure2.ProcedureException; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.NonceKey; @@ -179,17 +176,4 @@ public final class MasterProcedureUtil { public static int getServerPriority(ServerProcedureInterface proc) { return proc.hasMetaTableRegion() ? 100 : 1; } - - /** - * This is a version of unwrapRemoteIOException that can do DoNotRetryIOE. - * We need to throw DNRIOE to clients if a failed Procedure else they will - * keep trying. The default proc.getException().unwrapRemoteException - * doesn't have access to DNRIOE from the procedure2 module. - */ - public static IOException unwrapRemoteIOException(Procedure proc) { - Exception e = proc.getException().unwrapRemoteException(); - // Do not retry ProcedureExceptions! - return (e instanceof ProcedureException)? new DoNotRetryIOException(e): - proc.getException().unwrapRemoteIOException(); - } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java index f9fdad8abae..41b30ad5a2f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureDescriber.java @@ -69,8 +69,7 @@ public class ProcedureDescriber { description.put("LAST_UPDATE", new Date(proc.getLastUpdate())); if (proc.isFailed()) { - description.put("ERRORS", - MasterProcedureUtil.unwrapRemoteIOException(proc).getMessage()); + description.put("ERRORS", proc.getException().unwrapRemoteIOException().getMessage()); } description.put("PARAMETERS", parametersToObject(proc)); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java index f771210b871..2011c0b9de9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedurePrepareLatch.java @@ -99,7 +99,7 @@ public abstract class ProcedurePrepareLatch { @Override protected void countDown(final Procedure proc) { if (proc.hasException()) { - exception = MasterProcedureUtil.unwrapRemoteIOException(proc); + exception = proc.getException().unwrapRemoteIOException(); } latch.countDown(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java index c8ff9f80452..f72905bb52d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java @@ -160,10 +160,9 @@ public final class ProcedureSyncWait { throw new IOException("The Master is Aborting"); } - // If the procedure fails, we should always have an exception captured. Throw it. - // Needs to be an IOE to get out of here. if (proc.hasException()) { - throw MasterProcedureUtil.unwrapRemoteIOException(proc); + // If the procedure fails, we should always have an exception captured. Throw it. + throw proc.getException().unwrapRemoteIOException(); } else { return proc.getResult(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java index 0ccf26e9c24..2916cc410ca 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMetaTableAccessor.java @@ -108,7 +108,7 @@ public class TestMetaTableAccessor { @Test public void testIsMetaWhenAllHealthy() throws InterruptedException { HMaster m = UTIL.getMiniHBaseCluster().getMaster(); - assertTrue(m.waitForMetaOnline()); + assertTrue(m.waitUntilMetaOnline()); } @Test @@ -117,7 +117,7 @@ public class TestMetaTableAccessor { int index = UTIL.getMiniHBaseCluster().getServerWithMeta(); HRegionServer rsWithMeta = UTIL.getMiniHBaseCluster().getRegionServer(index); rsWithMeta.abort("TESTING"); - assertTrue(m.waitForMetaOnline()); + assertTrue(m.waitUntilMetaOnline()); } /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java index 7680845cce5..80c5a36521d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHbck.java @@ -116,8 +116,7 @@ public class TestHbck { //bypass the procedure List pids = Arrays.asList(procId); - List results = - TEST_UTIL.getHbck().bypassProcedure(pids, 30000, false, false); + List results = TEST_UTIL.getHbck().bypassProcedure(pids, 30000, false); assertTrue("Failed to by pass procedure!", results.get(0)); TEST_UTIL.waitFor(5000, () -> proc.isSuccess() && proc.isBypass()); LOG.info("{} finished", proc); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionBypass.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionBypass.java deleted file mode 100644 index 4bc9f272da4..00000000000 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionBypass.java +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master.assignment; - -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.concurrent.CountDownLatch; - -import org.apache.hadoop.hbase.HBaseClassTestRule; -import org.apache.hadoop.hbase.HBaseTestingUtility; -import org.apache.hadoop.hbase.TableName; -import org.apache.hadoop.hbase.client.Admin; -import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; -import org.apache.hadoop.hbase.procedure2.Procedure; -import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; -import org.apache.hadoop.hbase.testclassification.LargeTests; -import org.apache.hadoop.hbase.util.Bytes; - -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.rules.TestName; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - - -/** - * Tests bypass on a region assign/unassign - */ -@Category({LargeTests.class}) -public class TestRegionBypass { - private final static Logger LOG = LoggerFactory.getLogger(TestRegionBypass.class); - - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestRegionBypass.class); - - @Rule - public TestName name = new TestName(); - - private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); - private TableName tableName; - - @BeforeClass - public static void startCluster() throws Exception { - TEST_UTIL.startMiniCluster(2); - } - - @AfterClass - public static void stopCluster() throws Exception { - TEST_UTIL.shutdownMiniCluster(); - } - - @Before - public void before() throws IOException { - this.tableName = TableName.valueOf(this.name.getMethodName()); - // Create a table. Has one region at least. - TEST_UTIL.createTable(this.tableName, Bytes.toBytes("cf")); - - } - - @Test - public void testBypass() throws IOException { - Admin admin = TEST_UTIL.getAdmin(); - List regions = admin.getRegions(this.tableName); - for (RegionInfo ri: regions) { - admin.unassign(ri.getRegionName(), false); - } - List pids = new ArrayList<>(regions.size()); - for (RegionInfo ri: regions) { - Procedure p = new StallingAssignProcedure(ri); - pids.add(TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(). - submitProcedure(p)); - } - for (Long pid: pids) { - while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().isStarted(pid)) { - Thread.currentThread().yield(); - } - } - // Call bypass on all. We should be stuck in the dispatch at this stage. - List> ps = - TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor().getProcedures(); - for (Procedure p: ps) { - if (p instanceof StallingAssignProcedure) { - List bs = TEST_UTIL.getHbck(). - bypassProcedure(Arrays.asList(p.getProcId()), 0, false, false); - for (Boolean b: bs) { - LOG.info("BYPASSED {} {}", p.getProcId(), b); - } - } - } - // Countdown the latch so its not hanging out. - for (Procedure p: ps) { - if (p instanceof StallingAssignProcedure) { - ((StallingAssignProcedure)p).latch.countDown(); - } - } - // Try and assign WITHOUT override flag. Should fail!. - for (RegionInfo ri: regions) { - try { - admin.assign(ri.getRegionName()); - } catch (Throwable dnrioe) { - // Expected - LOG.info("Expected {}", dnrioe); - } - } - while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(). - getActiveProcIds().isEmpty()) { - Thread.currentThread().yield(); - } - // Now assign with the override flag. - for (RegionInfo ri: regions) { - TEST_UTIL.getHbck().assigns(Arrays.asList(ri.getEncodedName()), true); - } - while (!TEST_UTIL.getHBaseCluster().getMaster().getMasterProcedureExecutor(). - getActiveProcIds().isEmpty()) { - Thread.currentThread().yield(); - } - for (RegionInfo ri: regions) { - assertTrue(ri.toString(), TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager(). - getRegionStates().isRegionOnline(ri)); - } - } - - /** - * An AssignProcedure that Stalls just before the finish. - */ - public static class StallingAssignProcedure extends AssignProcedure { - public final CountDownLatch latch = new CountDownLatch(2); - - public StallingAssignProcedure() { - super(); - } - - public StallingAssignProcedure(RegionInfo regionInfo) { - super(regionInfo); - } - - @Override - void setTransitionState(MasterProcedureProtos.RegionTransitionState state) { - if (state == MasterProcedureProtos.RegionTransitionState.REGION_TRANSITION_DISPATCH) { - try { - LOG.info("LATCH2 {}", this.latch.getCount()); - this.latch.await(); - LOG.info("LATCH3 {}", this.latch.getCount()); - } catch (InterruptedException e) { - e.printStackTrace(); - } - } else if (state == MasterProcedureProtos.RegionTransitionState.REGION_TRANSITION_QUEUE) { - // Set latch. - LOG.info("LATCH1 {}", this.latch.getCount()); - this.latch.countDown(); - } - super.setTransitionState(state); - } - } -} diff --git a/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb b/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb index cd6e7516f99..68842a0410d 100644 --- a/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb +++ b/hbase-shell/src/main/ruby/shell/commands/list_procedures.rb @@ -31,11 +31,12 @@ EOF end def command - formatter.header(%w[PID Name State Submitted Last_Update Parameters]) + formatter.header(%w[Id Name State Submitted_Time Last_Update Parameters]) + list = JSON.parse(admin.list_procedures) list.each do |proc| - submitted_time = Time.at(Integer(proc['submittedTime'])/1000).to_s - last_update = Time.at(Integer(proc['lastUpdate'])/1000).to_s + submitted_time = Time.at(Integer(proc['submittedTime']) / 1000).to_s + last_update = Time.at(Integer(proc['lastUpdate']) / 1000).to_s formatter.row([proc['procId'], proc['className'], proc['state'], submitted_time, last_update, proc['stateMessage']]) end diff --git a/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java b/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java index 9bb8eacce89..ab36edc278b 100644 --- a/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java +++ b/hbase-shell/src/test/java/org/apache/hadoop/hbase/client/TestShell.java @@ -39,4 +39,5 @@ public class TestShell extends AbstractTestShell { // Start all ruby tests jruby.runScriptlet(PathType.ABSOLUTE, "src/test/ruby/tests_runner.rb"); } + } diff --git a/hbase-shell/src/test/ruby/shell/shell_test.rb b/hbase-shell/src/test/ruby/shell/shell_test.rb index abf92357c49..c1e9017edbb 100644 --- a/hbase-shell/src/test/ruby/shell/shell_test.rb +++ b/hbase-shell/src/test/ruby/shell/shell_test.rb @@ -85,9 +85,9 @@ class ShellTest < Test::Unit::TestCase define_test "Shell::Shell interactive mode should not throw" do # incorrect number of arguments - @shell.command('create', 'nothrow_table') - @shell.command('create', 'nothrow_table', 'family_1') + @shell.command('create', 'foo') + @shell.command('create', 'foo', 'family_1') # create a table that exists - @shell.command('create', 'nothrow_table', 'family_1') + @shell.command('create', 'foo', 'family_1') end end