diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index f0a723dae16..ce33e5204c7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -37,7 +37,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.PleaseHoldException; -import org.apache.hadoop.hbase.RegionException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.YouAreDeadException; @@ -53,7 +52,6 @@ import org.apache.hadoop.hbase.master.AssignmentListener; import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MetricsAssignmentManager; -import org.apache.hadoop.hbase.master.NoSuchProcedureException; import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionState.State; @@ -561,11 +559,6 @@ public class AssignmentManager implements ServerListener { return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc); } - @VisibleForTesting - public boolean waitForAssignment(final RegionInfo regionInfo) throws IOException { - return waitForAssignment(regionInfo, Long.MAX_VALUE); - } - /** * Create round-robin assigns. Use on table creation to distribute out regions across cluster. * @return AssignProcedures made out of the passed in hris and a call to the balancer @@ -577,38 +570,6 @@ public class AssignmentManager implements ServerListener { return createRoundRobinAssignProcedures(hris, null); } - @VisibleForTesting - // TODO: Remove this? - public boolean waitForAssignment(final RegionInfo regionInfo, final long timeout) - throws IOException { - RegionStateNode node = null; - // This method can be called before the regionInfo has made it into the regionStateMap - // so wait around here a while. - long startTime = System.currentTimeMillis(); - // Something badly wrong if takes ten seconds to register a region. - long endTime = startTime + 10000; - while ((node = regionStates.getRegionStateNode(regionInfo)) == null && isRunning() && - System.currentTimeMillis() < endTime) { - // Presume it not yet added but will be added soon. Let it spew a lot so we can tell if - // we are waiting here alot. - LOG.debug("Waiting on " + regionInfo + " to be added to regionStateMap"); - Threads.sleep(10); - } - if (node == null) { - if (!isRunning()) return false; - throw new RegionException(regionInfo.getRegionNameAsString() + " never registered with Assigment."); - } - - RegionTransitionProcedure proc = node.getProcedure(); - if (proc == null) { - throw new NoSuchProcedureException(node.toString()); - } - - ProcedureSyncWait.waitForProcedureToCompleteIOE( - master.getMasterProcedureExecutor(), proc, timeout); - return true; - } - // ============================================================================================ // RegionTransition procedures helpers // ============================================================================================ @@ -1918,4 +1879,9 @@ public class AssignmentManager implements ServerListener { new ServerCrashException(rtp.getProcId(), serverName)); } } + + @VisibleForTesting + MasterServices getMaster() { + return master; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java index 4bf88c89f98..5c349e5aaa3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java @@ -497,7 +497,7 @@ public class RegionStates { return regionsMap.get(regionName); } - protected RegionStateNode getRegionStateNode(final RegionInfo regionInfo) { + public RegionStateNode getRegionStateNode(final RegionInfo regionInfo) { return getRegionStateNodeFromName(regionInfo.getRegionName()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java index 328ac000b76..f72905bb52d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ProcedureSyncWait.java @@ -40,6 +40,8 @@ import org.apache.yetus.audience.InterfaceStability; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos.ProcedureState; + /** * Helper to synchronously wait on conditions. * This will be removed in the future (mainly when the AssignmentManager will be @@ -135,18 +137,25 @@ public final class ProcedureSyncWait { } public static byte[] waitForProcedureToComplete( - final ProcedureExecutor procExec, - final Procedure proc, final long timeout) - throws IOException { + final ProcedureExecutor procExec, final Procedure proc, + final long timeout) throws IOException { waitFor(procExec.getEnvironment(), "pid=" + proc.getProcId(), new ProcedureSyncWait.Predicate() { @Override public Boolean evaluate() throws IOException { - return !procExec.isRunning() || procExec.isFinished(proc.getProcId()); + if (!procExec.isRunning()) { + return true; + } + ProcedureState state = proc.getState(); + if (state == ProcedureState.INITIALIZING || state == ProcedureState.RUNNABLE) { + // under these states the procedure may have not been added to procExec yet, so do not + // use isFinished to test whether it is finished, as this method will just check if the + // procedure is in the running procedure list + return false; + } + return procExec.isFinished(proc.getProcId()); } - } - ); - + }); if (!procExec.isRunning()) { throw new IOException("The Master is Aborting"); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 03d5d76c0f2..2bfcbca825c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -99,6 +99,7 @@ import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.master.assignment.RegionStateStore; import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.regionserver.BloomType; @@ -3210,17 +3211,15 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility { } /** - * Uses directly the assignment manager to assign the region. - * and waits until the specified region has completed assignment. - * @throws IOException - * @throw InterruptedException + * Uses directly the assignment manager to assign the region. and waits until the specified region + * has completed assignment. * @return true if the region is assigned false otherwise. */ public boolean assignRegion(final RegionInfo regionInfo) throws IOException, InterruptedException { final AssignmentManager am = getHBaseCluster().getMaster().getAssignmentManager(); am.assign(regionInfo); - return am.waitForAssignment(regionInfo); + return AssignmentTestingUtil.waitForAssignment(am, regionInfo); } /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java index 1a156ec09a8..ef9714cab21 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi.java @@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.HMaster; -import org.apache.hadoop.hbase.master.NoSuchProcedureException; import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; @@ -82,22 +81,10 @@ public class TestAsyncRegionAdminApi extends TestAsyncAdminBase { // Region is assigned now. Let's assign it again. // Master should not abort, and region should stay assigned. admin.assign(hri.getRegionName()).get(); - try { - am.waitForAssignment(hri); - fail("Expected NoSuchProcedureException"); - } catch (NoSuchProcedureException e) { - // Expected - } assertTrue(regionStates.getRegionState(hri).isOpened()); // unassign region admin.unassign(hri.getRegionName(), true).get(); - try { - am.waitForAssignment(hri); - fail("Expected NoSuchProcedureException"); - } catch (NoSuchProcedureException e) { - // Expected - } assertTrue(regionStates.getRegionState(hri).isClosed()); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java index 23187b31ff0..63ae6dc7a84 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.client; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -46,8 +47,8 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.Waiter; -import org.apache.hadoop.hbase.master.NoSuchProcedureException; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore; import org.apache.hadoop.hbase.testclassification.LargeTests; @@ -103,18 +104,14 @@ public class TestMetaWithReplicas { getMetaRegionLocation(TEST_UTIL.getZooKeeperWatcher()); LOG.info("HBASE:META DEPLOY: on " + hbaseMetaServerName); sns.add(hbaseMetaServerName); - for (int replicaId = 1; replicaId < 3; replicaId ++) { - RegionInfo h = - RegionReplicaUtil.getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, - replicaId); - try { - am.waitForAssignment(h); - ServerName sn = am.getRegionStates().getRegionServerOfRegion(h); - LOG.info("HBASE:META DEPLOY: " + h.getRegionNameAsString() + " on " + sn); - sns.add(sn); - } catch (NoSuchProcedureException e) { - LOG.info("Presume the procedure has been cleaned up so just proceed: " + e.toString()); - } + for (int replicaId = 1; replicaId < 3; replicaId++) { + RegionInfo h = RegionReplicaUtil + .getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId); + AssignmentTestingUtil.waitForAssignment(am, h); + ServerName sn = am.getRegionStates().getRegionServerOfRegion(h); + assertNotNull(sn); + LOG.info("HBASE:META DEPLOY: " + h.getRegionNameAsString() + " on " + sn); + sns.add(sn); } // Fun. All meta region replicas have ended up on the one server. This will cause this test // to fail ... sometimes. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSeparateClientZKCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSeparateClientZKCluster.java index 533af935b5c..c63fcadd140 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSeparateClientZKCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestSeparateClientZKCluster.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hbase.client; import java.io.File; - import org.apache.commons.io.FileUtils; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -27,7 +26,7 @@ import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.HMaster; -import org.apache.hadoop.hbase.master.NoSuchProcedureException; +import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; @@ -41,7 +40,6 @@ import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -92,7 +90,7 @@ public class TestSeparateClientZKCluster { FileUtils.deleteDirectory(clientZkDir); } - @Test(timeout = 60000) + @Test public void testBasicOperation() throws Exception { TableName tn = TableName.valueOf(name.getMethodName()); // create table @@ -119,7 +117,7 @@ public class TestSeparateClientZKCluster { } } - @Test(timeout = 60000) + @Test public void testMasterSwitch() throws Exception { // get an admin instance and issue some request first Connection conn = TEST_UTIL.getConnection(); @@ -143,7 +141,7 @@ public class TestSeparateClientZKCluster { } } - @Test(timeout = 60000) + @Test public void testMetaRegionMove() throws Exception { TableName tn = TableName.valueOf(name.getMethodName()); // create table @@ -199,7 +197,7 @@ public class TestSeparateClientZKCluster { } } - @Test(timeout = 120000) + @Test public void testMetaMoveDuringClientZkClusterRestart() throws Exception { TableName tn = TableName.valueOf(name.getMethodName()); // create table @@ -230,12 +228,8 @@ public class TestSeparateClientZKCluster { Thread.sleep(200); } // wait for meta region online - try { - cluster.getMaster().getAssignmentManager() - .waitForAssignment(RegionInfoBuilder.FIRST_META_REGIONINFO); - } catch (NoSuchProcedureException e) { - // we don't need to take any further action - } + AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), + RegionInfoBuilder.FIRST_META_REGIONINFO); // wait some long time to make sure we will retry sync data to client ZK until data set Thread.sleep(10000); clientZkCluster.startup(clientZkDir); @@ -250,7 +244,7 @@ public class TestSeparateClientZKCluster { } } - @Test(timeout = 60000) + @Test public void testAsyncTable() throws Exception { TableName tn = TableName.valueOf(name.getMethodName()); ColumnFamilyDescriptorBuilder cfDescBuilder = ColumnFamilyDescriptorBuilder.newBuilder(family); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java index 3799d739181..e7b6e849e2c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/AssignmentTestingUtil.java @@ -18,14 +18,19 @@ package org.apache.hadoop.hbase.master.assignment; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import java.io.IOException; import java.util.Set; - import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.Waiter.ExplainingPredicate; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.master.RegionState.State; +import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; +import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait; import org.apache.hadoop.hbase.util.Threads; import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceStability; @@ -34,7 +39,7 @@ import org.slf4j.LoggerFactory; @InterfaceAudience.Private @InterfaceStability.Evolving -public abstract class AssignmentTestingUtil { +public final class AssignmentTestingUtil { private static final Logger LOG = LoggerFactory.getLogger(AssignmentTestingUtil.class); private AssignmentTestingUtil() {} @@ -122,4 +127,27 @@ public abstract class AssignmentTestingUtil { private static HMaster getMaster(final HBaseTestingUtility util) { return util.getMiniHBaseCluster().getMaster(); } + + public static boolean waitForAssignment(AssignmentManager am, RegionInfo regionInfo) + throws IOException { + // This method can be called before the regionInfo has made it into the regionStateMap + // so wait around here a while. + Waiter.waitFor(am.getConfiguration(), 10000, + () -> am.getRegionStates().getRegionStateNode(regionInfo) != null); + RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo); + // Wait until the region has already been open, or we have a TRSP along with it. + Waiter.waitFor(am.getConfiguration(), 30000, + () -> regionNode.isInState(State.OPEN) || regionNode.isInTransition()); + RegionTransitionProcedure proc = regionNode.getProcedure(); + synchronized (regionNode) { + if (regionNode.isInState(State.OPEN)) { + return true; + } + proc = regionNode.getProcedure(); + } + assertNotNull(proc); + ProcedureSyncWait.waitForProcedureToCompleteIOE(am.getMaster().getMasterProcedureExecutor(), + proc, 5L * 60 * 1000); + return true; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index eb162de7736..29a91075cfd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -33,7 +33,6 @@ import java.util.Optional; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -73,18 +72,14 @@ import org.apache.hadoop.hbase.exceptions.UnexpectedStateException; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.MasterRpcServices; -import org.apache.hadoop.hbase.master.NoSuchProcedureException; import org.apache.hadoop.hbase.master.RegionState; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; +import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; -import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; -import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; -import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; -import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.util.Bytes; @@ -94,8 +89,6 @@ import org.apache.hadoop.hbase.util.HBaseFsck; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.hadoop.hbase.util.Threads; -import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; -import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NodeExistsException; import org.junit.After; @@ -111,6 +104,14 @@ import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; +import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; + +import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode; +import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest; +import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionResponse; + /** * The below tests are testing split region against a running cluster */ @@ -161,11 +162,7 @@ public class TestSplitTransactionOnCluster { throws IOException, InterruptedException { assertEquals(1, regions.size()); RegionInfo hri = regions.get(0).getRegionInfo(); - try { - cluster.getMaster().getAssignmentManager().waitForAssignment(hri, 600000); - } catch (NoSuchProcedureException e) { - LOG.info("Presume the procedure has been cleaned up so just proceed: " + e.toString()); - } + AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri); return hri; }