From b82cd670c3f473fde937c5b8445acc83ca76c5c6 Mon Sep 17 00:00:00 2001 From: Allan Yang Date: Fri, 24 Aug 2018 12:19:47 -0700 Subject: [PATCH] HBASE-21095 The timeout retry logic for several procedures are broken after master restarts --- .../assignment/RegionTransitionProcedure.java | 11 ++++------- .../TestUnexpectedStateException.java | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java index 0db86766843..c10bf2d05da 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionTransitionProcedure.java @@ -364,12 +364,9 @@ public abstract class RegionTransitionProcedure LOG.warn("Failed transition, suspend {}secs {}; {}; waiting on rectified condition fixed " + "by other Procedure or operator intervention", backoff / 1000, this, regionNode.toShortString(), e); - getRegionState(env).getProcedureEvent().suspend(); - if (getRegionState(env).getProcedureEvent().suspendIfNotReady(this)) { - setTimeout(Math.toIntExact(backoff)); - setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); - throw new ProcedureSuspendedException(); - } + setTimeout(Math.toIntExact(backoff)); + setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); + throw new ProcedureSuspendedException(); } return new Procedure[] {this}; @@ -387,7 +384,7 @@ public abstract class RegionTransitionProcedure @Override protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { setState(ProcedureProtos.ProcedureState.RUNNABLE); - getRegionState(env).getProcedureEvent().wake(env.getProcedureScheduler()); + env.getProcedureScheduler().addFront(this); return false; // 'false' means that this procedure handled the timeout } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestUnexpectedStateException.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestUnexpectedStateException.java index 0f62f8ed7ce..16648c0848b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestUnexpectedStateException.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestUnexpectedStateException.java @@ -24,17 +24,15 @@ import java.util.Iterator; import java.util.List; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Threads; -import org.apache.hbase.thirdparty.com.google.gson.JsonArray; -import org.apache.hbase.thirdparty.com.google.gson.JsonElement; -import org.apache.hbase.thirdparty.com.google.gson.JsonObject; -import org.apache.hbase.thirdparty.com.google.gson.JsonParser; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -46,6 +44,11 @@ import org.junit.rules.TestName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.gson.JsonArray; +import org.apache.hbase.thirdparty.com.google.gson.JsonElement; +import org.apache.hbase.thirdparty.com.google.gson.JsonObject; +import org.apache.hbase.thirdparty.com.google.gson.JsonParser; + /** * Tests for HBASE-18408 "AM consumes CPU and fills up the logs really fast when there is no RS to * assign". If an {@link org.apache.hadoop.hbase.exceptions.UnexpectedStateException}, we'd spin on @@ -66,6 +69,8 @@ public class TestUnexpectedStateException { @BeforeClass public static void beforeClass() throws Exception { + TEST_UTIL.getConfiguration().setBoolean("hbase.localcluster.assign.random.ports", false); + TEST_UTIL.getConfiguration().setInt(HConstants.MASTER_INFO_PORT, 50655); TEST_UTIL.startMiniCluster(); } @@ -139,6 +144,11 @@ public class TestUnexpectedStateException { } Thread.sleep(1000); } + TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join(); + HMaster master = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster(); + TEST_UTIL.waitFor(30000, () -> master.isInitialized()); + am = master.getAssignmentManager(); + rsn = am.getRegionStates().getRegionStateNode(region); am.markRegionAsOpened(rsn); t.join(); }