HBASE-21095 The timeout retry logic for several procedures are broken after master restarts
This commit is contained in:
parent
66add55234
commit
b82cd670c3
|
@ -364,12 +364,9 @@ public abstract class RegionTransitionProcedure
|
|||
LOG.warn("Failed transition, suspend {}secs {}; {}; waiting on rectified condition fixed " +
|
||||
"by other Procedure or operator intervention", backoff / 1000, this,
|
||||
regionNode.toShortString(), e);
|
||||
getRegionState(env).getProcedureEvent().suspend();
|
||||
if (getRegionState(env).getProcedureEvent().suspendIfNotReady(this)) {
|
||||
setTimeout(Math.toIntExact(backoff));
|
||||
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
|
||||
throw new ProcedureSuspendedException();
|
||||
}
|
||||
setTimeout(Math.toIntExact(backoff));
|
||||
setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
|
||||
throw new ProcedureSuspendedException();
|
||||
}
|
||||
|
||||
return new Procedure[] {this};
|
||||
|
@ -387,7 +384,7 @@ public abstract class RegionTransitionProcedure
|
|||
@Override
|
||||
protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
|
||||
setState(ProcedureProtos.ProcedureState.RUNNABLE);
|
||||
getRegionState(env).getProcedureEvent().wake(env.getProcedureScheduler());
|
||||
env.getProcedureScheduler().addFront(this);
|
||||
return false; // 'false' means that this procedure handled the timeout
|
||||
}
|
||||
|
||||
|
|
|
@ -24,17 +24,15 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.hbase.thirdparty.com.google.gson.JsonArray;
|
||||
import org.apache.hbase.thirdparty.com.google.gson.JsonElement;
|
||||
import org.apache.hbase.thirdparty.com.google.gson.JsonObject;
|
||||
import org.apache.hbase.thirdparty.com.google.gson.JsonParser;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -46,6 +44,11 @@ import org.junit.rules.TestName;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.apache.hbase.thirdparty.com.google.gson.JsonArray;
|
||||
import org.apache.hbase.thirdparty.com.google.gson.JsonElement;
|
||||
import org.apache.hbase.thirdparty.com.google.gson.JsonObject;
|
||||
import org.apache.hbase.thirdparty.com.google.gson.JsonParser;
|
||||
|
||||
/**
|
||||
* Tests for HBASE-18408 "AM consumes CPU and fills up the logs really fast when there is no RS to
|
||||
* assign". If an {@link org.apache.hadoop.hbase.exceptions.UnexpectedStateException}, we'd spin on
|
||||
|
@ -66,6 +69,8 @@ public class TestUnexpectedStateException {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
TEST_UTIL.getConfiguration().setBoolean("hbase.localcluster.assign.random.ports", false);
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.MASTER_INFO_PORT, 50655);
|
||||
TEST_UTIL.startMiniCluster();
|
||||
}
|
||||
|
||||
|
@ -139,6 +144,11 @@ public class TestUnexpectedStateException {
|
|||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
TEST_UTIL.getMiniHBaseCluster().stopMaster(0).join();
|
||||
HMaster master = TEST_UTIL.getMiniHBaseCluster().startMaster().getMaster();
|
||||
TEST_UTIL.waitFor(30000, () -> master.isInitialized());
|
||||
am = master.getAssignmentManager();
|
||||
rsn = am.getRegionStates().getRegionStateNode(region);
|
||||
am.markRegionAsOpened(rsn);
|
||||
t.join();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue