Revert "Merge pull request #133 from infraio/retry-backoff

HBASE-22193 Add backoff when region failed open too many times"

This reverts commit 249ac58d4f, reversing
changes made to f7867c4ffb.
This commit is contained in:
Guanghao Zhang 2019-04-13 11:08:12 +08:00
parent 249ac58d4f
commit e4e561b37f
2 changed files with 11 additions and 26 deletions

View File

@ -131,10 +131,6 @@ public class AssignmentManager {
"hbase.assignment.maximum.attempts";
private static final int DEFAULT_ASSIGN_MAX_ATTEMPTS = Integer.MAX_VALUE;
public static final String ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS =
"hbase.assignment.retry.immediately.maximum.attempts";
private static final int DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS = 3;
/** Region in Transition metrics threshold time */
public static final String METRICS_RIT_STUCK_WARNING_THRESHOLD =
"hbase.metrics.rit.stuck.warning.threshold";
@ -155,7 +151,6 @@ public class AssignmentManager {
private final int assignDispatchWaitQueueMaxSize;
private final int assignDispatchWaitMillis;
private final int assignMaxAttempts;
private final int assignRetryImmediatelyMaxAttempts;
private final Object checkIfShouldMoveSystemRegionLock = new Object();
@ -184,8 +179,6 @@ public class AssignmentManager {
this.assignMaxAttempts = Math.max(1, conf.getInt(ASSIGN_MAX_ATTEMPTS,
DEFAULT_ASSIGN_MAX_ATTEMPTS));
this.assignRetryImmediatelyMaxAttempts = conf.getInt(ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS,
DEFAULT_ASSIGN_RETRY_IMMEDIATELY_MAX_ATTEMPTS);
int ritChoreInterval = conf.getInt(RIT_CHORE_INTERVAL_MSEC_CONF_KEY,
DEFAULT_RIT_CHORE_INTERVAL_MSEC);
@ -315,10 +308,6 @@ public class AssignmentManager {
return assignMaxAttempts;
}
int getAssignRetryImmediatelyMaxAttempts() {
return assignRetryImmediatelyMaxAttempts;
}
public RegionStates getRegionStates() {
return regionStates;
}

View File

@ -226,32 +226,20 @@ public class TransitRegionStateProcedure
return Flow.HAS_MORE_STATE;
}
int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode)
.incrementAndGetRetries();
int maxAttempts = env.getAssignmentManager().getAssignMaxAttempts();
LOG.info("Retry={} of max={}; {}; {}", retries, maxAttempts, this, regionNode.toShortString());
if (retries >= maxAttempts) {
if (incrementAndCheckMaxAttempts(env, regionNode)) {
env.getAssignmentManager().regionFailedOpen(regionNode, true);
setFailure(getClass().getSimpleName(), new RetriesExhaustedException(
"Max attempts " + env.getAssignmentManager().getAssignMaxAttempts() + " exceeded"));
regionNode.unsetProcedure(this);
return Flow.NO_MORE_STATE;
}
env.getAssignmentManager().regionFailedOpen(regionNode, false);
// we failed to assign the region, force a new plan
forceNewPlan = true;
regionNode.setRegionLocation(null);
setNextState(RegionStateTransitionState.REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE);
if (retries > env.getAssignmentManager().getAssignRetryImmediatelyMaxAttempts()) {
// Throw exception to backoff and retry when failed open too many times
throw new HBaseIOException("Failed to open region");
} else {
// Here we do not throw exception because we want to the region to be online ASAP
return Flow.HAS_MORE_STATE;
}
// Here we do not throw exception because we want to the region to be online ASAP
return Flow.HAS_MORE_STATE;
}
private void closeRegion(MasterProcedureEnv env, RegionStateNode regionNode) throws IOException {
@ -412,6 +400,14 @@ public class TransitRegionStateProcedure
this.remoteProc = null;
}
private boolean incrementAndCheckMaxAttempts(MasterProcedureEnv env, RegionStateNode regionNode) {
int retries = env.getAssignmentManager().getRegionStates().addToFailedOpen(regionNode)
.incrementAndGetRetries();
int max = env.getAssignmentManager().getAssignMaxAttempts();
LOG.info("Retry={} of max={}; {}; {}", retries, max, this, regionNode.toShortString());
return retries >= max;
}
@Override
protected void rollbackState(MasterProcedureEnv env, RegionStateTransitionState state)
throws IOException, InterruptedException {