HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone) (#2757)
* HBASE-25379 Make retry pause time configurable for regionserver short operation RPC (reportRegionStateTransition/reportProcedureDone) * HBASE-25379 RemoteProcedureResultReporter also should retry after the configured pause time * Addressed the review comments Signed-off-by: Yulin Niu <niuyulin@apache.org>
This commit is contained in:
parent
d963342f8a
commit
c96fbf0407
|
@ -965,6 +965,17 @@ public final class HConstants {
|
||||||
*/
|
*/
|
||||||
public static final int DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT = 10000;
|
public static final int DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT = 10000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retry pause time for short operation RPC
|
||||||
|
*/
|
||||||
|
public static final String HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME =
|
||||||
|
"hbase.rpc.shortoperation.retry.pause.time";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default value of {@link #HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME}
|
||||||
|
*/
|
||||||
|
public static final long DEFAULT_HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME = 1000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Value indicating the server name was saved with no sequence number.
|
* Value indicating the server name was saved with no sequence number.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -435,6 +435,9 @@ public class HRegionServer extends Thread implements
|
||||||
|
|
||||||
private final int shortOperationTimeout;
|
private final int shortOperationTimeout;
|
||||||
|
|
||||||
|
// Time to pause if master says 'please hold'
|
||||||
|
private final long retryPauseTime;
|
||||||
|
|
||||||
private final RegionServerAccounting regionServerAccounting;
|
private final RegionServerAccounting regionServerAccounting;
|
||||||
|
|
||||||
private SlowLogTableOpsChore slowLogTableOpsChore = null;
|
private SlowLogTableOpsChore slowLogTableOpsChore = null;
|
||||||
|
@ -615,6 +618,9 @@ public class HRegionServer extends Thread implements
|
||||||
this.shortOperationTimeout = conf.getInt(HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
|
this.shortOperationTimeout = conf.getInt(HConstants.HBASE_RPC_SHORTOPERATION_TIMEOUT_KEY,
|
||||||
HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
|
HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_TIMEOUT);
|
||||||
|
|
||||||
|
this.retryPauseTime = conf.getLong(HConstants.HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME,
|
||||||
|
HConstants.DEFAULT_HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME);
|
||||||
|
|
||||||
this.abortRequested = new AtomicBoolean(false);
|
this.abortRequested = new AtomicBoolean(false);
|
||||||
this.stopped = false;
|
this.stopped = false;
|
||||||
|
|
||||||
|
@ -2436,10 +2442,8 @@ public class HRegionServer extends Thread implements
|
||||||
final ReportRegionStateTransitionRequest request =
|
final ReportRegionStateTransitionRequest request =
|
||||||
createReportRegionStateTransitionRequest(context);
|
createReportRegionStateTransitionRequest(context);
|
||||||
|
|
||||||
// Time to pause if master says 'please hold'. Make configurable if needed.
|
|
||||||
final long initPauseTime = 1000;
|
|
||||||
int tries = 0;
|
int tries = 0;
|
||||||
long pauseTime;
|
long pauseTime = this.retryPauseTime;
|
||||||
// Keep looping till we get an error. We want to send reports even though server is going down.
|
// Keep looping till we get an error. We want to send reports even though server is going down.
|
||||||
// Only go down if clusterConnection is null. It is set to null almost as last thing as the
|
// Only go down if clusterConnection is null. It is set to null almost as last thing as the
|
||||||
// HRegionServer does down.
|
// HRegionServer does down.
|
||||||
|
@ -2470,9 +2474,9 @@ public class HRegionServer extends Thread implements
|
||||||
|| ioe instanceof CallQueueTooBigException;
|
|| ioe instanceof CallQueueTooBigException;
|
||||||
if (pause) {
|
if (pause) {
|
||||||
// Do backoff else we flood the Master with requests.
|
// Do backoff else we flood the Master with requests.
|
||||||
pauseTime = ConnectionUtils.getPauseTime(initPauseTime, tries);
|
pauseTime = ConnectionUtils.getPauseTime(this.retryPauseTime, tries);
|
||||||
} else {
|
} else {
|
||||||
pauseTime = initPauseTime; // Reset.
|
pauseTime = this.retryPauseTime; // Reset.
|
||||||
}
|
}
|
||||||
LOG.info("Failed report transition " +
|
LOG.info("Failed report transition " +
|
||||||
TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" +
|
TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" +
|
||||||
|
@ -3938,4 +3942,13 @@ public class HRegionServer extends Thread implements
|
||||||
public CompactedHFilesDischarger getCompactedHFilesDischarger() {
|
public CompactedHFilesDischarger getCompactedHFilesDischarger() {
|
||||||
return compactedFileDischarger;
|
return compactedFileDischarger;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return pause time configured in {@link HConstants#HBASE_RPC_SHORTOPERATION_RETRY_PAUSE_TIME}}
|
||||||
|
* @return pause time
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public long getRetryPauseTime() {
|
||||||
|
return this.retryPauseTime;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -41,9 +41,6 @@ class RemoteProcedureResultReporter extends Thread {
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(RemoteProcedureResultReporter.class);
|
private static final Logger LOG = LoggerFactory.getLogger(RemoteProcedureResultReporter.class);
|
||||||
|
|
||||||
// Time to pause if master says 'please hold'. Make configurable if needed.
|
|
||||||
private static final int INIT_PAUSE_TIME_MS = 1000;
|
|
||||||
|
|
||||||
private static final int MAX_BATCH = 100;
|
private static final int MAX_BATCH = 100;
|
||||||
|
|
||||||
private final HRegionServer server;
|
private final HRegionServer server;
|
||||||
|
@ -98,9 +95,9 @@ class RemoteProcedureResultReporter extends Thread {
|
||||||
long pauseTime;
|
long pauseTime;
|
||||||
if (pause) {
|
if (pause) {
|
||||||
// Do backoff else we flood the Master with requests.
|
// Do backoff else we flood the Master with requests.
|
||||||
pauseTime = ConnectionUtils.getPauseTime(INIT_PAUSE_TIME_MS, tries);
|
pauseTime = ConnectionUtils.getPauseTime(server.getRetryPauseTime(), tries);
|
||||||
} else {
|
} else {
|
||||||
pauseTime = INIT_PAUSE_TIME_MS; // Reset.
|
pauseTime = server.getRetryPauseTime(); // Reset.
|
||||||
}
|
}
|
||||||
LOG.info("Failed procedure report " + TextFormat.shortDebugString(request) + "; retry (#" +
|
LOG.info("Failed procedure report " + TextFormat.shortDebugString(request) + "; retry (#" +
|
||||||
tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)."
|
tries + ")" + (pause ? " after " + pauseTime + "ms delay (Master is coming online...)."
|
||||||
|
|
Loading…
Reference in New Issue