HBASE-18143 [AMv2] Backoff on failed report of region transition quickly goes to astronomical time scale
M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Rather than compound the pause time, just have backoff multiple the original INIT_PAUSE_TIME_MS so we go 1, 2, 5, 10, ... etc. rather than 1, 2, 30, 600... and so on. Minor fixup around logging so report of failed transition is no longer reported as trace-level.
This commit is contained in:
parent
c7a7f880dd
commit
e1f3c89b3b
|
@ -2200,15 +2200,16 @@ public class HRegionServer extends HasThread implements
|
|||
ReportRegionStateTransitionResponse response =
|
||||
rss.reportRegionStateTransition(null, request);
|
||||
if (response.hasErrorMessage()) {
|
||||
LOG.info("Failed transition " + hris[0]
|
||||
+ " to " + code + ": " + response.getErrorMessage());
|
||||
LOG.info("TRANSITION FAILED " + request + ": " + response.getErrorMessage());
|
||||
// NOTE: Return mid-method!!!
|
||||
return false;
|
||||
}
|
||||
// Log if we had to retry else don't log unless TRACE. We want to
|
||||
// know if were successful after an attempt showed in logs as failed.
|
||||
if (tries > 0 || LOG.isTraceEnabled()) {
|
||||
LOG.trace("TRANSITION REPORTED " + request);
|
||||
LOG.info("TRANSITION REPORTED " + request);
|
||||
}
|
||||
// NOTE: Return mid-method!!!
|
||||
return true;
|
||||
} catch (ServiceException se) {
|
||||
IOException ioe = ProtobufUtil.getRemoteException(se);
|
||||
|
@ -2216,11 +2217,11 @@ public class HRegionServer extends HasThread implements
|
|||
ioe instanceof PleaseHoldException;
|
||||
if (pause) {
|
||||
// Do backoff else we flood the Master with requests.
|
||||
pauseTime = ConnectionUtils.getPauseTime(pauseTime, tries);
|
||||
pauseTime = ConnectionUtils.getPauseTime(INIT_PAUSE_TIME_MS, tries);
|
||||
} else {
|
||||
pauseTime = INIT_PAUSE_TIME_MS; // Reset.
|
||||
}
|
||||
LOG.info("Failed report of region transition " +
|
||||
LOG.info("Failed report transition " +
|
||||
TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" +
|
||||
(pause?
|
||||
" after " + pauseTime + "ms delay (Master is coming online...).":
|
||||
|
@ -2233,9 +2234,7 @@ public class HRegionServer extends HasThread implements
|
|||
}
|
||||
}
|
||||
}
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("TRANSITION NOT REPORTED " + request);
|
||||
}
|
||||
LOG.info("TRANSITION NOT REPORTED " + request);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue