HBASE-18143 [AMv2] Backoff on failed report of region transition quickly goes to astronomical time scale

M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
  Rather than compound the pause time, just have backoff multiple the
  original INIT_PAUSE_TIME_MS so we go 1, 2, 5, 10, ... etc. rather than
  1, 2, 30, 600... and so on.

  Minor fixup around logging so report of failed transition is no longer
  reported as trace-level.
This commit is contained in:
Michael Stack 2017-05-31 22:06:21 -07:00
parent c7a7f880dd
commit e1f3c89b3b
1 changed files with 7 additions and 8 deletions

View File

@ -2200,15 +2200,16 @@ public class HRegionServer extends HasThread implements
ReportRegionStateTransitionResponse response = ReportRegionStateTransitionResponse response =
rss.reportRegionStateTransition(null, request); rss.reportRegionStateTransition(null, request);
if (response.hasErrorMessage()) { if (response.hasErrorMessage()) {
LOG.info("Failed transition " + hris[0] LOG.info("TRANSITION FAILED " + request + ": " + response.getErrorMessage());
+ " to " + code + ": " + response.getErrorMessage()); // NOTE: Return mid-method!!!
return false; return false;
} }
// Log if we had to retry else don't log unless TRACE. We want to // Log if we had to retry else don't log unless TRACE. We want to
// know if were successful after an attempt showed in logs as failed. // know if were successful after an attempt showed in logs as failed.
if (tries > 0 || LOG.isTraceEnabled()) { if (tries > 0 || LOG.isTraceEnabled()) {
LOG.trace("TRANSITION REPORTED " + request); LOG.info("TRANSITION REPORTED " + request);
} }
// NOTE: Return mid-method!!!
return true; return true;
} catch (ServiceException se) { } catch (ServiceException se) {
IOException ioe = ProtobufUtil.getRemoteException(se); IOException ioe = ProtobufUtil.getRemoteException(se);
@ -2216,11 +2217,11 @@ public class HRegionServer extends HasThread implements
ioe instanceof PleaseHoldException; ioe instanceof PleaseHoldException;
if (pause) { if (pause) {
// Do backoff else we flood the Master with requests. // Do backoff else we flood the Master with requests.
pauseTime = ConnectionUtils.getPauseTime(pauseTime, tries); pauseTime = ConnectionUtils.getPauseTime(INIT_PAUSE_TIME_MS, tries);
} else { } else {
pauseTime = INIT_PAUSE_TIME_MS; // Reset. pauseTime = INIT_PAUSE_TIME_MS; // Reset.
} }
LOG.info("Failed report of region transition " + LOG.info("Failed report transition " +
TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" + TextFormat.shortDebugString(request) + "; retry (#" + tries + ")" +
(pause? (pause?
" after " + pauseTime + "ms delay (Master is coming online...).": " after " + pauseTime + "ms delay (Master is coming online...).":
@ -2233,9 +2234,7 @@ public class HRegionServer extends HasThread implements
} }
} }
} }
if (LOG.isTraceEnabled()) { LOG.info("TRANSITION NOT REPORTED " + request);
LOG.trace("TRANSITION NOT REPORTED " + request);
}
return false; return false;
} }