HBASE-17557 HRegionServer#reportRegionSizesForQuotas() should respond to UnsupportedOperationException
This commit is contained in:
parent
6b334cd817
commit
7fb0ac26e3
|
@ -53,6 +53,9 @@ public class FileSystemUtilizationChore extends ScheduledChore {
|
|||
static final String FS_UTILIZATION_MAX_ITERATION_DURATION_KEY = "hbase.regionserver.quotas.fs.utilization.chore.max.iteration.millis";
|
||||
static final long FS_UTILIZATION_MAX_ITERATION_DURATION_DEFAULT = 5000L;
|
||||
|
||||
private int numberOfCyclesToSkip = 0, prevNumberOfCyclesToSkip = 0;
|
||||
private static final int CYCLE_UPPER_BOUND = 32;
|
||||
|
||||
private final HRegionServer rs;
|
||||
private final long maxIterationMillis;
|
||||
private Iterator<Region> leftoverRegions;
|
||||
|
@ -67,6 +70,10 @@ public class FileSystemUtilizationChore extends ScheduledChore {
|
|||
|
||||
@Override
|
||||
protected void chore() {
|
||||
if (numberOfCyclesToSkip > 0) {
|
||||
numberOfCyclesToSkip--;
|
||||
return;
|
||||
}
|
||||
final Map<HRegionInfo,Long> onlineRegionSizes = new HashMap<>();
|
||||
final Set<Region> onlineRegions = new HashSet<>(rs.getOnlineRegions());
|
||||
// Process the regions from the last run if we have any. If we are somehow having difficulty
|
||||
|
@ -126,7 +133,14 @@ public class FileSystemUtilizationChore extends ScheduledChore {
|
|||
+ skippedSplitParents + " regions due to being the parent of a split, and"
|
||||
+ skippedRegionReplicas + " regions due to being region replicas.");
|
||||
}
|
||||
reportRegionSizesToMaster(onlineRegionSizes);
|
||||
if (!reportRegionSizesToMaster(onlineRegionSizes)) {
|
||||
// backoff reporting
|
||||
numberOfCyclesToSkip = prevNumberOfCyclesToSkip > 0 ? 2 * prevNumberOfCyclesToSkip : 1;
|
||||
if (numberOfCyclesToSkip > CYCLE_UPPER_BOUND) {
|
||||
numberOfCyclesToSkip = CYCLE_UPPER_BOUND;
|
||||
}
|
||||
prevNumberOfCyclesToSkip = numberOfCyclesToSkip;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -166,8 +180,8 @@ public class FileSystemUtilizationChore extends ScheduledChore {
|
|||
*
|
||||
* @param onlineRegionSizes The computed region sizes to report.
|
||||
*/
|
||||
void reportRegionSizesToMaster(Map<HRegionInfo,Long> onlineRegionSizes) {
|
||||
this.rs.reportRegionSizesForQuotas(onlineRegionSizes);
|
||||
boolean reportRegionSizesToMaster(Map<HRegionInfo,Long> onlineRegionSizes) {
|
||||
return this.rs.reportRegionSizesForQuotas(onlineRegionSizes);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -66,6 +66,7 @@ import org.apache.hadoop.hbase.ChoreService;
|
|||
import org.apache.hadoop.hbase.ClockOutOfSyncException;
|
||||
import org.apache.hadoop.hbase.CoordinatedStateManager;
|
||||
import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
|
||||
import org.apache.hadoop.hbase.DoNotRetryIOException;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
|
@ -1248,13 +1249,14 @@ public class HRegionServer extends HasThread implements
|
|||
* Reports the given map of Regions and their size on the filesystem to the active Master.
|
||||
*
|
||||
* @param onlineRegionSizes A map of region info to size in bytes
|
||||
* @return false if FileSystemUtilizationChore should pause reporting to master. true otherwise
|
||||
*/
|
||||
public void reportRegionSizesForQuotas(final Map<HRegionInfo, Long> onlineRegionSizes) {
|
||||
public boolean reportRegionSizesForQuotas(final Map<HRegionInfo, Long> onlineRegionSizes) {
|
||||
RegionServerStatusService.BlockingInterface rss = rssStub;
|
||||
if (rss == null) {
|
||||
// the current server could be stopping.
|
||||
LOG.trace("Skipping Region size report to HMaster as stub is null");
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
try {
|
||||
RegionSpaceUseReportRequest request = buildRegionSpaceUseReportRequest(
|
||||
|
@ -1263,16 +1265,28 @@ public class HRegionServer extends HasThread implements
|
|||
} catch (ServiceException se) {
|
||||
IOException ioe = ProtobufUtil.getRemoteException(se);
|
||||
if (ioe instanceof PleaseHoldException) {
|
||||
LOG.trace("Failed to report region sizes to Master because it is initializing. This will be retried.", ioe);
|
||||
LOG.trace("Failed to report region sizes to Master because it is initializing."
|
||||
+ " This will be retried.", ioe);
|
||||
// The Master is coming up. Will retry the report later. Avoid re-creating the stub.
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
LOG.debug("Failed to report region sizes to Master. This will be retried.", ioe);
|
||||
if (rssStub == rss) {
|
||||
rssStub = null;
|
||||
}
|
||||
createRegionServerStatusStub(true);
|
||||
if (ioe instanceof DoNotRetryIOException) {
|
||||
DoNotRetryIOException doNotRetryEx = (DoNotRetryIOException) ioe;
|
||||
if (doNotRetryEx.getCause() != null) {
|
||||
Throwable t = doNotRetryEx.getCause();
|
||||
if (t instanceof UnsupportedOperationException) {
|
||||
LOG.debug("master doesn't support ReportRegionSpaceUse, pause before retrying");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
LOG.debug("Failed to report region sizes to Master. This will be retried.", ioe);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue