HBASE-27048 Server side scanner time limit should account for time in queue (#4562)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
Bryan Beaudreault 2022-07-06 11:57:37 -04:00
parent c725dc98e5
commit 87f22813f4
2 changed files with 92 additions and 32 deletions

View File

@ -288,7 +288,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
/**
* Default value of {@link RSRpcServices#REGION_SERVER_RPC_MINIMUM_SCAN_TIME_LIMIT_DELTA}
*/
private static final long DEFAULT_REGION_SERVER_RPC_MINIMUM_SCAN_TIME_LIMIT_DELTA = 10;
static final long DEFAULT_REGION_SERVER_RPC_MINIMUM_SCAN_TIME_LIMIT_DELTA = 10;
/*
* Whether to reject rows with size > threshold defined by {@link
@ -3209,33 +3209,53 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
}
}
private long getTimeLimit(HBaseRpcController controller, boolean allowHeartbeatMessages) {
// visible for testing only
long getTimeLimit(RpcCall rpcCall, HBaseRpcController controller,
boolean allowHeartbeatMessages) {
// Set the time limit to be half of the more restrictive timeout value (one of the
// timeout values must be positive). In the event that both values are positive, the
// more restrictive of the two is used to calculate the limit.
if (allowHeartbeatMessages && (scannerLeaseTimeoutPeriod > 0 || rpcTimeout > 0)) {
long timeLimitDelta;
if (scannerLeaseTimeoutPeriod > 0 && rpcTimeout > 0) {
timeLimitDelta = Math.min(scannerLeaseTimeoutPeriod, rpcTimeout);
} else {
timeLimitDelta = scannerLeaseTimeoutPeriod > 0 ? scannerLeaseTimeoutPeriod : rpcTimeout;
if (allowHeartbeatMessages) {
long now = EnvironmentEdgeManager.currentTime();
long remainingTimeout = getRemainingRpcTimeout(rpcCall, controller, now);
if (scannerLeaseTimeoutPeriod > 0 || remainingTimeout > 0) {
long timeLimitDelta;
if (scannerLeaseTimeoutPeriod > 0 && remainingTimeout > 0) {
timeLimitDelta = Math.min(scannerLeaseTimeoutPeriod, remainingTimeout);
} else {
timeLimitDelta =
scannerLeaseTimeoutPeriod > 0 ? scannerLeaseTimeoutPeriod : remainingTimeout;
}
// Use half of whichever timeout value was more restrictive... But don't allow
// the time limit to be less than the allowable minimum (could cause an
// immediate timeout before scanning any data).
timeLimitDelta = Math.max(timeLimitDelta / 2, minimumScanTimeLimitDelta);
return now + timeLimitDelta;
}
if (controller != null && controller.getCallTimeout() > 0) {
timeLimitDelta = Math.min(timeLimitDelta, controller.getCallTimeout());
}
// Use half of whichever timeout value was more restrictive... But don't allow
// the time limit to be less than the allowable minimum (could cause an
// immediatate timeout before scanning any data).
timeLimitDelta = Math.max(timeLimitDelta / 2, minimumScanTimeLimitDelta);
// XXX: Can not use EnvironmentEdge here because TestIncrementTimeRange use a
// ManualEnvironmentEdge. Consider using System.nanoTime instead.
return System.currentTimeMillis() + timeLimitDelta;
}
// Default value of timeLimit is negative to indicate no timeLimit should be
// enforced.
return -1L;
}
private long getRemainingRpcTimeout(RpcCall call, HBaseRpcController controller, long now) {
long timeout;
if (controller != null && controller.getCallTimeout() > 0) {
timeout = controller.getCallTimeout();
} else if (rpcTimeout > 0) {
timeout = rpcTimeout;
} else {
return -1;
}
if (call != null) {
timeout -= (now - call.getReceiveTime());
}
// getTimeLimit ignores values <= 0, but timeout may now be negative if queue time was high.
// return minimum value here in that case so we count this in calculating the final delta.
return Math.max(minimumScanTimeLimitDelta, timeout);
}
private void checkLimitOfRows(int numOfCompleteRows, int limitOfRows, boolean moreRows,
ScannerContext scannerContext, ScanResponse.Builder builder) {
if (numOfCompleteRows >= limitOfRows) {
@ -3250,7 +3270,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
// return whether we have more results in region.
private void scan(HBaseRpcController controller, ScanRequest request, RegionScannerHolder rsh,
long maxQuotaResultSize, int maxResults, int limitOfRows, List<Result> results,
ScanResponse.Builder builder, MutableObject<Object> lastBlock, RpcCallContext context)
ScanResponse.Builder builder, MutableObject<Object> lastBlock, RpcCall rpcCall)
throws IOException {
HRegion region = rsh.r;
RegionScanner scanner = rsh.s;
@ -3297,7 +3317,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
// heartbeats AND partials
boolean allowHeartbeatMessages = clientHandlesHeartbeats && allowPartialResults;
long timeLimit = getTimeLimit(controller, allowHeartbeatMessages);
long timeLimit = getTimeLimit(rpcCall, controller, allowHeartbeatMessages);
final LimitScope sizeScope =
allowPartialResults ? LimitScope.BETWEEN_CELLS : LimitScope.BETWEEN_ROWS;
@ -3328,7 +3348,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
// Collect values to be returned here
moreRows = scanner.nextRaw(values, scannerContext);
if (context == null) {
if (rpcCall == null) {
// When there is no RpcCallContext,copy EC to heap, then the scanner would close,
// This can be an EXPENSIVE call. It may make an extra copy from offheap to onheap
// buffers.See more details in HBASE-26036.
@ -3366,7 +3386,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
}
boolean mayHaveMoreCellsInRow = scannerContext.mayHaveMoreCellsInRow();
Result r = Result.create(values, null, stale, mayHaveMoreCellsInRow);
lastBlock.setValue(addSize(context, r, lastBlock.getValue()));
lastBlock.setValue(addSize(rpcCall, r, lastBlock.getValue()));
results.add(r);
numOfResults++;
if (!mayHaveMoreCellsInRow && limitOfRows > 0) {
@ -3434,7 +3454,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
region.closeRegionOperation();
// Update serverside metrics, even on error.
long end = EnvironmentEdgeManager.currentTime();
long responseCellSize = context != null ? context.getResponseCellSize() : 0;
long responseCellSize = rpcCall != null ? rpcCall.getResponseCellSize() : 0;
region.getMetrics().updateScanTime(end - before);
final MetricsRegionServer metricsRegionServer = regionServer.getMetrics();
if (metricsRegionServer != null) {
@ -3562,7 +3582,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
} else {
rows = closeScanner ? 0 : 1;
}
RpcCallContext context = RpcServer.getCurrentCall().orElse(null);
RpcCall rpcCall = RpcServer.getCurrentCall().orElse(null);
// now let's do the real scan.
long maxQuotaResultSize = Math.min(maxScannerResultSize, quota.getReadAvailable());
RegionScanner scanner = rsh.s;
@ -3585,7 +3605,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
Boolean bypass = region.getCoprocessorHost().preScannerNext(scanner, results, rows);
if (!results.isEmpty()) {
for (Result r : results) {
lastBlock.setValue(addSize(context, r, lastBlock.getValue()));
lastBlock.setValue(addSize(rpcCall, r, lastBlock.getValue()));
}
}
if (bypass != null && bypass.booleanValue()) {
@ -3594,7 +3614,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
}
if (!done) {
scan((HBaseRpcController) controller, request, rsh, maxQuotaResultSize, rows, limitOfRows,
results, builder, lastBlock, context);
results, builder, lastBlock, rpcCall);
} else {
builder.setMoreResultsInRegion(!results.isEmpty());
}
@ -3606,7 +3626,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
quota.addScanResult(results);
addResults(builder, results, (HBaseRpcController) controller,
RegionReplicaUtil.isDefaultReplica(region.getRegionInfo()),
isClientCellBlockSupport(context));
isClientCellBlockSupport(rpcCall));
if (scanner.isFilterDone() && results.isEmpty()) {
// If the scanner's filter - if any - is done with the scan
// only set moreResults to false if the results is empty. This is used to keep compatible
@ -3638,7 +3658,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
}
if (!builder.getMoreResults() || !builder.getMoreResultsInRegion() || closeScanner) {
scannerClosed = true;
closeScanner(region, scanner, scannerName, context);
closeScanner(region, scanner, scannerName, rpcCall);
}
return builder.build();
} catch (IOException e) {
@ -3648,7 +3668,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
// The scanner state might be left in a dirty state, so we will tell the Client to
// fail this RPC and close the scanner while opening up another one from the start of
// row that the client has last seen.
closeScanner(region, scanner, scannerName, context);
closeScanner(region, scanner, scannerName, rpcCall);
// If it is a DoNotRetryIOException already, throw as it is. Unfortunately, DNRIOE is
// used in two different semantics.
@ -3674,7 +3694,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
// We closed the scanner already. Instead of throwing the IOException, and client
// retrying with the same scannerId only to get USE on the next RPC, we directly throw
// a special exception to save an RPC.
if (VersionInfoUtil.hasMinimumVersion(context.getClientVersionInfo(), 1, 4)) {
if (VersionInfoUtil.hasMinimumVersion(rpcCall.getClientVersionInfo(), 1, 4)) {
// 1.4.0+ clients know how to handle
throw new ScannerResetException("Scanner is closed on the server-side", e);
} else {
@ -3689,8 +3709,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.Blockin
if (!scannerClosed) {
// Adding resets expiration time on lease.
// the closeCallBack will be set in closeScanner so here we only care about shippedCallback
if (context != null) {
context.setCallBack(rsh.shippedCallback);
if (rpcCall != null) {
rpcCall.setCallBack(rsh.shippedCallback);
} else {
// If context is null,here we call rsh.shippedCallback directly to reuse the logic in
// rsh.shippedCallback to release the internal resources in rsh,and lease is also added

View File

@ -17,9 +17,11 @@
*/
package org.apache.hadoop.hbase.regionserver;
import static org.apache.hadoop.hbase.regionserver.RSRpcServices.DEFAULT_REGION_SERVER_RPC_MINIMUM_SCAN_TIME_LIMIT_DELTA;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.when;
import java.io.IOException;
import java.util.ArrayList;
@ -48,9 +50,12 @@ import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.FilterBase;
import org.apache.hadoop.hbase.ipc.HBaseRpcController;
import org.apache.hadoop.hbase.ipc.RpcCall;
import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.wal.WAL;
import org.junit.After;
@ -60,6 +65,7 @@ import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.mockito.Mockito;
import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
@ -139,6 +145,40 @@ public class TestScannerHeartbeatMessages {
TABLE = createTestTable(TABLE_NAME, ROWS, FAMILIES, QUALIFIERS, VALUE);
}
@Test
public void testTimeLimitAccountsForQueueTime() throws IOException, InterruptedException {
HRegionServer rs = TEST_UTIL.getMiniHBaseCluster().getRegionServer(0);
RSRpcServices services = new RSRpcServices(rs);
RpcCall mockRpcCall = Mockito.mock(RpcCall.class);
// first return 180 (minimal queuing), then 120 (more queueing), then 101 (heavy queueing)
// finally, 25 is fatal levels of queueing -- exceeding timeout
when(mockRpcCall.getReceiveTime()).thenReturn(180L, 120L, 101L, 25L);
// assume timeout of 100ms
HBaseRpcController mockController = Mockito.mock(HBaseRpcController.class);
when(mockController.getCallTimeout()).thenReturn(100);
// current time is 100, which we'll subtract from 90 and 50 to generate some time deltas
EnvironmentEdgeManager.injectEdge(() -> 200L);
try {
// we queued for 20ms, leaving 80ms of timeout, which we divide by 2
assertEquals(200 + (100 - 20) / 2, services.getTimeLimit(mockRpcCall, mockController, true));
// we queued for 80ms, leaving 20ms of timeout, which we divide by 2
assertEquals(200 + (100 - 80) / 2, services.getTimeLimit(mockRpcCall, mockController, true));
// we queued for 99ms of 100ms timeout, leaving only 1ms. we fall back to default minimum
assertEquals(200 + DEFAULT_REGION_SERVER_RPC_MINIMUM_SCAN_TIME_LIMIT_DELTA,
services.getTimeLimit(mockRpcCall, mockController, true));
// lastly, we queue for 175ms of 100ms timeout. this should be very rare since we drop
// timed out calls in the queue. in this case we still fallback on default minimum for now.
assertEquals(200 + DEFAULT_REGION_SERVER_RPC_MINIMUM_SCAN_TIME_LIMIT_DELTA,
services.getTimeLimit(mockRpcCall, mockController, true));
} finally {
EnvironmentEdgeManager.reset();
}
}
static Table createTestTable(TableName name, byte[][] rows, byte[][] families,
byte[][] qualifiers, byte[] cellValue) throws IOException {
Table ht = TEST_UTIL.createTable(name, families);