HBASE-25735 Add target Region to connection exceptions

Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
stack 2021-04-05 20:10:44 -07:00
parent 41b4e6e5c4
commit f120067f17
11 changed files with 120 additions and 58 deletions

View File

@ -25,6 +25,7 @@ import java.io.IOException;
import java.io.InterruptedIOException;
import java.lang.reflect.UndeclaredThrowableException;
import java.net.SocketTimeoutException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
@ -246,7 +247,8 @@ public class RpcRetryingCallerImpl<T> implements RpcRetryingCaller<T> {
@Override
public String toString() {
return "RpcRetryingCaller{" + "globalStartTime=" + tracker.getStartTime() +
return "RpcRetryingCaller{" + "globalStartTime=" +
Instant.ofEpochMilli(tracker.getStartTime()) +
", pause=" + pause + ", maxAttempts=" + maxAttempts + '}';
}
}

View File

@ -380,7 +380,7 @@ public abstract class AbstractRpcClient<T extends RpcConnection> implements RpcC
call.error.fillInStackTrace();
hrc.setFailed(call.error);
} else {
hrc.setFailed(wrapException(addr, call.error));
hrc.setFailed(wrapException(addr, hrc.getRegionInfo(), call.error));
}
callback.run(null);
} else {

View File

@ -88,6 +88,7 @@ class Call {
@Override
public String toString() {
// Call[id=32153218,methodName=Get]
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
.appendSuper(toShortString())
.append("param", Optional.ofNullable(param)

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.ipc;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback;
import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
@ -32,7 +33,9 @@ import org.apache.yetus.audience.InterfaceAudience;
* Optionally carries Cells across the proxy/service interface down into ipc. On its way out it
* optionally carries a set of result Cell data. We stick the Cells here when we want to avoid
* having to protobuf them (for performance reasons). This class is used ferrying data across the
* proxy/protobuf service chasm. Also does call timeout. Used by client and server ipc'ing.
* proxy/protobuf service chasm. Also does call timeout and on client-side, carries the target
* RegionInfo we're making the call against if relevant (useful adding info to exceptions and logs).
* Used by client and server ipc'ing.
*/
@InterfaceAudience.Private
public interface HBaseRpcController extends RpcController, CellScannable {
@ -103,4 +106,18 @@ public interface HBaseRpcController extends RpcController, CellScannable {
* cancellation state does not change during this call.
*/
void notifyOnCancel(RpcCallback<Object> callback, CancellationCallback action) throws IOException;
/**
* @return True if this Controller is carrying the RPC target Region's RegionInfo.
*/
default boolean hasRegionInfo() {
return false;
}
/**
* @return Target Region's RegionInfo or null if not available or pertinent.
*/
default RegionInfo getRegionInfo() {
return null;
}
}

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.ipc;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback;
import java.io.IOException;
@ -31,10 +32,8 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.yetus.audience.InterfaceAudience;
/**
* Optionally carries Cells across the proxy/service interface down into ipc. On its way out it
* optionally carries a set of result Cell data. We stick the Cells here when we want to avoid
* having to protobuf them (for performance reasons). This class is used ferrying data across the
* proxy/protobuf service chasm. Also does call timeout. Used by client and server ipc'ing.
* Get instances via {@link RpcControllerFactory} on client-side.
* @see RpcControllerFactory
*/
@InterfaceAudience.Private
public class HBaseRpcControllerImpl implements HBaseRpcController {
@ -51,6 +50,12 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
private IOException exception;
/**
* Rpc target Region's RegionInfo we are going against. May be null.
* @see #hasRegionInfo()
*/
private RegionInfo regionInfo;
/**
* Priority to set on this request. Set it here in controller so available composing the request.
* This is the ordained way of setting priorities going forward. We will be undoing the old
@ -67,15 +72,34 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
private CellScanner cellScanner;
public HBaseRpcControllerImpl() {
this((CellScanner) null);
this(null, (CellScanner) null);
}
/**
* Used server-side. Clients should go via {@link RpcControllerFactory}
*/
public HBaseRpcControllerImpl(final CellScanner cellScanner) {
this.cellScanner = cellScanner;
this(null, cellScanner);
}
public HBaseRpcControllerImpl(final List<CellScannable> cellIterables) {
HBaseRpcControllerImpl(RegionInfo regionInfo, final CellScanner cellScanner) {
this.cellScanner = cellScanner;
this.regionInfo = regionInfo;
}
HBaseRpcControllerImpl(RegionInfo regionInfo, final List<CellScannable> cellIterables) {
this.cellScanner = cellIterables == null ? null : CellUtil.createCellScanner(cellIterables);
this.regionInfo = null;
}
@Override
public boolean hasRegionInfo() {
return this.regionInfo != null;
}
@Override
public RegionInfo getRegionInfo() {
return this.regionInfo;
}
/**
@ -118,6 +142,7 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
cellScanner = null;
exception = null;
callTimeout = null;
regionInfo = null;
// In the implementations of some callable with replicas, rpc calls are executed in a executor
// and we could cancel the operation from outside which means there could be a race between
// reset and startCancel. Although I think the race should be handled by the callable since the
@ -131,11 +156,7 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
@Override
public int getCallTimeout() {
if (callTimeout != null) {
return callTimeout.intValue();
} else {
return 0;
}
return callTimeout != null? callTimeout: 0;
}
@Override
@ -241,5 +262,4 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
action.run(false);
}
}
}

View File

@ -29,6 +29,7 @@ import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.exceptions.ClientExceptionsUtil;
import org.apache.hadoop.hbase.exceptions.ConnectionClosedException;
import org.apache.hadoop.hbase.exceptions.ConnectionClosingException;
@ -159,15 +160,21 @@ class IPCUtil {
}
}
private static String getCallTarget(InetSocketAddress addr, RegionInfo regionInfo) {
return "address=" + addr +
(regionInfo != null? ", region=" + regionInfo.getRegionNameAsString(): null);
}
/**
* Takes an Exception and the address we were trying to connect to and return an IOException with
* the input exception as the cause. The new exception provides the stack trace of the place where
* the exception is thrown and some extra diagnostics information.
* Takes an Exception, the address, and if pertinent, the RegionInfo for the Region we were trying
* to connect to and returns an IOException with the input exception as the cause. The new
* exception provides the stack trace of the place where the exception is thrown and some extra
* diagnostics information.
* <p/>
* Notice that we will try our best to keep the original exception type when creating a new
* exception, especially for the 'connection' exceptions, as it is used to determine whether this
* is a network issue or the remote side tells us clearly what is wrong, which is very important
* to decide whether to retry. If it is not possible to create a new exception with the same type,
* is a network issue or the remote side tells us clearly what is wrong, which is important
* deciding whether to retry. If it is not possible to create a new exception with the same type,
* for example, the {@code error} is not an {@link IOException}, an {@link IOException} will be
* created.
* @param addr target address
@ -175,17 +182,18 @@ class IPCUtil {
* @return an exception to throw
* @see ClientExceptionsUtil#isConnectionException(Throwable)
*/
static IOException wrapException(InetSocketAddress addr, Throwable error) {
static IOException wrapException(InetSocketAddress addr, RegionInfo regionInfo,
Throwable error) {
if (error instanceof ConnectException) {
// connection refused; include the host:port in the error
return (IOException) new ConnectException(
"Call to " + addr + " failed on connection exception: " + error).initCause(error);
return (IOException) new ConnectException("Call to " + getCallTarget(addr, regionInfo) +
" failed on connection exception: " + error).initCause(error);
} else if (error instanceof SocketTimeoutException) {
return (IOException) new SocketTimeoutException(
"Call to " + addr + " failed because " + error).initCause(error);
"Call to " + getCallTarget(addr, regionInfo) + " failed because " + error).initCause(error);
} else if (error instanceof ConnectionClosingException) {
return new ConnectionClosingException("Call to " + addr + " failed on local exception: "
+ error, error);
return new ConnectionClosingException("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error, error);
} else if (error instanceof ServerTooBusyException) {
// we already have address in the exception message
return (IOException) error;
@ -194,42 +202,44 @@ class IPCUtil {
try {
return (IOException) error.getClass().asSubclass(DoNotRetryIOException.class)
.getConstructor(String.class)
.newInstance("Call to " + addr + " failed on local exception: " + error).initCause(error);
.newInstance("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error).initCause(error);
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException | SecurityException e) {
// just ignore, will just new a DoNotRetryIOException instead below
}
return new DoNotRetryIOException("Call to " + addr + " failed on local exception: "
+ error, error);
return new DoNotRetryIOException("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error, error);
} else if (error instanceof ConnectionClosedException) {
return new ConnectionClosedException("Call to " + addr + " failed on local exception: "
+ error, error);
return new ConnectionClosedException("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error, error);
} else if (error instanceof CallTimeoutException) {
return new CallTimeoutException("Call to " + addr + " failed on local exception: "
+ error, error);
return new CallTimeoutException("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error, error);
} else if (error instanceof ClosedChannelException) {
// ClosedChannelException does not have a constructor which takes a String but it is a
// connection exception so we keep its original type
return (IOException) error;
} else if (error instanceof TimeoutException) {
// TimeoutException is not an IOException, let's convert it to TimeoutIOException.
return new TimeoutIOException("Call to " + addr + " failed on local exception: "
+ error, error);
return new TimeoutIOException("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error, error);
} else {
// try our best to keep the original exception type
if (error instanceof IOException) {
try {
return (IOException) error.getClass().asSubclass(IOException.class)
.getConstructor(String.class)
.newInstance("Call to " + addr + " failed on local exception: " + error)
.newInstance("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error)
.initCause(error);
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException | SecurityException e) {
// just ignore, will just new an IOException instead below
}
}
return new HBaseIOException("Call to " + addr + " failed on local exception: "
+ error, error);
return new HBaseIOException("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error, error);
}
}

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -134,8 +134,8 @@ abstract class RpcConnection {
@Override
public void run(Timeout timeout) throws Exception {
call.setTimeout(new CallTimeoutException(call.toShortString() + ", waitTime="
+ (EnvironmentEdgeManager.currentTime() - call.getStartTime()) + ", rpcTimeout="
+ call.timeout));
+ (EnvironmentEdgeManager.currentTime() - call.getStartTime()) + "ms, rpcTimeout="
+ call.timeout + "ms"));
callTimeout(call);
}
}, call.timeout, TimeUnit.MILLISECONDS);

View File

@ -1,4 +1,4 @@
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -22,6 +22,7 @@ import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CellScannable;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -51,12 +52,13 @@ public class RpcControllerFactory {
return new HBaseRpcControllerImpl();
}
public HBaseRpcController newController(final CellScanner cellScanner) {
return new HBaseRpcControllerImpl(cellScanner);
public HBaseRpcController newController(RegionInfo regionInfo, CellScanner cellScanner) {
return new HBaseRpcControllerImpl(regionInfo, cellScanner);
}
public HBaseRpcController newController(final List<CellScannable> cellIterables) {
return new HBaseRpcControllerImpl(cellIterables);
public HBaseRpcController newController(RegionInfo regionInfo,
final List<CellScannable> cellIterables) {
return new HBaseRpcControllerImpl(regionInfo, cellIterables);
}

View File

@ -48,7 +48,7 @@ public class TestHBaseRpcControllerImpl {
for (int i = 0; i < count; i++) {
cells.add(createCell(i));
}
HBaseRpcController controller = new HBaseRpcControllerImpl(cells);
HBaseRpcController controller = new HBaseRpcControllerImpl(null, cells);
CellScanner cellScanner = controller.cellScanner();
int index = 0;
for (; cellScanner.advance(); index++) {

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.ipc;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.lang.reflect.Constructor;
@ -30,6 +31,7 @@ import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeoutException;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.exceptions.ClientExceptionsUtil;
import org.apache.hadoop.hbase.exceptions.TimeoutIOException;
import org.apache.hadoop.hbase.testclassification.ClientTests;
@ -91,8 +93,7 @@ public class TestIPCUtil {
}
/**
* See HBASE-21862, it is very important to keep the original exception type for connection
* exceptions.
* See HBASE-21862, it is important to keep original exception type for connection exceptions.
*/
@Test
public void testWrapConnectionException() throws Exception {
@ -103,9 +104,17 @@ public class TestIPCUtil {
InetSocketAddress addr = InetSocketAddress.createUnresolved("127.0.0.1", 12345);
for (Throwable exception : exceptions) {
if (exception instanceof TimeoutException) {
assertThat(IPCUtil.wrapException(addr, exception), instanceOf(TimeoutIOException.class));
assertThat(IPCUtil.wrapException(addr, null, exception), instanceOf(TimeoutIOException.class));
} else {
assertThat(IPCUtil.wrapException(addr, exception), instanceOf(exception.getClass()));
IOException ioe = IPCUtil.wrapException(addr, RegionInfoBuilder.FIRST_META_REGIONINFO,
exception);
// Assert that the exception contains the Region name if supplied. HBASE-25735.
// Not all exceptions get the region stuffed into it.
if (ioe.getMessage() != null) {
assertTrue(ioe.getMessage().
contains(RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString()));
}
assertThat(ioe, instanceOf(exception.getClass()));
}
}
}

View File

@ -71,13 +71,14 @@ public class TestRpcControllerFactory {
}
@Override
public HBaseRpcController newController(final CellScanner cellScanner) {
return new CountingRpcController(super.newController(cellScanner));
public HBaseRpcController newController(RegionInfo regionInfo, CellScanner cellScanner) {
return new CountingRpcController(super.newController(regionInfo, cellScanner));
}
@Override
public HBaseRpcController newController(final List<CellScannable> cellIterables) {
return new CountingRpcController(super.newController(cellIterables));
public HBaseRpcController newController(RegionInfo regionInfo,
List<CellScannable> cellIterables) {
return new CountingRpcController(super.newController(regionInfo, cellIterables));
}
}