HBASE-25735 Add target Region to connection exceptions

Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
stack 2021-04-05 20:10:44 -07:00
parent 048ca4e43f
commit f4e123630d
10 changed files with 116 additions and 57 deletions

View File

@ -379,7 +379,7 @@ public abstract class AbstractRpcClient<T extends RpcConnection> implements RpcC
call.error.fillInStackTrace(); call.error.fillInStackTrace();
hrc.setFailed(call.error); hrc.setFailed(call.error);
} else { } else {
hrc.setFailed(wrapException(addr, call.error)); hrc.setFailed(wrapException(addr, hrc.getRegionInfo(), call.error));
} }
callback.run(null); callback.run(null);
} else { } else {

View File

@ -88,6 +88,7 @@ class Call {
@Override @Override
public String toString() { public String toString() {
// Call[id=32153218,methodName=Get]
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
.appendSuper(toShortString()) .appendSuper(toShortString())
.append("param", Optional.ofNullable(param) .append("param", Optional.ofNullable(param)

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,6 +17,7 @@
*/ */
package org.apache.hadoop.hbase.ipc; package org.apache.hadoop.hbase.ipc;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback; import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback;
import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
@ -32,7 +33,9 @@ import org.apache.yetus.audience.InterfaceAudience;
* Optionally carries Cells across the proxy/service interface down into ipc. On its way out it * Optionally carries Cells across the proxy/service interface down into ipc. On its way out it
* optionally carries a set of result Cell data. We stick the Cells here when we want to avoid * optionally carries a set of result Cell data. We stick the Cells here when we want to avoid
* having to protobuf them (for performance reasons). This class is used ferrying data across the * having to protobuf them (for performance reasons). This class is used ferrying data across the
* proxy/protobuf service chasm. Also does call timeout. Used by client and server ipc'ing. * proxy/protobuf service chasm. Also does call timeout and on client-side, carries the target
* RegionInfo we're making the call against if relevant (useful adding info to exceptions and logs).
* Used by client and server ipc'ing.
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
public interface HBaseRpcController extends RpcController, CellScannable { public interface HBaseRpcController extends RpcController, CellScannable {
@ -103,4 +106,18 @@ public interface HBaseRpcController extends RpcController, CellScannable {
* cancellation state does not change during this call. * cancellation state does not change during this call.
*/ */
void notifyOnCancel(RpcCallback<Object> callback, CancellationCallback action) throws IOException; void notifyOnCancel(RpcCallback<Object> callback, CancellationCallback action) throws IOException;
/**
* @return True if this Controller is carrying the RPC target Region's RegionInfo.
*/
default boolean hasRegionInfo() {
return false;
}
/**
* @return Target Region's RegionInfo or null if not available or pertinent.
*/
default RegionInfo getRegionInfo() {
return null;
}
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -17,6 +17,7 @@
*/ */
package org.apache.hadoop.hbase.ipc; package org.apache.hadoop.hbase.ipc;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback; import org.apache.hbase.thirdparty.com.google.protobuf.RpcCallback;
import java.io.IOException; import java.io.IOException;
@ -31,10 +32,8 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
/** /**
* Optionally carries Cells across the proxy/service interface down into ipc. On its way out it * Get instances via {@link RpcControllerFactory} on client-side.
* optionally carries a set of result Cell data. We stick the Cells here when we want to avoid * @see RpcControllerFactory
* having to protobuf them (for performance reasons). This class is used ferrying data across the
* proxy/protobuf service chasm. Also does call timeout. Used by client and server ipc'ing.
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
public class HBaseRpcControllerImpl implements HBaseRpcController { public class HBaseRpcControllerImpl implements HBaseRpcController {
@ -51,6 +50,12 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
private IOException exception; private IOException exception;
/**
* Rpc target Region's RegionInfo we are going against. May be null.
* @see #hasRegionInfo()
*/
private RegionInfo regionInfo;
/** /**
* Priority to set on this request. Set it here in controller so available composing the request. * Priority to set on this request. Set it here in controller so available composing the request.
* This is the ordained way of setting priorities going forward. We will be undoing the old * This is the ordained way of setting priorities going forward. We will be undoing the old
@ -67,15 +72,34 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
private CellScanner cellScanner; private CellScanner cellScanner;
public HBaseRpcControllerImpl() { public HBaseRpcControllerImpl() {
this((CellScanner) null); this(null, (CellScanner) null);
} }
/**
* Used server-side. Clients should go via {@link RpcControllerFactory}
*/
public HBaseRpcControllerImpl(final CellScanner cellScanner) { public HBaseRpcControllerImpl(final CellScanner cellScanner) {
this.cellScanner = cellScanner; this(null, cellScanner);
} }
public HBaseRpcControllerImpl(final List<CellScannable> cellIterables) { HBaseRpcControllerImpl(RegionInfo regionInfo, final CellScanner cellScanner) {
this.cellScanner = cellScanner;
this.regionInfo = regionInfo;
}
HBaseRpcControllerImpl(RegionInfo regionInfo, final List<CellScannable> cellIterables) {
this.cellScanner = cellIterables == null ? null : CellUtil.createCellScanner(cellIterables); this.cellScanner = cellIterables == null ? null : CellUtil.createCellScanner(cellIterables);
this.regionInfo = null;
}
@Override
public boolean hasRegionInfo() {
return this.regionInfo != null;
}
@Override
public RegionInfo getRegionInfo() {
return this.regionInfo;
} }
/** /**
@ -118,6 +142,7 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
cellScanner = null; cellScanner = null;
exception = null; exception = null;
callTimeout = null; callTimeout = null;
regionInfo = null;
// In the implementations of some callable with replicas, rpc calls are executed in a executor // In the implementations of some callable with replicas, rpc calls are executed in a executor
// and we could cancel the operation from outside which means there could be a race between // and we could cancel the operation from outside which means there could be a race between
// reset and startCancel. Although I think the race should be handled by the callable since the // reset and startCancel. Although I think the race should be handled by the callable since the
@ -131,11 +156,7 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
@Override @Override
public int getCallTimeout() { public int getCallTimeout() {
if (callTimeout != null) { return callTimeout != null? callTimeout: 0;
return callTimeout.intValue();
} else {
return 0;
}
} }
@Override @Override
@ -241,5 +262,4 @@ public class HBaseRpcControllerImpl implements HBaseRpcController {
action.run(false); action.run(false);
} }
} }
} }

View File

@ -28,6 +28,7 @@ import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.exceptions.ClientExceptionsUtil; import org.apache.hadoop.hbase.exceptions.ClientExceptionsUtil;
import org.apache.hadoop.hbase.exceptions.ConnectionClosedException; import org.apache.hadoop.hbase.exceptions.ConnectionClosedException;
import org.apache.hadoop.hbase.exceptions.ConnectionClosingException; import org.apache.hadoop.hbase.exceptions.ConnectionClosingException;
@ -159,15 +160,21 @@ class IPCUtil {
} }
} }
private static String getCallTarget(Address addr, RegionInfo regionInfo) {
return "address=" + addr +
(regionInfo != null? ", region=" + regionInfo.getRegionNameAsString(): null);
}
/** /**
* Takes an Exception and the address we were trying to connect to and return an IOException with * Takes an Exception, the address, and if pertinent, the RegionInfo for the Region we were trying
* the input exception as the cause. The new exception provides the stack trace of the place where * to connect to and returns an IOException with the input exception as the cause. The new
* the exception is thrown and some extra diagnostics information. * exception provides the stack trace of the place where the exception is thrown and some extra
* diagnostics information.
* <p/> * <p/>
* Notice that we will try our best to keep the original exception type when creating a new * Notice that we will try our best to keep the original exception type when creating a new
* exception, especially for the 'connection' exceptions, as it is used to determine whether this * exception, especially for the 'connection' exceptions, as it is used to determine whether this
* is a network issue or the remote side tells us clearly what is wrong, which is very important * is a network issue or the remote side tells us clearly what is wrong, which is important
* to decide whether to retry. If it is not possible to create a new exception with the same type, * deciding whether to retry. If it is not possible to create a new exception with the same type,
* for example, the {@code error} is not an {@link IOException}, an {@link IOException} will be * for example, the {@code error} is not an {@link IOException}, an {@link IOException} will be
* created. * created.
* @param addr target address * @param addr target address
@ -175,17 +182,17 @@ class IPCUtil {
* @return an exception to throw * @return an exception to throw
* @see ClientExceptionsUtil#isConnectionException(Throwable) * @see ClientExceptionsUtil#isConnectionException(Throwable)
*/ */
static IOException wrapException(Address addr, Throwable error) { static IOException wrapException(Address addr, RegionInfo regionInfo, Throwable error) {
if (error instanceof ConnectException) { if (error instanceof ConnectException) {
// connection refused; include the host:port in the error // connection refused; include the host:port in the error
return (IOException) new ConnectException( return (IOException) new ConnectException("Call to " + getCallTarget(addr, regionInfo) +
"Call to " + addr + " failed on connection exception: " + error).initCause(error); " failed on connection exception: " + error).initCause(error);
} else if (error instanceof SocketTimeoutException) { } else if (error instanceof SocketTimeoutException) {
return (IOException) new SocketTimeoutException( return (IOException) new SocketTimeoutException(
"Call to " + addr + " failed because " + error).initCause(error); "Call to " + getCallTarget(addr, regionInfo) + " failed because " + error).initCause(error);
} else if (error instanceof ConnectionClosingException) { } else if (error instanceof ConnectionClosingException) {
return new ConnectionClosingException("Call to " + addr + " failed on local exception: " return new ConnectionClosingException("Call to " + getCallTarget(addr, regionInfo) +
+ error, error); " failed on local exception: " + error, error);
} else if (error instanceof ServerTooBusyException) { } else if (error instanceof ServerTooBusyException) {
// we already have address in the exception message // we already have address in the exception message
return (IOException) error; return (IOException) error;
@ -194,42 +201,44 @@ class IPCUtil {
try { try {
return (IOException) error.getClass().asSubclass(DoNotRetryIOException.class) return (IOException) error.getClass().asSubclass(DoNotRetryIOException.class)
.getConstructor(String.class) .getConstructor(String.class)
.newInstance("Call to " + addr + " failed on local exception: " + error).initCause(error); .newInstance("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error).initCause(error);
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException } catch (InstantiationException | IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException | SecurityException e) { | InvocationTargetException | NoSuchMethodException | SecurityException e) {
// just ignore, will just new a DoNotRetryIOException instead below // just ignore, will just new a DoNotRetryIOException instead below
} }
return new DoNotRetryIOException("Call to " + addr + " failed on local exception: " return new DoNotRetryIOException("Call to " + getCallTarget(addr, regionInfo) +
+ error, error); " failed on local exception: " + error, error);
} else if (error instanceof ConnectionClosedException) { } else if (error instanceof ConnectionClosedException) {
return new ConnectionClosedException("Call to " + addr + " failed on local exception: " return new ConnectionClosedException("Call to " + getCallTarget(addr, regionInfo) +
+ error, error); " failed on local exception: " + error, error);
} else if (error instanceof CallTimeoutException) { } else if (error instanceof CallTimeoutException) {
return new CallTimeoutException("Call to " + addr + " failed on local exception: " return new CallTimeoutException("Call to " + getCallTarget(addr, regionInfo) +
+ error, error); " failed on local exception: " + error, error);
} else if (error instanceof ClosedChannelException) { } else if (error instanceof ClosedChannelException) {
// ClosedChannelException does not have a constructor which takes a String but it is a // ClosedChannelException does not have a constructor which takes a String but it is a
// connection exception so we keep its original type // connection exception so we keep its original type
return (IOException) error; return (IOException) error;
} else if (error instanceof TimeoutException) { } else if (error instanceof TimeoutException) {
// TimeoutException is not an IOException, let's convert it to TimeoutIOException. // TimeoutException is not an IOException, let's convert it to TimeoutIOException.
return new TimeoutIOException("Call to " + addr + " failed on local exception: " return new TimeoutIOException("Call to " + getCallTarget(addr, regionInfo) +
+ error, error); " failed on local exception: " + error, error);
} else { } else {
// try our best to keep the original exception type // try our best to keep the original exception type
if (error instanceof IOException) { if (error instanceof IOException) {
try { try {
return (IOException) error.getClass().asSubclass(IOException.class) return (IOException) error.getClass().asSubclass(IOException.class)
.getConstructor(String.class) .getConstructor(String.class)
.newInstance("Call to " + addr + " failed on local exception: " + error) .newInstance("Call to " + getCallTarget(addr, regionInfo) +
" failed on local exception: " + error)
.initCause(error); .initCause(error);
} catch (InstantiationException | IllegalAccessException | IllegalArgumentException } catch (InstantiationException | IllegalAccessException | IllegalArgumentException
| InvocationTargetException | NoSuchMethodException | SecurityException e) { | InvocationTargetException | NoSuchMethodException | SecurityException e) {
// just ignore, will just new an IOException instead below // just ignore, will just new an IOException instead below
} }
} }
return new HBaseIOException("Call to " + addr + " failed on local exception: " return new HBaseIOException("Call to " + getCallTarget(addr, regionInfo) +
+ error, error); " failed on local exception: " + error, error);
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -132,8 +132,8 @@ abstract class RpcConnection {
@Override @Override
public void run(Timeout timeout) throws Exception { public void run(Timeout timeout) throws Exception {
call.setTimeout(new CallTimeoutException(call.toShortString() + ", waitTime=" call.setTimeout(new CallTimeoutException(call.toShortString() + ", waitTime="
+ (EnvironmentEdgeManager.currentTime() - call.getStartTime()) + ", rpcTimeout=" + (EnvironmentEdgeManager.currentTime() - call.getStartTime()) + "ms, rpcTimeout="
+ call.timeout)); + call.timeout + "ms"));
callTimeout(call); callTimeout(call);
} }
}, call.timeout, TimeUnit.MILLISECONDS); }, call.timeout, TimeUnit.MILLISECONDS);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -22,6 +22,7 @@ import java.util.List;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CellScannable; import org.apache.hadoop.hbase.CellScannable;
import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -51,12 +52,13 @@ public class RpcControllerFactory {
return new HBaseRpcControllerImpl(); return new HBaseRpcControllerImpl();
} }
public HBaseRpcController newController(final CellScanner cellScanner) { public HBaseRpcController newController(RegionInfo regionInfo, CellScanner cellScanner) {
return new HBaseRpcControllerImpl(cellScanner); return new HBaseRpcControllerImpl(regionInfo, cellScanner);
} }
public HBaseRpcController newController(final List<CellScannable> cellIterables) { public HBaseRpcController newController(RegionInfo regionInfo,
return new HBaseRpcControllerImpl(cellIterables); final List<CellScannable> cellIterables) {
return new HBaseRpcControllerImpl(regionInfo, cellIterables);
} }

View File

@ -48,7 +48,7 @@ public class TestHBaseRpcControllerImpl {
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
cells.add(createCell(i)); cells.add(createCell(i));
} }
HBaseRpcController controller = new HBaseRpcControllerImpl(cells); HBaseRpcController controller = new HBaseRpcControllerImpl(null, cells);
CellScanner cellScanner = controller.cellScanner(); CellScanner cellScanner = controller.cellScanner();
int index = 0; int index = 0;
for (; cellScanner.advance(); index++) { for (; cellScanner.advance(); index++) {

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.ipc;
import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.instanceOf;
import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
@ -29,6 +30,7 @@ import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeoutException;
import org.apache.commons.lang3.mutable.MutableInt; import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.exceptions.ClientExceptionsUtil; import org.apache.hadoop.hbase.exceptions.ClientExceptionsUtil;
import org.apache.hadoop.hbase.exceptions.TimeoutIOException; import org.apache.hadoop.hbase.exceptions.TimeoutIOException;
import org.apache.hadoop.hbase.net.Address; import org.apache.hadoop.hbase.net.Address;
@ -91,8 +93,7 @@ public class TestIPCUtil {
} }
/** /**
* See HBASE-21862, it is very important to keep the original exception type for connection * See HBASE-21862, it is important to keep original exception type for connection exceptions.
* exceptions.
*/ */
@Test @Test
public void testWrapConnectionException() throws Exception { public void testWrapConnectionException() throws Exception {
@ -103,9 +104,17 @@ public class TestIPCUtil {
Address addr = Address.fromParts("127.0.0.1", 12345); Address addr = Address.fromParts("127.0.0.1", 12345);
for (Throwable exception : exceptions) { for (Throwable exception : exceptions) {
if (exception instanceof TimeoutException) { if (exception instanceof TimeoutException) {
assertThat(IPCUtil.wrapException(addr, exception), instanceOf(TimeoutIOException.class)); assertThat(IPCUtil.wrapException(addr, null, exception), instanceOf(TimeoutIOException.class));
} else { } else {
assertThat(IPCUtil.wrapException(addr, exception), instanceOf(exception.getClass())); IOException ioe = IPCUtil.wrapException(addr, RegionInfoBuilder.FIRST_META_REGIONINFO,
exception);
// Assert that the exception contains the Region name if supplied. HBASE-25735.
// Not all exceptions get the region stuffed into it.
if (ioe.getMessage() != null) {
assertTrue(ioe.getMessage().
contains(RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString()));
}
assertThat(ioe, instanceOf(exception.getClass()));
} }
} }
} }

View File

@ -72,13 +72,14 @@ public class TestRpcControllerFactory {
} }
@Override @Override
public HBaseRpcController newController(final CellScanner cellScanner) { public HBaseRpcController newController(RegionInfo regionInfo, CellScanner cellScanner) {
return new CountingRpcController(super.newController(cellScanner)); return new CountingRpcController(super.newController(regionInfo, cellScanner));
} }
@Override @Override
public HBaseRpcController newController(final List<CellScannable> cellIterables) { public HBaseRpcController newController(RegionInfo regionInfo,
return new CountingRpcController(super.newController(cellIterables)); List<CellScannable> cellIterables) {
return new CountingRpcController(super.newController(regionInfo, cellIterables));
} }
} }