HBASE-22041 master should resolve regionserver's ip again when ConnectionException
This commit is contained in:
parent
7d6a79b768
commit
a5f2691b71
|
@ -301,6 +301,10 @@ public class AsyncConnectionImpl implements AsyncConnection {
|
|||
() -> createAdminServerStub(serverName));
|
||||
}
|
||||
|
||||
void removeAdminStub(ServerName serverName) {
|
||||
adminStubs.remove(getStubKey(AdminService.getDescriptor().getName(), serverName));
|
||||
}
|
||||
|
||||
CompletableFuture<MasterService.Interface> getMasterStub() {
|
||||
return ConnectionUtils.getOrFetch(masterStub, masterStubMakeFuture, false, () -> {
|
||||
CompletableFuture<MasterService.Interface> future = new CompletableFuture<>();
|
||||
|
|
|
@ -216,4 +216,8 @@ public class AsyncRegionServerAdmin {
|
|||
executeProcedures(ExecuteProceduresRequest request) {
|
||||
return call((stub, controller, done) -> stub.executeProcedures(controller, request, done));
|
||||
}
|
||||
|
||||
public void removeRsStub(ServerName serverName) {
|
||||
conn.removeAdminStub(serverName);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master.procedure;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.lang.Thread.UncaughtExceptionHandler;
|
||||
import java.net.ConnectException;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
@ -257,7 +258,7 @@ public class RSProcedureDispatcher extends RemoteProcedureDispatcher<MasterProce
|
|||
DEFAULT_RS_RPC_RETRY_INTERVAL);
|
||||
}
|
||||
|
||||
private AsyncRegionServerAdmin getRsAdmin() throws IOException {
|
||||
private AsyncRegionServerAdmin getRsAdmin() {
|
||||
return master.getAsyncClusterConnection().getRegionServerAdmin(serverName);
|
||||
}
|
||||
|
||||
|
@ -306,6 +307,14 @@ public class RSProcedureDispatcher extends RemoteProcedureDispatcher<MasterProce
|
|||
serverName, e.toString(), numberOfAttemptsSoFar);
|
||||
return false;
|
||||
}
|
||||
//This situation may be that the master resolves to the wrong ip address.
|
||||
// removing the cache so that the master can resolve the ip for regionserver again.
|
||||
if (e instanceof ConnectException) {
|
||||
getRsAdmin().removeRsStub(serverName);
|
||||
LOG.warn("Request to {} failed due to {}, try={} retry get new rs admin... ",
|
||||
serverName, e.toString(), numberOfAttemptsSoFar);
|
||||
}
|
||||
|
||||
if (e instanceof RegionServerAbortedException || e instanceof RegionServerStoppedException) {
|
||||
// A better way is to return true here to let the upper layer quit, and then schedule a
|
||||
// background task to check whether the region server is dead. And if it is dead, call
|
||||
|
@ -313,7 +322,7 @@ public class RSProcedureDispatcher extends RemoteProcedureDispatcher<MasterProce
|
|||
// result, but waste some resources.
|
||||
LOG.warn("{} is aborted or stopped, for safety we still need to"
|
||||
+ " wait until it is fully dead, try={}", serverName, numberOfAttemptsSoFar);
|
||||
} else {
|
||||
} else if (!(e instanceof ConnectException)) {
|
||||
LOG.warn("request to {} failed due to {}, try={}, retrying...", serverName, e.toString(),
|
||||
numberOfAttemptsSoFar);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue