HDFS-16369. RBF: Fix the retry logic of RouterRpcServer#invokeAtAvailableNs. (#3745). Contributed by Ayush Saxena.

Reviewed-by: litao <tomleescut@gmail.com>
Reviewed-by: Inigo Goiri <inigoiri@apache.org>
This commit is contained in:
Ayush Saxena 2021-12-04 10:54:29 +05:30 committed by GitHub
parent c0f405a46b
commit cab7086fbc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 39 deletions

View File

@ -43,7 +43,6 @@ import java.net.URISyntaxException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
@ -671,8 +670,8 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol,
/** /**
* Invokes the method at default namespace, if default namespace is not * Invokes the method at default namespace, if default namespace is not
* available then at the first available namespace. * available then at the other available namespaces.
* If the namespace is unavailable, retry once with other namespace. * If the namespace is unavailable, retry with other namespaces.
* @param <T> expected return type. * @param <T> expected return type.
* @param method the remote method. * @param method the remote method.
* @return the response received after invoking method. * @return the response received after invoking method.
@ -681,28 +680,29 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol,
<T> T invokeAtAvailableNs(RemoteMethod method, Class<T> clazz) <T> T invokeAtAvailableNs(RemoteMethod method, Class<T> clazz)
throws IOException { throws IOException {
String nsId = subclusterResolver.getDefaultNamespace(); String nsId = subclusterResolver.getDefaultNamespace();
// If default Ns is not present return result from first namespace.
Set<FederationNamespaceInfo> nss = namenodeResolver.getNamespaces(); Set<FederationNamespaceInfo> nss = namenodeResolver.getNamespaces();
try { // If no namespace is available, then throw this IOException.
if (!nsId.isEmpty()) { IOException io = new IOException("No namespace available.");
// If default Ns is present return result from that namespace.
if (!nsId.isEmpty()) {
try {
return rpcClient.invokeSingle(nsId, method, clazz); return rpcClient.invokeSingle(nsId, method, clazz);
} catch (IOException ioe) {
if (!clientProto.isUnavailableSubclusterException(ioe)) {
LOG.debug("{} exception cannot be retried",
ioe.getClass().getSimpleName());
throw ioe;
}
// Remove the already tried namespace.
nss.removeIf(n -> n.getNameserviceId().equals(nsId));
return invokeOnNs(method, clazz, io, nss);
} }
// If no namespace is available, throw IOException.
IOException io = new IOException("No namespace available.");
return invokeOnNs(method, clazz, io, nss);
} catch (IOException ioe) {
if (!clientProto.isUnavailableSubclusterException(ioe)) {
LOG.debug("{} exception cannot be retried",
ioe.getClass().getSimpleName());
throw ioe;
}
Set<FederationNamespaceInfo> nssWithoutFailed = getNameSpaceInfo(nss, nsId);
return invokeOnNs(method, clazz, ioe, nssWithoutFailed);
} }
return invokeOnNs(method, clazz, io, nss);
} }
/** /**
* Invoke the method on first available namespace, * Invoke the method sequentially on available namespaces,
* throw no namespace available exception, if no namespaces are available. * throw no namespace available exception, if no namespaces are available.
* @param method the remote method. * @param method the remote method.
* @param clazz Class for the return type. * @param clazz Class for the return type.
@ -716,26 +716,22 @@ public class RouterRpcServer extends AbstractService implements ClientProtocol,
if (nss.isEmpty()) { if (nss.isEmpty()) {
throw ioe; throw ioe;
} }
String nsId = nss.iterator().next().getNameserviceId(); for (FederationNamespaceInfo fnInfo : nss) {
return rpcClient.invokeSingle(nsId, method, clazz); String nsId = fnInfo.getNameserviceId();
} LOG.debug("Invoking {} on namespace {}", method, nsId);
try {
/** return rpcClient.invokeSingle(nsId, method, clazz);
* Get set of namespace info's removing the already invoked namespaceinfo. } catch (IOException e) {
* @param nss List of namespaces in the federation. LOG.debug("Failed to invoke {} on namespace {}", method, nsId, e);
* @param nsId Already invoked namespace id. // Ignore the exception and try on other namespace, if the tried
* @return List of name spaces in the federation on // namespace is unavailable, else throw the received exception.
* removing the already invoked namespaceinfo. if (!clientProto.isUnavailableSubclusterException(e)) {
*/ throw e;
private static Set<FederationNamespaceInfo> getNameSpaceInfo( }
final Set<FederationNamespaceInfo> nss, final String nsId) {
Set<FederationNamespaceInfo> namespaceInfos = new HashSet<>();
for (FederationNamespaceInfo ns : nss) {
if (!nsId.equals(ns.getNameserviceId())) {
namespaceInfos.add(ns);
} }
} }
return namespaceInfos; // Couldn't get a response from any of the namespace, throw ioe.
throw ioe;
} }
@Override // ClientProtocol @Override // ClientProtocol

View File

@ -76,13 +76,14 @@ import org.junit.Test;
* Tests router rpc with multiple destination mount table resolver. * Tests router rpc with multiple destination mount table resolver.
*/ */
public class TestRouterRPCMultipleDestinationMountTableResolver { public class TestRouterRPCMultipleDestinationMountTableResolver {
private static final List<String> NS_IDS = Arrays.asList("ns0", "ns1"); private static final List<String> NS_IDS = Arrays.asList("ns0", "ns1", "ns2");
private static StateStoreDFSCluster cluster; private static StateStoreDFSCluster cluster;
private static RouterContext routerContext; private static RouterContext routerContext;
private static MountTableResolver resolver; private static MountTableResolver resolver;
private static DistributedFileSystem nnFs0; private static DistributedFileSystem nnFs0;
private static DistributedFileSystem nnFs1; private static DistributedFileSystem nnFs1;
private static DistributedFileSystem nnFs2;
private static DistributedFileSystem routerFs; private static DistributedFileSystem routerFs;
private static RouterRpcServer rpcServer; private static RouterRpcServer rpcServer;
@ -90,7 +91,7 @@ public class TestRouterRPCMultipleDestinationMountTableResolver {
public static void setUp() throws Exception { public static void setUp() throws Exception {
// Build and start a federated cluster // Build and start a federated cluster
cluster = new StateStoreDFSCluster(false, 2, cluster = new StateStoreDFSCluster(false, 3,
MultipleDestinationMountTableResolver.class); MultipleDestinationMountTableResolver.class);
Configuration routerConf = Configuration routerConf =
new RouterConfigBuilder().stateStore().admin().quota().rpc().build(); new RouterConfigBuilder().stateStore().admin().quota().rpc().build();
@ -111,6 +112,8 @@ public class TestRouterRPCMultipleDestinationMountTableResolver {
.getNamenode(cluster.getNameservices().get(0), null).getFileSystem(); .getNamenode(cluster.getNameservices().get(0), null).getFileSystem();
nnFs1 = (DistributedFileSystem) cluster nnFs1 = (DistributedFileSystem) cluster
.getNamenode(cluster.getNameservices().get(1), null).getFileSystem(); .getNamenode(cluster.getNameservices().get(1), null).getFileSystem();
nnFs2 = (DistributedFileSystem) cluster
.getNamenode(cluster.getNameservices().get(2), null).getFileSystem();
routerFs = (DistributedFileSystem) routerContext.getFileSystem(); routerFs = (DistributedFileSystem) routerContext.getFileSystem();
rpcServer =routerContext.getRouter().getRpcServer(); rpcServer =routerContext.getRouter().getRpcServer();
} }
@ -668,6 +671,7 @@ public class TestRouterRPCMultipleDestinationMountTableResolver {
// Make one subcluster unavailable. // Make one subcluster unavailable.
MiniDFSCluster dfsCluster = cluster.getCluster(); MiniDFSCluster dfsCluster = cluster.getCluster();
dfsCluster.shutdownNameNode(0); dfsCluster.shutdownNameNode(0);
dfsCluster.shutdownNameNode(1);
try { try {
// Verify that #invokeAtAvailableNs works by calling #getServerDefaults. // Verify that #invokeAtAvailableNs works by calling #getServerDefaults.
RemoteMethod method = new RemoteMethod("getServerDefaults"); RemoteMethod method = new RemoteMethod("getServerDefaults");
@ -675,7 +679,8 @@ public class TestRouterRPCMultipleDestinationMountTableResolver {
rpcServer.invokeAtAvailableNs(method, FsServerDefaults.class); rpcServer.invokeAtAvailableNs(method, FsServerDefaults.class);
assertNotNull(serverDefaults); assertNotNull(serverDefaults);
} finally { } finally {
dfsCluster.restartNameNode(0); dfsCluster.restartNameNode(0, false);
dfsCluster.restartNameNode(1);
} }
} }
@ -893,6 +898,9 @@ public class TestRouterRPCMultipleDestinationMountTableResolver {
if (nsId.equals("ns1")) { if (nsId.equals("ns1")) {
return nnFs1; return nnFs1;
} }
if (nsId.equals("ns2")) {
return nnFs2;
}
return null; return null;
} }