YARN-11238. Optimizing FederationClientInterceptor Call with Parallelism. (#4904)

This commit is contained in:
slfan1989 2022-10-05 00:17:00 +08:00 committed by GitHub
parent 07581f1ab2
commit 22bd5e3b53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 99 additions and 120 deletions

View File

@ -497,4 +497,14 @@ public class MemoryFederationStateStore implements FederationStateStore {
public RouterRMDTSecretManagerState getRouterRMSecretManagerState() { public RouterRMDTSecretManagerState getRouterRMSecretManagerState() {
return routerRMSecretManagerState; return routerRMSecretManagerState;
} }
@VisibleForTesting
public Map<SubClusterId, SubClusterInfo> getMembership() {
return membership;
}
@VisibleForTesting
public void setMembership(Map<SubClusterId, SubClusterInfo> membership) {
this.membership = membership;
}
} }

View File

@ -727,7 +727,7 @@ public final class FederationStateStoreFacade {
return stateStore; return stateStore;
} }
/** /*
* The Router Supports Store NewMasterKey (RouterMasterKey{@link RouterMasterKey}). * The Router Supports Store NewMasterKey (RouterMasterKey{@link RouterMasterKey}).
* *
* @param newKey Key used for generating and verifying delegation tokens * @param newKey Key used for generating and verifying delegation tokens

View File

@ -18,10 +18,10 @@
package org.apache.hadoop.yarn.server.router.clientrm; package org.apache.hadoop.yarn.server.router.clientrm;
import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
import java.io.IOException; import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@ -29,6 +29,7 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.Set;
import java.util.concurrent.BlockingQueue; import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
@ -39,7 +40,6 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.NotImplementedException;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeys;
@ -661,14 +661,11 @@ public class FederationClientInterceptor
RouterServerUtil.logAndThrowException("Missing getApplications request.", null); RouterServerUtil.logAndThrowException("Missing getApplications request.", null);
} }
long startTime = clock.getTime(); long startTime = clock.getTime();
Map<SubClusterId, SubClusterInfo> subclusters =
federationFacade.getSubClusters(true);
ClientMethod remoteMethod = new ClientMethod("getApplications", ClientMethod remoteMethod = new ClientMethod("getApplications",
new Class[] {GetApplicationsRequest.class}, new Object[] {request}); new Class[] {GetApplicationsRequest.class}, new Object[] {request});
Map<SubClusterId, GetApplicationsResponse> applications = null; Collection<GetApplicationsResponse> applications = null;
try { try {
applications = invokeConcurrent(subclusters.keySet(), remoteMethod, applications = invokeConcurrent(remoteMethod, GetApplicationsResponse.class);
GetApplicationsResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrMultipleAppsFailedRetrieved(); routerMetrics.incrMultipleAppsFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get applications due to exception.", ex); RouterServerUtil.logAndThrowException("Unable to get applications due to exception.", ex);
@ -676,7 +673,7 @@ public class FederationClientInterceptor
long stopTime = clock.getTime(); long stopTime = clock.getTime();
routerMetrics.succeededMultipleAppsRetrieved(stopTime - startTime); routerMetrics.succeededMultipleAppsRetrieved(stopTime - startTime);
// Merge the Application Reports // Merge the Application Reports
return RouterYarnClientUtils.mergeApplications(applications.values(), returnPartialReport); return RouterYarnClientUtils.mergeApplications(applications, returnPartialReport);
} }
@Override @Override
@ -691,8 +688,7 @@ public class FederationClientInterceptor
new Class[] {GetClusterMetricsRequest.class}, new Object[] {request}); new Class[] {GetClusterMetricsRequest.class}, new Object[] {request});
Collection<GetClusterMetricsResponse> clusterMetrics = null; Collection<GetClusterMetricsResponse> clusterMetrics = null;
try { try {
clusterMetrics = invokeAppClientProtocolMethod( clusterMetrics = invokeConcurrent(remoteMethod, GetClusterMetricsResponse.class);
true, remoteMethod, GetClusterMetricsResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrGetClusterMetricsFailedRetrieved(); routerMetrics.incrGetClusterMetricsFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get cluster metrics due to exception.", ex); RouterServerUtil.logAndThrowException("Unable to get cluster metrics due to exception.", ex);
@ -702,67 +698,62 @@ public class FederationClientInterceptor
return RouterYarnClientUtils.merge(clusterMetrics); return RouterYarnClientUtils.merge(clusterMetrics);
} }
<R> Map<SubClusterId, R> invokeConcurrent(ArrayList<SubClusterId> clusterIds, <R> Collection<R> invokeConcurrent(ClientMethod request, Class<R> clazz)
ClientMethod request, Class<R> clazz) throws YarnException, IOException { throws YarnException {
List<Callable<Object>> callables = new ArrayList<>();
List<Future<Object>> futures = new ArrayList<>(); // Get Active SubClusters
Map<SubClusterId, IOException> exceptions = new TreeMap<>(); Map<SubClusterId, SubClusterInfo> subClusterInfo = federationFacade.getSubClusters(true);
for (SubClusterId subClusterId : clusterIds) { Collection<SubClusterId> subClusterIds = subClusterInfo.keySet();
callables.add(new Callable<Object>() {
@Override List<Callable<Pair<SubClusterId, Object>>> callables = new ArrayList<>();
public Object call() throws Exception { List<Future<Pair<SubClusterId, Object>>> futures = new ArrayList<>();
ApplicationClientProtocol protocol = Map<SubClusterId, Exception> exceptions = new TreeMap<>();
getClientRMProxyForSubCluster(subClusterId);
Method method = ApplicationClientProtocol.class // Generate parallel Callable tasks
.getMethod(request.getMethodName(), request.getTypes()); for (SubClusterId subClusterId : subClusterIds) {
return method.invoke(protocol, request.getParams()); callables.add(() -> {
} ApplicationClientProtocol protocol = getClientRMProxyForSubCluster(subClusterId);
String methodName = request.getMethodName();
Class<?>[] types = request.getTypes();
Object[] params = request.getParams();
Method method = ApplicationClientProtocol.class.getMethod(methodName, types);
Object result = method.invoke(protocol, params);
return Pair.of(subClusterId, result);
}); });
} }
// Get results from multiple threads
Map<SubClusterId, R> results = new TreeMap<>(); Map<SubClusterId, R> results = new TreeMap<>();
try { try {
futures.addAll(executorService.invokeAll(callables)); futures.addAll(executorService.invokeAll(callables));
for (int i = 0; i < futures.size(); i++) { futures.stream().forEach(future -> {
SubClusterId subClusterId = clusterIds.get(i); SubClusterId subClusterId = null;
try { try {
Future<Object> future = futures.get(i); Pair<SubClusterId, Object> pair = future.get();
Object result = future.get(); subClusterId = pair.getKey();
Object result = pair.getValue();
results.put(subClusterId, clazz.cast(result)); results.put(subClusterId, clazz.cast(result));
} catch (ExecutionException ex) { } catch (InterruptedException | ExecutionException e) {
Throwable cause = ex.getCause(); Throwable cause = e.getCause();
LOG.debug("Cannot execute {} on {}: {}", request.getMethodName(), LOG.error("Cannot execute {} on {}: {}", request.getMethodName(),
subClusterId.getId(), cause.getMessage()); subClusterId.getId(), cause.getMessage());
IOException ioe; exceptions.put(subClusterId, e);
if (cause instanceof IOException) {
ioe = (IOException) cause;
} else if (cause instanceof YarnException) {
throw (YarnException) cause;
} else {
ioe = new IOException(
"Unhandled exception while calling " + request.getMethodName()
+ ": " + cause.getMessage(), cause);
}
// Store the exceptions
exceptions.put(subClusterId, ioe);
}
}
if (results.isEmpty() && !clusterIds.isEmpty()) {
SubClusterId subClusterId = clusterIds.get(0);
IOException ioe = exceptions.get(subClusterId);
if (ioe != null) {
throw ioe;
}
} }
});
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new YarnException(e); throw new YarnException("invokeConcurrent Failed.", e);
}
return results;
} }
<R> Map<SubClusterId, R> invokeConcurrent(Collection<SubClusterId> clusterIds, // All sub-clusters return results to be considered successful,
ClientMethod request, Class<R> clazz) throws YarnException, IOException { // otherwise an exception will be thrown.
ArrayList<SubClusterId> clusterIdList = new ArrayList<>(clusterIds); if (exceptions != null && !exceptions.isEmpty()) {
return invokeConcurrent(clusterIdList, request, clazz); Set<SubClusterId> subClusterIdSets = exceptions.keySet();
throw new YarnException("invokeConcurrent Failed, An exception occurred in subClusterIds = " +
StringUtils.join(subClusterIdSets, ","));
}
// return result
return results.values();
} }
@Override @Override
@ -773,24 +764,19 @@ public class FederationClientInterceptor
RouterServerUtil.logAndThrowException("Missing getClusterNodes request.", null); RouterServerUtil.logAndThrowException("Missing getClusterNodes request.", null);
} }
long startTime = clock.getTime(); long startTime = clock.getTime();
Map<SubClusterId, SubClusterInfo> subClusters = ClientMethod remoteMethod = new ClientMethod("getClusterNodes",
federationFacade.getSubClusters(true); new Class[]{GetClusterNodesRequest.class}, new Object[]{request});
Map<SubClusterId, GetClusterNodesResponse> clusterNodes = Maps.newHashMap();
for (SubClusterId subClusterId : subClusters.keySet()) {
ApplicationClientProtocol client;
try { try {
client = getClientRMProxyForSubCluster(subClusterId); Collection<GetClusterNodesResponse> clusterNodes =
GetClusterNodesResponse response = client.getClusterNodes(request); invokeConcurrent(remoteMethod, GetClusterNodesResponse.class);
clusterNodes.put(subClusterId, response); long stopTime = clock.getTime();
routerMetrics.succeededGetClusterNodesRetrieved(stopTime - startTime);
return RouterYarnClientUtils.mergeClusterNodesResponse(clusterNodes);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrClusterNodesFailedRetrieved(); routerMetrics.incrClusterNodesFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get cluster nodes due to exception.", ex); RouterServerUtil.logAndThrowException("Unable to get cluster nodes due to exception.", ex);
} }
} throw new YarnException("Unable to get cluster nodes.");
long stopTime = clock.getTime();
routerMetrics.succeededGetClusterNodesRetrieved(stopTime - startTime);
// Merge the NodesResponse
return RouterYarnClientUtils.mergeClusterNodesResponse(clusterNodes.values());
} }
@Override @Override
@ -806,8 +792,7 @@ public class FederationClientInterceptor
new Class[]{GetQueueInfoRequest.class}, new Object[]{request}); new Class[]{GetQueueInfoRequest.class}, new Object[]{request});
Collection<GetQueueInfoResponse> queues = null; Collection<GetQueueInfoResponse> queues = null;
try { try {
queues = invokeAppClientProtocolMethod(true, remoteMethod, queues = invokeConcurrent(remoteMethod, GetQueueInfoResponse.class);
GetQueueInfoResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrGetQueueInfoFailedRetrieved(); routerMetrics.incrGetQueueInfoFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get queue [" + RouterServerUtil.logAndThrowException("Unable to get queue [" +
@ -831,8 +816,7 @@ public class FederationClientInterceptor
new Class[] {GetQueueUserAclsInfoRequest.class}, new Object[] {request}); new Class[] {GetQueueUserAclsInfoRequest.class}, new Object[] {request});
Collection<GetQueueUserAclsInfoResponse> queueUserAcls = null; Collection<GetQueueUserAclsInfoResponse> queueUserAcls = null;
try { try {
queueUserAcls = invokeAppClientProtocolMethod(true, remoteMethod, queueUserAcls = invokeConcurrent(remoteMethod, GetQueueUserAclsInfoResponse.class);
GetQueueUserAclsInfoResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrQueueUserAclsFailedRetrieved(); routerMetrics.incrQueueUserAclsFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get queue user Acls due to exception.", ex); RouterServerUtil.logAndThrowException("Unable to get queue user Acls due to exception.", ex);
@ -992,8 +976,7 @@ public class FederationClientInterceptor
new Class[] {ReservationListRequest.class}, new Object[] {request}); new Class[] {ReservationListRequest.class}, new Object[] {request});
Collection<ReservationListResponse> listResponses = null; Collection<ReservationListResponse> listResponses = null;
try { try {
listResponses = invokeAppClientProtocolMethod(true, remoteMethod, listResponses = invokeConcurrent(remoteMethod, ReservationListResponse.class);
ReservationListResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrListReservationsFailedRetrieved(); routerMetrics.incrListReservationsFailedRetrieved();
RouterServerUtil.logAndThrowException( RouterServerUtil.logAndThrowException(
@ -1072,24 +1055,6 @@ public class FederationClientInterceptor
throw new YarnException(msg); throw new YarnException(msg);
} }
private <R> Collection<R> invokeAppClientProtocolMethod(
Boolean filterInactiveSubClusters, ClientMethod request, Class<R> clazz)
throws YarnException, RuntimeException {
Map<SubClusterId, SubClusterInfo> subClusters =
federationFacade.getSubClusters(filterInactiveSubClusters);
return subClusters.keySet().stream().map(subClusterId -> {
try {
ApplicationClientProtocol protocol = getClientRMProxyForSubCluster(subClusterId);
Method method = ApplicationClientProtocol.class.
getMethod(request.getMethodName(), request.getTypes());
return clazz.cast(method.invoke(protocol, request.getParams()));
} catch (YarnException | NoSuchMethodException |
IllegalAccessException | InvocationTargetException ex) {
throw new RuntimeException(ex);
}
}).collect(Collectors.toList());
}
@Override @Override
public GetNodesToLabelsResponse getNodeToLabels( public GetNodesToLabelsResponse getNodeToLabels(
GetNodesToLabelsRequest request) throws YarnException, IOException { GetNodesToLabelsRequest request) throws YarnException, IOException {
@ -1102,8 +1067,7 @@ public class FederationClientInterceptor
new Class[] {GetNodesToLabelsRequest.class}, new Object[] {request}); new Class[] {GetNodesToLabelsRequest.class}, new Object[] {request});
Collection<GetNodesToLabelsResponse> clusterNodes = null; Collection<GetNodesToLabelsResponse> clusterNodes = null;
try { try {
clusterNodes = invokeAppClientProtocolMethod(true, remoteMethod, clusterNodes = invokeConcurrent(remoteMethod, GetNodesToLabelsResponse.class);
GetNodesToLabelsResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrNodeToLabelsFailedRetrieved(); routerMetrics.incrNodeToLabelsFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get node label due to exception.", ex); RouterServerUtil.logAndThrowException("Unable to get node label due to exception.", ex);
@ -1126,8 +1090,7 @@ public class FederationClientInterceptor
new Class[] {GetLabelsToNodesRequest.class}, new Object[] {request}); new Class[] {GetLabelsToNodesRequest.class}, new Object[] {request});
Collection<GetLabelsToNodesResponse> labelNodes = null; Collection<GetLabelsToNodesResponse> labelNodes = null;
try { try {
labelNodes = invokeAppClientProtocolMethod(true, remoteMethod, labelNodes = invokeConcurrent(remoteMethod, GetLabelsToNodesResponse.class);
GetLabelsToNodesResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrLabelsToNodesFailedRetrieved(); routerMetrics.incrLabelsToNodesFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get label node due to exception.", ex); RouterServerUtil.logAndThrowException("Unable to get label node due to exception.", ex);
@ -1150,8 +1113,7 @@ public class FederationClientInterceptor
new Class[] {GetClusterNodeLabelsRequest.class}, new Object[] {request}); new Class[] {GetClusterNodeLabelsRequest.class}, new Object[] {request});
Collection<GetClusterNodeLabelsResponse> nodeLabels = null; Collection<GetClusterNodeLabelsResponse> nodeLabels = null;
try { try {
nodeLabels = invokeAppClientProtocolMethod(true, remoteMethod, nodeLabels = invokeConcurrent(remoteMethod, GetClusterNodeLabelsResponse.class);
GetClusterNodeLabelsResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrClusterNodeLabelsFailedRetrieved(); routerMetrics.incrClusterNodeLabelsFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get cluster nodeLabels due to exception.", RouterServerUtil.logAndThrowException("Unable to get cluster nodeLabels due to exception.",
@ -1563,8 +1525,7 @@ public class FederationClientInterceptor
new Class[] {GetAllResourceProfilesRequest.class}, new Object[] {request}); new Class[] {GetAllResourceProfilesRequest.class}, new Object[] {request});
Collection<GetAllResourceProfilesResponse> resourceProfiles = null; Collection<GetAllResourceProfilesResponse> resourceProfiles = null;
try { try {
resourceProfiles = invokeAppClientProtocolMethod(true, remoteMethod, resourceProfiles = invokeConcurrent(remoteMethod, GetAllResourceProfilesResponse.class);
GetAllResourceProfilesResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrGetResourceProfilesFailedRetrieved(); routerMetrics.incrGetResourceProfilesFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get resource profiles due to exception.", RouterServerUtil.logAndThrowException("Unable to get resource profiles due to exception.",
@ -1588,8 +1549,7 @@ public class FederationClientInterceptor
new Class[] {GetResourceProfileRequest.class}, new Object[] {request}); new Class[] {GetResourceProfileRequest.class}, new Object[] {request});
Collection<GetResourceProfileResponse> resourceProfile = null; Collection<GetResourceProfileResponse> resourceProfile = null;
try { try {
resourceProfile = invokeAppClientProtocolMethod(true, remoteMethod, resourceProfile = invokeConcurrent(remoteMethod, GetResourceProfileResponse.class);
GetResourceProfileResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrGetResourceProfileFailedRetrieved(); routerMetrics.incrGetResourceProfileFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get resource profile due to exception.", RouterServerUtil.logAndThrowException("Unable to get resource profile due to exception.",
@ -1612,8 +1572,7 @@ public class FederationClientInterceptor
new Class[] {GetAllResourceTypeInfoRequest.class}, new Object[] {request}); new Class[] {GetAllResourceTypeInfoRequest.class}, new Object[] {request});
Collection<GetAllResourceTypeInfoResponse> listResourceTypeInfo; Collection<GetAllResourceTypeInfoResponse> listResourceTypeInfo;
try { try {
listResourceTypeInfo = invokeAppClientProtocolMethod(true, remoteMethod, listResourceTypeInfo = invokeConcurrent(remoteMethod, GetAllResourceTypeInfoResponse.class);
GetAllResourceTypeInfoResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrResourceTypeInfoFailedRetrieved(); routerMetrics.incrResourceTypeInfoFailedRetrieved();
LOG.error("Unable to get all resource type info node due to exception.", ex); LOG.error("Unable to get all resource type info node due to exception.", ex);
@ -1644,8 +1603,8 @@ public class FederationClientInterceptor
new Class[] {GetAttributesToNodesRequest.class}, new Object[] {request}); new Class[] {GetAttributesToNodesRequest.class}, new Object[] {request});
Collection<GetAttributesToNodesResponse> attributesToNodesResponses = null; Collection<GetAttributesToNodesResponse> attributesToNodesResponses = null;
try { try {
attributesToNodesResponses = invokeAppClientProtocolMethod(true, remoteMethod, attributesToNodesResponses =
GetAttributesToNodesResponse.class); invokeConcurrent(remoteMethod, GetAttributesToNodesResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrGetAttributesToNodesFailedRetrieved(); routerMetrics.incrGetAttributesToNodesFailedRetrieved();
RouterServerUtil.logAndThrowException("Unable to get attributes to nodes due to exception.", RouterServerUtil.logAndThrowException("Unable to get attributes to nodes due to exception.",
@ -1668,7 +1627,7 @@ public class FederationClientInterceptor
new Class[] {GetClusterNodeAttributesRequest.class}, new Object[] {request}); new Class[] {GetClusterNodeAttributesRequest.class}, new Object[] {request});
Collection<GetClusterNodeAttributesResponse> clusterNodeAttributesResponses = null; Collection<GetClusterNodeAttributesResponse> clusterNodeAttributesResponses = null;
try { try {
clusterNodeAttributesResponses = invokeAppClientProtocolMethod(true, remoteMethod, clusterNodeAttributesResponses = invokeConcurrent(remoteMethod,
GetClusterNodeAttributesResponse.class); GetClusterNodeAttributesResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrGetClusterNodeAttributesFailedRetrieved(); routerMetrics.incrGetClusterNodeAttributesFailedRetrieved();
@ -1693,7 +1652,7 @@ public class FederationClientInterceptor
new Class[] {GetNodesToAttributesRequest.class}, new Object[] {request}); new Class[] {GetNodesToAttributesRequest.class}, new Object[] {request});
Collection<GetNodesToAttributesResponse> nodesToAttributesResponses = null; Collection<GetNodesToAttributesResponse> nodesToAttributesResponses = null;
try { try {
nodesToAttributesResponses = invokeAppClientProtocolMethod(true, remoteMethod, nodesToAttributesResponses = invokeConcurrent(remoteMethod,
GetNodesToAttributesResponse.class); GetNodesToAttributesResponse.class);
} catch (Exception ex) { } catch (Exception ex) {
routerMetrics.incrGetNodesToAttributesFailedRetrieved(); routerMetrics.incrGetNodesToAttributesFailedRetrieved();

View File

@ -30,6 +30,7 @@ import java.util.HashMap;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.test.LambdaTestUtils;
@ -127,6 +128,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.manager.UniformBroadcastPolicyManager; import org.apache.hadoop.yarn.server.federation.policies.manager.UniformBroadcastPolicyManager;
import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore; import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId; import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade; import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreTestUtil; import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreTestUtil;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
@ -579,12 +581,20 @@ public class TestFederationClientInterceptor extends BaseRouterClientRMTest {
Assert.assertEquals(subClusters.size(), Assert.assertEquals(subClusters.size(),
response.getClusterMetrics().getNumNodeManagers()); response.getClusterMetrics().getNumNodeManagers());
// Clear Membership
Map<SubClusterId, SubClusterInfo> membership = new HashMap<>();
membership.putAll(stateStore.getMembership());
stateStore.getMembership().clear();
ClientMethod remoteMethod = new ClientMethod("getClusterMetrics", ClientMethod remoteMethod = new ClientMethod("getClusterMetrics",
new Class[] {GetClusterMetricsRequest.class}, new Class[] {GetClusterMetricsRequest.class},
new Object[] {GetClusterMetricsRequest.newInstance()}); new Object[] {GetClusterMetricsRequest.newInstance()});
Map<SubClusterId, GetClusterMetricsResponse> clusterMetrics = interceptor. Collection<GetClusterMetricsResponse> clusterMetrics = interceptor.invokeConcurrent(
invokeConcurrent(new ArrayList<>(), remoteMethod, GetClusterMetricsResponse.class); remoteMethod, GetClusterMetricsResponse.class);
Assert.assertTrue(clusterMetrics.isEmpty()); Assert.assertTrue(clusterMetrics.isEmpty());
// Restore membership
stateStore.setMembership(membership);
} }
/** /**