YARN-11238. Optimizing FederationClientInterceptor Call with Parallelism. (#4904)
This commit is contained in:
parent
07581f1ab2
commit
22bd5e3b53
|
@ -497,4 +497,14 @@ public class MemoryFederationStateStore implements FederationStateStore {
|
||||||
public RouterRMDTSecretManagerState getRouterRMSecretManagerState() {
|
public RouterRMDTSecretManagerState getRouterRMSecretManagerState() {
|
||||||
return routerRMSecretManagerState;
|
return routerRMSecretManagerState;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public Map<SubClusterId, SubClusterInfo> getMembership() {
|
||||||
|
return membership;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setMembership(Map<SubClusterId, SubClusterInfo> membership) {
|
||||||
|
this.membership = membership;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -727,7 +727,7 @@ public final class FederationStateStoreFacade {
|
||||||
return stateStore;
|
return stateStore;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* The Router Supports Store NewMasterKey (RouterMasterKey{@link RouterMasterKey}).
|
* The Router Supports Store NewMasterKey (RouterMasterKey{@link RouterMasterKey}).
|
||||||
*
|
*
|
||||||
* @param newKey Key used for generating and verifying delegation tokens
|
* @param newKey Key used for generating and verifying delegation tokens
|
||||||
|
|
|
@ -18,10 +18,10 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.router.clientrm;
|
package org.apache.hadoop.yarn.server.router.clientrm;
|
||||||
|
|
||||||
import org.apache.hadoop.thirdparty.com.google.common.collect.Maps;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
|
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -29,6 +29,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.concurrent.BlockingQueue;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
@ -39,7 +40,6 @@ import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.ThreadFactory;
|
import java.util.concurrent.ThreadFactory;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import org.apache.commons.lang3.NotImplementedException;
|
import org.apache.commons.lang3.NotImplementedException;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||||
|
@ -661,14 +661,11 @@ public class FederationClientInterceptor
|
||||||
RouterServerUtil.logAndThrowException("Missing getApplications request.", null);
|
RouterServerUtil.logAndThrowException("Missing getApplications request.", null);
|
||||||
}
|
}
|
||||||
long startTime = clock.getTime();
|
long startTime = clock.getTime();
|
||||||
Map<SubClusterId, SubClusterInfo> subclusters =
|
|
||||||
federationFacade.getSubClusters(true);
|
|
||||||
ClientMethod remoteMethod = new ClientMethod("getApplications",
|
ClientMethod remoteMethod = new ClientMethod("getApplications",
|
||||||
new Class[] {GetApplicationsRequest.class}, new Object[] {request});
|
new Class[] {GetApplicationsRequest.class}, new Object[] {request});
|
||||||
Map<SubClusterId, GetApplicationsResponse> applications = null;
|
Collection<GetApplicationsResponse> applications = null;
|
||||||
try {
|
try {
|
||||||
applications = invokeConcurrent(subclusters.keySet(), remoteMethod,
|
applications = invokeConcurrent(remoteMethod, GetApplicationsResponse.class);
|
||||||
GetApplicationsResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrMultipleAppsFailedRetrieved();
|
routerMetrics.incrMultipleAppsFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get applications due to exception.", ex);
|
RouterServerUtil.logAndThrowException("Unable to get applications due to exception.", ex);
|
||||||
|
@ -676,7 +673,7 @@ public class FederationClientInterceptor
|
||||||
long stopTime = clock.getTime();
|
long stopTime = clock.getTime();
|
||||||
routerMetrics.succeededMultipleAppsRetrieved(stopTime - startTime);
|
routerMetrics.succeededMultipleAppsRetrieved(stopTime - startTime);
|
||||||
// Merge the Application Reports
|
// Merge the Application Reports
|
||||||
return RouterYarnClientUtils.mergeApplications(applications.values(), returnPartialReport);
|
return RouterYarnClientUtils.mergeApplications(applications, returnPartialReport);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -691,8 +688,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetClusterMetricsRequest.class}, new Object[] {request});
|
new Class[] {GetClusterMetricsRequest.class}, new Object[] {request});
|
||||||
Collection<GetClusterMetricsResponse> clusterMetrics = null;
|
Collection<GetClusterMetricsResponse> clusterMetrics = null;
|
||||||
try {
|
try {
|
||||||
clusterMetrics = invokeAppClientProtocolMethod(
|
clusterMetrics = invokeConcurrent(remoteMethod, GetClusterMetricsResponse.class);
|
||||||
true, remoteMethod, GetClusterMetricsResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrGetClusterMetricsFailedRetrieved();
|
routerMetrics.incrGetClusterMetricsFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get cluster metrics due to exception.", ex);
|
RouterServerUtil.logAndThrowException("Unable to get cluster metrics due to exception.", ex);
|
||||||
|
@ -702,67 +698,62 @@ public class FederationClientInterceptor
|
||||||
return RouterYarnClientUtils.merge(clusterMetrics);
|
return RouterYarnClientUtils.merge(clusterMetrics);
|
||||||
}
|
}
|
||||||
|
|
||||||
<R> Map<SubClusterId, R> invokeConcurrent(ArrayList<SubClusterId> clusterIds,
|
<R> Collection<R> invokeConcurrent(ClientMethod request, Class<R> clazz)
|
||||||
ClientMethod request, Class<R> clazz) throws YarnException, IOException {
|
throws YarnException {
|
||||||
List<Callable<Object>> callables = new ArrayList<>();
|
|
||||||
List<Future<Object>> futures = new ArrayList<>();
|
// Get Active SubClusters
|
||||||
Map<SubClusterId, IOException> exceptions = new TreeMap<>();
|
Map<SubClusterId, SubClusterInfo> subClusterInfo = federationFacade.getSubClusters(true);
|
||||||
for (SubClusterId subClusterId : clusterIds) {
|
Collection<SubClusterId> subClusterIds = subClusterInfo.keySet();
|
||||||
callables.add(new Callable<Object>() {
|
|
||||||
@Override
|
List<Callable<Pair<SubClusterId, Object>>> callables = new ArrayList<>();
|
||||||
public Object call() throws Exception {
|
List<Future<Pair<SubClusterId, Object>>> futures = new ArrayList<>();
|
||||||
ApplicationClientProtocol protocol =
|
Map<SubClusterId, Exception> exceptions = new TreeMap<>();
|
||||||
getClientRMProxyForSubCluster(subClusterId);
|
|
||||||
Method method = ApplicationClientProtocol.class
|
// Generate parallel Callable tasks
|
||||||
.getMethod(request.getMethodName(), request.getTypes());
|
for (SubClusterId subClusterId : subClusterIds) {
|
||||||
return method.invoke(protocol, request.getParams());
|
callables.add(() -> {
|
||||||
}
|
ApplicationClientProtocol protocol = getClientRMProxyForSubCluster(subClusterId);
|
||||||
|
String methodName = request.getMethodName();
|
||||||
|
Class<?>[] types = request.getTypes();
|
||||||
|
Object[] params = request.getParams();
|
||||||
|
Method method = ApplicationClientProtocol.class.getMethod(methodName, types);
|
||||||
|
Object result = method.invoke(protocol, params);
|
||||||
|
return Pair.of(subClusterId, result);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get results from multiple threads
|
||||||
Map<SubClusterId, R> results = new TreeMap<>();
|
Map<SubClusterId, R> results = new TreeMap<>();
|
||||||
try {
|
try {
|
||||||
futures.addAll(executorService.invokeAll(callables));
|
futures.addAll(executorService.invokeAll(callables));
|
||||||
for (int i = 0; i < futures.size(); i++) {
|
futures.stream().forEach(future -> {
|
||||||
SubClusterId subClusterId = clusterIds.get(i);
|
SubClusterId subClusterId = null;
|
||||||
try {
|
try {
|
||||||
Future<Object> future = futures.get(i);
|
Pair<SubClusterId, Object> pair = future.get();
|
||||||
Object result = future.get();
|
subClusterId = pair.getKey();
|
||||||
|
Object result = pair.getValue();
|
||||||
results.put(subClusterId, clazz.cast(result));
|
results.put(subClusterId, clazz.cast(result));
|
||||||
} catch (ExecutionException ex) {
|
} catch (InterruptedException | ExecutionException e) {
|
||||||
Throwable cause = ex.getCause();
|
Throwable cause = e.getCause();
|
||||||
LOG.debug("Cannot execute {} on {}: {}", request.getMethodName(),
|
LOG.error("Cannot execute {} on {}: {}", request.getMethodName(),
|
||||||
subClusterId.getId(), cause.getMessage());
|
subClusterId.getId(), cause.getMessage());
|
||||||
IOException ioe;
|
exceptions.put(subClusterId, e);
|
||||||
if (cause instanceof IOException) {
|
|
||||||
ioe = (IOException) cause;
|
|
||||||
} else if (cause instanceof YarnException) {
|
|
||||||
throw (YarnException) cause;
|
|
||||||
} else {
|
|
||||||
ioe = new IOException(
|
|
||||||
"Unhandled exception while calling " + request.getMethodName()
|
|
||||||
+ ": " + cause.getMessage(), cause);
|
|
||||||
}
|
|
||||||
// Store the exceptions
|
|
||||||
exceptions.put(subClusterId, ioe);
|
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
if (results.isEmpty() && !clusterIds.isEmpty()) {
|
|
||||||
SubClusterId subClusterId = clusterIds.get(0);
|
|
||||||
IOException ioe = exceptions.get(subClusterId);
|
|
||||||
if (ioe != null) {
|
|
||||||
throw ioe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new YarnException(e);
|
throw new YarnException("invokeConcurrent Failed.", e);
|
||||||
}
|
}
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
<R> Map<SubClusterId, R> invokeConcurrent(Collection<SubClusterId> clusterIds,
|
// All sub-clusters return results to be considered successful,
|
||||||
ClientMethod request, Class<R> clazz) throws YarnException, IOException {
|
// otherwise an exception will be thrown.
|
||||||
ArrayList<SubClusterId> clusterIdList = new ArrayList<>(clusterIds);
|
if (exceptions != null && !exceptions.isEmpty()) {
|
||||||
return invokeConcurrent(clusterIdList, request, clazz);
|
Set<SubClusterId> subClusterIdSets = exceptions.keySet();
|
||||||
|
throw new YarnException("invokeConcurrent Failed, An exception occurred in subClusterIds = " +
|
||||||
|
StringUtils.join(subClusterIdSets, ","));
|
||||||
|
}
|
||||||
|
|
||||||
|
// return result
|
||||||
|
return results.values();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -773,24 +764,19 @@ public class FederationClientInterceptor
|
||||||
RouterServerUtil.logAndThrowException("Missing getClusterNodes request.", null);
|
RouterServerUtil.logAndThrowException("Missing getClusterNodes request.", null);
|
||||||
}
|
}
|
||||||
long startTime = clock.getTime();
|
long startTime = clock.getTime();
|
||||||
Map<SubClusterId, SubClusterInfo> subClusters =
|
ClientMethod remoteMethod = new ClientMethod("getClusterNodes",
|
||||||
federationFacade.getSubClusters(true);
|
new Class[]{GetClusterNodesRequest.class}, new Object[]{request});
|
||||||
Map<SubClusterId, GetClusterNodesResponse> clusterNodes = Maps.newHashMap();
|
try {
|
||||||
for (SubClusterId subClusterId : subClusters.keySet()) {
|
Collection<GetClusterNodesResponse> clusterNodes =
|
||||||
ApplicationClientProtocol client;
|
invokeConcurrent(remoteMethod, GetClusterNodesResponse.class);
|
||||||
try {
|
long stopTime = clock.getTime();
|
||||||
client = getClientRMProxyForSubCluster(subClusterId);
|
routerMetrics.succeededGetClusterNodesRetrieved(stopTime - startTime);
|
||||||
GetClusterNodesResponse response = client.getClusterNodes(request);
|
return RouterYarnClientUtils.mergeClusterNodesResponse(clusterNodes);
|
||||||
clusterNodes.put(subClusterId, response);
|
} catch (Exception ex) {
|
||||||
} catch (Exception ex) {
|
routerMetrics.incrClusterNodesFailedRetrieved();
|
||||||
routerMetrics.incrClusterNodesFailedRetrieved();
|
RouterServerUtil.logAndThrowException("Unable to get cluster nodes due to exception.", ex);
|
||||||
RouterServerUtil.logAndThrowException("Unable to get cluster nodes due to exception.", ex);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
long stopTime = clock.getTime();
|
throw new YarnException("Unable to get cluster nodes.");
|
||||||
routerMetrics.succeededGetClusterNodesRetrieved(stopTime - startTime);
|
|
||||||
// Merge the NodesResponse
|
|
||||||
return RouterYarnClientUtils.mergeClusterNodesResponse(clusterNodes.values());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -806,8 +792,7 @@ public class FederationClientInterceptor
|
||||||
new Class[]{GetQueueInfoRequest.class}, new Object[]{request});
|
new Class[]{GetQueueInfoRequest.class}, new Object[]{request});
|
||||||
Collection<GetQueueInfoResponse> queues = null;
|
Collection<GetQueueInfoResponse> queues = null;
|
||||||
try {
|
try {
|
||||||
queues = invokeAppClientProtocolMethod(true, remoteMethod,
|
queues = invokeConcurrent(remoteMethod, GetQueueInfoResponse.class);
|
||||||
GetQueueInfoResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrGetQueueInfoFailedRetrieved();
|
routerMetrics.incrGetQueueInfoFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get queue [" +
|
RouterServerUtil.logAndThrowException("Unable to get queue [" +
|
||||||
|
@ -831,8 +816,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetQueueUserAclsInfoRequest.class}, new Object[] {request});
|
new Class[] {GetQueueUserAclsInfoRequest.class}, new Object[] {request});
|
||||||
Collection<GetQueueUserAclsInfoResponse> queueUserAcls = null;
|
Collection<GetQueueUserAclsInfoResponse> queueUserAcls = null;
|
||||||
try {
|
try {
|
||||||
queueUserAcls = invokeAppClientProtocolMethod(true, remoteMethod,
|
queueUserAcls = invokeConcurrent(remoteMethod, GetQueueUserAclsInfoResponse.class);
|
||||||
GetQueueUserAclsInfoResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrQueueUserAclsFailedRetrieved();
|
routerMetrics.incrQueueUserAclsFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get queue user Acls due to exception.", ex);
|
RouterServerUtil.logAndThrowException("Unable to get queue user Acls due to exception.", ex);
|
||||||
|
@ -992,8 +976,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {ReservationListRequest.class}, new Object[] {request});
|
new Class[] {ReservationListRequest.class}, new Object[] {request});
|
||||||
Collection<ReservationListResponse> listResponses = null;
|
Collection<ReservationListResponse> listResponses = null;
|
||||||
try {
|
try {
|
||||||
listResponses = invokeAppClientProtocolMethod(true, remoteMethod,
|
listResponses = invokeConcurrent(remoteMethod, ReservationListResponse.class);
|
||||||
ReservationListResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrListReservationsFailedRetrieved();
|
routerMetrics.incrListReservationsFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException(
|
RouterServerUtil.logAndThrowException(
|
||||||
|
@ -1072,24 +1055,6 @@ public class FederationClientInterceptor
|
||||||
throw new YarnException(msg);
|
throw new YarnException(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
private <R> Collection<R> invokeAppClientProtocolMethod(
|
|
||||||
Boolean filterInactiveSubClusters, ClientMethod request, Class<R> clazz)
|
|
||||||
throws YarnException, RuntimeException {
|
|
||||||
Map<SubClusterId, SubClusterInfo> subClusters =
|
|
||||||
federationFacade.getSubClusters(filterInactiveSubClusters);
|
|
||||||
return subClusters.keySet().stream().map(subClusterId -> {
|
|
||||||
try {
|
|
||||||
ApplicationClientProtocol protocol = getClientRMProxyForSubCluster(subClusterId);
|
|
||||||
Method method = ApplicationClientProtocol.class.
|
|
||||||
getMethod(request.getMethodName(), request.getTypes());
|
|
||||||
return clazz.cast(method.invoke(protocol, request.getParams()));
|
|
||||||
} catch (YarnException | NoSuchMethodException |
|
|
||||||
IllegalAccessException | InvocationTargetException ex) {
|
|
||||||
throw new RuntimeException(ex);
|
|
||||||
}
|
|
||||||
}).collect(Collectors.toList());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public GetNodesToLabelsResponse getNodeToLabels(
|
public GetNodesToLabelsResponse getNodeToLabels(
|
||||||
GetNodesToLabelsRequest request) throws YarnException, IOException {
|
GetNodesToLabelsRequest request) throws YarnException, IOException {
|
||||||
|
@ -1102,8 +1067,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetNodesToLabelsRequest.class}, new Object[] {request});
|
new Class[] {GetNodesToLabelsRequest.class}, new Object[] {request});
|
||||||
Collection<GetNodesToLabelsResponse> clusterNodes = null;
|
Collection<GetNodesToLabelsResponse> clusterNodes = null;
|
||||||
try {
|
try {
|
||||||
clusterNodes = invokeAppClientProtocolMethod(true, remoteMethod,
|
clusterNodes = invokeConcurrent(remoteMethod, GetNodesToLabelsResponse.class);
|
||||||
GetNodesToLabelsResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrNodeToLabelsFailedRetrieved();
|
routerMetrics.incrNodeToLabelsFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get node label due to exception.", ex);
|
RouterServerUtil.logAndThrowException("Unable to get node label due to exception.", ex);
|
||||||
|
@ -1126,8 +1090,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetLabelsToNodesRequest.class}, new Object[] {request});
|
new Class[] {GetLabelsToNodesRequest.class}, new Object[] {request});
|
||||||
Collection<GetLabelsToNodesResponse> labelNodes = null;
|
Collection<GetLabelsToNodesResponse> labelNodes = null;
|
||||||
try {
|
try {
|
||||||
labelNodes = invokeAppClientProtocolMethod(true, remoteMethod,
|
labelNodes = invokeConcurrent(remoteMethod, GetLabelsToNodesResponse.class);
|
||||||
GetLabelsToNodesResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrLabelsToNodesFailedRetrieved();
|
routerMetrics.incrLabelsToNodesFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get label node due to exception.", ex);
|
RouterServerUtil.logAndThrowException("Unable to get label node due to exception.", ex);
|
||||||
|
@ -1150,8 +1113,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetClusterNodeLabelsRequest.class}, new Object[] {request});
|
new Class[] {GetClusterNodeLabelsRequest.class}, new Object[] {request});
|
||||||
Collection<GetClusterNodeLabelsResponse> nodeLabels = null;
|
Collection<GetClusterNodeLabelsResponse> nodeLabels = null;
|
||||||
try {
|
try {
|
||||||
nodeLabels = invokeAppClientProtocolMethod(true, remoteMethod,
|
nodeLabels = invokeConcurrent(remoteMethod, GetClusterNodeLabelsResponse.class);
|
||||||
GetClusterNodeLabelsResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrClusterNodeLabelsFailedRetrieved();
|
routerMetrics.incrClusterNodeLabelsFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get cluster nodeLabels due to exception.",
|
RouterServerUtil.logAndThrowException("Unable to get cluster nodeLabels due to exception.",
|
||||||
|
@ -1563,8 +1525,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetAllResourceProfilesRequest.class}, new Object[] {request});
|
new Class[] {GetAllResourceProfilesRequest.class}, new Object[] {request});
|
||||||
Collection<GetAllResourceProfilesResponse> resourceProfiles = null;
|
Collection<GetAllResourceProfilesResponse> resourceProfiles = null;
|
||||||
try {
|
try {
|
||||||
resourceProfiles = invokeAppClientProtocolMethod(true, remoteMethod,
|
resourceProfiles = invokeConcurrent(remoteMethod, GetAllResourceProfilesResponse.class);
|
||||||
GetAllResourceProfilesResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrGetResourceProfilesFailedRetrieved();
|
routerMetrics.incrGetResourceProfilesFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get resource profiles due to exception.",
|
RouterServerUtil.logAndThrowException("Unable to get resource profiles due to exception.",
|
||||||
|
@ -1588,8 +1549,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetResourceProfileRequest.class}, new Object[] {request});
|
new Class[] {GetResourceProfileRequest.class}, new Object[] {request});
|
||||||
Collection<GetResourceProfileResponse> resourceProfile = null;
|
Collection<GetResourceProfileResponse> resourceProfile = null;
|
||||||
try {
|
try {
|
||||||
resourceProfile = invokeAppClientProtocolMethod(true, remoteMethod,
|
resourceProfile = invokeConcurrent(remoteMethod, GetResourceProfileResponse.class);
|
||||||
GetResourceProfileResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrGetResourceProfileFailedRetrieved();
|
routerMetrics.incrGetResourceProfileFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get resource profile due to exception.",
|
RouterServerUtil.logAndThrowException("Unable to get resource profile due to exception.",
|
||||||
|
@ -1612,8 +1572,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetAllResourceTypeInfoRequest.class}, new Object[] {request});
|
new Class[] {GetAllResourceTypeInfoRequest.class}, new Object[] {request});
|
||||||
Collection<GetAllResourceTypeInfoResponse> listResourceTypeInfo;
|
Collection<GetAllResourceTypeInfoResponse> listResourceTypeInfo;
|
||||||
try {
|
try {
|
||||||
listResourceTypeInfo = invokeAppClientProtocolMethod(true, remoteMethod,
|
listResourceTypeInfo = invokeConcurrent(remoteMethod, GetAllResourceTypeInfoResponse.class);
|
||||||
GetAllResourceTypeInfoResponse.class);
|
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrResourceTypeInfoFailedRetrieved();
|
routerMetrics.incrResourceTypeInfoFailedRetrieved();
|
||||||
LOG.error("Unable to get all resource type info node due to exception.", ex);
|
LOG.error("Unable to get all resource type info node due to exception.", ex);
|
||||||
|
@ -1644,8 +1603,8 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetAttributesToNodesRequest.class}, new Object[] {request});
|
new Class[] {GetAttributesToNodesRequest.class}, new Object[] {request});
|
||||||
Collection<GetAttributesToNodesResponse> attributesToNodesResponses = null;
|
Collection<GetAttributesToNodesResponse> attributesToNodesResponses = null;
|
||||||
try {
|
try {
|
||||||
attributesToNodesResponses = invokeAppClientProtocolMethod(true, remoteMethod,
|
attributesToNodesResponses =
|
||||||
GetAttributesToNodesResponse.class);
|
invokeConcurrent(remoteMethod, GetAttributesToNodesResponse.class);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrGetAttributesToNodesFailedRetrieved();
|
routerMetrics.incrGetAttributesToNodesFailedRetrieved();
|
||||||
RouterServerUtil.logAndThrowException("Unable to get attributes to nodes due to exception.",
|
RouterServerUtil.logAndThrowException("Unable to get attributes to nodes due to exception.",
|
||||||
|
@ -1668,7 +1627,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetClusterNodeAttributesRequest.class}, new Object[] {request});
|
new Class[] {GetClusterNodeAttributesRequest.class}, new Object[] {request});
|
||||||
Collection<GetClusterNodeAttributesResponse> clusterNodeAttributesResponses = null;
|
Collection<GetClusterNodeAttributesResponse> clusterNodeAttributesResponses = null;
|
||||||
try {
|
try {
|
||||||
clusterNodeAttributesResponses = invokeAppClientProtocolMethod(true, remoteMethod,
|
clusterNodeAttributesResponses = invokeConcurrent(remoteMethod,
|
||||||
GetClusterNodeAttributesResponse.class);
|
GetClusterNodeAttributesResponse.class);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrGetClusterNodeAttributesFailedRetrieved();
|
routerMetrics.incrGetClusterNodeAttributesFailedRetrieved();
|
||||||
|
@ -1693,7 +1652,7 @@ public class FederationClientInterceptor
|
||||||
new Class[] {GetNodesToAttributesRequest.class}, new Object[] {request});
|
new Class[] {GetNodesToAttributesRequest.class}, new Object[] {request});
|
||||||
Collection<GetNodesToAttributesResponse> nodesToAttributesResponses = null;
|
Collection<GetNodesToAttributesResponse> nodesToAttributesResponses = null;
|
||||||
try {
|
try {
|
||||||
nodesToAttributesResponses = invokeAppClientProtocolMethod(true, remoteMethod,
|
nodesToAttributesResponses = invokeConcurrent(remoteMethod,
|
||||||
GetNodesToAttributesResponse.class);
|
GetNodesToAttributesResponse.class);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
routerMetrics.incrGetNodesToAttributesFailedRetrieved();
|
routerMetrics.incrGetNodesToAttributesFailedRetrieved();
|
||||||
|
|
|
@ -30,6 +30,7 @@ import java.util.HashMap;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.test.LambdaTestUtils;
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
|
@ -127,6 +128,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||||
import org.apache.hadoop.yarn.server.federation.policies.manager.UniformBroadcastPolicyManager;
|
import org.apache.hadoop.yarn.server.federation.policies.manager.UniformBroadcastPolicyManager;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore;
|
import org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore;
|
||||||
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
|
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
|
||||||
|
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;
|
||||||
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
|
||||||
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreTestUtil;
|
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreTestUtil;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
|
||||||
|
@ -579,12 +581,20 @@ public class TestFederationClientInterceptor extends BaseRouterClientRMTest {
|
||||||
Assert.assertEquals(subClusters.size(),
|
Assert.assertEquals(subClusters.size(),
|
||||||
response.getClusterMetrics().getNumNodeManagers());
|
response.getClusterMetrics().getNumNodeManagers());
|
||||||
|
|
||||||
|
// Clear Membership
|
||||||
|
Map<SubClusterId, SubClusterInfo> membership = new HashMap<>();
|
||||||
|
membership.putAll(stateStore.getMembership());
|
||||||
|
stateStore.getMembership().clear();
|
||||||
|
|
||||||
ClientMethod remoteMethod = new ClientMethod("getClusterMetrics",
|
ClientMethod remoteMethod = new ClientMethod("getClusterMetrics",
|
||||||
new Class[] {GetClusterMetricsRequest.class},
|
new Class[] {GetClusterMetricsRequest.class},
|
||||||
new Object[] {GetClusterMetricsRequest.newInstance()});
|
new Object[] {GetClusterMetricsRequest.newInstance()});
|
||||||
Map<SubClusterId, GetClusterMetricsResponse> clusterMetrics = interceptor.
|
Collection<GetClusterMetricsResponse> clusterMetrics = interceptor.invokeConcurrent(
|
||||||
invokeConcurrent(new ArrayList<>(), remoteMethod, GetClusterMetricsResponse.class);
|
remoteMethod, GetClusterMetricsResponse.class);
|
||||||
Assert.assertTrue(clusterMetrics.isEmpty());
|
Assert.assertTrue(clusterMetrics.isEmpty());
|
||||||
|
|
||||||
|
// Restore membership
|
||||||
|
stateStore.setMembership(membership);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue