YARN-11370. [Federation] Refactor MemoryFederationStateStore code. (#5126)

This commit is contained in:
slfan1989 2023-02-23 04:37:35 +08:00 committed by GitHub
parent e8a6b2c2c4
commit 2e997d818d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 66 additions and 65 deletions

View File

@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.api.records.ReservationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
import org.apache.hadoop.yarn.server.federation.store.metrics.FederationStateStoreClientMetrics;
import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterRequest; import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterRequest;
import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterResponse; import org.apache.hadoop.yarn.server.federation.store.records.AddApplicationHomeSubClusterResponse;
import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster; import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster;
@ -107,7 +108,7 @@ import static org.apache.hadoop.yarn.server.federation.store.utils.FederationSta
public class MemoryFederationStateStore implements FederationStateStore { public class MemoryFederationStateStore implements FederationStateStore {
private Map<SubClusterId, SubClusterInfo> membership; private Map<SubClusterId, SubClusterInfo> membership;
private Map<ApplicationId, SubClusterId> applications; private Map<ApplicationId, ApplicationHomeSubCluster> applications;
private Map<ReservationId, SubClusterId> reservations; private Map<ReservationId, SubClusterId> reservations;
private Map<String, SubClusterPolicyConfiguration> policies; private Map<String, SubClusterPolicyConfiguration> policies;
private RouterRMDTSecretManagerState routerRMSecretManagerState; private RouterRMDTSecretManagerState routerRMSecretManagerState;
@ -122,10 +123,10 @@ public class MemoryFederationStateStore implements FederationStateStore {
@Override @Override
public void init(Configuration conf) { public void init(Configuration conf) {
membership = new ConcurrentHashMap<SubClusterId, SubClusterInfo>(); membership = new ConcurrentHashMap<>();
applications = new ConcurrentHashMap<ApplicationId, SubClusterId>(); applications = new ConcurrentHashMap<>();
reservations = new ConcurrentHashMap<ReservationId, SubClusterId>(); reservations = new ConcurrentHashMap<>();
policies = new ConcurrentHashMap<String, SubClusterPolicyConfiguration>(); policies = new ConcurrentHashMap<>();
routerRMSecretManagerState = new RouterRMDTSecretManagerState(); routerRMSecretManagerState = new RouterRMDTSecretManagerState();
maxAppsInStateStore = conf.getInt( maxAppsInStateStore = conf.getInt(
YarnConfiguration.FEDERATION_STATESTORE_MAX_APPLICATIONS, YarnConfiguration.FEDERATION_STATESTORE_MAX_APPLICATIONS,
@ -143,14 +144,15 @@ public class MemoryFederationStateStore implements FederationStateStore {
} }
@Override @Override
public SubClusterRegisterResponse registerSubCluster( public SubClusterRegisterResponse registerSubCluster(SubClusterRegisterRequest request)
SubClusterRegisterRequest request) throws YarnException { throws YarnException {
long startTime = clock.getTime();
FederationMembershipStateStoreInputValidator.validate(request); FederationMembershipStateStoreInputValidator.validate(request);
SubClusterInfo subClusterInfo = request.getSubClusterInfo(); SubClusterInfo subClusterInfo = request.getSubClusterInfo();
long currentTime = long currentTime =
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis(); Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis();
SubClusterInfo subClusterInfoToSave = SubClusterInfo subClusterInfoToSave =
SubClusterInfo.newInstance(subClusterInfo.getSubClusterId(), SubClusterInfo.newInstance(subClusterInfo.getSubClusterId(),
subClusterInfo.getAMRMServiceAddress(), subClusterInfo.getAMRMServiceAddress(),
@ -161,18 +163,21 @@ public class MemoryFederationStateStore implements FederationStateStore {
subClusterInfo.getCapability()); subClusterInfo.getCapability());
membership.put(subClusterInfo.getSubClusterId(), subClusterInfoToSave); membership.put(subClusterInfo.getSubClusterId(), subClusterInfoToSave);
long stopTime = clock.getTime();
FederationStateStoreClientMetrics.succeededStateStoreCall(stopTime - startTime);
return SubClusterRegisterResponse.newInstance(); return SubClusterRegisterResponse.newInstance();
} }
@Override @Override
public SubClusterDeregisterResponse deregisterSubCluster( public SubClusterDeregisterResponse deregisterSubCluster(SubClusterDeregisterRequest request)
SubClusterDeregisterRequest request) throws YarnException { throws YarnException {
FederationMembershipStateStoreInputValidator.validate(request); FederationMembershipStateStoreInputValidator.validate(request);
SubClusterInfo subClusterInfo = membership.get(request.getSubClusterId()); SubClusterInfo subClusterInfo = membership.get(request.getSubClusterId());
if (subClusterInfo == null) { if (subClusterInfo == null) {
String errMsg = FederationStateStoreUtils.logAndThrowStoreException(
"SubCluster " + request.getSubClusterId().toString() + " not found"; LOG, "SubCluster %s not found", request.getSubClusterId());
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
} else { } else {
subClusterInfo.setState(request.getState()); subClusterInfo.setState(request.getState());
} }
@ -181,17 +186,16 @@ public class MemoryFederationStateStore implements FederationStateStore {
} }
@Override @Override
public SubClusterHeartbeatResponse subClusterHeartbeat( public SubClusterHeartbeatResponse subClusterHeartbeat(SubClusterHeartbeatRequest request)
SubClusterHeartbeatRequest request) throws YarnException { throws YarnException {
FederationMembershipStateStoreInputValidator.validate(request); FederationMembershipStateStoreInputValidator.validate(request);
SubClusterId subClusterId = request.getSubClusterId(); SubClusterId subClusterId = request.getSubClusterId();
SubClusterInfo subClusterInfo = membership.get(subClusterId); SubClusterInfo subClusterInfo = membership.get(subClusterId);
if (subClusterInfo == null) { if (subClusterInfo == null) {
String errMsg = "SubCluster " + subClusterId.toString() FederationStateStoreUtils.logAndThrowStoreException(
+ " does not exist; cannot heartbeat"; LOG, "SubCluster %s does not exist; cannot heartbeat.", request.getSubClusterId());
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
} }
long currentTime = long currentTime =
@ -205,11 +209,12 @@ public class MemoryFederationStateStore implements FederationStateStore {
} }
@Override @Override
public GetSubClusterInfoResponse getSubCluster( public GetSubClusterInfoResponse getSubCluster(GetSubClusterInfoRequest request)
GetSubClusterInfoRequest request) throws YarnException { throws YarnException {
FederationMembershipStateStoreInputValidator.validate(request); FederationMembershipStateStoreInputValidator.validate(request);
SubClusterId subClusterId = request.getSubClusterId(); SubClusterId subClusterId = request.getSubClusterId();
if (!membership.containsKey(subClusterId)) { if (!membership.containsKey(subClusterId)) {
LOG.warn("The queried SubCluster: {} does not exist.", subClusterId); LOG.warn("The queried SubCluster: {} does not exist.", subClusterId);
return null; return null;
@ -219,16 +224,17 @@ public class MemoryFederationStateStore implements FederationStateStore {
} }
@Override @Override
public GetSubClustersInfoResponse getSubClusters( public GetSubClustersInfoResponse getSubClusters(GetSubClustersInfoRequest request)
GetSubClustersInfoRequest request) throws YarnException { throws YarnException {
List<SubClusterInfo> result = new ArrayList<SubClusterInfo>();
List<SubClusterInfo> result = new ArrayList<>();
for (SubClusterInfo info : membership.values()) { for (SubClusterInfo info : membership.values()) {
if (!request.getFilterInactiveSubClusters() if (!request.getFilterInactiveSubClusters() || info.getState().isActive()) {
|| info.getState().isActive()) {
result.add(info); result.add(info);
} }
} }
return GetSubClustersInfoResponse.newInstance(result); return GetSubClustersInfoResponse.newInstance(result);
} }
@ -239,16 +245,16 @@ public class MemoryFederationStateStore implements FederationStateStore {
AddApplicationHomeSubClusterRequest request) throws YarnException { AddApplicationHomeSubClusterRequest request) throws YarnException {
FederationApplicationHomeSubClusterStoreInputValidator.validate(request); FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId = ApplicationHomeSubCluster homeSubCluster = request.getApplicationHomeSubCluster();
request.getApplicationHomeSubCluster().getApplicationId();
ApplicationId appId = homeSubCluster.getApplicationId();
if (!applications.containsKey(appId)) { if (!applications.containsKey(appId)) {
applications.put(appId, applications.put(appId, homeSubCluster);
request.getApplicationHomeSubCluster().getHomeSubCluster());
} }
return AddApplicationHomeSubClusterResponse ApplicationHomeSubCluster respHomeSubCluster = applications.get(appId);
.newInstance(applications.get(appId)); return AddApplicationHomeSubClusterResponse.newInstance(respHomeSubCluster.getHomeSubCluster());
} }
@Override @Override
@ -256,15 +262,16 @@ public class MemoryFederationStateStore implements FederationStateStore {
UpdateApplicationHomeSubClusterRequest request) throws YarnException { UpdateApplicationHomeSubClusterRequest request) throws YarnException {
FederationApplicationHomeSubClusterStoreInputValidator.validate(request); FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId = ApplicationId appId =
request.getApplicationHomeSubCluster().getApplicationId(); request.getApplicationHomeSubCluster().getApplicationId();
if (!applications.containsKey(appId)) { if (!applications.containsKey(appId)) {
String errMsg = "Application " + appId + " does not exist"; FederationStateStoreUtils.logAndThrowStoreException(LOG,
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); "Application %s does not exist.", appId);
} }
applications.put(appId, applications.put(appId, request.getApplicationHomeSubCluster());
request.getApplicationHomeSubCluster().getHomeSubCluster());
return UpdateApplicationHomeSubClusterResponse.newInstance(); return UpdateApplicationHomeSubClusterResponse.newInstance();
} }
@ -275,11 +282,12 @@ public class MemoryFederationStateStore implements FederationStateStore {
FederationApplicationHomeSubClusterStoreInputValidator.validate(request); FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId = request.getApplicationId(); ApplicationId appId = request.getApplicationId();
if (!applications.containsKey(appId)) { if (!applications.containsKey(appId)) {
String errMsg = "Application " + appId + " does not exist"; FederationStateStoreUtils.logAndThrowStoreException(LOG,
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); "Application %s does not exist.", appId);
} }
return GetApplicationHomeSubClusterResponse.newInstance(appId, applications.get(appId)); return GetApplicationHomeSubClusterResponse.newInstance(appId,
applications.get(appId).getHomeSubCluster());
} }
@Override @Override
@ -303,7 +311,7 @@ public class MemoryFederationStateStore implements FederationStateStore {
} }
private ApplicationHomeSubCluster generateAppHomeSC(ApplicationId applicationId) { private ApplicationHomeSubCluster generateAppHomeSC(ApplicationId applicationId) {
SubClusterId subClusterId = applications.get(applicationId); SubClusterId subClusterId = applications.get(applicationId).getHomeSubCluster();
return ApplicationHomeSubCluster.newInstance(applicationId, subClusterId); return ApplicationHomeSubCluster.newInstance(applicationId, subClusterId);
} }
@ -314,8 +322,8 @@ public class MemoryFederationStateStore implements FederationStateStore {
FederationApplicationHomeSubClusterStoreInputValidator.validate(request); FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId = request.getApplicationId(); ApplicationId appId = request.getApplicationId();
if (!applications.containsKey(appId)) { if (!applications.containsKey(appId)) {
String errMsg = "Application " + appId + " does not exist"; FederationStateStoreUtils.logAndThrowStoreException(LOG,
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); "Application %s does not exist.", appId);
} }
applications.remove(appId); applications.remove(appId);
@ -329,12 +337,11 @@ public class MemoryFederationStateStore implements FederationStateStore {
FederationPolicyStoreInputValidator.validate(request); FederationPolicyStoreInputValidator.validate(request);
String queue = request.getQueue(); String queue = request.getQueue();
if (!policies.containsKey(queue)) { if (!policies.containsKey(queue)) {
LOG.warn("Policy for queue: {} does not exist.", queue); LOG.warn("Policy for queue : {} does not exist.", queue);
return null; return null;
} }
return GetSubClusterPolicyConfigurationResponse return GetSubClusterPolicyConfigurationResponse.newInstance(policies.get(queue));
.newInstance(policies.get(queue));
} }
@Override @Override
@ -350,8 +357,7 @@ public class MemoryFederationStateStore implements FederationStateStore {
@Override @Override
public GetSubClusterPoliciesConfigurationsResponse getPoliciesConfigurations( public GetSubClusterPoliciesConfigurationsResponse getPoliciesConfigurations(
GetSubClusterPoliciesConfigurationsRequest request) throws YarnException { GetSubClusterPoliciesConfigurationsRequest request) throws YarnException {
ArrayList<SubClusterPolicyConfiguration> result = ArrayList<SubClusterPolicyConfiguration> result = new ArrayList<>();
new ArrayList<SubClusterPolicyConfiguration>();
for (SubClusterPolicyConfiguration policy : policies.values()) { for (SubClusterPolicyConfiguration policy : policies.values()) {
result.add(policy); result.add(policy);
} }
@ -386,7 +392,8 @@ public class MemoryFederationStateStore implements FederationStateStore {
FederationReservationHomeSubClusterStoreInputValidator.validate(request); FederationReservationHomeSubClusterStoreInputValidator.validate(request);
ReservationId reservationId = request.getReservationId(); ReservationId reservationId = request.getReservationId();
if (!reservations.containsKey(reservationId)) { if (!reservations.containsKey(reservationId)) {
throw new YarnException("Reservation " + reservationId + " does not exist"); FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Reservation %s does not exist.", reservationId);
} }
SubClusterId subClusterId = reservations.get(reservationId); SubClusterId subClusterId = reservations.get(reservationId);
ReservationHomeSubCluster homeSubCluster = ReservationHomeSubCluster homeSubCluster =
@ -417,7 +424,8 @@ public class MemoryFederationStateStore implements FederationStateStore {
ReservationId reservationId = request.getReservationHomeSubCluster().getReservationId(); ReservationId reservationId = request.getReservationHomeSubCluster().getReservationId();
if (!reservations.containsKey(reservationId)) { if (!reservations.containsKey(reservationId)) {
throw new YarnException("Reservation " + reservationId + " does not exist."); FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Reservation %s does not exist.", reservationId);
} }
SubClusterId subClusterId = request.getReservationHomeSubCluster().getHomeSubCluster(); SubClusterId subClusterId = request.getReservationHomeSubCluster().getHomeSubCluster();
@ -431,7 +439,8 @@ public class MemoryFederationStateStore implements FederationStateStore {
FederationReservationHomeSubClusterStoreInputValidator.validate(request); FederationReservationHomeSubClusterStoreInputValidator.validate(request);
ReservationId reservationId = request.getReservationId(); ReservationId reservationId = request.getReservationId();
if (!reservations.containsKey(reservationId)) { if (!reservations.containsKey(reservationId)) {
throw new YarnException("Reservation " + reservationId + " does not exist"); FederationStateStoreUtils.logAndThrowStoreException(LOG,
"Reservation %s does not exist.", reservationId);
} }
reservations.remove(reservationId); reservations.remove(reservationId);
return DeleteReservationHomeSubClusterResponse.newInstance(); return DeleteReservationHomeSubClusterResponse.newInstance();
@ -446,9 +455,8 @@ public class MemoryFederationStateStore implements FederationStateStore {
Set<DelegationKey> rmDTMasterKeyState = routerRMSecretManagerState.getMasterKeyState(); Set<DelegationKey> rmDTMasterKeyState = routerRMSecretManagerState.getMasterKeyState();
if (rmDTMasterKeyState.contains(delegationKey)) { if (rmDTMasterKeyState.contains(delegationKey)) {
LOG.info("Error storing info for RMDTMasterKey with keyID: {}.", delegationKey.getKeyId()); FederationStateStoreUtils.logAndThrowStoreException(LOG,
throw new IOException("RMDTMasterKey with keyID: " + delegationKey.getKeyId() + "Error storing info for RMDTMasterKey with keyID: %s.", delegationKey.getKeyId());
" is already stored");
} }
routerRMSecretManagerState.getMasterKeyState().add(delegationKey); routerRMSecretManagerState.getMasterKeyState().add(delegationKey);

View File

@ -164,12 +164,9 @@ public abstract class FederationStateStoreBaseTest {
SubClusterDeregisterRequest deregisterRequest = SubClusterDeregisterRequest SubClusterDeregisterRequest deregisterRequest = SubClusterDeregisterRequest
.newInstance(subClusterId, SubClusterState.SC_UNREGISTERED); .newInstance(subClusterId, SubClusterState.SC_UNREGISTERED);
try {
stateStore.deregisterSubCluster(deregisterRequest); LambdaTestUtils.intercept(YarnException.class,
Assert.fail(); "SubCluster SC not found", () -> stateStore.deregisterSubCluster(deregisterRequest));
} catch (FederationStateStoreException e) {
Assert.assertTrue(e.getMessage().startsWith("SubCluster SC not found"));
}
} }
@Test @Test
@ -266,13 +263,9 @@ public abstract class FederationStateStoreBaseTest {
SubClusterHeartbeatRequest heartbeatRequest = SubClusterHeartbeatRequest SubClusterHeartbeatRequest heartbeatRequest = SubClusterHeartbeatRequest
.newInstance(subClusterId, SubClusterState.SC_RUNNING, "capability"); .newInstance(subClusterId, SubClusterState.SC_RUNNING, "capability");
try { LambdaTestUtils.intercept(YarnException.class,
stateStore.subClusterHeartbeat(heartbeatRequest); "SubCluster SC does not exist; cannot heartbeat",
Assert.fail(); () -> stateStore.subClusterHeartbeat(heartbeatRequest));
} catch (FederationStateStoreException e) {
Assert.assertTrue(e.getMessage()
.startsWith("SubCluster SC does not exist; cannot heartbeat"));
}
} }
// Test FederationApplicationHomeSubClusterStore // Test FederationApplicationHomeSubClusterStore