YARN-11479. [Federation] ZookeeperFederationStateStore Support Store ApplicationSubmitData. (#5631)

This commit is contained in:
slfan1989 2023-05-11 00:37:47 +08:00 committed by GitHub
parent be50d221f5
commit 690db3c34b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 140 additions and 49 deletions

View File

@ -37,11 +37,13 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.token.delegation.DelegationKey; import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.util.curator.ZKCuratorManager; import org.apache.hadoop.util.curator.ZKCuratorManager;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterIdProto; import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterIdProto;
import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterInfoProto; import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterInfoProto;
import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterPolicyConfigurationProto; import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.SubClusterPolicyConfigurationProto;
import org.apache.hadoop.yarn.federation.proto.YarnServerFederationProtos.ApplicationHomeSubClusterProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto;
import org.apache.hadoop.yarn.security.client.YARNDelegationTokenIdentifier; import org.apache.hadoop.yarn.security.client.YARNDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; import org.apache.hadoop.yarn.server.federation.store.FederationStateStore;
@ -94,6 +96,7 @@ import org.apache.hadoop.yarn.server.federation.store.records.RouterRMTokenReque
import org.apache.hadoop.yarn.server.federation.store.records.impl.pb.SubClusterIdPBImpl; import org.apache.hadoop.yarn.server.federation.store.records.impl.pb.SubClusterIdPBImpl;
import org.apache.hadoop.yarn.server.federation.store.records.impl.pb.SubClusterInfoPBImpl; import org.apache.hadoop.yarn.server.federation.store.records.impl.pb.SubClusterInfoPBImpl;
import org.apache.hadoop.yarn.server.federation.store.records.impl.pb.SubClusterPolicyConfigurationPBImpl; import org.apache.hadoop.yarn.server.federation.store.records.impl.pb.SubClusterPolicyConfigurationPBImpl;
import org.apache.hadoop.yarn.server.federation.store.records.impl.pb.ApplicationHomeSubClusterPBImpl;
import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKey; import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKey;
import org.apache.hadoop.yarn.server.federation.store.records.RouterStoreToken; import org.apache.hadoop.yarn.server.federation.store.records.RouterStoreToken;
import org.apache.hadoop.yarn.server.federation.store.utils.FederationApplicationHomeSubClusterStoreInputValidator; import org.apache.hadoop.yarn.server.federation.store.utils.FederationApplicationHomeSubClusterStoreInputValidator;
@ -323,49 +326,76 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
long start = clock.getTime(); long start = clock.getTime();
FederationApplicationHomeSubClusterStoreInputValidator.validate(request); FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationHomeSubCluster app = request.getApplicationHomeSubCluster();
ApplicationId appId = app.getApplicationId(); // parse parameters
// We need to get applicationId, subClusterId, appSubmissionContext 3 parameters.
ApplicationHomeSubCluster requestApplicationHomeSubCluster =
request.getApplicationHomeSubCluster();
ApplicationId requestApplicationId = requestApplicationHomeSubCluster.getApplicationId();
SubClusterId requestSubClusterId = requestApplicationHomeSubCluster.getHomeSubCluster();
ApplicationSubmissionContext requestApplicationSubmissionContext =
requestApplicationHomeSubCluster.getApplicationSubmissionContext();
LOG.debug("applicationId = {}, subClusterId = {}, appSubmissionContext = {}.",
requestApplicationId, requestSubClusterId, requestApplicationSubmissionContext);
// Try to write the subcluster // Try to write the subcluster
SubClusterId homeSubCluster = app.getHomeSubCluster();
try { try {
putApp(appId, homeSubCluster, false); storeOrUpdateApplicationHomeSubCluster(requestApplicationId,
requestApplicationHomeSubCluster, false);
} catch (Exception e) { } catch (Exception e) {
String errMsg = "Cannot add application home subcluster for " + appId; String errMsg = "Cannot add application home subcluster for " + requestApplicationId;
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
} }
// Check for the actual subcluster // Check for the actual subcluster
try { try {
homeSubCluster = getApp(appId); // We try to get the ApplicationHomeSubCluster actually stored in ZK
// according to the applicationId.
ApplicationHomeSubCluster actualAppHomeSubCluster =
getApplicationHomeSubCluster(requestApplicationId);
SubClusterId responseSubClusterId = actualAppHomeSubCluster.getHomeSubCluster();
long end = clock.getTime();
opDurations.addAppHomeSubClusterDuration(start, end);
return AddApplicationHomeSubClusterResponse.newInstance(responseSubClusterId);
} catch (Exception e) { } catch (Exception e) {
String errMsg = "Cannot check app home subcluster for " + appId; String errMsg = "Cannot check app home subcluster for " + requestApplicationId;
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
} }
long end = clock.getTime();
opDurations.addAppHomeSubClusterDuration(start, end); // Throw YarnException.
return AddApplicationHomeSubClusterResponse throw new YarnException("Cannot addApplicationHomeSubCluster by request");
.newInstance(homeSubCluster);
} }
@Override @Override
public UpdateApplicationHomeSubClusterResponse public UpdateApplicationHomeSubClusterResponse updateApplicationHomeSubCluster(
updateApplicationHomeSubCluster( UpdateApplicationHomeSubClusterRequest request) throws YarnException {
UpdateApplicationHomeSubClusterRequest request)
throws YarnException {
long start = clock.getTime(); long start = clock.getTime();
FederationApplicationHomeSubClusterStoreInputValidator.validate(request); FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationHomeSubCluster app = request.getApplicationHomeSubCluster(); ApplicationHomeSubCluster requestApplicationHomeSubCluster =
ApplicationId appId = app.getApplicationId(); request.getApplicationHomeSubCluster();
SubClusterId homeSubCluster = getApp(appId); ApplicationId requestApplicationId = requestApplicationHomeSubCluster.getApplicationId();
if (homeSubCluster == null) { ApplicationHomeSubCluster zkStoreApplicationHomeSubCluster =
String errMsg = "Application " + appId + " does not exist"; getApplicationHomeSubCluster(requestApplicationId);
if (zkStoreApplicationHomeSubCluster == null) {
String errMsg = "Application " + requestApplicationId + " does not exist";
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
} }
SubClusterId newSubClusterId =
request.getApplicationHomeSubCluster().getHomeSubCluster(); SubClusterId oldSubClusterId = zkStoreApplicationHomeSubCluster.getHomeSubCluster();
putApp(appId, newSubClusterId, true); SubClusterId newSubClusterId = requestApplicationHomeSubCluster.getHomeSubCluster();
ApplicationSubmissionContext requestApplicationSubmissionContext =
requestApplicationHomeSubCluster.getApplicationSubmissionContext();
LOG.debug("applicationId = {}, oldHomeSubClusterId = {}, newHomeSubClusterId = {}, " +
"appSubmissionContext = {}.", requestApplicationId, oldSubClusterId, newSubClusterId,
requestApplicationSubmissionContext);
// update stored ApplicationHomeSubCluster
storeOrUpdateApplicationHomeSubCluster(requestApplicationId,
requestApplicationHomeSubCluster, true);
long end = clock.getTime(); long end = clock.getTime();
opDurations.addUpdateAppHomeSubClusterDuration(start, end); opDurations.addUpdateAppHomeSubClusterDuration(start, end);
@ -378,15 +408,33 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
long start = clock.getTime(); long start = clock.getTime();
FederationApplicationHomeSubClusterStoreInputValidator.validate(request); FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId = request.getApplicationId(); ApplicationId requestApplicationId = request.getApplicationId();
SubClusterId homeSubCluster = getApp(appId);
if (homeSubCluster == null) { ApplicationHomeSubCluster zkStoreApplicationHomeSubCluster =
String errMsg = "Application " + appId + " does not exist"; getApplicationHomeSubCluster(requestApplicationId);
if (zkStoreApplicationHomeSubCluster == null) {
String errMsg = "Application " + requestApplicationId + " does not exist";
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
} }
// Prepare to return data
SubClusterId subClusterId = zkStoreApplicationHomeSubCluster.getHomeSubCluster();
long createTime = zkStoreApplicationHomeSubCluster.getCreateTime();
long end = clock.getTime(); long end = clock.getTime();
opDurations.addGetAppHomeSubClusterDuration(start, end); opDurations.addGetAppHomeSubClusterDuration(start, end);
return GetApplicationHomeSubClusterResponse.newInstance(appId, homeSubCluster);
// If the request asks for an ApplicationSubmissionContext to be returned,
// we will return
if (request.getContainsAppSubmissionContext()) {
ApplicationSubmissionContext submissionContext =
zkStoreApplicationHomeSubCluster.getApplicationSubmissionContext();
return GetApplicationHomeSubClusterResponse.newInstance(
requestApplicationId, subClusterId, createTime, submissionContext);
}
return GetApplicationHomeSubClusterResponse.newInstance(requestApplicationId,
subClusterId, createTime);
} }
@Override @Override
@ -421,13 +469,18 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
private ApplicationHomeSubCluster generateAppHomeSC(String appId) { private ApplicationHomeSubCluster generateAppHomeSC(String appId) {
try { try {
// Parse ApplicationHomeSubCluster
ApplicationId applicationId = ApplicationId.fromString(appId); ApplicationId applicationId = ApplicationId.fromString(appId);
SubClusterId homeSubCluster = getApp(applicationId); ApplicationHomeSubCluster zkStoreApplicationHomeSubCluster =
ApplicationHomeSubCluster app = getApplicationHomeSubCluster(applicationId);
ApplicationHomeSubCluster.newInstance(applicationId, homeSubCluster);
return app; // Prepare to return data
SubClusterId subClusterId = zkStoreApplicationHomeSubCluster.getHomeSubCluster();
ApplicationHomeSubCluster resultApplicationHomeSubCluster =
ApplicationHomeSubCluster.newInstance(applicationId, subClusterId);
return resultApplicationHomeSubCluster;
} catch (Exception ex) { } catch (Exception ex) {
LOG.error("get homeSubCluster by appId = {}.", appId); LOG.error("get homeSubCluster by appId = {}.", appId, ex);
} }
return null; return null;
} }
@ -674,39 +727,43 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
/** /**
* Get the subcluster for an application. * Get the subcluster for an application.
*
* @param appId Application identifier. * @param appId Application identifier.
* @return Subcluster identifier. * @return ApplicationHomeSubCluster identifier.
* @throws Exception If it cannot contact ZooKeeper. * @throws Exception If it cannot contact ZooKeeper.
*/ */
private SubClusterId getApp(final ApplicationId appId) throws YarnException { private ApplicationHomeSubCluster getApplicationHomeSubCluster(
final ApplicationId appId) throws YarnException {
String appZNode = getNodePath(appsZNode, appId.toString()); String appZNode = getNodePath(appsZNode, appId.toString());
SubClusterId subClusterId = null; ApplicationHomeSubCluster appHomeSubCluster = null;
byte[] data = get(appZNode); byte[] data = get(appZNode);
if (data != null) { if (data != null) {
try { try {
subClusterId = new SubClusterIdPBImpl( appHomeSubCluster = new ApplicationHomeSubClusterPBImpl(
SubClusterIdProto.parseFrom(data)); ApplicationHomeSubClusterProto.parseFrom(data));
} catch (InvalidProtocolBufferException e) { } catch (InvalidProtocolBufferException e) {
String errMsg = "Cannot parse application at " + appZNode; String errMsg = "Cannot parse application at " + appZNode;
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
} }
} }
return subClusterId; return appHomeSubCluster;
} }
/** /**
* Put an application. * We will store the data of ApplicationHomeSubCluster according to appId.
* @param appId Application identifier. *
* @param subClusterId Subcluster identifier. * @param applicationId ApplicationId.
* @param applicationHomeSubCluster ApplicationHomeSubCluster.
* @param update false, add records; true, update records.
* @throws Exception If it cannot contact ZooKeeper. * @throws Exception If it cannot contact ZooKeeper.
*/ */
private void putApp(final ApplicationId appId, private void storeOrUpdateApplicationHomeSubCluster(final ApplicationId applicationId,
final SubClusterId subClusterId, boolean update) final ApplicationHomeSubCluster applicationHomeSubCluster, boolean update)
throws YarnException { throws YarnException {
String appZNode = getNodePath(appsZNode, appId.toString()); String appZNode = getNodePath(appsZNode, applicationId.toString());
SubClusterIdProto proto = ApplicationHomeSubClusterProto proto =
((SubClusterIdPBImpl)subClusterId).getProto(); ((ApplicationHomeSubClusterPBImpl) applicationHomeSubCluster).getProto();
byte[] data = proto.toByteArray(); byte[] data = proto.toByteArray();
put(appZNode, data, update); put(appZNode, data, update);
} }

View File

@ -406,7 +406,7 @@ public abstract class FederationStateStoreBaseTest {
ApplicationId appId1 = ApplicationId.newInstance(1, 1); ApplicationId appId1 = ApplicationId.newInstance(1, 1);
SubClusterId subClusterId1 = SubClusterId.newInstance("SC1"); SubClusterId subClusterId1 = SubClusterId.newInstance("SC1");
ApplicationHomeSubCluster ahsc1 = ApplicationHomeSubCluster ahsc1 =
ApplicationHomeSubCluster.newInstance(appId1, subClusterId1); ApplicationHomeSubCluster.newInstance(appId1, subClusterId1);
ApplicationId appId2 = ApplicationId.newInstance(1, 2); ApplicationId appId2 = ApplicationId.newInstance(1, 2);
SubClusterId subClusterId2 = SubClusterId.newInstance("SC2"); SubClusterId subClusterId2 = SubClusterId.newInstance("SC2");
@ -470,6 +470,7 @@ public abstract class FederationStateStoreBaseTest {
Assert.assertEquals(10, items.size()); Assert.assertEquals(10, items.size());
for (ApplicationHomeSubCluster item : items) { for (ApplicationHomeSubCluster item : items) {
appHomeSubClusters.contains(item);
Assert.assertTrue(appHomeSubClusters.contains(item)); Assert.assertTrue(appHomeSubClusters.contains(item));
} }
} }

View File

@ -33,6 +33,10 @@ import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
import org.apache.hadoop.metrics2.impl.MetricsRecords; import org.apache.hadoop.metrics2.impl.MetricsRecords;
import org.apache.hadoop.security.token.delegation.DelegationKey; import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
@ -41,6 +45,10 @@ import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKey;
import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKeyRequest; import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKeyRequest;
import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKeyResponse; import org.apache.hadoop.yarn.server.federation.store.records.RouterMasterKeyResponse;
import org.apache.hadoop.yarn.server.federation.store.records.RouterStoreToken; import org.apache.hadoop.yarn.server.federation.store.records.RouterStoreToken;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSubCluster;
import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationHomeSubClusterRequest;
import org.apache.hadoop.yarn.server.federation.store.records.GetApplicationHomeSubClusterResponse;
import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.Records;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -276,4 +284,29 @@ public class TestZookeeperFederationStateStore extends FederationStateStoreBaseT
assertNotNull(zkRouterStoreToken); assertNotNull(zkRouterStoreToken);
assertEquals(token, zkRouterStoreToken); assertEquals(token, zkRouterStoreToken);
} }
@Test
public void testGetApplicationHomeSubClusterWithContext() throws Exception {
ZookeeperFederationStateStore zkFederationStateStore =
(ZookeeperFederationStateStore) this.getStateStore();
ApplicationId appId = ApplicationId.newInstance(1, 3);
SubClusterId subClusterId = SubClusterId.newInstance("SC");
ApplicationSubmissionContext context =
ApplicationSubmissionContext.newInstance(appId, "test", "default",
Priority.newInstance(0), null, true, true,
2, Resource.newInstance(10, 2), "test");
addApplicationHomeSC(appId, subClusterId, context);
GetApplicationHomeSubClusterRequest getRequest =
GetApplicationHomeSubClusterRequest.newInstance(appId, true);
GetApplicationHomeSubClusterResponse result =
zkFederationStateStore.getApplicationHomeSubCluster(getRequest);
ApplicationHomeSubCluster applicationHomeSubCluster = result.getApplicationHomeSubCluster();
assertEquals(appId, applicationHomeSubCluster.getApplicationId());
assertEquals(subClusterId, applicationHomeSubCluster.getHomeSubCluster());
assertEquals(context, applicationHomeSubCluster.getApplicationSubmissionContext());
}
} }