From eda4bb5dcde6ab125b6886979ac846269d0a8e1e Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Tue, 23 Aug 2022 05:09:20 +0800 Subject: [PATCH] YARN-11250. Capture the Performance Metrics of ZookeeperFederationStateStore. (#4738) --- .../impl/MemoryFederationStateStore.java | 3 +- .../store/impl/SQLFederationStateStore.java | 3 +- .../ZKFederationStateStoreOpDurations.java | 155 ++++++++++++++++++ .../impl/ZookeeperFederationStateStore.java | 70 ++++++-- .../GetApplicationHomeSubClusterResponse.java | 5 +- .../TestZookeeperFederationStateStore.java | 69 ++++++++ 6 files changed, 286 insertions(+), 19 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZKFederationStateStoreOpDurations.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java index 0d8371bade5..920b8e8912d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/MemoryFederationStateStore.java @@ -249,8 +249,7 @@ public class MemoryFederationStateStore implements FederationStateStore { FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg); } - return GetApplicationHomeSubClusterResponse.newInstance( - ApplicationHomeSubCluster.newInstance(appId, applications.get(appId))); + return GetApplicationHomeSubClusterResponse.newInstance(appId, applications.get(appId)); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/SQLFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/SQLFederationStateStore.java index 241224aa2f7..dffcfa6a10e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/SQLFederationStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/SQLFederationStateStore.java @@ -720,8 +720,7 @@ public class SQLFederationStateStore implements FederationStateStore { FederationStateStoreUtils.returnToPool(LOG, cstmt); } return GetApplicationHomeSubClusterResponse - .newInstance(ApplicationHomeSubCluster - .newInstance(request.getApplicationId(), homeRM)); + .newInstance(request.getApplicationId(), homeRM); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZKFederationStateStoreOpDurations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZKFederationStateStoreOpDurations.java new file mode 100644 index 00000000000..6ce5e2ef461 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZKFederationStateStoreOpDurations.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *
+ * http://www.apache.org/licenses/LICENSE-2.0 + *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.yarn.server.federation.store.impl;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.metrics2.MetricsCollector;
+import org.apache.hadoop.metrics2.MetricsInfo;
+import org.apache.hadoop.metrics2.MetricsSource;
+import org.apache.hadoop.metrics2.MetricsSystem;
+import org.apache.hadoop.metrics2.annotation.Metric;
+import org.apache.hadoop.metrics2.annotation.Metrics;
+import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
+import org.apache.hadoop.metrics2.lib.MetricsRegistry;
+import org.apache.hadoop.metrics2.lib.MutableRate;
+
+import static org.apache.hadoop.metrics2.lib.Interns.info;
+
+@InterfaceAudience.Private
+@InterfaceStability.Unstable
+@Metrics(context="ZKFederationStateStore-op-durations")
+public final class ZKFederationStateStoreOpDurations implements MetricsSource {
+
+ @Metric("Duration for a add application homeSubcluster call")
+ private MutableRate addAppHomeSubCluster;
+
+ @Metric("Duration for a update application homeSubcluster call")
+ private MutableRate updateAppHomeSubCluster;
+
+ @Metric("Duration for a get application homeSubcluster call")
+ private MutableRate getAppHomeSubCluster;
+
+ @Metric("Duration for a get applications homeSubcluster call")
+ private MutableRate getAppsHomeSubCluster;
+
+ @Metric("Duration for a delete applications homeSubcluster call")
+ private MutableRate deleteAppHomeSubCluster;
+
+ @Metric("Duration for a register subCluster call")
+ private MutableRate registerSubCluster;
+
+ @Metric("Duration for a deregister subCluster call")
+ private MutableRate deregisterSubCluster;
+
+ @Metric("Duration for a subCluster Heartbeat call")
+ private MutableRate subClusterHeartbeat;
+
+ @Metric("Duration for a get SubCluster call")
+ private MutableRate getSubCluster;
+
+ @Metric("Duration for a get SubClusters call")
+ private MutableRate getSubClusters;
+
+ @Metric("Duration for a get PolicyConfiguration call")
+ private MutableRate getPolicyConfiguration;
+
+ @Metric("Duration for a set PolicyConfiguration call")
+ private MutableRate setPolicyConfiguration;
+
+ @Metric("Duration for a get PolicyConfigurations call")
+ private MutableRate getPoliciesConfigurations;
+
+ protected static final MetricsInfo RECORD_INFO =
+ info("ZKFederationStateStoreOpDurations", "Durations of ZKFederationStateStore calls");
+
+ private final MetricsRegistry registry;
+
+ private static final ZKFederationStateStoreOpDurations INSTANCE =
+ new ZKFederationStateStoreOpDurations();
+
+ public static ZKFederationStateStoreOpDurations getInstance() {
+ return INSTANCE;
+ }
+
+ private ZKFederationStateStoreOpDurations() {
+ registry = new MetricsRegistry(RECORD_INFO);
+ registry.tag(RECORD_INFO, "ZKFederationStateStoreOpDurations");
+
+ MetricsSystem ms = DefaultMetricsSystem.instance();
+ if (ms != null) {
+ ms.register(RECORD_INFO.name(), RECORD_INFO.description(), this);
+ }
+ }
+
+ @Override
+ public synchronized void getMetrics(MetricsCollector collector, boolean all) {
+ registry.snapshot(collector.addRecord(registry.info()), all);
+ }
+
+ public void addAppHomeSubClusterDuration(long startTime, long endTime) {
+ addAppHomeSubCluster.add(endTime - startTime);
+ }
+
+ public void addUpdateAppHomeSubClusterDuration(long startTime, long endTime) {
+ updateAppHomeSubCluster.add(endTime - startTime);
+ }
+
+ public void addGetAppHomeSubClusterDuration(long startTime, long endTime) {
+ getAppHomeSubCluster.add(endTime - startTime);
+ }
+
+ public void addGetAppsHomeSubClusterDuration(long startTime, long endTime) {
+ getAppsHomeSubCluster.add(endTime - startTime);
+ }
+
+ public void addDeleteAppHomeSubClusterDuration(long startTime, long endTime) {
+ deleteAppHomeSubCluster.add(endTime - startTime);
+ }
+
+ public void addRegisterSubClusterDuration(long startTime, long endTime) {
+ registerSubCluster.add(endTime - startTime);
+ }
+
+ public void addDeregisterSubClusterDuration(long startTime, long endTime) {
+ deregisterSubCluster.add(endTime - startTime);
+ }
+
+ public void addSubClusterHeartbeatDuration(long startTime, long endTime) {
+ subClusterHeartbeat.add(endTime - startTime);
+ }
+
+ public void addGetSubClusterDuration(long startTime, long endTime) {
+ getSubCluster.add(endTime - startTime);
+ }
+
+ public void addGetSubClustersDuration(long startTime, long endTime) {
+ getSubClusters.add(endTime - startTime);
+ }
+
+ public void addGetPolicyConfigurationDuration(long startTime, long endTime) {
+ getPolicyConfiguration.add(endTime - startTime);
+ }
+
+ public void addSetPolicyConfigurationDuration(long startTime, long endTime) {
+ setPolicyConfiguration.add(endTime - startTime);
+ }
+
+ public void addGetPoliciesConfigurationsDuration(long startTime, long endTime) {
+ getPoliciesConfigurations.add(endTime - startTime);
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java
index 888d7aa3d37..5d9b948e5e0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java
@@ -25,6 +25,7 @@ import java.util.Calendar;
import java.util.List;
import java.util.TimeZone;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.curator.ZKCuratorManager;
@@ -84,6 +85,8 @@ import org.apache.hadoop.yarn.server.federation.store.utils.FederationMembership
import org.apache.hadoop.yarn.server.federation.store.utils.FederationPolicyStoreInputValidator;
import org.apache.hadoop.yarn.server.federation.store.utils.FederationStateStoreUtils;
import org.apache.hadoop.yarn.server.records.Version;
+import org.apache.hadoop.yarn.util.Clock;
+import org.apache.hadoop.yarn.util.SystemClock;
import org.apache.zookeeper.data.ACL;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -124,6 +127,12 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
private String membershipZNode;
private String policiesZNode;
+ private volatile Clock clock = SystemClock.getInstance();
+
+ @VisibleForTesting
+ private ZKFederationStateStoreOpDurations opDurations =
+ ZKFederationStateStoreOpDurations.getInstance();
+
@Override
public void init(Configuration conf) throws YarnException {
LOG.info("Initializing ZooKeeper connection");
@@ -153,7 +162,6 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
String errMsg = "Cannot create base directories: " + e.getMessage();
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
}
-
}
@Override
@@ -167,6 +175,7 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
public AddApplicationHomeSubClusterResponse addApplicationHomeSubCluster(
AddApplicationHomeSubClusterRequest request) throws YarnException {
+ long start = clock.getTime();
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationHomeSubCluster app = request.getApplicationHomeSubCluster();
ApplicationId appId = app.getApplicationId();
@@ -187,7 +196,8 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
String errMsg = "Cannot check app home subcluster for " + appId;
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
}
-
+ long end = clock.getTime();
+ opDurations.addAppHomeSubClusterDuration(start, end);
return AddApplicationHomeSubClusterResponse
.newInstance(homeSubCluster);
}
@@ -198,6 +208,7 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
UpdateApplicationHomeSubClusterRequest request)
throws YarnException {
+ long start = clock.getTime();
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationHomeSubCluster app = request.getApplicationHomeSubCluster();
ApplicationId appId = app.getApplicationId();
@@ -209,6 +220,9 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
SubClusterId newSubClusterId =
request.getApplicationHomeSubCluster().getHomeSubCluster();
putApp(appId, newSubClusterId, true);
+
+ long end = clock.getTime();
+ opDurations.addUpdateAppHomeSubClusterDuration(start, end);
return UpdateApplicationHomeSubClusterResponse.newInstance();
}
@@ -216,6 +230,7 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
public GetApplicationHomeSubClusterResponse getApplicationHomeSubCluster(
GetApplicationHomeSubClusterRequest request) throws YarnException {
+ long start = clock.getTime();
FederationApplicationHomeSubClusterStoreInputValidator.validate(request);
ApplicationId appId = request.getApplicationId();
SubClusterId homeSubCluster = getApp(appId);
@@ -223,13 +238,15 @@ public class ZookeeperFederationStateStore implements FederationStateStore {
String errMsg = "Application " + appId + " does not exist";
FederationStateStoreUtils.logAndThrowStoreException(LOG, errMsg);
}
- return GetApplicationHomeSubClusterResponse.newInstance(
- ApplicationHomeSubCluster.newInstance(appId, homeSubCluster));
+ long end = clock.getTime();
+ opDurations.addGetAppHomeSubClusterDuration(start, end);
+ return GetApplicationHomeSubClusterResponse.newInstance(appId, homeSubCluster);
}
@Override
public GetApplicationsHomeSubClusterResponse getApplicationsHomeSubCluster(
GetApplicationsHomeSubClusterRequest request) throws YarnException {
+ long start = clock.getTime();
List