HDFS-14403. Cost-based extension to the RPC Fair Call Queue. Contributed by Christopher Gregorian.

2019-05-24 17:09:52 -07:00 · 2019-05-24 17:09:52 -07:00 · 129576f628
parent d023f1f864
commit 129576f628
10 changed files with 639 additions and 192 deletions
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
@ -106,6 +106,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
  public static final String IPC_CALLQUEUE_IMPL_KEY = "callqueue.impl";
  public static final String IPC_SCHEDULER_IMPL_KEY = "scheduler.impl";
  public static final String IPC_IDENTITY_PROVIDER_KEY = "identity-provider.impl";
  public static final String IPC_COST_PROVIDER_KEY = "cost-provider.impl";
  public static final String IPC_BACKOFF_ENABLE = "backoff.enable";
  public static final boolean IPC_BACKOFF_ENABLE_DEFAULT = false;
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CallQueueManager.java
@ -198,7 +198,6 @@ public class CallQueueManager<E extends Schedulable>
  }
  // This should be only called once per call and cached in the call object
  // each getPriorityLevel call will increment the counter for the caller
  int getPriorityLevel(Schedulable e) {
    return scheduler.getPriorityLevel(e);
  }
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CostProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/CostProvider.java
@ -0,0 +1,46 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.ipc;
 import org.apache.hadoop.conf.Configuration;
 /**
 * Used by {@link DecayRpcScheduler} to get the cost of users' operations. This
 * is configurable using
 * {@link org.apache.hadoop.fs.CommonConfigurationKeys#IPC_COST_PROVIDER_KEY}.
 */
 public interface CostProvider {
  /**
   * Initialize this provider using the given configuration, examining only
   * ones which fall within the provided namespace.
   *
   * @param namespace The namespace to use when looking up configurations.
   * @param conf The configuration
   */
  void init(String namespace, Configuration conf);
  /**
   * Get cost from {@link ProcessingDetails} which will be used in scheduler.
   *
   * @param details Process details
   * @return The cost of the call
   */
  long getCost(ProcessingDetails details);
 }
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java
@ -58,8 +58,8 @@ import org.slf4j.LoggerFactory;
 import static org.apache.hadoop.ipc.ProcessingDetails.Timing;
 /**
- * The decay RPC scheduler counts incoming requests in a map, then
+ * The decay RPC scheduler tracks the cost of incoming requests in a map, then
- * decays the counts at a fixed time interval. The scheduler is optimized
+ * decays the costs at a fixed time interval. The scheduler is optimized
 * for large periods (on the order of seconds), as it offloads work to the
 * decay sweep.
 */
@ -77,7 +77,7 @@ public class DecayRpcScheduler implements RpcScheduler,
    "faircallqueue.decay-scheduler.period-ms";
  /**
-   * Decay factor controls how much each count is suppressed by on each sweep.
+   * Decay factor controls how much each cost is suppressed by on each sweep.
   * Valid numbers are &gt; 0 and &lt; 1. Decay factor works in tandem with
   * period
   * to control how long the scheduler remembers an identity.
@ -135,15 +135,15 @@ public class DecayRpcScheduler implements RpcScheduler,
  private static final ObjectWriter WRITER = new ObjectMapper().writer();
  // Track the decayed and raw (no decay) number of calls for each schedulable
-  // identity from all previous decay windows: idx 0 for decayed call count and
+  // identity from all previous decay windows: idx 0 for decayed call cost and
-  // idx 1 for the raw call count
+  // idx 1 for the raw call cost
-  private final ConcurrentHashMap<Object, List<AtomicLong>> callCounts =
+  private final ConcurrentHashMap<Object, List<AtomicLong>> callCosts =
      new ConcurrentHashMap<Object, List<AtomicLong>>();
-  // Should be the sum of all AtomicLongs in decayed callCounts
+  // Should be the sum of all AtomicLongs in decayed callCosts
-  private final AtomicLong totalDecayedCallCount = new AtomicLong();
+  private final AtomicLong totalDecayedCallCost = new AtomicLong();
-  // The sum of all AtomicLongs in raw callCounts
+  // The sum of all AtomicLongs in raw callCosts
-  private final AtomicLong totalRawCallCount = new AtomicLong();
+  private final AtomicLong totalRawCallCost = new AtomicLong();
  // Track total call count and response time in current decay window
@ -161,7 +161,7 @@ public class DecayRpcScheduler implements RpcScheduler,
  // Tune the behavior of the scheduler
  private final long decayPeriodMillis; // How long between each tick
-  private final double decayFactor; // nextCount = currentCount * decayFactor
+  private final double decayFactor; // nextCost = currentCost * decayFactor
  private final int numLevels;
  private final double[] thresholds;
  private final IdentityProvider identityProvider;
@ -171,9 +171,10 @@ public class DecayRpcScheduler implements RpcScheduler,
  private final int topUsersCount; // e.g., report top 10 users' metrics
  private static final double PRECISION = 0.0001;
  private MetricsProxy metricsProxy;
  private final CostProvider costProvider;
  /**
-   * This TimerTask will call decayCurrentCounts until
+   * This TimerTask will call decayCurrentCosts until
   * the scheduler has been garbage collected.
   */
  public static class DecayTask extends TimerTask {
@ -189,7 +190,7 @@ public class DecayRpcScheduler implements RpcScheduler,
    public void run() {
      DecayRpcScheduler sched = schedulerRef.get();
      if (sched != null) {
-        sched.decayCurrentCounts();
+        sched.decayCurrentCosts();
      } else {
        // Our scheduler was garbage collected since it is no longer in use,
        // so we should terminate the timer as well
@ -216,6 +217,7 @@ public class DecayRpcScheduler implements RpcScheduler,
    this.decayFactor = parseDecayFactor(ns, conf);
    this.decayPeriodMillis = parseDecayPeriodMillis(ns, conf);
    this.identityProvider = this.parseIdentityProvider(ns, conf);
    this.costProvider = this.parseCostProvider(ns, conf);
    this.thresholds = parseThresholds(ns, conf, numLevels);
    this.backOffByResponseTimeEnabled = parseBackOffByResponseTimeEnabled(ns,
        conf);
@ -243,6 +245,24 @@ public class DecayRpcScheduler implements RpcScheduler,
    recomputeScheduleCache();
  }
  private CostProvider parseCostProvider(String ns, Configuration conf) {
    List<CostProvider> providers = conf.getInstances(
        ns + "." + CommonConfigurationKeys.IPC_COST_PROVIDER_KEY,
        CostProvider.class);
    if (providers.size() < 1) {
      LOG.info("CostProvider not specified, defaulting to DefaultCostProvider");
      return new DefaultCostProvider();
    } else if (providers.size() > 1) {
      LOG.warn("Found multiple CostProviders; using: {}",
          providers.get(0).getClass());
    }
    CostProvider provider = providers.get(0); // use the first
    provider.init(ns, conf);
    return provider;
  }
  // Load configs
  private IdentityProvider parseIdentityProvider(String ns,
      Configuration conf) {
@ -389,69 +409,69 @@ public class DecayRpcScheduler implements RpcScheduler,
  }
  /**
-   * Decay the stored counts for each user and clean as necessary.
+   * Decay the stored costs for each user and clean as necessary.
   * This method should be called periodically in order to keep
-   * counts current.
+   * costs current.
   */
-  private void decayCurrentCounts() {
+  private void decayCurrentCosts() {
-    LOG.debug("Start to decay current counts.");
+    LOG.debug("Start to decay current costs.");
    try {
-      long totalDecayedCount = 0;
+      long totalDecayedCost = 0;
-      long totalRawCount = 0;
+      long totalRawCost = 0;
      Iterator<Map.Entry<Object, List<AtomicLong>>> it =
-          callCounts.entrySet().iterator();
+          callCosts.entrySet().iterator();
      while (it.hasNext()) {
        Map.Entry<Object, List<AtomicLong>> entry = it.next();
-        AtomicLong decayedCount = entry.getValue().get(0);
+        AtomicLong decayedCost = entry.getValue().get(0);
-        AtomicLong rawCount = entry.getValue().get(1);
+        AtomicLong rawCost = entry.getValue().get(1);
        // Compute the next value by reducing it by the decayFactor
-        totalRawCount += rawCount.get();
+        totalRawCost += rawCost.get();
-        long currentValue = decayedCount.get();
+        long currentValue = decayedCost.get();
        long nextValue = (long) (currentValue * decayFactor);
-        totalDecayedCount += nextValue;
+        totalDecayedCost += nextValue;
-        decayedCount.set(nextValue);
+        decayedCost.set(nextValue);
-        LOG.debug("Decaying counts for the user: {}, " +
+        LOG.debug(
-            "its decayedCount: {}, rawCount: {}", entry.getKey(),
+            "Decaying costs for the user: {}, its decayedCost: {}, rawCost: {}",
-            nextValue, rawCount.get());
+            entry.getKey(), nextValue, rawCost.get());
        if (nextValue == 0) {
-          LOG.debug("The decayed count for the user {} is zero " +
+          LOG.debug("The decayed cost for the user {} is zero " +
              "and being cleaned.", entry.getKey());
          // We will clean up unused keys here. An interesting optimization
-          // might be to have an upper bound on keyspace in callCounts and only
+          // might be to have an upper bound on keyspace in callCosts and only
          // clean once we pass it.
          it.remove();
        }
      }
      // Update the total so that we remain in sync
-      totalDecayedCallCount.set(totalDecayedCount);
+      totalDecayedCallCost.set(totalDecayedCost);
-      totalRawCallCount.set(totalRawCount);
+      totalRawCallCost.set(totalRawCost);
-      LOG.debug("After decaying the stored counts, totalDecayedCount: {}, " +
+      LOG.debug("After decaying the stored costs, totalDecayedCost: {}, " +
-          "totalRawCallCount: {}.", totalDecayedCount, totalRawCount);
+          "totalRawCallCost: {}.", totalDecayedCost, totalRawCost);
      // Now refresh the cache of scheduling decisions
      recomputeScheduleCache();
      // Update average response time with decay
      updateAverageResponseTime(true);
    } catch (Exception ex) {
-      LOG.error("decayCurrentCounts exception: " +
+      LOG.error("decayCurrentCosts exception: " +
          ExceptionUtils.getStackTrace(ex));
      throw ex;
    }
  }
  /**
-   * Update the scheduleCache to match current conditions in callCounts.
+   * Update the scheduleCache to match current conditions in callCosts.
   */
  private void recomputeScheduleCache() {
    Map<Object, Integer> nextCache = new HashMap<Object, Integer>();
-    for (Map.Entry<Object, List<AtomicLong>> entry : callCounts.entrySet()) {
+    for (Map.Entry<Object, List<AtomicLong>> entry : callCosts.entrySet()) {
      Object id = entry.getKey();
      AtomicLong value = entry.getValue().get(0);
@ -466,51 +486,52 @@ public class DecayRpcScheduler implements RpcScheduler,
  }
  /**
-   * Get the number of occurrences and increment atomically.
+   * Adjust the stored cost for a given identity.
-   * @param identity the identity of the user to increment
+   *
-   * @return the value before incrementation
+   * @param identity the identity of the user whose cost should be adjusted
   * @param costDelta the cost to add for the given identity
   */
-  private long getAndIncrementCallCounts(Object identity)
+  private void addCost(Object identity, long costDelta) {
-      throws InterruptedException {
+    // We will increment the cost, or create it if no such cost exists
-    // We will increment the count, or create it if no such count exists
+    List<AtomicLong> cost = this.callCosts.get(identity);
-    List<AtomicLong> count = this.callCounts.get(identity);
+    if (cost == null) {
-    if (count == null) {
+      // Create the costs since no such cost exists.
-      // Create the counts since no such count exists.
+      // idx 0 for decayed call cost
-      // idx 0 for decayed call count
+      // idx 1 for the raw call cost
-      // idx 1 for the raw call count
+      cost = new ArrayList<AtomicLong>(2);
-      count = new ArrayList<AtomicLong>(2);
+      cost.add(new AtomicLong(0));
-      count.add(new AtomicLong(0));
+      cost.add(new AtomicLong(0));
      count.add(new AtomicLong(0));
      // Put it in, or get the AtomicInteger that was put in by another thread
-      List<AtomicLong> otherCount = callCounts.putIfAbsent(identity, count);
+      List<AtomicLong> otherCost = callCosts.putIfAbsent(identity, cost);
-      if (otherCount != null) {
+      if (otherCost != null) {
-        count = otherCount;
+        cost = otherCost;
      }
    }
    // Update the total
-    totalDecayedCallCount.getAndIncrement();
+    totalDecayedCallCost.getAndAdd(costDelta);
-    totalRawCallCount.getAndIncrement();
+    totalRawCallCost.getAndAdd(costDelta);
    // At this point value is guaranteed to be not null. It may however have
-    // been clobbered from callCounts. Nonetheless, we return what
+    // been clobbered from callCosts. Nonetheless, we return what
    // we have.
-    count.get(1).getAndIncrement();
+    cost.get(1).getAndAdd(costDelta);
-    return count.get(0).getAndIncrement();
+    cost.get(0).getAndAdd(costDelta);
  }
  /**
-   * Given the number of occurrences, compute a scheduling decision.
+   * Given the cost for an identity, compute a scheduling decision.
-   * @param occurrences how many occurrences
+   *
   * @param cost the cost for an identity
   * @return scheduling decision from 0 to numLevels - 1
   */
-  private int computePriorityLevel(long occurrences) {
+  private int computePriorityLevel(long cost) {
-    long totalCallSnapshot = totalDecayedCallCount.get();
+    long totalCallSnapshot = totalDecayedCallCost.get();
    double proportion = 0;
    if (totalCallSnapshot > 0) {
-      proportion = (double) occurrences / totalCallSnapshot;
+      proportion = (double) cost / totalCallSnapshot;
    }
    // Start with low priority levels, since they will be most common
@ -531,31 +552,23 @@ public class DecayRpcScheduler implements RpcScheduler,
   * @return integer scheduling decision from 0 to numLevels - 1
   */
  private int cachedOrComputedPriorityLevel(Object identity) {
-    try {
+    // Try the cache
-      long occurrences = this.getAndIncrementCallCounts(identity);
+    Map<Object, Integer> scheduleCache = scheduleCacheRef.get();
-
+    if (scheduleCache != null) {
-      // Try the cache
+      Integer priority = scheduleCache.get(identity);
-      Map<Object, Integer> scheduleCache = scheduleCacheRef.get();
+      if (priority != null) {
-      if (scheduleCache != null) {
+        LOG.debug("Cache priority for: {} with priority: {}", identity,
-        Integer priority = scheduleCache.get(identity);
+            priority);
-        if (priority != null) {
+        return priority;
          LOG.debug("Cache priority for: {} with priority: {}", identity,
              priority);
          return priority;
        }
      }
      // Cache was no good, compute it
      int priority = computePriorityLevel(occurrences);
      LOG.debug("compute priority for " + identity + " priority " + priority);
      return priority;
    } catch (InterruptedException ie) {
      LOG.warn("Caught InterruptedException, returning low priority level");
      LOG.debug("Fallback priority for: {} with priority: {}", identity,
          numLevels - 1);
      return numLevels - 1;
    }
    // Cache was no good, compute it
    List<AtomicLong> costList = callCosts.get(identity);
    long currentCost = costList == null ? 0 : costList.get(0).get();
    int priority = computePriorityLevel(currentCost);
    LOG.debug("compute priority for {} priority {}", identity, priority);
    return priority;
  }
  /**
@ -605,6 +618,10 @@ public class DecayRpcScheduler implements RpcScheduler,
  @Override
  public void addResponseTime(String callName, Schedulable schedulable,
      ProcessingDetails details) {
    String user = identityProvider.makeIdentity(schedulable);
    long processingCost = costProvider.getCost(details);
    addCost(user, processingCost);
    int priorityLevel = schedulable.getPriorityLevel();
    long queueTime = details.get(Timing.QUEUE, TimeUnit.MILLISECONDS);
    long processingTime = details.get(Timing.PROCESSING, TimeUnit.MILLISECONDS);
@ -652,22 +669,30 @@ public class DecayRpcScheduler implements RpcScheduler,
  // For testing
  @VisibleForTesting
-  public double getDecayFactor() { return decayFactor; }
+  double getDecayFactor() {
    return decayFactor;
  }
  @VisibleForTesting
-  public long getDecayPeriodMillis() { return decayPeriodMillis; }
+  long getDecayPeriodMillis() {
    return decayPeriodMillis;
  }
  @VisibleForTesting
-  public double[] getThresholds() { return thresholds; }
+  double[] getThresholds() {
    return thresholds;
  }
  @VisibleForTesting
-  public void forceDecay() { decayCurrentCounts(); }
+  void forceDecay() {
    decayCurrentCosts();
  }
  @VisibleForTesting
-  public Map<Object, Long> getCallCountSnapshot() {
+  Map<Object, Long> getCallCostSnapshot() {
    HashMap<Object, Long> snapshot = new HashMap<Object, Long>();
-    for (Map.Entry<Object, List<AtomicLong>> entry : callCounts.entrySet()) {
+    for (Map.Entry<Object, List<AtomicLong>> entry : callCosts.entrySet()) {
      snapshot.put(entry.getKey(), entry.getValue().get(0).get());
    }
@ -675,8 +700,8 @@ public class DecayRpcScheduler implements RpcScheduler,
  }
  @VisibleForTesting
-  public long getTotalCallSnapshot() {
+  long getTotalCallSnapshot() {
-    return totalDecayedCallCount.get();
+    return totalDecayedCallCost.get();
  }
  /**
@ -809,15 +834,15 @@ public class DecayRpcScheduler implements RpcScheduler,
  }
  public int getUniqueIdentityCount() {
-    return callCounts.size();
+    return callCosts.size();
  }
  public long getTotalCallVolume() {
-    return totalDecayedCallCount.get();
+    return totalDecayedCallCost.get();
  }
  public long getTotalRawCallVolume() {
-    return totalRawCallCount.get();
+    return totalRawCallCost.get();
  }
  public long[] getResponseTimeCountInLastWindow() {
@ -910,17 +935,17 @@ public class DecayRpcScheduler implements RpcScheduler,
    }
  }
-  // Get the top N callers' raw call count and scheduler decision
+  // Get the top N callers' raw call cost and scheduler decision
  private TopN getTopCallers(int n) {
    TopN topNCallers = new TopN(n);
    Iterator<Map.Entry<Object, List<AtomicLong>>> it =
-        callCounts.entrySet().iterator();
+        callCosts.entrySet().iterator();
    while (it.hasNext()) {
      Map.Entry<Object, List<AtomicLong>> entry = it.next();
      String caller = entry.getKey().toString();
-      Long count = entry.getValue().get(1).get();
+      Long cost = entry.getValue().get(1).get();
-      if (count > 0) {
+      if (cost > 0) {
-        topNCallers.offer(new NameValuePair(caller, count));
+        topNCallers.offer(new NameValuePair(caller, cost));
      }
    }
    return topNCallers;
@ -941,25 +966,25 @@ public class DecayRpcScheduler implements RpcScheduler,
  public String getCallVolumeSummary() {
    try {
-      return WRITER.writeValueAsString(getDecayedCallCounts());
+      return WRITER.writeValueAsString(getDecayedCallCosts());
    } catch (Exception e) {
      return "Error: " + e.getMessage();
    }
  }
-  private Map<Object, Long> getDecayedCallCounts() {
+  private Map<Object, Long> getDecayedCallCosts() {
-    Map<Object, Long> decayedCallCounts = new HashMap<>(callCounts.size());
+    Map<Object, Long> decayedCallCosts = new HashMap<>(callCosts.size());
    Iterator<Map.Entry<Object, List<AtomicLong>>> it =
-        callCounts.entrySet().iterator();
+        callCosts.entrySet().iterator();
    while (it.hasNext()) {
      Map.Entry<Object, List<AtomicLong>> entry = it.next();
      Object user = entry.getKey();
-      Long decayedCount = entry.getValue().get(0).get();
+      Long decayedCost = entry.getValue().get(0).get();
-      if (decayedCount > 0) {
+      if (decayedCost > 0) {
-        decayedCallCounts.put(user, decayedCount);
+        decayedCallCosts.put(user, decayedCost);
      }
    }
-    return decayedCallCounts;
+    return decayedCallCosts;
  }
  @Override
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DefaultCostProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DefaultCostProvider.java
@ -0,0 +1,43 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.ipc;
 import org.apache.hadoop.conf.Configuration;
 /**
 * Ignores process details and returns a constant value for each call.
 */
 public class DefaultCostProvider implements CostProvider {
  @Override
  public void init(String namespace, Configuration conf) {
    // No-op
  }
  /**
   * Returns 1, regardless of the processing details.
   *
   * @param details Process details (ignored)
   * @return 1
   */
  @Override
  public long getCost(ProcessingDetails details) {
    return 1;
  }
 }
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedTimeCostProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WeightedTimeCostProvider.java
@ -0,0 +1,110 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.ipc;
 import java.util.Locale;
 import org.apache.hadoop.conf.Configuration;
 import static org.apache.hadoop.ipc.ProcessingDetails.Timing;
 /**
 * A {@link CostProvider} that calculates the cost for an operation
 * as a weighted sum of its processing time values (see
 * {@link ProcessingDetails}). This can be used by specifying the
 * {@link org.apache.hadoop.fs.CommonConfigurationKeys#IPC_COST_PROVIDER_KEY}
 * configuration key.
 *
 * <p/>This allows for configuration of how heavily each of the operations
 * within {@link ProcessingDetails} is weighted. By default,
 * {@link ProcessingDetails.Timing#LOCKFREE},
 * {@link ProcessingDetails.Timing#RESPONSE}, and
 * {@link ProcessingDetails.Timing#HANDLER} times have a weight of
 * {@value #DEFAULT_LOCKFREE_WEIGHT},
 * {@link ProcessingDetails.Timing#LOCKSHARED} has a weight of
 * {@value #DEFAULT_LOCKSHARED_WEIGHT},
 * {@link ProcessingDetails.Timing#LOCKEXCLUSIVE} has a weight of
 * {@value #DEFAULT_LOCKEXCLUSIVE_WEIGHT}, and others are ignored.
 * These values can all be configured using the {@link #WEIGHT_CONFIG_PREFIX}
 * key, prefixed with the IPC namespace, and suffixed with the name of the
 * timing measurement from {@link ProcessingDetails} (all lowercase).
 * For example, to set the lock exclusive weight to be 1000, set:
 * <pre>
 *   ipc.8020.cost-provider.impl=org.apache.hadoop.ipc.WeightedTimeCostProvider
 *   ipc.8020.weighted-cost.lockexclusive=1000
 * </pre>
 */
 public class WeightedTimeCostProvider implements CostProvider {
  /**
   * The prefix used in configuration values specifying the weight to use when
   * determining the cost of an operation. See the class Javadoc for more info.
   */
  public static final String WEIGHT_CONFIG_PREFIX = ".weighted-cost.";
  static final int DEFAULT_LOCKFREE_WEIGHT = 1;
  static final int DEFAULT_LOCKSHARED_WEIGHT = 10;
  static final int DEFAULT_LOCKEXCLUSIVE_WEIGHT = 100;
  private long[] weights;
  @Override
  public void init(String namespace, Configuration conf) {
    weights = new long[Timing.values().length];
    for (Timing timing : ProcessingDetails.Timing.values()) {
      final int defaultValue;
      switch (timing) {
      case LOCKFREE:
      case RESPONSE:
      case HANDLER:
        defaultValue = DEFAULT_LOCKFREE_WEIGHT;
        break;
      case LOCKSHARED:
        defaultValue = DEFAULT_LOCKSHARED_WEIGHT;
        break;
      case LOCKEXCLUSIVE:
        defaultValue = DEFAULT_LOCKEXCLUSIVE_WEIGHT;
        break;
      default:
        // by default don't bill for queueing or lock wait time
        defaultValue = 0;
      }
      String key = namespace + WEIGHT_CONFIG_PREFIX
          + timing.name().toLowerCase(Locale.ENGLISH);
      weights[timing.ordinal()] = conf.getInt(key, defaultValue);
    }
  }
  /**
   * Calculates a weighted sum of the times stored on the provided processing
   * details to be used as the cost in {@link DecayRpcScheduler}.
   *
   * @param details Processing details
   * @return The weighted sum of the times. The returned unit is the same
   *         as the default unit used by the provided processing details.
   */
  @Override
  public long getCost(ProcessingDetails details) {
    assert weights != null : "Cost provider must be initialized before use";
    long cost = 0;
    // weights was initialized to the same length as Timing.values()
    for (int i = 0; i < Timing.values().length; i++) {
      cost += details.get(Timing.values()[i]) * weights[i];
    }
    return cost;
  }
 }
--- a/hadoop-common-project/hadoop-common/src/site/markdown/FairCallQueue.md
+++ b/hadoop-common-project/hadoop-common/src/site/markdown/FairCallQueue.md
@ -91,6 +91,21 @@ This is configurable via the **identity provider**, which defaults to the **User
 provider simply uses the username of the client submitting the request. However, a custom identity provider can be used
 to performing throttling based on other groupings, or using an external identity provider.
 ### Cost-based Fair Call Queue
 Though the fair call queue itself does a good job of mitigating the impact from users who submit a very high _number_
 of requests, it does not take account into how expensive each request is to process. Thus, when considering the
 HDFS NameNode, a user who submits 1000 "getFileInfo" requests would be prioritized the same as a user who submits 1000
 "listStatus" requests on some very large directory, or a user who submits 1000 "mkdir" requests, which are more
 expensive as they require an exclusive lock on the namesystem. To account for the _cost_ of an operation when
 considering the prioritization of user requests, there is a "cost-based" extension to the Fair Call Queue which uses
 the aggregate processing time of a user's operations to determine how that user should be prioritized. By default,
 queue time (time spent waiting to be processed) and lock wait time (time spent waiting to acquire a lock) is not
 considered in the cost, time spent processing without a lock is neutrally (1x) weighted, time spent processing with a
 shared lock is weighted 10x higher, and time spent processing with an exclusive lock is weighted 100x higher.
 This attempts to prioritize users based on the actual load they place on the server. To enable this feature, set the
 `costprovder.impl` configuration to `org.apache.hadoop.ipc.WeightedTimeCostProvider` as described below.
 Configuration
 -------------
@ -115,12 +130,16 @@ omitted.
 | scheduler.priority.levels | RpcScheduler, CallQueue | How many priority levels to use within the scheduler and call queue. | 4 |
 | faircallqueue.multiplexer.weights | WeightedRoundRobinMultiplexer | How much weight to give to each priority queue. This should be a comma-separated list of length equal to the number of priority levels. | Weights descend by a factor of 2 (e.g., for 4 levels: `8,4,2,1`) |
 | identity-provider.impl | DecayRpcScheduler | The identity provider mapping user requests to their identity. | org.apache.hadoop.ipc.UserIdentityProvider |
 | cost-provider.impl | DecayRpcScheduler | The cost provider mapping user requests to their cost. To enable determination of cost based on processing time, use `org.apache.hadoop.ipc.WeightedTimeCostProvider`. | org.apache.hadoop.ipc.DefaultCostProvider |
 | decay-scheduler.period-ms | DecayRpcScheduler | How frequently the decay factor should be applied to the operation counts of users. Higher values have less overhead, but respond less quickly to changes in client behavior. | 5000 |
 | decay-scheduler.decay-factor | DecayRpcScheduler | When decaying the operation counts of users, the multiplicative decay factor to apply. Higher values will weight older operations more strongly, essentially giving the scheduler a longer memory, and penalizing heavy clients for a longer period of time. | 0.5 |
 | decay-scheduler.thresholds | DecayRpcScheduler | The client load threshold, as an integer percentage, for each priority queue. Clients producing less load, as a percent of total operations, than specified at position _i_ will be given priority _i_. This should be a comma-separated list of length equal to the number of priority levels minus 1 (the last is implicitly 100). | Thresholds ascend by a factor of 2 (e.g., for 4 levels: `13,25,50`) |
 | decay-scheduler.backoff.responsetime.enable | DecayRpcScheduler | Whether or not to enable the backoff by response time feature. | false |
 | decay-scheduler.backoff.responsetime.thresholds | DecayRpcScheduler | The response time thresholds, as time durations, for each priority queue. If the average response time for a queue is above this threshold, backoff will occur in lower priority queues. This should be a comma-separated list of length equal to the number of priority levels. | Threshold increases by 10s per level (e.g., for 4 levels: `10s,20s,30s,40s`) |
 | decay-scheduler.metrics.top.user.count | DecayRpcScheduler | The number of top (i.e., heaviest) users to emit metric information about. | 10 |
 | weighted-cost.lockshared | WeightedTimeCostProvider | The weight multiplier to apply to the time spent in the processing phase which holds a shared (read) lock. | 10 |
 | weighted-cost.lockexclusive | WeightedTimeCostProvider | The weight multiplier to apply to the time spent in the processing phase which holds an exclusive (write) lock. | 100 |
 | weighted-cost.{handler,lockfree,response} | WeightedTimeCostProvider | The weight multiplier to apply to the time spent in the processing phases which do not involve holding a lock. See `org.apache.hadoop.ipc.ProcessingDetails.Timing` for more details on each phase. | 1 |
 ### Example Configuration
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestDecayRpcScheduler.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestDecayRpcScheduler.java
@ -26,6 +26,7 @@ import static org.junit.Assert.*;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 import org.apache.hadoop.fs.CommonConfigurationKeys;
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.conf.Configuration;
@ -36,6 +37,7 @@ import javax.management.ObjectName;
 import java.io.ByteArrayOutputStream;
 import java.io.PrintStream;
 import java.lang.management.ManagementFactory;
 import java.util.concurrent.TimeUnit;
 public class TestDecayRpcScheduler {
  private Schedulable mockCall(String id) {
@ -131,67 +133,69 @@ public class TestDecayRpcScheduler {
    conf.set("ns." + DecayRpcScheduler.IPC_FCQ_DECAYSCHEDULER_PERIOD_KEY, "99999999"); // Never flush
    scheduler = new DecayRpcScheduler(1, "ns", conf);
-    assertEquals(0, scheduler.getCallCountSnapshot().size()); // empty first
+    assertEquals(0, scheduler.getCallCostSnapshot().size()); // empty first
-    scheduler.getPriorityLevel(mockCall("A"));
+    getPriorityIncrementCallCount("A");
-    assertEquals(1, scheduler.getCallCountSnapshot().get("A").longValue());
+    assertEquals(1, scheduler.getCallCostSnapshot().get("A").longValue());
-    assertEquals(1, scheduler.getCallCountSnapshot().get("A").longValue());
+    assertEquals(1, scheduler.getCallCostSnapshot().get("A").longValue());
-    scheduler.getPriorityLevel(mockCall("A"));
+    getPriorityIncrementCallCount("A");
-    scheduler.getPriorityLevel(mockCall("B"));
+    getPriorityIncrementCallCount("B");
-    scheduler.getPriorityLevel(mockCall("A"));
+    getPriorityIncrementCallCount("A");
-    assertEquals(3, scheduler.getCallCountSnapshot().get("A").longValue());
+    assertEquals(3, scheduler.getCallCostSnapshot().get("A").longValue());
-    assertEquals(1, scheduler.getCallCountSnapshot().get("B").longValue());
+    assertEquals(1, scheduler.getCallCostSnapshot().get("B").longValue());
  }
  @Test
  @SuppressWarnings("deprecation")
  public void testDecay() throws Exception {
    Configuration conf = new Configuration();
-    conf.set("ns." + DecayRpcScheduler.IPC_FCQ_DECAYSCHEDULER_PERIOD_KEY, "999999999"); // Never
+    conf.setLong("ns." // Never decay
-    conf.set("ns." + DecayRpcScheduler.IPC_FCQ_DECAYSCHEDULER_FACTOR_KEY, "0.5");
+        + DecayRpcScheduler.IPC_SCHEDULER_DECAYSCHEDULER_PERIOD_KEY, 999999999);
    conf.setDouble("ns."
        + DecayRpcScheduler.IPC_SCHEDULER_DECAYSCHEDULER_FACTOR_KEY, 0.5);
    scheduler = new DecayRpcScheduler(1, "ns", conf);
    assertEquals(0, scheduler.getTotalCallSnapshot());
    for (int i = 0; i < 4; i++) {
-      scheduler.getPriorityLevel(mockCall("A"));
+      getPriorityIncrementCallCount("A");
    }
    sleep(1000);
    for (int i = 0; i < 8; i++) {
-      scheduler.getPriorityLevel(mockCall("B"));
+      getPriorityIncrementCallCount("B");
    }
    assertEquals(12, scheduler.getTotalCallSnapshot());
-    assertEquals(4, scheduler.getCallCountSnapshot().get("A").longValue());
+    assertEquals(4, scheduler.getCallCostSnapshot().get("A").longValue());
-    assertEquals(8, scheduler.getCallCountSnapshot().get("B").longValue());
+    assertEquals(8, scheduler.getCallCostSnapshot().get("B").longValue());
    scheduler.forceDecay();
    assertEquals(6, scheduler.getTotalCallSnapshot());
-    assertEquals(2, scheduler.getCallCountSnapshot().get("A").longValue());
+    assertEquals(2, scheduler.getCallCostSnapshot().get("A").longValue());
-    assertEquals(4, scheduler.getCallCountSnapshot().get("B").longValue());
+    assertEquals(4, scheduler.getCallCostSnapshot().get("B").longValue());
    scheduler.forceDecay();
    assertEquals(3, scheduler.getTotalCallSnapshot());
-    assertEquals(1, scheduler.getCallCountSnapshot().get("A").longValue());
+    assertEquals(1, scheduler.getCallCostSnapshot().get("A").longValue());
-    assertEquals(2, scheduler.getCallCountSnapshot().get("B").longValue());
+    assertEquals(2, scheduler.getCallCostSnapshot().get("B").longValue());
    scheduler.forceDecay();
    assertEquals(1, scheduler.getTotalCallSnapshot());
-    assertEquals(null, scheduler.getCallCountSnapshot().get("A"));
+    assertEquals(null, scheduler.getCallCostSnapshot().get("A"));
-    assertEquals(1, scheduler.getCallCountSnapshot().get("B").longValue());
+    assertEquals(1, scheduler.getCallCostSnapshot().get("B").longValue());
    scheduler.forceDecay();
    assertEquals(0, scheduler.getTotalCallSnapshot());
-    assertEquals(null, scheduler.getCallCountSnapshot().get("A"));
+    assertEquals(null, scheduler.getCallCostSnapshot().get("A"));
-    assertEquals(null, scheduler.getCallCountSnapshot().get("B"));
+    assertEquals(null, scheduler.getCallCostSnapshot().get("B"));
  }
  @Test
@ -205,16 +209,16 @@ public class TestDecayRpcScheduler {
        .IPC_FCQ_DECAYSCHEDULER_THRESHOLDS_KEY, "25, 50, 75");
    scheduler = new DecayRpcScheduler(4, namespace, conf);
-    assertEquals(0, scheduler.getPriorityLevel(mockCall("A")));
+    assertEquals(0, getPriorityIncrementCallCount("A")); // 0 out of 0 calls
-    assertEquals(2, scheduler.getPriorityLevel(mockCall("A")));
+    assertEquals(3, getPriorityIncrementCallCount("A")); // 1 out of 1 calls
-    assertEquals(0, scheduler.getPriorityLevel(mockCall("B")));
+    assertEquals(0, getPriorityIncrementCallCount("B")); // 0 out of 2 calls
-    assertEquals(1, scheduler.getPriorityLevel(mockCall("B")));
+    assertEquals(1, getPriorityIncrementCallCount("B")); // 1 out of 3 calls
-    assertEquals(0, scheduler.getPriorityLevel(mockCall("C")));
+    assertEquals(0, getPriorityIncrementCallCount("C")); // 0 out of 4 calls
-    assertEquals(0, scheduler.getPriorityLevel(mockCall("C")));
+    assertEquals(0, getPriorityIncrementCallCount("C")); // 1 out of 5 calls
-    assertEquals(1, scheduler.getPriorityLevel(mockCall("A")));
+    assertEquals(1, getPriorityIncrementCallCount("A")); // 2 out of 6 calls
-    assertEquals(1, scheduler.getPriorityLevel(mockCall("A")));
+    assertEquals(1, getPriorityIncrementCallCount("A")); // 3 out of 7 calls
-    assertEquals(1, scheduler.getPriorityLevel(mockCall("A")));
+    assertEquals(2, getPriorityIncrementCallCount("A")); // 4 out of 8 calls
-    assertEquals(2, scheduler.getPriorityLevel(mockCall("A")));
+    assertEquals(2, getPriorityIncrementCallCount("A")); // 5 out of 9 calls
    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    ObjectName mxbeanName = new ObjectName(
@ -243,7 +247,7 @@ public class TestDecayRpcScheduler {
    assertEquals(0, scheduler.getTotalCallSnapshot());
    for (int i = 0; i < 64; i++) {
-      scheduler.getPriorityLevel(mockCall("A"));
+      getPriorityIncrementCallCount("A");
    }
    // It should eventually decay to zero
@ -272,6 +276,108 @@ public class TestDecayRpcScheduler {
      //set systout back
      System.setOut(output);
    }
  }
  @Test
  public void testUsingWeightedTimeCostProvider() {
    scheduler = getSchedulerWithWeightedTimeCostProvider(3);
    // 3 details in increasing order of cost. Although medium has a longer
    // duration, the shared lock is weighted less than the exclusive lock
    ProcessingDetails callDetailsLow =
        new ProcessingDetails(TimeUnit.MILLISECONDS);
    callDetailsLow.set(ProcessingDetails.Timing.LOCKFREE, 1);
    ProcessingDetails callDetailsMedium =
        new ProcessingDetails(TimeUnit.MILLISECONDS);
    callDetailsMedium.set(ProcessingDetails.Timing.LOCKSHARED, 500);
    ProcessingDetails callDetailsHigh =
        new ProcessingDetails(TimeUnit.MILLISECONDS);
    callDetailsHigh.set(ProcessingDetails.Timing.LOCKEXCLUSIVE, 100);
    for (int i = 0; i < 10; i++) {
      scheduler.addResponseTime("ignored", mockCall("LOW"), callDetailsLow);
    }
    scheduler.addResponseTime("ignored", mockCall("MED"), callDetailsMedium);
    scheduler.addResponseTime("ignored", mockCall("HIGH"), callDetailsHigh);
    assertEquals(0, scheduler.getPriorityLevel(mockCall("LOW")));
    assertEquals(1, scheduler.getPriorityLevel(mockCall("MED")));
    assertEquals(2, scheduler.getPriorityLevel(mockCall("HIGH")));
    assertEquals(3, scheduler.getUniqueIdentityCount());
    long totalCallInitial = scheduler.getTotalRawCallVolume();
    assertEquals(totalCallInitial, scheduler.getTotalCallVolume());
    scheduler.forceDecay();
    // Relative priorities should stay the same after a single decay
    assertEquals(0, scheduler.getPriorityLevel(mockCall("LOW")));
    assertEquals(1, scheduler.getPriorityLevel(mockCall("MED")));
    assertEquals(2, scheduler.getPriorityLevel(mockCall("HIGH")));
    assertEquals(3, scheduler.getUniqueIdentityCount());
    assertEquals(totalCallInitial, scheduler.getTotalRawCallVolume());
    assertTrue(scheduler.getTotalCallVolume() < totalCallInitial);
    for (int i = 0; i < 100; i++) {
      scheduler.forceDecay();
    }
    // After enough decay cycles, all callers should be high priority again
    assertEquals(0, scheduler.getPriorityLevel(mockCall("LOW")));
    assertEquals(0, scheduler.getPriorityLevel(mockCall("MED")));
    assertEquals(0, scheduler.getPriorityLevel(mockCall("HIGH")));
  }
  @Test
  public void testUsingWeightedTimeCostProviderWithZeroCostCalls() {
    scheduler = getSchedulerWithWeightedTimeCostProvider(2);
    ProcessingDetails emptyDetails =
        new ProcessingDetails(TimeUnit.MILLISECONDS);
    for (int i = 0; i < 1000; i++) {
      scheduler.addResponseTime("ignored", mockCall("MANY"), emptyDetails);
    }
    scheduler.addResponseTime("ignored", mockCall("FEW"), emptyDetails);
    // Since the calls are all "free", they should have the same priority
    assertEquals(0, scheduler.getPriorityLevel(mockCall("MANY")));
    assertEquals(0, scheduler.getPriorityLevel(mockCall("FEW")));
  }
  @Test
  public void testUsingWeightedTimeCostProviderNoRequests() {
    scheduler = getSchedulerWithWeightedTimeCostProvider(2);
    assertEquals(0, scheduler.getPriorityLevel(mockCall("A")));
  }
  /**
   * Get a scheduler that uses {@link WeightedTimeCostProvider} and has
   * normal decaying disabled.
   */
  private static DecayRpcScheduler getSchedulerWithWeightedTimeCostProvider(
      int priorityLevels) {
    Configuration conf = new Configuration();
    conf.setClass("ns." + CommonConfigurationKeys.IPC_COST_PROVIDER_KEY,
        WeightedTimeCostProvider.class, CostProvider.class);
    conf.setLong("ns."
        + DecayRpcScheduler.IPC_SCHEDULER_DECAYSCHEDULER_PERIOD_KEY, 999999);
    return new DecayRpcScheduler(priorityLevels, "ns", conf);
  }
  /**
   * Get the priority and increment the call count, assuming that
   * {@link DefaultCostProvider} is in use.
   */
  private int getPriorityIncrementCallCount(String callId) {
    Schedulable mockCall = mockCall(callId);
    int priority = scheduler.getPriorityLevel(mockCall);
    // The DefaultCostProvider uses a cost of 1 for all calls, ignoring
    // the processing details, so an empty one is fine
    ProcessingDetails emptyProcessingDetails =
        new ProcessingDetails(TimeUnit.MILLISECONDS);
    scheduler.addResponseTime("ignored", mockCall, emptyProcessingDetails);
    return priority;
  }
 }
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java
@ -18,7 +18,6 @@
 package org.apache.hadoop.ipc;
 import com.google.common.base.Supplier;
 import com.google.protobuf.ServiceException;
 import org.apache.hadoop.HadoopIllegalArgumentException;
 import org.apache.hadoop.conf.Configuration;
@ -1195,15 +1194,6 @@ public class TestRPC extends TestRpcBase {
    Exception lastException = null;
    proxy = getClient(addr, conf);
    MetricsRecordBuilder rb1 =
        getMetrics("DecayRpcSchedulerMetrics2." + ns);
    final long beginDecayedCallVolume = MetricsAsserts.getLongCounter(
        "DecayedCallVolume", rb1);
    final long beginRawCallVolume = MetricsAsserts.getLongCounter(
        "CallVolume", rb1);
    final int beginUniqueCaller = MetricsAsserts.getIntCounter("UniqueCallers",
        rb1);
    try {
      // start a sleep RPC call that sleeps 3s.
      for (int i = 0; i < numClients; i++) {
@ -1231,41 +1221,6 @@ public class TestRPC extends TestRpcBase {
        } else {
          lastException = unwrapExeption;
        }
        // Lets Metric system update latest metrics
        GenericTestUtils.waitFor(new Supplier<Boolean>() {
          @Override
          public Boolean get() {
            MetricsRecordBuilder rb2 =
              getMetrics("DecayRpcSchedulerMetrics2." + ns);
            long decayedCallVolume1 = MetricsAsserts.getLongCounter(
                "DecayedCallVolume", rb2);
            long rawCallVolume1 = MetricsAsserts.getLongCounter(
                "CallVolume", rb2);
            int uniqueCaller1 = MetricsAsserts.getIntCounter(
                "UniqueCallers", rb2);
            long callVolumePriority0 = MetricsAsserts.getLongGauge(
                "Priority.0.CompletedCallVolume", rb2);
            long callVolumePriority1 = MetricsAsserts.getLongGauge(
                "Priority.1.CompletedCallVolume", rb2);
            double avgRespTimePriority0 = MetricsAsserts.getDoubleGauge(
                "Priority.0.AvgResponseTime", rb2);
            double avgRespTimePriority1 = MetricsAsserts.getDoubleGauge(
                "Priority.1.AvgResponseTime", rb2);
            LOG.info("DecayedCallVolume: " + decayedCallVolume1);
            LOG.info("CallVolume: " + rawCallVolume1);
            LOG.info("UniqueCaller: " + uniqueCaller1);
            LOG.info("Priority.0.CompletedCallVolume: " + callVolumePriority0);
            LOG.info("Priority.1.CompletedCallVolume: " + callVolumePriority1);
            LOG.info("Priority.0.AvgResponseTime: " + avgRespTimePriority0);
            LOG.info("Priority.1.AvgResponseTime: " + avgRespTimePriority1);
            return decayedCallVolume1 > beginDecayedCallVolume &&
                rawCallVolume1 > beginRawCallVolume &&
                uniqueCaller1 > beginUniqueCaller;
          }
        }, 30, 60000);
      }
    } finally {
      executorService.shutdown();
@ -1277,6 +1232,63 @@ public class TestRPC extends TestRpcBase {
    assertTrue("RetriableException not received", succeeded);
  }
  /** Test that the metrics for DecayRpcScheduler are updated. */
  @Test (timeout=30000)
  public void testDecayRpcSchedulerMetrics() throws Exception {
    final String ns = CommonConfigurationKeys.IPC_NAMESPACE + ".0";
    Server server = setupDecayRpcSchedulerandTestServer(ns + ".");
    MetricsRecordBuilder rb1 =
        getMetrics("DecayRpcSchedulerMetrics2." + ns);
    final long beginDecayedCallVolume = MetricsAsserts.getLongCounter(
        "DecayedCallVolume", rb1);
    final long beginRawCallVolume = MetricsAsserts.getLongCounter(
        "CallVolume", rb1);
    final int beginUniqueCaller = MetricsAsserts.getIntCounter("UniqueCallers",
        rb1);
    TestRpcService proxy = getClient(addr, conf);
    try {
      for (int i = 0; i < 2; i++) {
        proxy.sleep(null, newSleepRequest(100));
      }
      // Lets Metric system update latest metrics
      GenericTestUtils.waitFor(() -> {
        MetricsRecordBuilder rb2 =
            getMetrics("DecayRpcSchedulerMetrics2." + ns);
        long decayedCallVolume1 = MetricsAsserts.getLongCounter(
            "DecayedCallVolume", rb2);
        long rawCallVolume1 = MetricsAsserts.getLongCounter(
            "CallVolume", rb2);
        int uniqueCaller1 = MetricsAsserts.getIntCounter(
            "UniqueCallers", rb2);
        long callVolumePriority0 = MetricsAsserts.getLongGauge(
            "Priority.0.CompletedCallVolume", rb2);
        long callVolumePriority1 = MetricsAsserts.getLongGauge(
            "Priority.1.CompletedCallVolume", rb2);
        double avgRespTimePriority0 = MetricsAsserts.getDoubleGauge(
            "Priority.0.AvgResponseTime", rb2);
        double avgRespTimePriority1 = MetricsAsserts.getDoubleGauge(
            "Priority.1.AvgResponseTime", rb2);
        LOG.info("DecayedCallVolume: {}", decayedCallVolume1);
        LOG.info("CallVolume: {}", rawCallVolume1);
        LOG.info("UniqueCaller: {}", uniqueCaller1);
        LOG.info("Priority.0.CompletedCallVolume: {}", callVolumePriority0);
        LOG.info("Priority.1.CompletedCallVolume: {}", callVolumePriority1);
        LOG.info("Priority.0.AvgResponseTime: {}", avgRespTimePriority0);
        LOG.info("Priority.1.AvgResponseTime: {}", avgRespTimePriority1);
        return decayedCallVolume1 > beginDecayedCallVolume &&
            rawCallVolume1 > beginRawCallVolume &&
            uniqueCaller1 > beginUniqueCaller;
      }, 30, 60000);
    } finally {
      stop(server, proxy);
    }
  }
  private Server setupDecayRpcSchedulerandTestServer(String ns)
      throws Exception {
    final int queueSizePerHandler = 3;
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestWeightedTimeCostProvider.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestWeightedTimeCostProvider.java
@ -0,0 +1,86 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.ipc;
 import java.util.concurrent.TimeUnit;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.ipc.ProcessingDetails.Timing;
 import org.junit.Before;
 import org.junit.Test;
 import static org.apache.hadoop.ipc.WeightedTimeCostProvider.DEFAULT_LOCKEXCLUSIVE_WEIGHT;
 import static org.apache.hadoop.ipc.WeightedTimeCostProvider.DEFAULT_LOCKFREE_WEIGHT;
 import static org.apache.hadoop.ipc.WeightedTimeCostProvider.DEFAULT_LOCKSHARED_WEIGHT;
 import static org.junit.Assert.assertEquals;
 /** Tests for {@link WeightedTimeCostProvider}. */
 public class TestWeightedTimeCostProvider {
  private static final int QUEUE_TIME = 3;
  private static final int LOCKFREE_TIME = 5;
  private static final int LOCKSHARED_TIME = 7;
  private static final int LOCKEXCLUSIVE_TIME = 11;
  private WeightedTimeCostProvider costProvider;
  private ProcessingDetails processingDetails;
  @Before
  public void setup() {
    costProvider = new WeightedTimeCostProvider();
    processingDetails = new ProcessingDetails(TimeUnit.MILLISECONDS);
    processingDetails.set(Timing.QUEUE, QUEUE_TIME);
    processingDetails.set(Timing.LOCKFREE, LOCKFREE_TIME);
    processingDetails.set(Timing.LOCKSHARED, LOCKSHARED_TIME);
    processingDetails.set(Timing.LOCKEXCLUSIVE, LOCKEXCLUSIVE_TIME);
  }
  @Test(expected = AssertionError.class)
  public void testGetCostBeforeInit() {
    costProvider.getCost(null);
  }
  @Test
  public void testGetCostDefaultWeights() {
    costProvider.init("foo", new Configuration());
    long actualCost = costProvider.getCost(processingDetails);
    long expectedCost = DEFAULT_LOCKFREE_WEIGHT * LOCKFREE_TIME
        + DEFAULT_LOCKSHARED_WEIGHT * LOCKSHARED_TIME
        + DEFAULT_LOCKEXCLUSIVE_WEIGHT * LOCKEXCLUSIVE_TIME;
    assertEquals(expectedCost, actualCost);
  }
  @Test
  public void testGetCostConfiguredWeights() {
    Configuration conf = new Configuration();
    int queueWeight = 1000;
    int lockfreeWeight = 10000;
    int locksharedWeight = 100000;
    conf.setInt("foo.weighted-cost.queue", queueWeight);
    conf.setInt("foo.weighted-cost.lockfree", lockfreeWeight);
    conf.setInt("foo.weighted-cost.lockshared", locksharedWeight);
    conf.setInt("bar.weighted-cost.lockexclusive", 0); // should not apply
    costProvider.init("foo", conf);
    long actualCost = costProvider.getCost(processingDetails);
    long expectedCost = queueWeight * QUEUE_TIME
        + lockfreeWeight * LOCKFREE_TIME
        + locksharedWeight * LOCKSHARED_TIME
        + DEFAULT_LOCKEXCLUSIVE_WEIGHT * LOCKEXCLUSIVE_TIME;
    assertEquals(expectedCost, actualCost);
  }
 }