From 344c335a920e6f32a35ebace0a118a9dc4a22fb7 Mon Sep 17 00:00:00 2001 From: Wangda Tan Date: Thu, 9 Aug 2018 11:03:00 -0700 Subject: [PATCH] YARN-8588. Logging improvements for better debuggability. (Suma Shivaprasad via wangda) Change-Id: I66aa4b0ec031ae5ce0fae558e2f8cbcbbfebc442 --- .../capacity/AutoCreatedLeafQueueConfig.java | 5 ++ .../capacity/QueueManagementChange.java | 2 +- .../QueueManagementDynamicEditPolicy.java | 36 ++++++------- ...uaranteedOrZeroCapacityOverTimePolicy.java | 50 ++++++++++++------- 4 files changed, 52 insertions(+), 41 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java index 5952250a02a..87ef1c0e26f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AutoCreatedLeafQueueConfig.java @@ -63,4 +63,9 @@ public QueueCapacities getQueueCapacities() { public CapacitySchedulerConfiguration getLeafQueueConfigs() { return leafQueueConfigs; } + + @Override public String toString() { + return "AutoCreatedLeafQueueConfig{" + "queueCapacities=" + queueCapacities + + ", leafQueueConfigs=" + leafQueueConfigs + '}'; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementChange.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementChange.java index 74d9b23c0b3..64ba578ff73 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementChange.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementChange.java @@ -124,7 +124,7 @@ public int hashCode() { @Override public String toString() { - return "QueueManagementChange{" + "queue=" + queue + return "QueueManagementChange{" + "queue=" + queue.getQueueName() + ", updatedEntitlementsByPartition=" + queueTemplateUpdate + ", queueAction=" + queueAction + ", transitionToQueueState=" + transitionToQueueState + '}'; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementDynamicEditPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementDynamicEditPolicy.java index 9b0cf7bc93b..ea43ac82f53 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementDynamicEditPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/QueueManagementDynamicEditPolicy.java @@ -19,8 +19,9 @@ import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -50,8 +51,8 @@ */ public class QueueManagementDynamicEditPolicy implements SchedulingEditPolicy { - private static final Log LOG = - LogFactory.getLog(QueueManagementDynamicEditPolicy.class); + private static final Logger LOG = + LoggerFactory.getLogger(QueueManagementDynamicEditPolicy.class); private Clock clock; @@ -90,7 +91,7 @@ public QueueManagementDynamicEditPolicy(RMContext context, @Override public void init(final Configuration config, final RMContext context, final ResourceScheduler sched) { - LOG.info("Queue Management Policy monitor:" + this. + LOG.info("Queue Management Policy monitor: {}" + this. getClass().getCanonicalName()); assert null == scheduler : "Unexpected duplicate call to init"; if (!(sched instanceof CapacityScheduler)) { @@ -189,13 +190,7 @@ List manageAutoCreatedLeafQueues() parentQueue.getAutoCreatedQueueManagementPolicy(); long startTime = 0; try { - if (LOG.isDebugEnabled()) { - LOG.debug(MessageFormat - .format("Trying to use {0} to compute preemption " - + "candidates", - policyClazz.getClass().getName())); - startTime = clock.getTime(); - } + startTime = clock.getTime(); queueManagementChanges = policyClazz.computeQueueManagementChanges(); @@ -209,15 +204,14 @@ List manageAutoCreatedLeafQueues() } if (LOG.isDebugEnabled()) { - LOG.debug(MessageFormat.format("{0} uses {1} millisecond" - + " to run", - policyClazz.getClass().getName(), clock.getTime() - - startTime)); + LOG.debug("{} uses {} millisecond" + " to run", + policyClazz.getClass().getName(), clock.getTime() - startTime); if (queueManagementChanges.size() > 0) { - LOG.debug(" Updated queue management updates for parent queue" - + " [" - + parentQueue.getQueueName() + ": [\n" + queueManagementChanges - .toString() + "\n]"); + LOG.debug(" Updated queue management changes for parent queue" + " " + + "{}: [{}]", parentQueue.getQueueName(), + queueManagementChanges.size() < 25 ? + queueManagementChanges.toString() : + queueManagementChanges.size()); } } } catch (YarnException e) { @@ -232,7 +226,7 @@ List manageAutoCreatedLeafQueues() "Skipping queue management updates for parent queue " + parentQueue .getQueuePath() + " " - + "since configuration for auto creating queue's beyond " + + "since configuration for auto creating queues beyond " + "parent's " + "guaranteed capacity is disabled"); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java index b2301fd93a0..faa6e6f54a9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/queuemanagement/GuaranteedOrZeroCapacityOverTimePolicy.java @@ -19,8 +19,10 @@ .queuemanagement; import com.google.common.annotations.VisibleForTesting; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .QueueManagementDynamicEditPolicy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler .SchedulerDynamicEditException; @@ -81,8 +83,8 @@ public class GuaranteedOrZeroCapacityOverTimePolicy private CapacitySchedulerContext scheduler; private ManagedParentQueue managedParentQueue; - private static final Log LOG = LogFactory.getLog( - GuaranteedOrZeroCapacityOverTimePolicy.class); + private static final Logger LOG = + LoggerFactory.getLogger(GuaranteedOrZeroCapacityOverTimePolicy.class); private ReentrantReadWriteLock.WriteLock writeLock; @@ -380,6 +382,17 @@ public List computeQueueManagementChanges() deactivateLeafQueuesIfInActive(managedParentQueue, nodeLabel, leafQueueEntitlements); + if (LOG.isDebugEnabled()) { + if ( deactivatedLeafQueues.size() > 0) { + LOG.debug("Parent queue = {}, " + + ", nodeLabel = {}, deactivated leaf queues = [{}] ", + managedParentQueue.getQueueName(), nodeLabel, + deactivatedLeafQueues.size() > 25 ? deactivatedLeafQueues + .size() : deactivatedLeafQueues); + + } + } + float deactivatedCapacity = getTotalDeactivatedCapacity( deactivatedLeafQueues, nodeLabel); @@ -392,7 +405,7 @@ public List computeQueueManagementChanges() + deactivatedCapacity + EPSILON; if (LOG.isDebugEnabled()) { - LOG.debug("Parent queue : " + managedParentQueue.getQueueName() + LOG.debug("Parent queue = " + managedParentQueue.getQueueName() + ", nodeLabel = " + nodeLabel + ", absCapacity = " + parentAbsoluteCapacity + ", leafQueueAbsoluteCapacity = " + leafQueueTemplateAbsoluteCapacity + ", deactivatedCapacity = " @@ -409,7 +422,8 @@ public List computeQueueManagementChanges() pendingApps.size()); if (LOG.isDebugEnabled()) { - LOG.debug("Found " + maxLeafQueuesTobeActivated + " leaf queues" + LOG.debug("Parent queue = " + managedParentQueue.getQueueName() + + " : Found " + maxLeafQueuesTobeActivated + " leaf queues" + " to be activated with " + pendingApps.size() + " apps "); } @@ -424,8 +438,9 @@ public List computeQueueManagementChanges() if (LOG.isDebugEnabled()) { if (leafQueuesToBeActivated.size() > 0) { - LOG.debug("Activated leaf queues : [" + leafQueuesToBeActivated - + "]"); + LOG.debug("Activated leaf queues : [{}]", + leafQueuesToBeActivated.size() < 25 ? + leafQueuesToBeActivated : leafQueuesToBeActivated.size()); } } } @@ -492,8 +507,9 @@ void updateLeafQueueState() { String partition = e.getKey(); if (!newPartitions.contains(partition)) { itr.remove(); - LOG.info( - "Removed partition " + partition + " from leaf queue " + "state"); + LOG.info(managedParentQueue.getQueueName() + + " : Removed partition " + partition + " from leaf queue " + + "state"); } else{ Map queues = e.getValue(); for ( @@ -502,7 +518,9 @@ void updateLeafQueueState() { String queue = queueItr.next().getKey(); if (!newQueues.contains(queue)) { queueItr.remove(); - LOG.info("Removed queue " + queue + " from leaf queue " + LOG.info(managedParentQueue.getQueueName() + " : Removed queue" + + queue + " from " + + "leaf queue " + "state from partition " + partition); } } @@ -582,12 +600,6 @@ private Map deactivateLeafQueuesIfInActive( updateToZeroCapacity(capacities, nodeLabel); deactivatedQueues.put(leafQueue.getQueueName(), leafQueueTemplateCapacities); - } else{ - if (LOG.isDebugEnabled()) { - LOG.debug(" Leaf queue has pending applications or is " + "inactive" - + " : " + leafQueue.getNumApplications() - + ".Skipping deactivation for " + leafQueue); - } } } else{ LOG.warn("Could not find queue in scheduler while trying" + " to " @@ -659,7 +671,7 @@ public void commitQueueManagementChanges( if (isActive(leafQueue, nodeLabel)) { if (LOG.isDebugEnabled()) { LOG.debug("Queue is already active." + " Skipping activation : " - + queue.getQueuePath()); + + leafQueue.getQueueName()); } } else{ activate(leafQueue, nodeLabel); @@ -668,7 +680,7 @@ public void commitQueueManagementChanges( if (!isActive(leafQueue, nodeLabel)) { if (LOG.isDebugEnabled()) { LOG.debug("Queue is already de-activated. Skipping " - + "de-activation : " + leafQueue.getQueuePath()); + + "de-activation : " + leafQueue.getQueueName()); } } else{ deactivate(leafQueue, nodeLabel);