YARN-8588. Logging improvements for better debuggability. (Suma Shivaprasad via wangda)

Change-Id: I66aa4b0ec031ae5ce0fae558e2f8cbcbbfebc442
This commit is contained in:
Wangda Tan 2018-08-09 11:03:00 -07:00
parent 5326a7906d
commit 344c335a92
4 changed files with 52 additions and 41 deletions

View File

@ -63,4 +63,9 @@ public class AutoCreatedLeafQueueConfig {
public CapacitySchedulerConfiguration getLeafQueueConfigs() {
return leafQueueConfigs;
}
@Override public String toString() {
return "AutoCreatedLeafQueueConfig{" + "queueCapacities=" + queueCapacities
+ ", leafQueueConfigs=" + leafQueueConfigs + '}';
}
}

View File

@ -124,7 +124,7 @@ public abstract class QueueManagementChange {
@Override
public String toString() {
return "QueueManagementChange{" + "queue=" + queue
return "QueueManagementChange{" + "queue=" + queue.getQueueName()
+ ", updatedEntitlementsByPartition=" + queueTemplateUpdate
+ ", queueAction=" + queueAction + ", transitionToQueueState="
+ transitionToQueueState + '}';

View File

@ -19,8 +19,9 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
@ -50,8 +51,8 @@ import java.util.Set;
*/
public class QueueManagementDynamicEditPolicy implements SchedulingEditPolicy {
private static final Log LOG =
LogFactory.getLog(QueueManagementDynamicEditPolicy.class);
private static final Logger LOG =
LoggerFactory.getLogger(QueueManagementDynamicEditPolicy.class);
private Clock clock;
@ -90,7 +91,7 @@ public class QueueManagementDynamicEditPolicy implements SchedulingEditPolicy {
@Override
public void init(final Configuration config, final RMContext context,
final ResourceScheduler sched) {
LOG.info("Queue Management Policy monitor:" + this.
LOG.info("Queue Management Policy monitor: {}" + this.
getClass().getCanonicalName());
assert null == scheduler : "Unexpected duplicate call to init";
if (!(sched instanceof CapacityScheduler)) {
@ -189,13 +190,7 @@ public class QueueManagementDynamicEditPolicy implements SchedulingEditPolicy {
parentQueue.getAutoCreatedQueueManagementPolicy();
long startTime = 0;
try {
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat
.format("Trying to use {0} to compute preemption "
+ "candidates",
policyClazz.getClass().getName()));
startTime = clock.getTime();
}
startTime = clock.getTime();
queueManagementChanges = policyClazz.computeQueueManagementChanges();
@ -209,15 +204,14 @@ public class QueueManagementDynamicEditPolicy implements SchedulingEditPolicy {
}
if (LOG.isDebugEnabled()) {
LOG.debug(MessageFormat.format("{0} uses {1} millisecond"
+ " to run",
policyClazz.getClass().getName(), clock.getTime()
- startTime));
LOG.debug("{} uses {} millisecond" + " to run",
policyClazz.getClass().getName(), clock.getTime() - startTime);
if (queueManagementChanges.size() > 0) {
LOG.debug(" Updated queue management updates for parent queue"
+ " ["
+ parentQueue.getQueueName() + ": [\n" + queueManagementChanges
.toString() + "\n]");
LOG.debug(" Updated queue management changes for parent queue" + " "
+ "{}: [{}]", parentQueue.getQueueName(),
queueManagementChanges.size() < 25 ?
queueManagementChanges.toString() :
queueManagementChanges.size());
}
}
} catch (YarnException e) {
@ -232,7 +226,7 @@ public class QueueManagementDynamicEditPolicy implements SchedulingEditPolicy {
"Skipping queue management updates for parent queue "
+ parentQueue
.getQueuePath() + " "
+ "since configuration for auto creating queue's beyond "
+ "since configuration for auto creating queues beyond "
+ "parent's "
+ "guaranteed capacity is disabled");
}

View File

@ -19,8 +19,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity
.queuemanagement;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity
.QueueManagementDynamicEditPolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler
.SchedulerDynamicEditException;
@ -81,8 +83,8 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
private CapacitySchedulerContext scheduler;
private ManagedParentQueue managedParentQueue;
private static final Log LOG = LogFactory.getLog(
GuaranteedOrZeroCapacityOverTimePolicy.class);
private static final Logger LOG =
LoggerFactory.getLogger(GuaranteedOrZeroCapacityOverTimePolicy.class);
private ReentrantReadWriteLock.WriteLock writeLock;
@ -380,6 +382,17 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
deactivateLeafQueuesIfInActive(managedParentQueue, nodeLabel,
leafQueueEntitlements);
if (LOG.isDebugEnabled()) {
if ( deactivatedLeafQueues.size() > 0) {
LOG.debug("Parent queue = {}, " +
", nodeLabel = {}, deactivated leaf queues = [{}] ",
managedParentQueue.getQueueName(), nodeLabel,
deactivatedLeafQueues.size() > 25 ? deactivatedLeafQueues
.size() : deactivatedLeafQueues);
}
}
float deactivatedCapacity = getTotalDeactivatedCapacity(
deactivatedLeafQueues, nodeLabel);
@ -392,7 +405,7 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
+ deactivatedCapacity + EPSILON;
if (LOG.isDebugEnabled()) {
LOG.debug("Parent queue : " + managedParentQueue.getQueueName()
LOG.debug("Parent queue = " + managedParentQueue.getQueueName()
+ ", nodeLabel = " + nodeLabel + ", absCapacity = "
+ parentAbsoluteCapacity + ", leafQueueAbsoluteCapacity = "
+ leafQueueTemplateAbsoluteCapacity + ", deactivatedCapacity = "
@ -409,7 +422,8 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
pendingApps.size());
if (LOG.isDebugEnabled()) {
LOG.debug("Found " + maxLeafQueuesTobeActivated + " leaf queues"
LOG.debug("Parent queue = " + managedParentQueue.getQueueName()
+ " : Found " + maxLeafQueuesTobeActivated + " leaf queues"
+ " to be activated with " + pendingApps.size() + " apps ");
}
@ -424,8 +438,9 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
if (LOG.isDebugEnabled()) {
if (leafQueuesToBeActivated.size() > 0) {
LOG.debug("Activated leaf queues : [" + leafQueuesToBeActivated
+ "]");
LOG.debug("Activated leaf queues : [{}]",
leafQueuesToBeActivated.size() < 25 ?
leafQueuesToBeActivated : leafQueuesToBeActivated.size());
}
}
}
@ -492,8 +507,9 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
String partition = e.getKey();
if (!newPartitions.contains(partition)) {
itr.remove();
LOG.info(
"Removed partition " + partition + " from leaf queue " + "state");
LOG.info(managedParentQueue.getQueueName() +
" : Removed partition " + partition + " from leaf queue " +
"state");
} else{
Map<String, LeafQueueStatePerPartition> queues = e.getValue();
for (
@ -502,7 +518,9 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
String queue = queueItr.next().getKey();
if (!newQueues.contains(queue)) {
queueItr.remove();
LOG.info("Removed queue " + queue + " from leaf queue "
LOG.info(managedParentQueue.getQueueName() + " : Removed queue"
+ queue + " from "
+ "leaf queue "
+ "state from partition " + partition);
}
}
@ -582,12 +600,6 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
updateToZeroCapacity(capacities, nodeLabel);
deactivatedQueues.put(leafQueue.getQueueName(),
leafQueueTemplateCapacities);
} else{
if (LOG.isDebugEnabled()) {
LOG.debug(" Leaf queue has pending applications or is " + "inactive"
+ " : " + leafQueue.getNumApplications()
+ ".Skipping deactivation for " + leafQueue);
}
}
} else{
LOG.warn("Could not find queue in scheduler while trying" + " to "
@ -659,7 +671,7 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
if (isActive(leafQueue, nodeLabel)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Queue is already active." + " Skipping activation : "
+ queue.getQueuePath());
+ leafQueue.getQueueName());
}
} else{
activate(leafQueue, nodeLabel);
@ -668,7 +680,7 @@ public class GuaranteedOrZeroCapacityOverTimePolicy
if (!isActive(leafQueue, nodeLabel)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Queue is already de-activated. Skipping "
+ "de-activation : " + leafQueue.getQueuePath());
+ "de-activation : " + leafQueue.getQueueName());
}
} else{
deactivate(leafQueue, nodeLabel);