YARN-3294. Allow dumping of Capacity Scheduler debug logs via web UI for

a fixed time period. Contributed by Varun Vasudev
This commit is contained in:
Xuan 2015-04-07 09:52:36 -07:00
parent 0b5d7d23c4
commit d27e9241e8
5 changed files with 280 additions and 0 deletions

View File

@ -101,6 +101,9 @@ Release 2.8.0 - UNRELEASED
YARN-2901. Add errors and warning metrics page to RM, NM web UI.
(Varun Vasudev via wangda)
YARN-3294. Allow dumping of Capacity Scheduler debug logs via
web UI for a fixed time period. (Varun Vasudev via xgong)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -0,0 +1,131 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.log4j.*;
import java.io.File;
import java.io.IOException;
import java.util.*;
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class AdHocLogDumper {
private static final Log LOG = LogFactory.getLog(AdHocLogDumper.class);
private String name;
private String targetFilename;
private Map<String, Priority> appenderLevels;
private Level currentLogLevel;
public static final String AD_HOC_DUMPER_APPENDER = "ad-hoc-dumper-appender";
private static boolean logFlag = false;
private static final Object lock = new Object();
public AdHocLogDumper(String name, String targetFilename) {
this.name = name;
this.targetFilename = targetFilename;
appenderLevels = new HashMap<>();
}
public void dumpLogs(String level, int timePeriod)
throws YarnRuntimeException, IOException {
synchronized (lock) {
if (logFlag) {
LOG.info("Attempt to dump logs when appender is already running");
throw new YarnRuntimeException("Appender is already dumping logs");
}
Level targetLevel = Level.toLevel(level);
Log log = LogFactory.getLog(name);
appenderLevels.clear();
if (log instanceof Log4JLogger) {
Logger packageLogger = ((Log4JLogger) log).getLogger();
currentLogLevel = packageLogger.getLevel();
Level currentEffectiveLevel = packageLogger.getEffectiveLevel();
// make sure we can create the appender first
Layout layout = new PatternLayout("%d{ISO8601} %p %c: %m%n");
FileAppender fApp;
File file =
new File(System.getProperty("yarn.log.dir"), targetFilename);
try {
fApp = new FileAppender(layout, file.getAbsolutePath(), false);
} catch (IOException ie) {
LOG
.warn(
"Error creating file, can't dump logs to "
+ file.getAbsolutePath(), ie);
throw ie;
}
fApp.setName(AdHocLogDumper.AD_HOC_DUMPER_APPENDER);
fApp.setThreshold(targetLevel);
// get current threshold of all appenders and set it to the effective
// level
for (Enumeration appenders = Logger.getRootLogger().getAllAppenders(); appenders
.hasMoreElements();) {
Object obj = appenders.nextElement();
if (obj instanceof AppenderSkeleton) {
AppenderSkeleton appender = (AppenderSkeleton) obj;
appenderLevels.put(appender.getName(), appender.getThreshold());
appender.setThreshold(currentEffectiveLevel);
}
}
packageLogger.addAppender(fApp);
LOG.info("Dumping adhoc logs for " + name + " to "
+ file.getAbsolutePath() + " for " + timePeriod + " milliseconds");
packageLogger.setLevel(targetLevel);
logFlag = true;
TimerTask restoreLogLevel = new RestoreLogLevel();
Timer restoreLogLevelTimer = new Timer();
restoreLogLevelTimer.schedule(restoreLogLevel, timePeriod);
}
}
}
class RestoreLogLevel extends TimerTask {
@Override
public void run() {
Log log = LogFactory.getLog(name);
if (log instanceof Log4JLogger) {
Logger logger = ((Log4JLogger) log).getLogger();
logger.removeAppender(AD_HOC_DUMPER_APPENDER);
logger.setLevel(currentLogLevel);
for (Enumeration appenders = Logger.getRootLogger().getAllAppenders(); appenders
.hasMoreElements();) {
Object obj = appenders.nextElement();
if (obj instanceof AppenderSkeleton) {
AppenderSkeleton appender = (AppenderSkeleton) obj;
appender.setThreshold(appenderLevels.get(appender.getName()));
}
}
logFlag = false;
LOG.info("Done dumping adhoc logs for " + name);
}
}
}
}

View File

@ -0,0 +1,86 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.util;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.util.Time;
import org.apache.log4j.Appender;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Logger;
import org.apache.log4j.Priority;
import org.junit.Assert;
import org.junit.Test;
import java.io.File;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
public class TestAdHocLogDumper {
private static final Log LOG = LogFactory.getLog(TestAdHocLogDumper.class);
@Test
public void testDumpingSchedulerLogs() throws Exception {
Map<Appender, Priority> levels = new HashMap<>();
String logHierarchy = TestAdHocLogDumper.class.getName();
String logFilename = "test.log";
Log log = LogFactory.getLog(logHierarchy);
if (log instanceof Log4JLogger) {
for (Enumeration appenders = Logger.getRootLogger().getAllAppenders(); appenders
.hasMoreElements();) {
Object obj = appenders.nextElement();
if (obj instanceof AppenderSkeleton) {
AppenderSkeleton appender = (AppenderSkeleton) obj;
levels.put(appender, appender.getThreshold());
}
}
}
AdHocLogDumper dumper = new AdHocLogDumper(logHierarchy, logFilename);
dumper.dumpLogs("DEBUG", 1000);
LOG.debug("test message 1");
LOG.info("test message 2");
File logFile = new File(logFilename);
Assert.assertTrue(logFile.exists());
Thread.sleep(2000);
long lastWrite = logFile.lastModified();
Assert.assertTrue(lastWrite < Time.now());
Assert.assertTrue(logFile.length() != 0);
// make sure levels are set back to their original values
if (log instanceof Log4JLogger) {
for (Enumeration appenders = Logger.getRootLogger().getAllAppenders(); appenders
.hasMoreElements();) {
Object obj = appenders.nextElement();
if (obj instanceof AppenderSkeleton) {
AppenderSkeleton appender = (AppenderSkeleton) obj;
Assert.assertEquals(levels.get(appender), appender.getThreshold());
}
}
}
boolean del = logFile.delete();
if(!del) {
LOG.info("Couldn't clean up after test");
}
}
}

View File

@ -196,6 +196,40 @@ class CapacitySchedulerPage extends RmView {
@Override
public void render(Block html) {
html._(MetricsOverviewTable.class);
// Dump CapacityScheduler debug logs
html.div()
.button()
.$onclick("confirmAction()").b("Dump scheduler logs")._()
.select().$id("time")
.option().$value("60")._("1 min")._()
.option().$value("300")._("5 min")._()
.option().$value("600")._("10 min")._()
._()._();
StringBuilder script = new StringBuilder();
script.append("function confirmAction() {")
.append(" b = confirm(\"Are you sure you wish to generate scheduler logs?\");")
.append(" if (b == true) {")
.append(" var timePeriod = $(\"#time\").val();")
.append(" $.ajax({")
.append(" type: 'POST',")
.append(" url: '/ws/v1/cluster/scheduler/logs',")
.append(" contentType: 'text/plain',")
.append(" data: 'time=' + timePeriod,")
.append(" dataType: 'text'")
.append(" }).done(function(data){")
.append(" setTimeout(function(){")
.append(" alert(\"Scheduler log is being generated.\");")
.append(" }, 1000);")
.append(" }).fail(function(data){")
.append(" alert(\"Scheduler log generation failed. Please check the ResourceManager log for more informtion.\");")
.append(" console.log(data);")
.append(" });")
.append(" }")
.append("}");
html.script().$type("text/javascript")._(script.toString())._();
UL<DIV<DIV<Hamlet>>> ul = html.
div("#cs-wrapper.ui-widget").
div(".ui-widget-header.ui-corner-top").

View File

@ -38,6 +38,7 @@ import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.ws.rs.Consumes;
import javax.ws.rs.DELETE;
import javax.ws.rs.FormParam;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.PUT;
@ -139,6 +140,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.StatisticsItemIn
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsInfo;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.util.AdHocLogDumper;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.webapp.BadRequestException;
import org.apache.hadoop.yarn.webapp.NotFoundException;
@ -238,6 +240,30 @@ public class RMWebServices {
return new SchedulerTypeInfo(sinfo);
}
@POST
@Path("/scheduler/logs")
@Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML })
public String dumpSchedulerLogs(@FormParam("time") String time) throws IOException {
init();
ResourceScheduler rs = rm.getResourceScheduler();
int period = Integer.parseInt(time);
if (period <= 0) {
throw new BadRequestException("Period must be greater than 0");
}
final String logHierarchy =
"org.apache.hadoop.yarn.server.resourcemanager.scheduler";
String logfile = "yarn-scheduler-debug.log";
if (rs instanceof CapacityScheduler) {
logfile = "yarn-capacity-scheduler-debug.log";
} else if (rs instanceof FairScheduler) {
logfile = "yarn-fair-scheduler-debug.log";
}
AdHocLogDumper dumper = new AdHocLogDumper(logHierarchy, logfile);
// time period is sent to us in seconds
dumper.dumpLogs("DEBUG", period * 1000);
return "Capacity scheduler logs are being created.";
}
/**
* Returns all nodes in the cluster. If the states param is given, returns
* all nodes that are in the comma-separated list of states.