From d27e9241e8676a0edb2d35453cac5f9495fcd605 Mon Sep 17 00:00:00 2001 From: Xuan Date: Tue, 7 Apr 2015 09:52:36 -0700 Subject: [PATCH] YARN-3294. Allow dumping of Capacity Scheduler debug logs via web UI for a fixed time period. Contributed by Varun Vasudev --- hadoop-yarn-project/CHANGES.txt | 3 + .../hadoop/yarn/util/AdHocLogDumper.java | 131 ++++++++++++++++++ .../hadoop/yarn/util/TestAdHocLogDumper.java | 86 ++++++++++++ .../webapp/CapacitySchedulerPage.java | 34 +++++ .../resourcemanager/webapp/RMWebServices.java | 26 ++++ 5 files changed, 280 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AdHocLogDumper.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestAdHocLogDumper.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 278636d2a8c..f2950bfb199 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -101,6 +101,9 @@ Release 2.8.0 - UNRELEASED YARN-2901. Add errors and warning metrics page to RM, NM web UI. (Varun Vasudev via wangda) + YARN-3294. Allow dumping of Capacity Scheduler debug logs via + web UI for a fixed time period. (Varun Vasudev via xgong) + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AdHocLogDumper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AdHocLogDumper.java new file mode 100644 index 00000000000..d2e4c74fd32 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/AdHocLogDumper.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; +import org.apache.log4j.*; + +import java.io.File; +import java.io.IOException; +import java.util.*; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class AdHocLogDumper { + + private static final Log LOG = LogFactory.getLog(AdHocLogDumper.class); + + private String name; + private String targetFilename; + private Map appenderLevels; + private Level currentLogLevel; + public static final String AD_HOC_DUMPER_APPENDER = "ad-hoc-dumper-appender"; + private static boolean logFlag = false; + private static final Object lock = new Object(); + + public AdHocLogDumper(String name, String targetFilename) { + this.name = name; + this.targetFilename = targetFilename; + appenderLevels = new HashMap<>(); + } + + public void dumpLogs(String level, int timePeriod) + throws YarnRuntimeException, IOException { + synchronized (lock) { + if (logFlag) { + LOG.info("Attempt to dump logs when appender is already running"); + throw new YarnRuntimeException("Appender is already dumping logs"); + } + Level targetLevel = Level.toLevel(level); + Log log = LogFactory.getLog(name); + appenderLevels.clear(); + if (log instanceof Log4JLogger) { + Logger packageLogger = ((Log4JLogger) log).getLogger(); + currentLogLevel = packageLogger.getLevel(); + Level currentEffectiveLevel = packageLogger.getEffectiveLevel(); + + // make sure we can create the appender first + Layout layout = new PatternLayout("%d{ISO8601} %p %c: %m%n"); + FileAppender fApp; + File file = + new File(System.getProperty("yarn.log.dir"), targetFilename); + try { + fApp = new FileAppender(layout, file.getAbsolutePath(), false); + } catch (IOException ie) { + LOG + .warn( + "Error creating file, can't dump logs to " + + file.getAbsolutePath(), ie); + throw ie; + } + fApp.setName(AdHocLogDumper.AD_HOC_DUMPER_APPENDER); + fApp.setThreshold(targetLevel); + + // get current threshold of all appenders and set it to the effective + // level + for (Enumeration appenders = Logger.getRootLogger().getAllAppenders(); appenders + .hasMoreElements();) { + Object obj = appenders.nextElement(); + if (obj instanceof AppenderSkeleton) { + AppenderSkeleton appender = (AppenderSkeleton) obj; + appenderLevels.put(appender.getName(), appender.getThreshold()); + appender.setThreshold(currentEffectiveLevel); + } + } + + packageLogger.addAppender(fApp); + LOG.info("Dumping adhoc logs for " + name + " to " + + file.getAbsolutePath() + " for " + timePeriod + " milliseconds"); + packageLogger.setLevel(targetLevel); + logFlag = true; + + TimerTask restoreLogLevel = new RestoreLogLevel(); + Timer restoreLogLevelTimer = new Timer(); + restoreLogLevelTimer.schedule(restoreLogLevel, timePeriod); + } + } + } + + class RestoreLogLevel extends TimerTask { + @Override + public void run() { + Log log = LogFactory.getLog(name); + if (log instanceof Log4JLogger) { + Logger logger = ((Log4JLogger) log).getLogger(); + logger.removeAppender(AD_HOC_DUMPER_APPENDER); + logger.setLevel(currentLogLevel); + for (Enumeration appenders = Logger.getRootLogger().getAllAppenders(); appenders + .hasMoreElements();) { + Object obj = appenders.nextElement(); + if (obj instanceof AppenderSkeleton) { + AppenderSkeleton appender = (AppenderSkeleton) obj; + appender.setThreshold(appenderLevels.get(appender.getName())); + } + } + logFlag = false; + LOG.info("Done dumping adhoc logs for " + name); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestAdHocLogDumper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestAdHocLogDumper.java new file mode 100644 index 00000000000..046c94e5d45 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/TestAdHocLogDumper.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.util; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.commons.logging.impl.Log4JLogger; +import org.apache.hadoop.util.Time; +import org.apache.log4j.Appender; +import org.apache.log4j.AppenderSkeleton; +import org.apache.log4j.Logger; +import org.apache.log4j.Priority; +import org.junit.Assert; +import org.junit.Test; + +import java.io.File; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.Map; + +public class TestAdHocLogDumper { + + private static final Log LOG = LogFactory.getLog(TestAdHocLogDumper.class); + + @Test + public void testDumpingSchedulerLogs() throws Exception { + + Map levels = new HashMap<>(); + String logHierarchy = TestAdHocLogDumper.class.getName(); + String logFilename = "test.log"; + Log log = LogFactory.getLog(logHierarchy); + if (log instanceof Log4JLogger) { + for (Enumeration appenders = Logger.getRootLogger().getAllAppenders(); appenders + .hasMoreElements();) { + Object obj = appenders.nextElement(); + if (obj instanceof AppenderSkeleton) { + AppenderSkeleton appender = (AppenderSkeleton) obj; + levels.put(appender, appender.getThreshold()); + } + } + } + + AdHocLogDumper dumper = new AdHocLogDumper(logHierarchy, logFilename); + dumper.dumpLogs("DEBUG", 1000); + LOG.debug("test message 1"); + LOG.info("test message 2"); + File logFile = new File(logFilename); + Assert.assertTrue(logFile.exists()); + Thread.sleep(2000); + long lastWrite = logFile.lastModified(); + Assert.assertTrue(lastWrite < Time.now()); + Assert.assertTrue(logFile.length() != 0); + + // make sure levels are set back to their original values + if (log instanceof Log4JLogger) { + for (Enumeration appenders = Logger.getRootLogger().getAllAppenders(); appenders + .hasMoreElements();) { + Object obj = appenders.nextElement(); + if (obj instanceof AppenderSkeleton) { + AppenderSkeleton appender = (AppenderSkeleton) obj; + Assert.assertEquals(levels.get(appender), appender.getThreshold()); + } + } + } + boolean del = logFile.delete(); + if(!del) { + LOG.info("Couldn't clean up after test"); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java index e62fd706788..f1e1e8caf81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/CapacitySchedulerPage.java @@ -196,6 +196,40 @@ class CapacitySchedulerPage extends RmView { @Override public void render(Block html) { html._(MetricsOverviewTable.class); + // Dump CapacityScheduler debug logs + html.div() + .button() + .$onclick("confirmAction()").b("Dump scheduler logs")._() + .select().$id("time") + .option().$value("60")._("1 min")._() + .option().$value("300")._("5 min")._() + .option().$value("600")._("10 min")._() + ._()._(); + + StringBuilder script = new StringBuilder(); + script.append("function confirmAction() {") + .append(" b = confirm(\"Are you sure you wish to generate scheduler logs?\");") + .append(" if (b == true) {") + .append(" var timePeriod = $(\"#time\").val();") + .append(" $.ajax({") + .append(" type: 'POST',") + .append(" url: '/ws/v1/cluster/scheduler/logs',") + .append(" contentType: 'text/plain',") + .append(" data: 'time=' + timePeriod,") + .append(" dataType: 'text'") + .append(" }).done(function(data){") + .append(" setTimeout(function(){") + .append(" alert(\"Scheduler log is being generated.\");") + .append(" }, 1000);") + .append(" }).fail(function(data){") + .append(" alert(\"Scheduler log generation failed. Please check the ResourceManager log for more informtion.\");") + .append(" console.log(data);") + .append(" });") + .append(" }") + .append("}"); + + html.script().$type("text/javascript")._(script.toString())._(); + UL>> ul = html. div("#cs-wrapper.ui-widget"). div(".ui-widget-header.ui-corner-top"). diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index 36f2b1d5516..584da7d1b55 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -38,6 +38,7 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.Consumes; import javax.ws.rs.DELETE; +import javax.ws.rs.FormParam; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.PUT; @@ -139,6 +140,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.StatisticsItemIn import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelsInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsInfo; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.AdHocLogDumper; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.webapp.BadRequestException; import org.apache.hadoop.yarn.webapp.NotFoundException; @@ -238,6 +240,30 @@ public class RMWebServices { return new SchedulerTypeInfo(sinfo); } + @POST + @Path("/scheduler/logs") + @Produces({ MediaType.APPLICATION_JSON, MediaType.APPLICATION_XML }) + public String dumpSchedulerLogs(@FormParam("time") String time) throws IOException { + init(); + ResourceScheduler rs = rm.getResourceScheduler(); + int period = Integer.parseInt(time); + if (period <= 0) { + throw new BadRequestException("Period must be greater than 0"); + } + final String logHierarchy = + "org.apache.hadoop.yarn.server.resourcemanager.scheduler"; + String logfile = "yarn-scheduler-debug.log"; + if (rs instanceof CapacityScheduler) { + logfile = "yarn-capacity-scheduler-debug.log"; + } else if (rs instanceof FairScheduler) { + logfile = "yarn-fair-scheduler-debug.log"; + } + AdHocLogDumper dumper = new AdHocLogDumper(logHierarchy, logfile); + // time period is sent to us in seconds + dumper.dumpLogs("DEBUG", period * 1000); + return "Capacity scheduler logs are being created."; + } + /** * Returns all nodes in the cluster. If the states param is given, returns * all nodes that are in the comma-separated list of states.