From f64fda0f00b22793a9c5ea10f9d73ef33fa2b563 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 6 Jan 2022 15:26:49 +0530 Subject: [PATCH] HADOOP-18055. Async Profiler endpoint for Hadoop daemons (#3824) Reviewed-by: Akira Ajisaka --- .../org/apache/hadoop/http/HttpServer2.java | 21 + .../hadoop/http/ProfileOutputServlet.java | 87 ++++ .../apache/hadoop/http/ProfileServlet.java | 394 ++++++++++++++++++ .../hadoop/http/ProfilerDisabledServlet.java | 44 ++ .../org/apache/hadoop/util/ProcessUtils.java | 74 ++++ .../src/main/resources/core-default.xml | 2 +- .../src/site/markdown/AsyncProfilerServlet.md | 145 +++++++ .../http/TestDisabledProfileServlet.java | 95 +++++ .../hadoop-kms/src/site/markdown/index.md.vm | 5 +- .../src/site/markdown/ServerSetup.md.vm | 5 +- hadoop-project/src/site/site.xml | 1 + 11 files changed, 868 insertions(+), 5 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java index 76e77560a58..fb090fe4385 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/HttpServer2.java @@ -27,6 +27,7 @@ import java.net.InetSocketAddress; import java.net.MalformedURLException; import java.net.URI; import java.net.URL; +import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.List; @@ -771,6 +772,26 @@ public final class HttpServer2 implements FilterContainer { addDefaultServlets(); addPrometheusServlet(conf); + addAsyncProfilerServlet(contexts); + } + + private void addAsyncProfilerServlet(ContextHandlerCollection contexts) throws IOException { + final String asyncProfilerHome = ProfileServlet.getAsyncProfilerHome(); + if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) { + addServlet("prof", "/prof", ProfileServlet.class); + Path tmpDir = Paths.get(ProfileServlet.OUTPUT_DIR); + if (Files.notExists(tmpDir)) { + Files.createDirectories(tmpDir); + } + ServletContextHandler genCtx = new ServletContextHandler(contexts, "/prof-output-hadoop"); + genCtx.addServlet(ProfileOutputServlet.class, "/*"); + genCtx.setResourceBase(tmpDir.toAbsolutePath().toString()); + genCtx.setDisplayName("prof-output-hadoop"); + } else { + addServlet("prof", "/prof", ProfilerDisabledServlet.class); + LOG.info("ASYNC_PROFILER_HOME environment variable and async.profiler.home system property " + + "not specified. Disabling /prof endpoint."); + } } private void addPrometheusServlet(Configuration conf) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java new file mode 100644 index 00000000000..1ecc21f3753 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileOutputServlet.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.http; + +import java.io.File; +import java.io.IOException; +import java.util.regex.Pattern; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.eclipse.jetty.servlet.DefaultServlet; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Servlet to serve files generated by {@link ProfileServlet}. + */ +@InterfaceAudience.Private +public class ProfileOutputServlet extends DefaultServlet { + + private static final long serialVersionUID = 1L; + + private static final Logger LOG = LoggerFactory.getLogger(ProfileOutputServlet.class); + // default refresh period 2 sec + private static final int REFRESH_PERIOD = 2; + // Alphanumeric characters, plus percent (url-encoding), equals, ampersand, dot and hyphen + private static final Pattern ALPHA_NUMERIC = Pattern.compile("[a-zA-Z0-9%=&.\\-]*"); + + @Override + protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) + throws ServletException, IOException { + if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) { + resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED); + ProfileServlet.setResponseHeader(resp); + resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!"); + return; + } + + String absoluteDiskPath = getServletContext().getRealPath(req.getPathInfo()); + File requestedFile = new File(absoluteDiskPath); + // async-profiler version 1.4 writes 'Started [cpu] profiling' to output file when profiler is + // running which gets replaced by final output. If final output is not ready yet, the file size + // will be <100 bytes (in all modes). + if (requestedFile.length() < 100) { + LOG.info("{} is incomplete. Sending auto-refresh header.", requestedFile); + String refreshUrl = req.getRequestURI(); + // Rebuild the query string (if we have one) + if (req.getQueryString() != null) { + refreshUrl += "?" + sanitize(req.getQueryString()); + } + ProfileServlet.setResponseHeader(resp); + resp.setHeader("Refresh", REFRESH_PERIOD + ";" + refreshUrl); + resp.getWriter().write("This page will be auto-refreshed every " + REFRESH_PERIOD + + " seconds until the output file is ready. Redirecting to " + refreshUrl); + } else { + super.doGet(req, resp); + } + } + + static String sanitize(String input) { + // Basic test to try to avoid any XSS attacks or HTML content showing up. + // Duplicates HtmlQuoting a little, but avoid destroying ampersand. + if (ALPHA_NUMERIC.matcher(input).matches()) { + return input; + } + throw new RuntimeException("Non-alphanumeric data found in input, aborting."); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java new file mode 100644 index 00000000000..fc0ec7736ed --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfileServlet.java @@ -0,0 +1,394 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.http; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.util.ProcessUtils; + +/** + * Servlet that runs async-profiler as web-endpoint. + *

+ * Following options from async-profiler can be specified as query paramater. + * // -e event profiling event: cpu|alloc|lock|cache-misses etc. + * // -d duration run profiling for 'duration' seconds (integer) + * // -i interval sampling interval in nanoseconds (long) + * // -j jstackdepth maximum Java stack depth (integer) + * // -b bufsize frame buffer size (long) + * // -t profile different threads separately + * // -s simple class names instead of FQN + * // -o fmt[,fmt...] output format: summary|traces|flat|collapsed|svg|tree|jfr|html + * // --width px SVG width pixels (integer) + * // --height px SVG frame height pixels (integer) + * // --minwidth px skip frames smaller than px (double) + * // --reverse generate stack-reversed FlameGraph / Call tree + *

+ * Example: + * If Namenode http address is localhost:9870, and ResourceManager http address is localhost:8088, + * ProfileServlet running with async-profiler setup can be accessed with + * http://localhost:9870/prof and http://localhost:8088/prof for Namenode and ResourceManager + * processes respectively. + * Deep dive into some params: + * - To collect 10 second CPU profile of current process i.e. Namenode (returns FlameGraph svg) + * curl "http://localhost:9870/prof" + * - To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg) + * curl "http://localhost:9870/prof?pid=12345" (For instance, provide pid of Datanode) + * - To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg) + * curl "http://localhost:9870/prof?pid=12345&duration=30" + * - To collect 1 minute CPU profile of current process and output in tree format (html) + * curl "http://localhost:9870/prof?output=tree&duration=60" + * - To collect 10 second heap allocation profile of current process (returns FlameGraph svg) + * curl "http://localhost:9870/prof?event=alloc" + * - To collect lock contention profile of current process (returns FlameGraph svg) + * curl "http://localhost:9870/prof?event=lock" + *

+ * Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events) + * // Perf events: + * // cpu + * // page-faults + * // context-switches + * // cycles + * // instructions + * // cache-references + * // cache-misses + * // branches + * // branch-misses + * // bus-cycles + * // L1-dcache-load-misses + * // LLC-load-misses + * // dTLB-load-misses + * // mem:breakpoint + * // trace:tracepoint + * // Java events: + * // alloc + * // lock + */ +@InterfaceAudience.Private +public class ProfileServlet extends HttpServlet { + + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(ProfileServlet.class); + + static final String ACCESS_CONTROL_ALLOW_METHODS = "Access-Control-Allow-Methods"; + static final String ACCESS_CONTROL_ALLOW_ORIGIN = "Access-Control-Allow-Origin"; + private static final String ALLOWED_METHODS = "GET"; + private static final String CONTENT_TYPE_TEXT = "text/plain; charset=utf-8"; + private static final String ASYNC_PROFILER_HOME_ENV = "ASYNC_PROFILER_HOME"; + private static final String ASYNC_PROFILER_HOME_SYSTEM_PROPERTY = "async.profiler.home"; + private static final String PROFILER_SCRIPT = "/profiler.sh"; + private static final int DEFAULT_DURATION_SECONDS = 10; + private static final AtomicInteger ID_GEN = new AtomicInteger(0); + + static final String OUTPUT_DIR = System.getProperty("java.io.tmpdir") + "/prof-output-hadoop"; + + private enum Event { + + CPU("cpu"), + ALLOC("alloc"), + LOCK("lock"), + PAGE_FAULTS("page-faults"), + CONTEXT_SWITCHES("context-switches"), + CYCLES("cycles"), + INSTRUCTIONS("instructions"), + CACHE_REFERENCES("cache-references"), + CACHE_MISSES("cache-misses"), + BRANCHES("branches"), + BRANCH_MISSES("branch-misses"), + BUS_CYCLES("bus-cycles"), + L1_DCACHE_LOAD_MISSES("L1-dcache-load-misses"), + LLC_LOAD_MISSES("LLC-load-misses"), + DTLB_LOAD_MISSES("dTLB-load-misses"), + MEM_BREAKPOINT("mem:breakpoint"), + TRACE_TRACEPOINT("trace:tracepoint"); + + private final String internalName; + + Event(final String internalName) { + this.internalName = internalName; + } + + public String getInternalName() { + return internalName; + } + + public static Event fromInternalName(final String name) { + for (Event event : values()) { + if (event.getInternalName().equalsIgnoreCase(name)) { + return event; + } + } + + return null; + } + } + + private enum Output { + SUMMARY, + TRACES, + FLAT, + COLLAPSED, + // No SVG in 2.x asyncprofiler. + SVG, + TREE, + JFR, + // In 2.x asyncprofiler, this is how you get flamegraphs. + HTML + } + + private final Lock profilerLock = new ReentrantLock(); + private transient volatile Process process; + private final String asyncProfilerHome; + private Integer pid; + + public ProfileServlet() { + this.asyncProfilerHome = getAsyncProfilerHome(); + this.pid = ProcessUtils.getPid(); + LOG.info("Servlet process PID: {} asyncProfilerHome: {}", pid, asyncProfilerHome); + } + + @Override + protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) + throws IOException { + if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) { + resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED); + setResponseHeader(resp); + resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!"); + return; + } + + // make sure async profiler home is set + if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + setResponseHeader(resp); + resp.getWriter().write("ASYNC_PROFILER_HOME env is not set.\n\n" + + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n" + + "environment is properly configured."); + return; + } + + // if pid is explicitly specified, use it else default to current process + pid = getInteger(req, "pid", pid); + + // if pid is not specified in query param and if current process pid cannot be determined + if (pid == null) { + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + setResponseHeader(resp); + resp.getWriter().write( + "'pid' query parameter unspecified or unable to determine PID of current process."); + return; + } + + final int duration = getInteger(req, "duration", DEFAULT_DURATION_SECONDS); + final Output output = getOutput(req); + final Event event = getEvent(req); + final Long interval = getLong(req, "interval"); + final Integer jstackDepth = getInteger(req, "jstackdepth", null); + final Long bufsize = getLong(req, "bufsize"); + final boolean thread = req.getParameterMap().containsKey("thread"); + final boolean simple = req.getParameterMap().containsKey("simple"); + final Integer width = getInteger(req, "width", null); + final Integer height = getInteger(req, "height", null); + final Double minwidth = getMinWidth(req); + final boolean reverse = req.getParameterMap().containsKey("reverse"); + + if (process == null || !process.isAlive()) { + try { + int lockTimeoutSecs = 3; + if (profilerLock.tryLock(lockTimeoutSecs, TimeUnit.SECONDS)) { + try { + File outputFile = new File(OUTPUT_DIR, + "async-prof-pid-" + pid + "-" + event.name().toLowerCase() + "-" + ID_GEN + .incrementAndGet() + "." + output.name().toLowerCase()); + List cmd = new ArrayList<>(); + cmd.add(asyncProfilerHome + PROFILER_SCRIPT); + cmd.add("-e"); + cmd.add(event.getInternalName()); + cmd.add("-d"); + cmd.add("" + duration); + cmd.add("-o"); + cmd.add(output.name().toLowerCase()); + cmd.add("-f"); + cmd.add(outputFile.getAbsolutePath()); + if (interval != null) { + cmd.add("-i"); + cmd.add(interval.toString()); + } + if (jstackDepth != null) { + cmd.add("-j"); + cmd.add(jstackDepth.toString()); + } + if (bufsize != null) { + cmd.add("-b"); + cmd.add(bufsize.toString()); + } + if (thread) { + cmd.add("-t"); + } + if (simple) { + cmd.add("-s"); + } + if (width != null) { + cmd.add("--width"); + cmd.add(width.toString()); + } + if (height != null) { + cmd.add("--height"); + cmd.add(height.toString()); + } + if (minwidth != null) { + cmd.add("--minwidth"); + cmd.add(minwidth.toString()); + } + if (reverse) { + cmd.add("--reverse"); + } + cmd.add(pid.toString()); + process = ProcessUtils.runCmdAsync(cmd); + + // set response and set refresh header to output location + setResponseHeader(resp); + resp.setStatus(HttpServletResponse.SC_ACCEPTED); + String relativeUrl = "/prof-output-hadoop/" + outputFile.getName(); + resp.getWriter().write("Started [" + event.getInternalName() + + "] profiling. This page will automatically redirect to " + relativeUrl + " after " + + duration + " seconds. " + + "If empty diagram and Linux 4.6+, see 'Basic Usage' section on the Async " + + "Profiler Home Page, https://github.com/jvm-profiling-tools/async-profiler." + + "\n\nCommand:\n" + Joiner.on(" ").join(cmd)); + + // to avoid auto-refresh by ProfileOutputServlet, refreshDelay can be specified + // via url param + int refreshDelay = getInteger(req, "refreshDelay", 0); + + // instead of sending redirect, set auto-refresh so that browsers will refresh + // with redirected url + resp.setHeader("Refresh", (duration + refreshDelay) + ";" + relativeUrl); + resp.getWriter().flush(); + } finally { + profilerLock.unlock(); + } + } else { + setResponseHeader(resp); + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + resp.getWriter() + .write("Unable to acquire lock. Another instance of profiler might be running."); + LOG.warn("Unable to acquire lock in {} seconds. Another instance of profiler might be" + + " running.", lockTimeoutSecs); + } + } catch (InterruptedException e) { + LOG.warn("Interrupted while acquiring profile lock.", e); + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + } + } else { + setResponseHeader(resp); + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + resp.getWriter().write("Another instance of profiler is already running."); + } + } + + private Integer getInteger(final HttpServletRequest req, final String param, + final Integer defaultValue) { + final String value = req.getParameter(param); + if (value != null) { + try { + return Integer.valueOf(value); + } catch (NumberFormatException e) { + return defaultValue; + } + } + return defaultValue; + } + + private Long getLong(final HttpServletRequest req, final String param) { + final String value = req.getParameter(param); + if (value != null) { + try { + return Long.valueOf(value); + } catch (NumberFormatException e) { + return null; + } + } + return null; + } + + private Double getMinWidth(final HttpServletRequest req) { + final String value = req.getParameter("minwidth"); + if (value != null) { + try { + return Double.valueOf(value); + } catch (NumberFormatException e) { + return null; + } + } + return null; + } + + private Event getEvent(final HttpServletRequest req) { + final String eventArg = req.getParameter("event"); + if (eventArg != null) { + Event event = Event.fromInternalName(eventArg); + return event == null ? Event.CPU : event; + } + return Event.CPU; + } + + private Output getOutput(final HttpServletRequest req) { + final String outputArg = req.getParameter("output"); + if (req.getParameter("output") != null) { + try { + return Output.valueOf(outputArg.trim().toUpperCase()); + } catch (IllegalArgumentException e) { + return Output.HTML; + } + } + return Output.HTML; + } + + static void setResponseHeader(final HttpServletResponse response) { + response.setHeader(ACCESS_CONTROL_ALLOW_METHODS, ALLOWED_METHODS); + response.setHeader(ACCESS_CONTROL_ALLOW_ORIGIN, "*"); + response.setContentType(CONTENT_TYPE_TEXT); + } + + static String getAsyncProfilerHome() { + String asyncProfilerHome = System.getenv(ASYNC_PROFILER_HOME_ENV); + // if ENV is not set, see if -Dasync.profiler.home=/path/to/async/profiler/home is set + if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) { + asyncProfilerHome = System.getProperty(ASYNC_PROFILER_HOME_SYSTEM_PROPERTY); + } + + return asyncProfilerHome; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java new file mode 100644 index 00000000000..459485ffa5b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/http/ProfilerDisabledServlet.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.http; + +import java.io.IOException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Servlet for disabled async-profiler. + */ +@InterfaceAudience.Private +public class ProfilerDisabledServlet extends HttpServlet { + + @Override + protected void doGet(final HttpServletRequest req, final HttpServletResponse resp) + throws IOException { + resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); + ProfileServlet.setResponseHeader(resp); + resp.getWriter().write("The profiler servlet was disabled at startup.\n\n" + + "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n" + + "environment is properly configured."); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java new file mode 100644 index 00000000000..cf653b9c912 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProcessUtils.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Process related utilities. + */ +@InterfaceAudience.Private +public final class ProcessUtils { + + private static final Logger LOG = LoggerFactory.getLogger(ProcessUtils.class); + + private ProcessUtils() { + // no-op + } + + public static Integer getPid() { + // JVM_PID can be exported in service start script + String pidStr = System.getenv("JVM_PID"); + + // In case if it is not set correctly, fallback to mxbean which is implementation specific. + if (pidStr == null || pidStr.trim().isEmpty()) { + String name = ManagementFactory.getRuntimeMXBean().getName(); + if (name != null) { + int idx = name.indexOf("@"); + if (idx != -1) { + pidStr = name.substring(0, name.indexOf("@")); + } + } + } + try { + if (pidStr != null) { + return Integer.valueOf(pidStr); + } + } catch (NumberFormatException ignored) { + // ignore + } + return null; + } + + public static Process runCmdAsync(List cmd) { + try { + LOG.info("Running command async: {}", cmd); + return new ProcessBuilder(cmd).inheritIO().start(); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 27c86bbc9ac..dc2a6ffd837 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -78,7 +78,7 @@ false Indicates if administrator ACLs are required to access - instrumentation servlets (JMX, METRICS, CONF, STACKS). + instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md b/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md new file mode 100644 index 00000000000..4b93cc219a5 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/AsyncProfilerServlet.md @@ -0,0 +1,145 @@ + + +Async Profiler Servlet for Hadoop +======================================== + + + +Purpose +------- + +This document describes how to configure and use async profiler +with Hadoop applications. +Async profiler is a low overhead sampling profiler for Java that +does not suffer from Safepoint bias problem. It features +HotSpot-specific APIs to collect stack traces and to track memory +allocations. The profiler works with OpenJDK, Oracle JDK and other +Java runtimes based on the HotSpot JVM. + +Hadoop profiler servlet supports Async Profiler major versions +1.x and 2.x. + +Prerequisites +------------- + +Make sure Hadoop is installed, configured and setup correctly. +For more information see: + +* [Single Node Setup](./SingleCluster.html) for first-time users. +* [Cluster Setup](./ClusterSetup.html) for large, distributed clusters. + +Go to https://github.com/jvm-profiling-tools/async-profiler, +download a release appropriate for your platform, and install +on every cluster host. + +Set `ASYNC_PROFILER_HOME` in the environment (put it in hadoop-env.sh) +to the root directory of the async-profiler install location, or pass +it on the Hadoop daemon's command line as a system property as +`-Dasync.profiler.home=/path/to/async-profiler`. + + +Usage +-------- + +Once the prerequisites have been satisfied, access to the async-profiler +is available by using Namenode or ResourceManager UI. + +Following options from async-profiler can be specified as query paramater. +* `-e event` profiling event: cpu|alloc|lock|cache-misses etc. +* `-d duration` run profiling for 'duration' seconds (integer) +* `-i interval` sampling interval in nanoseconds (long) +* `-j jstackdepth` maximum Java stack depth (integer) +* `-b bufsize` frame buffer size (long) +* `-t` profile different threads separately +* `-s` simple class names instead of FQN +* `-o fmt[,fmt...]` output format: summary|traces|flat|collapsed|svg|tree|jfr|html +* `--width px` SVG width pixels (integer) +* `--height px` SVG frame height pixels (integer) +* `--minwidth px` skip frames smaller than px (double) +* `--reverse` generate stack-reversed FlameGraph / Call tree + + +Example: +If Namenode http address is localhost:9870, and ResourceManager http +address is localhost:8088, ProfileServlet running with async-profiler +setup can be accessed with http://localhost:9870/prof and +http://localhost:8088/prof for Namenode and ResourceManager processes +respectively. + +Diving deep into some params: + +* To collect 10 second CPU profile of current process + (returns FlameGraph svg) + * `curl http://localhost:9870/prof` (FlameGraph svg for Namenode) + * `curl http://localhost:8088/prof` (FlameGraph svg for ResourceManager) +* To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg) + * `curl http://localhost:9870/prof?pid=12345` (For instance, provide + pid of Datanode here) +* To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg) + * `curl http://localhost:9870/prof?pid=12345&duration=30` +* To collect 1 minute CPU profile of current process and output in tree + format (html) + * `curl http://localhost:9870/prof?output=tree&duration=60` +* To collect 10 second heap allocation profile of current process + (returns FlameGraph svg) + * `curl http://localhost:9870/prof?event=alloc` +* To collect lock contention profile of current process + (returns FlameGraph svg) + * `curl http://localhost:9870/prof?event=lock` + + +The following event types are supported by async-profiler. +Use the 'event' parameter to specify. Default is 'cpu'. +Not all operating systems will support all types. + +Perf events: + +* cpu +* page-faults +* context-switches +* cycles +* instructions +* cache-references +* cache-misses +* branches +* branch-misses +* bus-cycles +* L1-dcache-load-misses +* LLC-load-misses +* dTLB-load-misses + +Java events: + +* alloc +* lock + +The following output formats are supported. +Use the 'output' parameter to specify. Default is 'flamegraph'. + +Output formats: + +* summary: A dump of basic profiling statistics. +* traces: Call traces. +* flat: Flat profile (top N hot methods). +* collapsed: Collapsed call traces in the format used by FlameGraph + script. This is a collection of call stacks, where each line is a + semicolon separated list of frames followed by a counter. +* svg: FlameGraph in SVG format. +* tree: Call tree in HTML format. +* jfr: Call traces in Java Flight Recorder format. + +The 'duration' parameter specifies how long to collect trace data +before generating output, specified in seconds. The default is 10 seconds. + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java new file mode 100644 index 00000000000..ce068bb6f1c --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestDisabledProfileServlet.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.http; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import javax.servlet.http.HttpServletResponse; + +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Small test to cover default disabled prof endpoint. + */ +public class TestDisabledProfileServlet extends HttpServerFunctionalTest { + + private static HttpServer2 server; + private static URL baseUrl; + + @BeforeClass + public static void setup() throws Exception { + server = createTestServer(); + server.start(); + baseUrl = getServerURL(server); + } + + @AfterClass + public static void cleanup() throws Exception { + server.stop(); + } + + @Test + public void testQuery() throws Exception { + try { + readOutput(new URL(baseUrl, "/prof")); + throw new IllegalStateException("Should not reach here"); + } catch (IOException e) { + assertTrue(e.getMessage() + .contains(HttpServletResponse.SC_INTERNAL_SERVER_ERROR + " for URL: " + baseUrl)); + } + + // CORS headers + HttpURLConnection conn = + (HttpURLConnection) new URL(baseUrl, "/prof").openConnection(); + assertEquals("GET", conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_METHODS)); + assertNotNull(conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_ORIGIN)); + conn.disconnect(); + } + + @Test + public void testRequestMethods() throws IOException { + HttpURLConnection connection = getConnection("PUT"); + assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, + connection.getResponseCode()); + connection.disconnect(); + connection = getConnection("POST"); + assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, + connection.getResponseCode()); + connection.disconnect(); + connection = getConnection("DELETE"); + assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED, + connection.getResponseCode()); + connection.disconnect(); + connection = getConnection("GET"); + assertEquals("Unexpected response code", HttpServletResponse.SC_INTERNAL_SERVER_ERROR, + connection.getResponseCode()); + connection.disconnect(); + } + + private HttpURLConnection getConnection(final String method) throws IOException { + URL url = new URL(baseUrl, "/prof"); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod(method); + return conn; + } + +} diff --git a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm index 6ea21d5cf40..09375d5aab5 100644 --- a/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm +++ b/hadoop-common-project/hadoop-kms/src/site/markdown/index.md.vm @@ -1208,9 +1208,10 @@ Name | Description /logs | Display log files /stacks | Display JVM stacks /static/index.html | The static home page +/prof | Async Profiler endpoint To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`, -and `/stacks`, configure the following properties in `kms-site.xml`: +`/stacks` and `/prof`, configure the following properties in `kms-site.xml`: ```xml @@ -1224,7 +1225,7 @@ and `/stacks`, configure the following properties in `kms-site.xml`: true Indicates if administrator ACLs are required to access - instrumentation servlets (JMX, METRICS, CONF, STACKS). + instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm index 2d0a5b8cd2e..e97de0275ca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/site/markdown/ServerSetup.md.vm @@ -162,9 +162,10 @@ Name | Description /logs | Display log files /stacks | Display JVM stacks /static/index.html | The static home page +/prof | Async Profiler endpoint To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`, -and `/stacks`, configure the following properties in `httpfs-site.xml`: +`/stacks` and `/prof`, configure the following properties in `httpfs-site.xml`: ```xml @@ -178,7 +179,7 @@ and `/stacks`, configure the following properties in `httpfs-site.xml`: true Indicates if administrator ACLs are required to access - instrumentation servlets (JMX, METRICS, CONF, STACKS). + instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF). diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index a150385048f..6c0233877b0 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -69,6 +69,7 @@ +