HADOOP-18055. Async Profiler endpoint for Hadoop daemons (#3824)
Reviewed-by: Akira Ajisaka <aajisaka@apache.org>
This commit is contained in:
parent
da0a6ba1ce
commit
f64fda0f00
|
@ -27,6 +27,7 @@ import java.net.InetSocketAddress;
|
|||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.List;
|
||||
|
@ -771,6 +772,26 @@ public final class HttpServer2 implements FilterContainer {
|
|||
|
||||
addDefaultServlets();
|
||||
addPrometheusServlet(conf);
|
||||
addAsyncProfilerServlet(contexts);
|
||||
}
|
||||
|
||||
private void addAsyncProfilerServlet(ContextHandlerCollection contexts) throws IOException {
|
||||
final String asyncProfilerHome = ProfileServlet.getAsyncProfilerHome();
|
||||
if (asyncProfilerHome != null && !asyncProfilerHome.trim().isEmpty()) {
|
||||
addServlet("prof", "/prof", ProfileServlet.class);
|
||||
Path tmpDir = Paths.get(ProfileServlet.OUTPUT_DIR);
|
||||
if (Files.notExists(tmpDir)) {
|
||||
Files.createDirectories(tmpDir);
|
||||
}
|
||||
ServletContextHandler genCtx = new ServletContextHandler(contexts, "/prof-output-hadoop");
|
||||
genCtx.addServlet(ProfileOutputServlet.class, "/*");
|
||||
genCtx.setResourceBase(tmpDir.toAbsolutePath().toString());
|
||||
genCtx.setDisplayName("prof-output-hadoop");
|
||||
} else {
|
||||
addServlet("prof", "/prof", ProfilerDisabledServlet.class);
|
||||
LOG.info("ASYNC_PROFILER_HOME environment variable and async.profiler.home system property "
|
||||
+ "not specified. Disabling /prof endpoint.");
|
||||
}
|
||||
}
|
||||
|
||||
private void addPrometheusServlet(Configuration conf) {
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.http;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.regex.Pattern;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.eclipse.jetty.servlet.DefaultServlet;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Servlet to serve files generated by {@link ProfileServlet}.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ProfileOutputServlet extends DefaultServlet {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ProfileOutputServlet.class);
|
||||
// default refresh period 2 sec
|
||||
private static final int REFRESH_PERIOD = 2;
|
||||
// Alphanumeric characters, plus percent (url-encoding), equals, ampersand, dot and hyphen
|
||||
private static final Pattern ALPHA_NUMERIC = Pattern.compile("[a-zA-Z0-9%=&.\\-]*");
|
||||
|
||||
@Override
|
||||
protected void doGet(final HttpServletRequest req, final HttpServletResponse resp)
|
||||
throws ServletException, IOException {
|
||||
if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) {
|
||||
resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED);
|
||||
ProfileServlet.setResponseHeader(resp);
|
||||
resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!");
|
||||
return;
|
||||
}
|
||||
|
||||
String absoluteDiskPath = getServletContext().getRealPath(req.getPathInfo());
|
||||
File requestedFile = new File(absoluteDiskPath);
|
||||
// async-profiler version 1.4 writes 'Started [cpu] profiling' to output file when profiler is
|
||||
// running which gets replaced by final output. If final output is not ready yet, the file size
|
||||
// will be <100 bytes (in all modes).
|
||||
if (requestedFile.length() < 100) {
|
||||
LOG.info("{} is incomplete. Sending auto-refresh header.", requestedFile);
|
||||
String refreshUrl = req.getRequestURI();
|
||||
// Rebuild the query string (if we have one)
|
||||
if (req.getQueryString() != null) {
|
||||
refreshUrl += "?" + sanitize(req.getQueryString());
|
||||
}
|
||||
ProfileServlet.setResponseHeader(resp);
|
||||
resp.setHeader("Refresh", REFRESH_PERIOD + ";" + refreshUrl);
|
||||
resp.getWriter().write("This page will be auto-refreshed every " + REFRESH_PERIOD
|
||||
+ " seconds until the output file is ready. Redirecting to " + refreshUrl);
|
||||
} else {
|
||||
super.doGet(req, resp);
|
||||
}
|
||||
}
|
||||
|
||||
static String sanitize(String input) {
|
||||
// Basic test to try to avoid any XSS attacks or HTML content showing up.
|
||||
// Duplicates HtmlQuoting a little, but avoid destroying ampersand.
|
||||
if (ALPHA_NUMERIC.matcher(input).matches()) {
|
||||
return input;
|
||||
}
|
||||
throw new RuntimeException("Non-alphanumeric data found in input, aborting.");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,394 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.http;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.hadoop.thirdparty.com.google.common.base.Joiner;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.util.ProcessUtils;
|
||||
|
||||
/**
|
||||
* Servlet that runs async-profiler as web-endpoint.
|
||||
* <p>
|
||||
* Following options from async-profiler can be specified as query paramater.
|
||||
* // -e event profiling event: cpu|alloc|lock|cache-misses etc.
|
||||
* // -d duration run profiling for 'duration' seconds (integer)
|
||||
* // -i interval sampling interval in nanoseconds (long)
|
||||
* // -j jstackdepth maximum Java stack depth (integer)
|
||||
* // -b bufsize frame buffer size (long)
|
||||
* // -t profile different threads separately
|
||||
* // -s simple class names instead of FQN
|
||||
* // -o fmt[,fmt...] output format: summary|traces|flat|collapsed|svg|tree|jfr|html
|
||||
* // --width px SVG width pixels (integer)
|
||||
* // --height px SVG frame height pixels (integer)
|
||||
* // --minwidth px skip frames smaller than px (double)
|
||||
* // --reverse generate stack-reversed FlameGraph / Call tree
|
||||
* <p>
|
||||
* Example:
|
||||
* If Namenode http address is localhost:9870, and ResourceManager http address is localhost:8088,
|
||||
* ProfileServlet running with async-profiler setup can be accessed with
|
||||
* http://localhost:9870/prof and http://localhost:8088/prof for Namenode and ResourceManager
|
||||
* processes respectively.
|
||||
* Deep dive into some params:
|
||||
* - To collect 10 second CPU profile of current process i.e. Namenode (returns FlameGraph svg)
|
||||
* curl "http://localhost:9870/prof"
|
||||
* - To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg)
|
||||
* curl "http://localhost:9870/prof?pid=12345" (For instance, provide pid of Datanode)
|
||||
* - To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg)
|
||||
* curl "http://localhost:9870/prof?pid=12345&duration=30"
|
||||
* - To collect 1 minute CPU profile of current process and output in tree format (html)
|
||||
* curl "http://localhost:9870/prof?output=tree&duration=60"
|
||||
* - To collect 10 second heap allocation profile of current process (returns FlameGraph svg)
|
||||
* curl "http://localhost:9870/prof?event=alloc"
|
||||
* - To collect lock contention profile of current process (returns FlameGraph svg)
|
||||
* curl "http://localhost:9870/prof?event=lock"
|
||||
* <p>
|
||||
* Following event types are supported (default is 'cpu') (NOTE: not all OS'es support all events)
|
||||
* // Perf events:
|
||||
* // cpu
|
||||
* // page-faults
|
||||
* // context-switches
|
||||
* // cycles
|
||||
* // instructions
|
||||
* // cache-references
|
||||
* // cache-misses
|
||||
* // branches
|
||||
* // branch-misses
|
||||
* // bus-cycles
|
||||
* // L1-dcache-load-misses
|
||||
* // LLC-load-misses
|
||||
* // dTLB-load-misses
|
||||
* // mem:breakpoint
|
||||
* // trace:tracepoint
|
||||
* // Java events:
|
||||
* // alloc
|
||||
* // lock
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ProfileServlet extends HttpServlet {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ProfileServlet.class);
|
||||
|
||||
static final String ACCESS_CONTROL_ALLOW_METHODS = "Access-Control-Allow-Methods";
|
||||
static final String ACCESS_CONTROL_ALLOW_ORIGIN = "Access-Control-Allow-Origin";
|
||||
private static final String ALLOWED_METHODS = "GET";
|
||||
private static final String CONTENT_TYPE_TEXT = "text/plain; charset=utf-8";
|
||||
private static final String ASYNC_PROFILER_HOME_ENV = "ASYNC_PROFILER_HOME";
|
||||
private static final String ASYNC_PROFILER_HOME_SYSTEM_PROPERTY = "async.profiler.home";
|
||||
private static final String PROFILER_SCRIPT = "/profiler.sh";
|
||||
private static final int DEFAULT_DURATION_SECONDS = 10;
|
||||
private static final AtomicInteger ID_GEN = new AtomicInteger(0);
|
||||
|
||||
static final String OUTPUT_DIR = System.getProperty("java.io.tmpdir") + "/prof-output-hadoop";
|
||||
|
||||
private enum Event {
|
||||
|
||||
CPU("cpu"),
|
||||
ALLOC("alloc"),
|
||||
LOCK("lock"),
|
||||
PAGE_FAULTS("page-faults"),
|
||||
CONTEXT_SWITCHES("context-switches"),
|
||||
CYCLES("cycles"),
|
||||
INSTRUCTIONS("instructions"),
|
||||
CACHE_REFERENCES("cache-references"),
|
||||
CACHE_MISSES("cache-misses"),
|
||||
BRANCHES("branches"),
|
||||
BRANCH_MISSES("branch-misses"),
|
||||
BUS_CYCLES("bus-cycles"),
|
||||
L1_DCACHE_LOAD_MISSES("L1-dcache-load-misses"),
|
||||
LLC_LOAD_MISSES("LLC-load-misses"),
|
||||
DTLB_LOAD_MISSES("dTLB-load-misses"),
|
||||
MEM_BREAKPOINT("mem:breakpoint"),
|
||||
TRACE_TRACEPOINT("trace:tracepoint");
|
||||
|
||||
private final String internalName;
|
||||
|
||||
Event(final String internalName) {
|
||||
this.internalName = internalName;
|
||||
}
|
||||
|
||||
public String getInternalName() {
|
||||
return internalName;
|
||||
}
|
||||
|
||||
public static Event fromInternalName(final String name) {
|
||||
for (Event event : values()) {
|
||||
if (event.getInternalName().equalsIgnoreCase(name)) {
|
||||
return event;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private enum Output {
|
||||
SUMMARY,
|
||||
TRACES,
|
||||
FLAT,
|
||||
COLLAPSED,
|
||||
// No SVG in 2.x asyncprofiler.
|
||||
SVG,
|
||||
TREE,
|
||||
JFR,
|
||||
// In 2.x asyncprofiler, this is how you get flamegraphs.
|
||||
HTML
|
||||
}
|
||||
|
||||
private final Lock profilerLock = new ReentrantLock();
|
||||
private transient volatile Process process;
|
||||
private final String asyncProfilerHome;
|
||||
private Integer pid;
|
||||
|
||||
public ProfileServlet() {
|
||||
this.asyncProfilerHome = getAsyncProfilerHome();
|
||||
this.pid = ProcessUtils.getPid();
|
||||
LOG.info("Servlet process PID: {} asyncProfilerHome: {}", pid, asyncProfilerHome);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doGet(final HttpServletRequest req, final HttpServletResponse resp)
|
||||
throws IOException {
|
||||
if (!HttpServer2.isInstrumentationAccessAllowed(getServletContext(), req, resp)) {
|
||||
resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED);
|
||||
setResponseHeader(resp);
|
||||
resp.getWriter().write("Unauthorized: Instrumentation access is not allowed!");
|
||||
return;
|
||||
}
|
||||
|
||||
// make sure async profiler home is set
|
||||
if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) {
|
||||
resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
|
||||
setResponseHeader(resp);
|
||||
resp.getWriter().write("ASYNC_PROFILER_HOME env is not set.\n\n"
|
||||
+ "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n"
|
||||
+ "environment is properly configured.");
|
||||
return;
|
||||
}
|
||||
|
||||
// if pid is explicitly specified, use it else default to current process
|
||||
pid = getInteger(req, "pid", pid);
|
||||
|
||||
// if pid is not specified in query param and if current process pid cannot be determined
|
||||
if (pid == null) {
|
||||
resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
|
||||
setResponseHeader(resp);
|
||||
resp.getWriter().write(
|
||||
"'pid' query parameter unspecified or unable to determine PID of current process.");
|
||||
return;
|
||||
}
|
||||
|
||||
final int duration = getInteger(req, "duration", DEFAULT_DURATION_SECONDS);
|
||||
final Output output = getOutput(req);
|
||||
final Event event = getEvent(req);
|
||||
final Long interval = getLong(req, "interval");
|
||||
final Integer jstackDepth = getInteger(req, "jstackdepth", null);
|
||||
final Long bufsize = getLong(req, "bufsize");
|
||||
final boolean thread = req.getParameterMap().containsKey("thread");
|
||||
final boolean simple = req.getParameterMap().containsKey("simple");
|
||||
final Integer width = getInteger(req, "width", null);
|
||||
final Integer height = getInteger(req, "height", null);
|
||||
final Double minwidth = getMinWidth(req);
|
||||
final boolean reverse = req.getParameterMap().containsKey("reverse");
|
||||
|
||||
if (process == null || !process.isAlive()) {
|
||||
try {
|
||||
int lockTimeoutSecs = 3;
|
||||
if (profilerLock.tryLock(lockTimeoutSecs, TimeUnit.SECONDS)) {
|
||||
try {
|
||||
File outputFile = new File(OUTPUT_DIR,
|
||||
"async-prof-pid-" + pid + "-" + event.name().toLowerCase() + "-" + ID_GEN
|
||||
.incrementAndGet() + "." + output.name().toLowerCase());
|
||||
List<String> cmd = new ArrayList<>();
|
||||
cmd.add(asyncProfilerHome + PROFILER_SCRIPT);
|
||||
cmd.add("-e");
|
||||
cmd.add(event.getInternalName());
|
||||
cmd.add("-d");
|
||||
cmd.add("" + duration);
|
||||
cmd.add("-o");
|
||||
cmd.add(output.name().toLowerCase());
|
||||
cmd.add("-f");
|
||||
cmd.add(outputFile.getAbsolutePath());
|
||||
if (interval != null) {
|
||||
cmd.add("-i");
|
||||
cmd.add(interval.toString());
|
||||
}
|
||||
if (jstackDepth != null) {
|
||||
cmd.add("-j");
|
||||
cmd.add(jstackDepth.toString());
|
||||
}
|
||||
if (bufsize != null) {
|
||||
cmd.add("-b");
|
||||
cmd.add(bufsize.toString());
|
||||
}
|
||||
if (thread) {
|
||||
cmd.add("-t");
|
||||
}
|
||||
if (simple) {
|
||||
cmd.add("-s");
|
||||
}
|
||||
if (width != null) {
|
||||
cmd.add("--width");
|
||||
cmd.add(width.toString());
|
||||
}
|
||||
if (height != null) {
|
||||
cmd.add("--height");
|
||||
cmd.add(height.toString());
|
||||
}
|
||||
if (minwidth != null) {
|
||||
cmd.add("--minwidth");
|
||||
cmd.add(minwidth.toString());
|
||||
}
|
||||
if (reverse) {
|
||||
cmd.add("--reverse");
|
||||
}
|
||||
cmd.add(pid.toString());
|
||||
process = ProcessUtils.runCmdAsync(cmd);
|
||||
|
||||
// set response and set refresh header to output location
|
||||
setResponseHeader(resp);
|
||||
resp.setStatus(HttpServletResponse.SC_ACCEPTED);
|
||||
String relativeUrl = "/prof-output-hadoop/" + outputFile.getName();
|
||||
resp.getWriter().write("Started [" + event.getInternalName()
|
||||
+ "] profiling. This page will automatically redirect to " + relativeUrl + " after "
|
||||
+ duration + " seconds. "
|
||||
+ "If empty diagram and Linux 4.6+, see 'Basic Usage' section on the Async "
|
||||
+ "Profiler Home Page, https://github.com/jvm-profiling-tools/async-profiler."
|
||||
+ "\n\nCommand:\n" + Joiner.on(" ").join(cmd));
|
||||
|
||||
// to avoid auto-refresh by ProfileOutputServlet, refreshDelay can be specified
|
||||
// via url param
|
||||
int refreshDelay = getInteger(req, "refreshDelay", 0);
|
||||
|
||||
// instead of sending redirect, set auto-refresh so that browsers will refresh
|
||||
// with redirected url
|
||||
resp.setHeader("Refresh", (duration + refreshDelay) + ";" + relativeUrl);
|
||||
resp.getWriter().flush();
|
||||
} finally {
|
||||
profilerLock.unlock();
|
||||
}
|
||||
} else {
|
||||
setResponseHeader(resp);
|
||||
resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
|
||||
resp.getWriter()
|
||||
.write("Unable to acquire lock. Another instance of profiler might be running.");
|
||||
LOG.warn("Unable to acquire lock in {} seconds. Another instance of profiler might be"
|
||||
+ " running.", lockTimeoutSecs);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
LOG.warn("Interrupted while acquiring profile lock.", e);
|
||||
resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
|
||||
}
|
||||
} else {
|
||||
setResponseHeader(resp);
|
||||
resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
|
||||
resp.getWriter().write("Another instance of profiler is already running.");
|
||||
}
|
||||
}
|
||||
|
||||
private Integer getInteger(final HttpServletRequest req, final String param,
|
||||
final Integer defaultValue) {
|
||||
final String value = req.getParameter(param);
|
||||
if (value != null) {
|
||||
try {
|
||||
return Integer.valueOf(value);
|
||||
} catch (NumberFormatException e) {
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
private Long getLong(final HttpServletRequest req, final String param) {
|
||||
final String value = req.getParameter(param);
|
||||
if (value != null) {
|
||||
try {
|
||||
return Long.valueOf(value);
|
||||
} catch (NumberFormatException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private Double getMinWidth(final HttpServletRequest req) {
|
||||
final String value = req.getParameter("minwidth");
|
||||
if (value != null) {
|
||||
try {
|
||||
return Double.valueOf(value);
|
||||
} catch (NumberFormatException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private Event getEvent(final HttpServletRequest req) {
|
||||
final String eventArg = req.getParameter("event");
|
||||
if (eventArg != null) {
|
||||
Event event = Event.fromInternalName(eventArg);
|
||||
return event == null ? Event.CPU : event;
|
||||
}
|
||||
return Event.CPU;
|
||||
}
|
||||
|
||||
private Output getOutput(final HttpServletRequest req) {
|
||||
final String outputArg = req.getParameter("output");
|
||||
if (req.getParameter("output") != null) {
|
||||
try {
|
||||
return Output.valueOf(outputArg.trim().toUpperCase());
|
||||
} catch (IllegalArgumentException e) {
|
||||
return Output.HTML;
|
||||
}
|
||||
}
|
||||
return Output.HTML;
|
||||
}
|
||||
|
||||
static void setResponseHeader(final HttpServletResponse response) {
|
||||
response.setHeader(ACCESS_CONTROL_ALLOW_METHODS, ALLOWED_METHODS);
|
||||
response.setHeader(ACCESS_CONTROL_ALLOW_ORIGIN, "*");
|
||||
response.setContentType(CONTENT_TYPE_TEXT);
|
||||
}
|
||||
|
||||
static String getAsyncProfilerHome() {
|
||||
String asyncProfilerHome = System.getenv(ASYNC_PROFILER_HOME_ENV);
|
||||
// if ENV is not set, see if -Dasync.profiler.home=/path/to/async/profiler/home is set
|
||||
if (asyncProfilerHome == null || asyncProfilerHome.trim().isEmpty()) {
|
||||
asyncProfilerHome = System.getProperty(ASYNC_PROFILER_HOME_SYSTEM_PROPERTY);
|
||||
}
|
||||
|
||||
return asyncProfilerHome;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.http;
|
||||
|
||||
import java.io.IOException;
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Servlet for disabled async-profiler.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class ProfilerDisabledServlet extends HttpServlet {
|
||||
|
||||
@Override
|
||||
protected void doGet(final HttpServletRequest req, final HttpServletResponse resp)
|
||||
throws IOException {
|
||||
resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
|
||||
ProfileServlet.setResponseHeader(resp);
|
||||
resp.getWriter().write("The profiler servlet was disabled at startup.\n\n"
|
||||
+ "Please ensure the prerequisites for the Profiler Servlet have been installed and the\n"
|
||||
+ "environment is properly configured.");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.util.List;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* Process related utilities.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public final class ProcessUtils {
|
||||
|
||||
private static final Logger LOG = LoggerFactory.getLogger(ProcessUtils.class);
|
||||
|
||||
private ProcessUtils() {
|
||||
// no-op
|
||||
}
|
||||
|
||||
public static Integer getPid() {
|
||||
// JVM_PID can be exported in service start script
|
||||
String pidStr = System.getenv("JVM_PID");
|
||||
|
||||
// In case if it is not set correctly, fallback to mxbean which is implementation specific.
|
||||
if (pidStr == null || pidStr.trim().isEmpty()) {
|
||||
String name = ManagementFactory.getRuntimeMXBean().getName();
|
||||
if (name != null) {
|
||||
int idx = name.indexOf("@");
|
||||
if (idx != -1) {
|
||||
pidStr = name.substring(0, name.indexOf("@"));
|
||||
}
|
||||
}
|
||||
}
|
||||
try {
|
||||
if (pidStr != null) {
|
||||
return Integer.valueOf(pidStr);
|
||||
}
|
||||
} catch (NumberFormatException ignored) {
|
||||
// ignore
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static Process runCmdAsync(List<String> cmd) {
|
||||
try {
|
||||
LOG.info("Running command async: {}", cmd);
|
||||
return new ProcessBuilder(cmd).inheritIO().start();
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -78,7 +78,7 @@
|
|||
<value>false</value>
|
||||
<description>
|
||||
Indicates if administrator ACLs are required to access
|
||||
instrumentation servlets (JMX, METRICS, CONF, STACKS).
|
||||
instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
<!---
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
Async Profiler Servlet for Hadoop
|
||||
========================================
|
||||
|
||||
<!-- MACRO{toc|fromDepth=0|toDepth=3} -->
|
||||
|
||||
Purpose
|
||||
-------
|
||||
|
||||
This document describes how to configure and use async profiler
|
||||
with Hadoop applications.
|
||||
Async profiler is a low overhead sampling profiler for Java that
|
||||
does not suffer from Safepoint bias problem. It features
|
||||
HotSpot-specific APIs to collect stack traces and to track memory
|
||||
allocations. The profiler works with OpenJDK, Oracle JDK and other
|
||||
Java runtimes based on the HotSpot JVM.
|
||||
|
||||
Hadoop profiler servlet supports Async Profiler major versions
|
||||
1.x and 2.x.
|
||||
|
||||
Prerequisites
|
||||
-------------
|
||||
|
||||
Make sure Hadoop is installed, configured and setup correctly.
|
||||
For more information see:
|
||||
|
||||
* [Single Node Setup](./SingleCluster.html) for first-time users.
|
||||
* [Cluster Setup](./ClusterSetup.html) for large, distributed clusters.
|
||||
|
||||
Go to https://github.com/jvm-profiling-tools/async-profiler,
|
||||
download a release appropriate for your platform, and install
|
||||
on every cluster host.
|
||||
|
||||
Set `ASYNC_PROFILER_HOME` in the environment (put it in hadoop-env.sh)
|
||||
to the root directory of the async-profiler install location, or pass
|
||||
it on the Hadoop daemon's command line as a system property as
|
||||
`-Dasync.profiler.home=/path/to/async-profiler`.
|
||||
|
||||
|
||||
Usage
|
||||
--------
|
||||
|
||||
Once the prerequisites have been satisfied, access to the async-profiler
|
||||
is available by using Namenode or ResourceManager UI.
|
||||
|
||||
Following options from async-profiler can be specified as query paramater.
|
||||
* `-e event` profiling event: cpu|alloc|lock|cache-misses etc.
|
||||
* `-d duration` run profiling for 'duration' seconds (integer)
|
||||
* `-i interval` sampling interval in nanoseconds (long)
|
||||
* `-j jstackdepth` maximum Java stack depth (integer)
|
||||
* `-b bufsize` frame buffer size (long)
|
||||
* `-t` profile different threads separately
|
||||
* `-s` simple class names instead of FQN
|
||||
* `-o fmt[,fmt...]` output format: summary|traces|flat|collapsed|svg|tree|jfr|html
|
||||
* `--width px` SVG width pixels (integer)
|
||||
* `--height px` SVG frame height pixels (integer)
|
||||
* `--minwidth px` skip frames smaller than px (double)
|
||||
* `--reverse` generate stack-reversed FlameGraph / Call tree
|
||||
|
||||
|
||||
Example:
|
||||
If Namenode http address is localhost:9870, and ResourceManager http
|
||||
address is localhost:8088, ProfileServlet running with async-profiler
|
||||
setup can be accessed with http://localhost:9870/prof and
|
||||
http://localhost:8088/prof for Namenode and ResourceManager processes
|
||||
respectively.
|
||||
|
||||
Diving deep into some params:
|
||||
|
||||
* To collect 10 second CPU profile of current process
|
||||
(returns FlameGraph svg)
|
||||
* `curl http://localhost:9870/prof` (FlameGraph svg for Namenode)
|
||||
* `curl http://localhost:8088/prof` (FlameGraph svg for ResourceManager)
|
||||
* To collect 10 second CPU profile of pid 12345 (returns FlameGraph svg)
|
||||
* `curl http://localhost:9870/prof?pid=12345` (For instance, provide
|
||||
pid of Datanode here)
|
||||
* To collect 30 second CPU profile of pid 12345 (returns FlameGraph svg)
|
||||
* `curl http://localhost:9870/prof?pid=12345&duration=30`
|
||||
* To collect 1 minute CPU profile of current process and output in tree
|
||||
format (html)
|
||||
* `curl http://localhost:9870/prof?output=tree&duration=60`
|
||||
* To collect 10 second heap allocation profile of current process
|
||||
(returns FlameGraph svg)
|
||||
* `curl http://localhost:9870/prof?event=alloc`
|
||||
* To collect lock contention profile of current process
|
||||
(returns FlameGraph svg)
|
||||
* `curl http://localhost:9870/prof?event=lock`
|
||||
|
||||
|
||||
The following event types are supported by async-profiler.
|
||||
Use the 'event' parameter to specify. Default is 'cpu'.
|
||||
Not all operating systems will support all types.
|
||||
|
||||
Perf events:
|
||||
|
||||
* cpu
|
||||
* page-faults
|
||||
* context-switches
|
||||
* cycles
|
||||
* instructions
|
||||
* cache-references
|
||||
* cache-misses
|
||||
* branches
|
||||
* branch-misses
|
||||
* bus-cycles
|
||||
* L1-dcache-load-misses
|
||||
* LLC-load-misses
|
||||
* dTLB-load-misses
|
||||
|
||||
Java events:
|
||||
|
||||
* alloc
|
||||
* lock
|
||||
|
||||
The following output formats are supported.
|
||||
Use the 'output' parameter to specify. Default is 'flamegraph'.
|
||||
|
||||
Output formats:
|
||||
|
||||
* summary: A dump of basic profiling statistics.
|
||||
* traces: Call traces.
|
||||
* flat: Flat profile (top N hot methods).
|
||||
* collapsed: Collapsed call traces in the format used by FlameGraph
|
||||
script. This is a collection of call stacks, where each line is a
|
||||
semicolon separated list of frames followed by a counter.
|
||||
* svg: FlameGraph in SVG format.
|
||||
* tree: Call tree in HTML format.
|
||||
* jfr: Call traces in Java Flight Recorder format.
|
||||
|
||||
The 'duration' parameter specifies how long to collect trace data
|
||||
before generating output, specified in seconds. The default is 10 seconds.
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.http;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Small test to cover default disabled prof endpoint.
|
||||
*/
|
||||
public class TestDisabledProfileServlet extends HttpServerFunctionalTest {
|
||||
|
||||
private static HttpServer2 server;
|
||||
private static URL baseUrl;
|
||||
|
||||
@BeforeClass
|
||||
public static void setup() throws Exception {
|
||||
server = createTestServer();
|
||||
server.start();
|
||||
baseUrl = getServerURL(server);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void cleanup() throws Exception {
|
||||
server.stop();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testQuery() throws Exception {
|
||||
try {
|
||||
readOutput(new URL(baseUrl, "/prof"));
|
||||
throw new IllegalStateException("Should not reach here");
|
||||
} catch (IOException e) {
|
||||
assertTrue(e.getMessage()
|
||||
.contains(HttpServletResponse.SC_INTERNAL_SERVER_ERROR + " for URL: " + baseUrl));
|
||||
}
|
||||
|
||||
// CORS headers
|
||||
HttpURLConnection conn =
|
||||
(HttpURLConnection) new URL(baseUrl, "/prof").openConnection();
|
||||
assertEquals("GET", conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_METHODS));
|
||||
assertNotNull(conn.getHeaderField(ProfileServlet.ACCESS_CONTROL_ALLOW_ORIGIN));
|
||||
conn.disconnect();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRequestMethods() throws IOException {
|
||||
HttpURLConnection connection = getConnection("PUT");
|
||||
assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED,
|
||||
connection.getResponseCode());
|
||||
connection.disconnect();
|
||||
connection = getConnection("POST");
|
||||
assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED,
|
||||
connection.getResponseCode());
|
||||
connection.disconnect();
|
||||
connection = getConnection("DELETE");
|
||||
assertEquals("Unexpected response code", HttpServletResponse.SC_METHOD_NOT_ALLOWED,
|
||||
connection.getResponseCode());
|
||||
connection.disconnect();
|
||||
connection = getConnection("GET");
|
||||
assertEquals("Unexpected response code", HttpServletResponse.SC_INTERNAL_SERVER_ERROR,
|
||||
connection.getResponseCode());
|
||||
connection.disconnect();
|
||||
}
|
||||
|
||||
private HttpURLConnection getConnection(final String method) throws IOException {
|
||||
URL url = new URL(baseUrl, "/prof");
|
||||
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
|
||||
conn.setRequestMethod(method);
|
||||
return conn;
|
||||
}
|
||||
|
||||
}
|
|
@ -1208,9 +1208,10 @@ Name | Description
|
|||
/logs | Display log files
|
||||
/stacks | Display JVM stacks
|
||||
/static/index.html | The static home page
|
||||
/prof | Async Profiler endpoint
|
||||
|
||||
To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`,
|
||||
and `/stacks`, configure the following properties in `kms-site.xml`:
|
||||
`/stacks` and `/prof`, configure the following properties in `kms-site.xml`:
|
||||
|
||||
```xml
|
||||
<property>
|
||||
|
@ -1224,7 +1225,7 @@ and `/stacks`, configure the following properties in `kms-site.xml`:
|
|||
<value>true</value>
|
||||
<description>
|
||||
Indicates if administrator ACLs are required to access
|
||||
instrumentation servlets (JMX, METRICS, CONF, STACKS).
|
||||
instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
|
|
@ -162,9 +162,10 @@ Name | Description
|
|||
/logs | Display log files
|
||||
/stacks | Display JVM stacks
|
||||
/static/index.html | The static home page
|
||||
/prof | Async Profiler endpoint
|
||||
|
||||
To control the access to servlet `/conf`, `/jmx`, `/logLevel`, `/logs`,
|
||||
and `/stacks`, configure the following properties in `httpfs-site.xml`:
|
||||
`/stacks` and `/prof`, configure the following properties in `httpfs-site.xml`:
|
||||
|
||||
```xml
|
||||
<property>
|
||||
|
@ -178,7 +179,7 @@ and `/stacks`, configure the following properties in `httpfs-site.xml`:
|
|||
<value>true</value>
|
||||
<description>
|
||||
Indicates if administrator ACLs are required to access
|
||||
instrumentation servlets (JMX, METRICS, CONF, STACKS).
|
||||
instrumentation servlets (JMX, METRICS, CONF, STACKS, PROF).
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
|
|
@ -69,6 +69,7 @@
|
|||
<item name="Tracing" href="hadoop-project-dist/hadoop-common/Tracing.html"/>
|
||||
<item name="Unix Shell Guide" href="hadoop-project-dist/hadoop-common/UnixShellGuide.html"/>
|
||||
<item name="Registry" href="hadoop-project-dist/hadoop-common/registry/index.html"/>
|
||||
<item name="Async Profiler" href="hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html" />
|
||||
</menu>
|
||||
|
||||
<menu name="HDFS" inherit="top">
|
||||
|
|
Loading…
Reference in New Issue