HDFS-6879. Adding tracing to Hadoop RPC. Contributed by Masatake Iwasaki.

(cherry picked from commit 6962510f72)
This commit is contained in:
Colin Patrick Mccabe 2014-08-27 14:12:05 -07:00
parent 4afd04a09f
commit f1ee8cec61
16 changed files with 741 additions and 6 deletions

View File

@ -239,6 +239,10 @@
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency>
<groupId>org.htrace</groupId>
<artifactId>htrace-core</artifactId>
</dependency>
<dependency> <dependency>
<groupId>org.apache.zookeeper</groupId> <groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId> <artifactId>zookeeper</artifactId>

View File

@ -88,6 +88,7 @@ import org.apache.hadoop.util.ProtoUtil;
import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.htrace.Trace;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
@ -694,6 +695,9 @@ public class Client {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Connecting to "+server); LOG.debug("Connecting to "+server);
} }
if (Trace.isTracing()) {
Trace.addTimelineAnnotation("IPC client connecting to " + server);
}
short numRetries = 0; short numRetries = 0;
Random rand = null; Random rand = null;
while (true) { while (true) {
@ -758,6 +762,10 @@ public class Client {
// update last activity time // update last activity time
touch(); touch();
if (Trace.isTracing()) {
Trace.addTimelineAnnotation("IPC client connected to " + server);
}
// start the receiver thread after the socket connection has been set // start the receiver thread after the socket connection has been set
// up // up
start(); start();

View File

@ -48,6 +48,9 @@ import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.ProtoUtil;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.htrace.Sampler;
import org.htrace.Trace;
import org.htrace.TraceScope;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.BlockingService; import com.google.protobuf.BlockingService;
@ -191,6 +194,16 @@ public class ProtobufRpcEngine implements RpcEngine {
+ method.getName() + "]"); + method.getName() + "]");
} }
TraceScope traceScope = null;
// if Tracing is on then start a new span for this rpc.
// guard it in the if statement to make sure there isn't
// any extra string manipulation.
if (Trace.isTracing()) {
traceScope = Trace.startSpan(
method.getDeclaringClass().getCanonicalName() +
"." + method.getName());
}
RequestHeaderProto rpcRequestHeader = constructRpcRequestHeader(method); RequestHeaderProto rpcRequestHeader = constructRpcRequestHeader(method);
if (LOG.isTraceEnabled()) { if (LOG.isTraceEnabled()) {
@ -212,8 +225,13 @@ public class ProtobufRpcEngine implements RpcEngine {
remoteId + ": " + method.getName() + remoteId + ": " + method.getName() +
" {" + e + "}"); " {" + e + "}");
} }
if (Trace.isTracing()) {
traceScope.getSpan().addTimelineAnnotation(
"Call got exception: " + e.getMessage());
}
throw new ServiceException(e); throw new ServiceException(e);
} finally {
if (traceScope != null) traceScope.close();
} }
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {

View File

@ -78,6 +78,7 @@ import org.apache.hadoop.conf.Configuration.IntegerRanges;
import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.ipc.ProtobufRpcEngine.RpcResponseMessageWrapper; import org.apache.hadoop.ipc.ProtobufRpcEngine.RpcResponseMessageWrapper;
@ -114,6 +115,10 @@ import org.apache.hadoop.util.ProtoUtil;
import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.htrace.Span;
import org.htrace.Trace;
import org.htrace.TraceInfo;
import org.htrace.TraceScope;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.ByteString; import com.google.protobuf.ByteString;
@ -503,6 +508,7 @@ public abstract class Server {
private ByteBuffer rpcResponse; // the response for this call private ByteBuffer rpcResponse; // the response for this call
private final RPC.RpcKind rpcKind; private final RPC.RpcKind rpcKind;
private final byte[] clientId; private final byte[] clientId;
private final Span traceSpan; // the tracing span on the server side
public Call(int id, int retryCount, Writable param, public Call(int id, int retryCount, Writable param,
Connection connection) { Connection connection) {
@ -512,6 +518,11 @@ public abstract class Server {
public Call(int id, int retryCount, Writable param, Connection connection, public Call(int id, int retryCount, Writable param, Connection connection,
RPC.RpcKind kind, byte[] clientId) { RPC.RpcKind kind, byte[] clientId) {
this(id, retryCount, param, connection, kind, clientId, null);
}
public Call(int id, int retryCount, Writable param, Connection connection,
RPC.RpcKind kind, byte[] clientId, Span span) {
this.callId = id; this.callId = id;
this.retryCount = retryCount; this.retryCount = retryCount;
this.rpcRequest = param; this.rpcRequest = param;
@ -520,6 +531,7 @@ public abstract class Server {
this.rpcResponse = null; this.rpcResponse = null;
this.rpcKind = kind; this.rpcKind = kind;
this.clientId = clientId; this.clientId = clientId;
this.traceSpan = span;
} }
@Override @Override
@ -1836,9 +1848,18 @@ public abstract class Server {
RpcErrorCodeProto.FATAL_DESERIALIZING_REQUEST, err); RpcErrorCodeProto.FATAL_DESERIALIZING_REQUEST, err);
} }
Span traceSpan = null;
if (header.hasTraceInfo()) {
// If the incoming RPC included tracing info, always continue the trace
TraceInfo parentSpan = new TraceInfo(header.getTraceInfo().getTraceId(),
header.getTraceInfo().getParentId());
traceSpan = Trace.startSpan(rpcRequest.toString(), parentSpan).detach();
}
Call call = new Call(header.getCallId(), header.getRetryCount(), Call call = new Call(header.getCallId(), header.getRetryCount(),
rpcRequest, this, ProtoUtil.convert(header.getRpcKind()), header rpcRequest, this, ProtoUtil.convert(header.getRpcKind()),
.getClientId().toByteArray()); header.getClientId().toByteArray(), traceSpan);
callQueue.put(call); // queue the call; maybe blocked here callQueue.put(call); // queue the call; maybe blocked here
incRpcCount(); // Increment the rpc count incRpcCount(); // Increment the rpc count
} }
@ -1981,6 +2002,7 @@ public abstract class Server {
ByteArrayOutputStream buf = ByteArrayOutputStream buf =
new ByteArrayOutputStream(INITIAL_RESP_BUF_SIZE); new ByteArrayOutputStream(INITIAL_RESP_BUF_SIZE);
while (running) { while (running) {
TraceScope traceScope = null;
try { try {
final Call call = callQueue.take(); // pop the queue; maybe blocked here final Call call = callQueue.take(); // pop the queue; maybe blocked here
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
@ -1997,6 +2019,10 @@ public abstract class Server {
Writable value = null; Writable value = null;
CurCall.set(call); CurCall.set(call);
if (call.traceSpan != null) {
traceScope = Trace.continueSpan(call.traceSpan);
}
try { try {
// Make the call as the user via Subject.doAs, thus associating // Make the call as the user via Subject.doAs, thus associating
// the call with the Subject // the call with the Subject
@ -2070,9 +2096,22 @@ public abstract class Server {
} catch (InterruptedException e) { } catch (InterruptedException e) {
if (running) { // unexpected -- log it if (running) { // unexpected -- log it
LOG.info(Thread.currentThread().getName() + " unexpectedly interrupted", e); LOG.info(Thread.currentThread().getName() + " unexpectedly interrupted", e);
if (Trace.isTracing()) {
traceScope.getSpan().addTimelineAnnotation("unexpectedly interrupted: " +
StringUtils.stringifyException(e));
}
} }
} catch (Exception e) { } catch (Exception e) {
LOG.info(Thread.currentThread().getName() + " caught an exception", e); LOG.info(Thread.currentThread().getName() + " caught an exception", e);
if (Trace.isTracing()) {
traceScope.getSpan().addTimelineAnnotation("Exception: " +
StringUtils.stringifyException(e));
}
} finally {
if (traceScope != null) {
traceScope.close();
}
IOUtils.cleanup(LOG, traceScope);
} }
} }
LOG.debug(Thread.currentThread().getName() + ": exiting"); LOG.debug(Thread.currentThread().getName() + ": exiting");

View File

@ -41,6 +41,8 @@ import org.apache.hadoop.util.Time;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.*; import org.apache.hadoop.conf.*;
import org.htrace.Trace;
import org.htrace.TraceScope;
/** An RpcEngine implementation for Writable data. */ /** An RpcEngine implementation for Writable data. */
@InterfaceStability.Evolving @InterfaceStability.Evolving
@ -227,9 +229,19 @@ public class WritableRpcEngine implements RpcEngine {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
startTime = Time.now(); startTime = Time.now();
} }
TraceScope traceScope = null;
ObjectWritable value = (ObjectWritable) if (Trace.isTracing()) {
client.call(RPC.RpcKind.RPC_WRITABLE, new Invocation(method, args), remoteId); traceScope = Trace.startSpan(
method.getDeclaringClass().getCanonicalName() +
"." + method.getName());
}
ObjectWritable value;
try {
value = (ObjectWritable)
client.call(RPC.RpcKind.RPC_WRITABLE, new Invocation(method, args), remoteId);
} finally {
if (traceScope != null) traceScope.close();
}
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
long callTime = Time.now() - startTime; long callTime = Time.now() - startTime;
LOG.debug("Call: " + method.getName() + " " + callTime); LOG.debug("Call: " + method.getName() + " " + callTime);

View File

@ -0,0 +1,153 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tracing;
import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.ShutdownHookManager;
import org.htrace.HTraceConfiguration;
import org.htrace.SpanReceiver;
import org.htrace.Trace;
/**
* This class provides functions for reading the names of SpanReceivers from
* the Hadoop configuration, adding those SpanReceivers to the Tracer,
* and closing those SpanReceivers when appropriate.
* This class does nothing If no SpanReceiver is configured.
*/
@InterfaceAudience.Private
public class SpanReceiverHost {
public static final String SPAN_RECEIVERS_CONF_KEY = "hadoop.trace.spanreceiver.classes";
private static final Log LOG = LogFactory.getLog(SpanReceiverHost.class);
private Collection<SpanReceiver> receivers = new HashSet<SpanReceiver>();
private boolean closed = false;
private static enum SingletonHolder {
INSTANCE;
Object lock = new Object();
SpanReceiverHost host = null;
}
public static SpanReceiverHost getInstance(Configuration conf) {
if (SingletonHolder.INSTANCE.host != null) {
return SingletonHolder.INSTANCE.host;
}
synchronized (SingletonHolder.INSTANCE.lock) {
if (SingletonHolder.INSTANCE.host != null) {
return SingletonHolder.INSTANCE.host;
}
SpanReceiverHost host = new SpanReceiverHost();
host.loadSpanReceivers(conf);
SingletonHolder.INSTANCE.host = host;
ShutdownHookManager.get().addShutdownHook(new Runnable() {
public void run() {
SingletonHolder.INSTANCE.host.closeReceivers();
}
}, 0);
return SingletonHolder.INSTANCE.host;
}
}
/**
* Reads the names of classes specified in the
* "hadoop.trace.spanreceiver.classes" property and instantiates and registers
* them with the Tracer as SpanReceiver's.
*
* The nullary constructor is called during construction, but if the classes
* specified implement the Configurable interface, setConfiguration() will be
* called on them. This allows SpanReceivers to use values from the Hadoop
* configuration.
*/
public void loadSpanReceivers(Configuration conf) {
Class<?> implClass = null;
String[] receiverNames = conf.getTrimmedStrings(SPAN_RECEIVERS_CONF_KEY);
if (receiverNames == null || receiverNames.length == 0) {
return;
}
for (String className : receiverNames) {
className = className.trim();
try {
implClass = Class.forName(className);
receivers.add(loadInstance(implClass, conf));
LOG.info("SpanReceiver " + className + " was loaded successfully.");
} catch (ClassNotFoundException e) {
LOG.warn("Class " + className + " cannot be found.", e);
} catch (IOException e) {
LOG.warn("Load SpanReceiver " + className + " failed.", e);
}
}
for (SpanReceiver rcvr : receivers) {
Trace.addReceiver(rcvr);
}
}
private SpanReceiver loadInstance(Class<?> implClass, Configuration conf)
throws IOException {
SpanReceiver impl;
try {
Object o = ReflectionUtils.newInstance(implClass, conf);
impl = (SpanReceiver)o;
impl.configure(wrapHadoopConf(conf));
} catch (SecurityException e) {
throw new IOException(e);
} catch (IllegalArgumentException e) {
throw new IOException(e);
} catch (RuntimeException e) {
throw new IOException(e);
}
return impl;
}
private static HTraceConfiguration wrapHadoopConf(final Configuration conf) {
return new HTraceConfiguration() {
public static final String HTRACE_CONF_PREFIX = "hadoop.";
@Override
public String get(String key) {
return conf.get(HTRACE_CONF_PREFIX + key);
}
@Override
public String get(String key, String defaultValue) {
return conf.get(HTRACE_CONF_PREFIX + key, defaultValue);
}
};
}
/**
* Calls close() on all SpanReceivers created by this SpanReceiverHost.
*/
public synchronized void closeReceivers() {
if (closed) return;
closed = true;
for (SpanReceiver rcvr : receivers) {
try {
rcvr.close();
} catch (IOException e) {
LOG.warn("Unable to close SpanReceiver correctly: " + e.getMessage(), e);
}
}
}
}

View File

@ -27,6 +27,8 @@ import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.UserInformation
import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*; import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*;
import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.SaslRpcServer.AuthMethod;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.htrace.Span;
import org.htrace.Trace;
import com.google.protobuf.ByteString; import com.google.protobuf.ByteString;
@ -165,6 +167,15 @@ public abstract class ProtoUtil {
RpcRequestHeaderProto.Builder result = RpcRequestHeaderProto.newBuilder(); RpcRequestHeaderProto.Builder result = RpcRequestHeaderProto.newBuilder();
result.setRpcKind(convert(rpcKind)).setRpcOp(operation).setCallId(callId) result.setRpcKind(convert(rpcKind)).setRpcOp(operation).setCallId(callId)
.setRetryCount(retryCount).setClientId(ByteString.copyFrom(uuid)); .setRetryCount(retryCount).setClientId(ByteString.copyFrom(uuid));
// Add tracing info if we are currently tracing.
if (Trace.isTracing()) {
Span s = Trace.currentSpan();
result.setTraceInfo(RPCTraceInfoProto.newBuilder()
.setParentId(s.getSpanId())
.setTraceId(s.getTraceId()).build());
}
return result.build(); return result.build();
} }
} }

View File

@ -53,6 +53,18 @@ enum RpcKindProto {
/**
* Used to pass through the information necessary to continue
* a trace after an RPC is made. All we need is the traceid
* (so we know the overarching trace this message is a part of), and
* the id of the current span when this message was sent, so we know
* what span caused the new span we will create when this message is received.
*/
message RPCTraceInfoProto {
optional int64 traceId = 1;
optional int64 parentId = 2;
}
message RpcRequestHeaderProto { // the header for the RpcRequest message RpcRequestHeaderProto { // the header for the RpcRequest
enum OperationProto { enum OperationProto {
RPC_FINAL_PACKET = 0; // The final RPC Packet RPC_FINAL_PACKET = 0; // The final RPC Packet
@ -67,6 +79,7 @@ message RpcRequestHeaderProto { // the header for the RpcRequest
// clientId + callId uniquely identifies a request // clientId + callId uniquely identifies a request
// retry count, 1 means this is the first retry // retry count, 1 means this is the first retry
optional sint32 retryCount = 5 [default = -1]; optional sint32 retryCount = 5 [default = -1];
optional RPCTraceInfoProto traceInfo = 6; // tracing info
} }

View File

@ -0,0 +1,169 @@
~~ Licensed under the Apache License, Version 2.0 (the "License");
~~ you may not use this file except in compliance with the License.
~~ You may obtain a copy of the License at
~~
~~ http://www.apache.org/licenses/LICENSE-2.0
~~
~~ Unless required by applicable law or agreed to in writing, software
~~ distributed under the License is distributed on an "AS IS" BASIS,
~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
~~ See the License for the specific language governing permissions and
~~ limitations under the License. See accompanying LICENSE file.
---
Hadoop Distributed File System-${project.version} - Enabling Dapper-like Tracing
---
---
${maven.build.timestamp}
Enabling Dapper-like Tracing in HDFS
%{toc|section=1|fromDepth=0}
* {Dapper-like Tracing in HDFS}
** HTrace
{{{https://issues.apache.org/jira/browse/HDFS-5274}HDFS-5274}}
added support for tracing requests through HDFS,
using the open source tracing library, {{{https://github.com/cloudera/htrace}HTrace}}.
Setting up tracing is quite simple, however it requires some very minor changes to your client code.
** SpanReceivers
The tracing system works by collecting information in structs called 'Spans'.
It is up to you to choose how you want to receive this information
by implementing the SpanReceiver interface, which defines one method:
+----
public void receiveSpan(Span span);
+----
Configure what SpanReceivers you'd like to use
by putting a comma separated list of the fully-qualified class name of
classes implementing SpanReceiver
in <<<hdfs-site.xml>>> property: <<<hadoop.trace.spanreceiver.classes>>>.
+----
<property>
<name>hadoop.trace.spanreceiver.classes</name>
<value>org.htrace.impl.LocalFileSpanReceiver</value>
</property>
<property>
<name>hadoop.local-file-span-receiver.path</name>
<value>/var/log/hadoop/htrace.out</value>
</property>
+----
** Setting up ZipkinSpanReceiver
Instead of implementing SpanReceiver by yourself,
you can use <<<ZipkinSpanReceiver>>> which uses
{{{https://github.com/twitter/zipkin}Zipkin}}
for collecting and dispalying tracing data.
In order to use <<<ZipkinSpanReceiver>>>,
you need to download and setup {{{https://github.com/twitter/zipkin}Zipkin}} first.
you also need to add the jar of <<<htrace-zipkin>>> to the classpath of Hadoop on each node.
Here is example setup procedure.
+----
$ git clone https://github.com/cloudera/htrace
$ cd htrace/htrace-zipkin
$ mvn compile assembly:single
$ cp target/htrace-zipkin-*-jar-with-dependencies.jar $HADOOP_HOME/share/hadoop/hdfs/lib/
+----
The sample configuration for <<<ZipkinSpanReceiver>>> is shown below.
By adding these to <<<hdfs-site.xml>>> of NameNode and DataNodes,
<<<ZipkinSpanReceiver>>> is initialized on the startup.
You also need this configuration on the client node in addition to the servers.
+----
<property>
<name>hadoop.trace.spanreceiver.classes</name>
<value>org.htrace.impl.ZipkinSpanReceiver</value>
</property>
<property>
<name>hadoop.zipkin.collector-hostname</name>
<value>192.168.1.2</value>
</property>
<property>
<name>hadoop.zipkin.collector-port</name>
<value>9410</value>
</property>
+----
** Turning on tracing by HTrace API
In order to turn on Dapper-like tracing,
you will need to wrap the traced logic with <<tracing span>> as shown below.
When there is running tracing spans,
the tracing information is propagated to servers along with RPC requests.
In addition, you need to initialize <<<SpanReceiver>>> once per process.
+----
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.tracing.SpanReceiverHost;
import org.htrace.Sampler;
import org.htrace.Trace;
import org.htrace.TraceScope;
...
SpanReceiverHost.getInstance(new HdfsConfiguration());
...
TraceScope ts = Trace.startSpan("Gets", Sampler.ALWAYS);
try {
... // traced logic
} finally {
if (ts != null) ts.close();
}
+----
** Sample code for tracing
The <<<TracingFsShell.java>>> shown below is the wrapper of FsShell
which start tracing span before invoking HDFS shell command.
+----
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.tracing.SpanReceiverHost;
import org.apache.hadoop.util.ToolRunner;
import org.htrace.Sampler;
import org.htrace.Trace;
import org.htrace.TraceScope;
public class TracingFsShell {
public static void main(String argv[]) throws Exception {
Configuration conf = new Configuration();
FsShell shell = new FsShell();
conf.setQuietMode(false);
shell.setConf(conf);
int res = 0;
SpanReceiverHost.init(new HdfsConfiguration());
TraceScope ts = null;
try {
ts = Trace.startSpan("FsShell", Sampler.ALWAYS);
res = ToolRunner.run(shell, argv);
} finally {
shell.close();
if (ts != null) ts.close();
}
System.exit(res);
}
}
+----
You can compile and execute this code as shown below.
+----
$ javac -cp `hadoop classpath` TracingFsShell.java
$ HADOOP_CLASSPATH=. hdfs TracingFsShell -put sample.txt /tmp/
+----

View File

@ -165,6 +165,9 @@ Release 2.6.0 - UNRELEASED
HDFS-4486. Add log category for long-running DFSClient notices (Zhe Zhang HDFS-4486. Add log category for long-running DFSClient notices (Zhe Zhang
via Colin Patrick McCabe) via Colin Patrick McCabe)
HDFS-6879. Adding tracing to Hadoop RPC (Masatake Iwasaki via Colin Patrick
McCabe)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang) HDFS-6690. Deduplicate xattr names in memory. (wang)

View File

@ -190,6 +190,10 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
<artifactId>xercesImpl</artifactId> <artifactId>xercesImpl</artifactId>
<scope>compile</scope> <scope>compile</scope>
</dependency> </dependency>
<dependency>
<groupId>org.htrace</groupId>
<artifactId>htrace-core</artifactId>
</dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -183,6 +183,7 @@ import org.apache.hadoop.util.ServicePlugin;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.util.VersionInfo;
import org.apache.hadoop.tracing.SpanReceiverHost;
import org.mortbay.util.ajax.JSON; import org.mortbay.util.ajax.JSON;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@ -329,6 +330,8 @@ public class DataNode extends Configured
private boolean isPermissionEnabled; private boolean isPermissionEnabled;
private String dnUserName = null; private String dnUserName = null;
private SpanReceiverHost spanReceiverHost;
/** /**
* Create the DataNode given a configuration, an array of dataDirs, * Create the DataNode given a configuration, an array of dataDirs,
* and a namenode proxy * and a namenode proxy
@ -830,6 +833,7 @@ public class DataNode extends Configured
this.dataDirs = dataDirs; this.dataDirs = dataDirs;
this.conf = conf; this.conf = conf;
this.dnConf = new DNConf(conf); this.dnConf = new DNConf(conf);
this.spanReceiverHost = SpanReceiverHost.getInstance(conf);
if (dnConf.maxLockedMemory > 0) { if (dnConf.maxLockedMemory > 0) {
if (!NativeIO.POSIX.getCacheManipulator().verifyCanMlock()) { if (!NativeIO.POSIX.getCacheManipulator().verifyCanMlock()) {
@ -1517,6 +1521,9 @@ public class DataNode extends Configured
MBeans.unregister(dataNodeInfoBeanName); MBeans.unregister(dataNodeInfoBeanName);
dataNodeInfoBeanName = null; dataNodeInfoBeanName = null;
} }
if (this.spanReceiverHost != null) {
this.spanReceiverHost.closeReceivers();
}
if (shortCircuitRegistry != null) shortCircuitRegistry.shutdown(); if (shortCircuitRegistry != null) shortCircuitRegistry.shutdown();
LOG.info("Shutdown complete."); LOG.info("Shutdown complete.");
synchronized(this) { synchronized(this) {

View File

@ -60,6 +60,7 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol;
import org.apache.hadoop.ipc.RefreshCallQueueProtocol; import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
import org.apache.hadoop.tools.GetUserMappingsProtocol; import org.apache.hadoop.tools.GetUserMappingsProtocol;
import org.apache.hadoop.tracing.SpanReceiverHost;
import org.apache.hadoop.util.ExitUtil.ExitException; import org.apache.hadoop.util.ExitUtil.ExitException;
import org.apache.hadoop.util.JvmPauseMonitor; import org.apache.hadoop.util.JvmPauseMonitor;
import org.apache.hadoop.util.ServicePlugin; import org.apache.hadoop.util.ServicePlugin;
@ -277,6 +278,7 @@ public class NameNode implements NameNodeStatusMXBean {
private JvmPauseMonitor pauseMonitor; private JvmPauseMonitor pauseMonitor;
private ObjectName nameNodeStatusBeanName; private ObjectName nameNodeStatusBeanName;
private SpanReceiverHost spanReceiverHost;
/** /**
* The namenode address that clients will use to access this namenode * The namenode address that clients will use to access this namenode
* or the name service. For HA configurations using logical URI, it * or the name service. For HA configurations using logical URI, it
@ -585,6 +587,9 @@ public class NameNode implements NameNodeStatusMXBean {
if (NamenodeRole.NAMENODE == role) { if (NamenodeRole.NAMENODE == role) {
startHttpServer(conf); startHttpServer(conf);
} }
this.spanReceiverHost = SpanReceiverHost.getInstance(conf);
loadNamesystem(conf); loadNamesystem(conf);
rpcServer = createRpcServer(conf); rpcServer = createRpcServer(conf);
@ -821,6 +826,9 @@ public class NameNode implements NameNodeStatusMXBean {
MBeans.unregister(nameNodeStatusBeanName); MBeans.unregister(nameNodeStatusBeanName);
nameNodeStatusBeanName = null; nameNodeStatusBeanName = null;
} }
if (this.spanReceiverHost != null) {
this.spanReceiverHost.closeReceivers();
}
} }
} }

View File

@ -0,0 +1,280 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tracing;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.htrace.HTraceConfiguration;
import org.htrace.Sampler;
import org.htrace.Span;
import org.htrace.SpanReceiver;
import org.htrace.Trace;
import org.htrace.TraceScope;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class TestTracing {
private static Configuration conf;
private static MiniDFSCluster cluster;
private static DistributedFileSystem dfs;
@Test
public void testSpanReceiverHost() throws Exception {
Configuration conf = new Configuration();
conf.set(SpanReceiverHost.SPAN_RECEIVERS_CONF_KEY,
SetSpanReceiver.class.getName());
SpanReceiverHost spanReceiverHost = SpanReceiverHost.getInstance(conf);
}
@Test
public void testWriteTraceHooks() throws Exception {
long startTime = System.currentTimeMillis();
TraceScope ts = Trace.startSpan("testWriteTraceHooks", Sampler.ALWAYS);
Path file = new Path("traceWriteTest.dat");
FSDataOutputStream stream = dfs.create(file);
for (int i = 0; i < 10; i++) {
byte[] data = RandomStringUtils.randomAlphabetic(102400).getBytes();
stream.write(data);
}
stream.hflush();
stream.close();
long endTime = System.currentTimeMillis();
ts.close();
String[] expectedSpanNames = {
"testWriteTraceHooks",
"org.apache.hadoop.hdfs.protocol.ClientProtocol.create",
"org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol.BlockingInterface.create",
"org.apache.hadoop.hdfs.protocol.ClientProtocol.fsync",
"org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol.BlockingInterface.fsync",
"org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol.BlockingInterface.complete"
};
assertSpanNamesFound(expectedSpanNames);
// The trace should last about the same amount of time as the test
Map<String, List<Span>> map = SetSpanReceiver.SetHolder.getMap();
Span s = map.get("testWriteTraceHooks").get(0);
Assert.assertNotNull(s);
long spanStart = s.getStartTimeMillis();
long spanEnd = s.getStopTimeMillis();
Assert.assertTrue(spanStart - startTime < 100);
Assert.assertTrue(spanEnd - endTime < 100);
// There should only be one trace id as it should all be homed in the
// top trace.
for (Span span : SetSpanReceiver.SetHolder.spans) {
Assert.assertEquals(ts.getSpan().getTraceId(), span.getTraceId());
}
}
@Test
public void testWriteWithoutTraceHooks() throws Exception {
Path file = new Path("withoutTraceWriteTest.dat");
FSDataOutputStream stream = dfs.create(file);
for (int i = 0; i < 10; i++) {
byte[] data = RandomStringUtils.randomAlphabetic(102400).getBytes();
stream.write(data);
}
stream.hflush();
stream.close();
Assert.assertTrue(SetSpanReceiver.SetHolder.size() == 0);
}
@Test
public void testReadTraceHooks() throws Exception {
String fileName = "traceReadTest.dat";
Path filePath = new Path(fileName);
// Create the file.
FSDataOutputStream ostream = dfs.create(filePath);
for (int i = 0; i < 50; i++) {
byte[] data = RandomStringUtils.randomAlphabetic(10240).getBytes();
ostream.write(data);
}
ostream.close();
long startTime = System.currentTimeMillis();
TraceScope ts = Trace.startSpan("testReadTraceHooks", Sampler.ALWAYS);
FSDataInputStream istream = dfs.open(filePath, 10240);
ByteBuffer buf = ByteBuffer.allocate(10240);
int count = 0;
try {
while (istream.read(buf) > 0) {
count += 1;
buf.clear();
istream.seek(istream.getPos() + 5);
}
} catch (IOException ioe) {
// Ignore this it's probably a seek after eof.
} finally {
istream.close();
}
ts.getSpan().addTimelineAnnotation("count: " + count);
long endTime = System.currentTimeMillis();
ts.close();
String[] expectedSpanNames = {
"testReadTraceHooks",
"org.apache.hadoop.hdfs.protocol.ClientProtocol.getBlockLocations",
"org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.ClientNamenodeProtocol.BlockingInterface.getBlockLocations"
};
assertSpanNamesFound(expectedSpanNames);
// The trace should last about the same amount of time as the test
Map<String, List<Span>> map = SetSpanReceiver.SetHolder.getMap();
Span s = map.get("testReadTraceHooks").get(0);
Assert.assertNotNull(s);
long spanStart = s.getStartTimeMillis();
long spanEnd = s.getStopTimeMillis();
Assert.assertTrue(spanStart - startTime < 100);
Assert.assertTrue(spanEnd - endTime < 100);
// There should only be one trace id as it should all be homed in the
// top trace.
for (Span span : SetSpanReceiver.SetHolder.spans) {
Assert.assertEquals(ts.getSpan().getTraceId(), span.getTraceId());
}
}
@Test
public void testReadWithoutTraceHooks() throws Exception {
String fileName = "withoutTraceReadTest.dat";
Path filePath = new Path(fileName);
// Create the file.
FSDataOutputStream ostream = dfs.create(filePath);
for (int i = 0; i < 50; i++) {
byte[] data = RandomStringUtils.randomAlphabetic(10240).getBytes();
ostream.write(data);
}
ostream.close();
FSDataInputStream istream = dfs.open(filePath, 10240);
ByteBuffer buf = ByteBuffer.allocate(10240);
int count = 0;
try {
while (istream.read(buf) > 0) {
count += 1;
buf.clear();
istream.seek(istream.getPos() + 5);
}
} catch (IOException ioe) {
// Ignore this it's probably a seek after eof.
} finally {
istream.close();
}
Assert.assertTrue(SetSpanReceiver.SetHolder.size() == 0);
}
@Before
public void cleanSet() {
SetSpanReceiver.SetHolder.spans.clear();
}
@BeforeClass
public static void setupCluster() throws IOException {
conf = new Configuration();
conf.setLong("dfs.blocksize", 100 * 1024);
conf.set(SpanReceiverHost.SPAN_RECEIVERS_CONF_KEY,
SetSpanReceiver.class.getName());
cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(3)
.build();
dfs = cluster.getFileSystem();
}
@AfterClass
public static void shutDown() throws IOException {
cluster.shutdown();
}
private void assertSpanNamesFound(String[] expectedSpanNames) {
Map<String, List<Span>> map = SetSpanReceiver.SetHolder.getMap();
for (String spanName : expectedSpanNames) {
Assert.assertTrue("Should find a span with name " + spanName, map.get(spanName) != null);
}
}
/**
* Span receiver that puts all spans into a single set.
* This is useful for testing.
* <p/>
* We're not using HTrace's POJOReceiver here so as that doesn't
* push all the metrics to a static place, and would make testing
* SpanReceiverHost harder.
*/
public static class SetSpanReceiver implements SpanReceiver {
public void configure(HTraceConfiguration conf) {
}
public void receiveSpan(Span span) {
SetHolder.spans.add(span);
}
public void close() {
}
public static class SetHolder {
public static Set<Span> spans = new HashSet<Span>();
public static int size() {
return spans.size();
}
public static Map<String, List<Span>> getMap() {
Map<String, List<Span>> map = new HashMap<String, List<Span>>();
for (Span s : spans) {
List<Span> l = map.get(s.getDescription());
if (l == null) {
l = new LinkedList<Span>();
map.put(s.getDescription(), l);
}
l.add(s);
}
return map;
}
}
}
}

View File

@ -695,6 +695,11 @@
<artifactId>jsch</artifactId> <artifactId>jsch</artifactId>
<version>0.1.42</version> <version>0.1.42</version>
</dependency> </dependency>
<dependency>
<groupId>org.htrace</groupId>
<artifactId>htrace-core</artifactId>
<version>3.0.4</version>
</dependency>
<dependency> <dependency>
<groupId>org.jdom</groupId> <groupId>org.jdom</groupId>
<artifactId>jdom</artifactId> <artifactId>jdom</artifactId>

View File

@ -65,6 +65,7 @@
<item name="Service Level Authorization" href="hadoop-project-dist/hadoop-common/ServiceLevelAuth.html"/> <item name="Service Level Authorization" href="hadoop-project-dist/hadoop-common/ServiceLevelAuth.html"/>
<item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/> <item name="HTTP Authentication" href="hadoop-project-dist/hadoop-common/HttpAuthentication.html"/>
<item name="Hadoop KMS" href="hadoop-kms/index.html"/> <item name="Hadoop KMS" href="hadoop-kms/index.html"/>
<item name="Tracing" href="hadoop-project-dist/hadoop-common/Tracing.html"/>
</menu> </menu>
<menu name="HDFS" inherit="top"> <menu name="HDFS" inherit="top">