From 2a8f824fb7db077dacabe62f78a0ddf50ee8a1fc Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Tue, 7 Oct 2014 11:55:49 -0700 Subject: [PATCH] HDFS-7186. Document the "hadoop trace" command. (Masatake Iwasaki via Colin P. McCabe) (cherry picked from commit 9196db9a080b96b9022c424d70f59811e67570a6) --- .../hadoop-common/CHANGES.txt | 3 + .../hadoop-common/src/site/apt/Tracing.apt.vm | 75 ++++++++++++++++--- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 4bcb877ed1e..2757342f913 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -11,6 +11,9 @@ Release 2.7.0 - UNRELEASED HADOOP-11156. DelegateToFileSystem should implement getFsStatus(final Path f). (Zhihai Xu via wang) + HDFS-7186. Document the "hadoop trace" command. (Masatake Iwasaki via Colin + P. McCabe) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/site/apt/Tracing.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/Tracing.apt.vm index f777dd23c16..31c25169a9e 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/Tracing.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/Tracing.apt.vm @@ -46,21 +46,32 @@ public void receiveSpan(Span span); +---- - hadoop.trace.spanreceiver.classes + hadoop.htrace.spanreceiver.classes org.htrace.impl.LocalFileSpanReceiver - hadoop.local-file-span-receiver.path + hadoop.htrace.local-file-span-receiver.path /var/log/hadoop/htrace.out +---- + You can omit package name prefix if you use span receiver bundled with HTrace. + ++---- + + hadoop.htrace.spanreceiver.classes + LocalFileSpanReceiver + ++---- + + + ** Setting up ZipkinSpanReceiver Instead of implementing SpanReceiver by yourself, you can use <<>> which uses {{{https://github.com/twitter/zipkin}Zipkin}} - for collecting and dispalying tracing data. + for collecting and displaying tracing data. In order to use <<>>, you need to download and setup {{{https://github.com/twitter/zipkin}Zipkin}} first. @@ -82,22 +93,63 @@ public void receiveSpan(Span span); +---- - hadoop.trace.spanreceiver.classes - org.htrace.impl.ZipkinSpanReceiver + hadoop.htrace.spanreceiver.classes + ZipkinSpanReceiver - hadoop.zipkin.collector-hostname + hadoop.htrace.zipkin.collector-hostname 192.168.1.2 - hadoop.zipkin.collector-port + hadoop.htrace.zipkin.collector-port 9410 +---- -** Turning on tracing by HTrace API - In order to turn on Dapper-like tracing, +** Dynamic update of tracing configuration + + You can use <<>> command to see and update the tracing configuration of each servers. + You must specify IPC server address of namenode or datanode by <<<-host>>> option. + You need to run the command against all servers if you want to update the configuration of all servers. + + <<>> shows list of loaded span receivers associated with the id. + ++---- + $ hadoop trace -list -host 192.168.56.2:9000 + ID CLASS + 1 org.htrace.impl.LocalFileSpanReceiver + + $ hadoop trace -list -host 192.168.56.2:50020 + ID CLASS + 1 org.htrace.impl.LocalFileSpanReceiver ++---- + + <<>> removes span receiver from server. + <<<-remove>>> options takes id of span receiver as argument. + ++---- + $ hadoop trace -remove 1 -host 192.168.56.2:9000 + Removed trace span receiver 1 ++---- + + <<>> adds span receiver to server. + You need to specify the class name of span receiver as argument of <<<-class>>> option. + You can specify the configuration associated with span receiver by <<<-Ckey=value>>> options. + ++---- + $ hadoop trace -add -class LocalFileSpanReceiver -Chadoop.htrace.local-file-span-receiver.path=/tmp/htrace.out -host 192.168.56.2:9000 + Added trace span receiver 2 with configuration hadoop.htrace.local-file-span-receiver.path = /tmp/htrace.out + + $ hadoop trace -list -host 192.168.56.2:9000 + ID CLASS + 2 org.htrace.impl.LocalFileSpanReceiver ++---- + + +** Starting tracing spans by HTrace API + + In order to trace, you will need to wrap the traced logic with <> as shown below. When there is running tracing spans, the tracing information is propagated to servers along with RPC requests. @@ -133,7 +185,6 @@ import org.htrace.TraceScope; +---- import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FsShell; -import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.tracing.SpanReceiverHost; import org.apache.hadoop.util.ToolRunner; import org.htrace.Sampler; @@ -146,8 +197,8 @@ public class TracingFsShell { FsShell shell = new FsShell(); conf.setQuietMode(false); shell.setConf(conf); + SpanReceiverHost.getInstance(conf); int res = 0; - SpanReceiverHost.init(new HdfsConfiguration()); TraceScope ts = null; try { ts = Trace.startSpan("FsShell", Sampler.ALWAYS); @@ -165,5 +216,5 @@ public class TracingFsShell { +---- $ javac -cp `hadoop classpath` TracingFsShell.java -$ HADOOP_CLASSPATH=. hdfs TracingFsShell -put sample.txt /tmp/ +$ java -cp .:`hadoop classpath` TracingFsShell -ls / +----