HDFS-7054. Make DFSOutputStream tracing more fine-grained (cmccabe)

(cherry picked from commit 8234fd0e10)
(cherry picked from commit 79c07bbaca)
This commit is contained in:
Colin Patrick Mccabe 2015-03-18 18:06:17 -07:00
parent 388696c089
commit 94976cb369
4 changed files with 300 additions and 116 deletions

View File

@ -437,6 +437,8 @@ Release 2.7.0 - UNRELEASED
HDFS-7940. Add tracing to DFSClient#setQuotaByStorageType (Rakesh R via HDFS-7940. Add tracing to DFSClient#setQuotaByStorageType (Rakesh R via
Colin P. McCabe) Colin P. McCabe)
HDFS-7054. Make DFSOutputStream tracing more fine-grained (cmccabe)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-7454. Reduce memory footprint for AclEntries in NameNode. HDFS-7454. Reduce memory footprint for AclEntries in NameNode.

View File

@ -95,8 +95,11 @@ import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.DataChecksum.Type; import org.apache.hadoop.util.DataChecksum.Type;
import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.apache.htrace.NullScope;
import org.apache.htrace.Sampler;
import org.apache.htrace.Span; import org.apache.htrace.Span;
import org.apache.htrace.Trace; import org.apache.htrace.Trace;
import org.apache.htrace.TraceInfo;
import org.apache.htrace.TraceScope; import org.apache.htrace.TraceScope;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@ -270,17 +273,11 @@ public class DFSOutputStream extends FSOutputSummer
/** Append on an existing block? */ /** Append on an existing block? */
private final boolean isAppend; private final boolean isAppend;
private final Span traceSpan; private DataStreamer(HdfsFileStatus stat, ExtendedBlock block) {
/**
* construction with tracing info
*/
private DataStreamer(HdfsFileStatus stat, ExtendedBlock block, Span span) {
isAppend = false; isAppend = false;
isLazyPersistFile = isLazyPersist(stat); isLazyPersistFile = isLazyPersist(stat);
this.block = block; this.block = block;
stage = BlockConstructionStage.PIPELINE_SETUP_CREATE; stage = BlockConstructionStage.PIPELINE_SETUP_CREATE;
traceSpan = span;
} }
/** /**
@ -291,10 +288,9 @@ public class DFSOutputStream extends FSOutputSummer
* @throws IOException if error occurs * @throws IOException if error occurs
*/ */
private DataStreamer(LocatedBlock lastBlock, HdfsFileStatus stat, private DataStreamer(LocatedBlock lastBlock, HdfsFileStatus stat,
int bytesPerChecksum, Span span) throws IOException { int bytesPerChecksum) throws IOException {
isAppend = true; isAppend = true;
stage = BlockConstructionStage.PIPELINE_SETUP_APPEND; stage = BlockConstructionStage.PIPELINE_SETUP_APPEND;
traceSpan = span;
block = lastBlock.getBlock(); block = lastBlock.getBlock();
bytesSent = block.getNumBytes(); bytesSent = block.getNumBytes();
accessToken = lastBlock.getBlockToken(); accessToken = lastBlock.getBlockToken();
@ -385,12 +381,8 @@ public class DFSOutputStream extends FSOutputSummer
@Override @Override
public void run() { public void run() {
long lastPacket = Time.now(); long lastPacket = Time.now();
TraceScope traceScope = null; TraceScope scope = NullScope.INSTANCE;
if (traceSpan != null) {
traceScope = Trace.continueSpan(traceSpan);
}
while (!streamerClosed && dfsClient.clientRunning) { while (!streamerClosed && dfsClient.clientRunning) {
// if the Responder encountered an error, shutdown Responder // if the Responder encountered an error, shutdown Responder
if (hasError && response != null) { if (hasError && response != null) {
try { try {
@ -436,11 +428,18 @@ public class DFSOutputStream extends FSOutputSummer
// get packet to be sent. // get packet to be sent.
if (dataQueue.isEmpty()) { if (dataQueue.isEmpty()) {
one = createHeartbeatPacket(); one = createHeartbeatPacket();
assert one != null;
} else { } else {
one = dataQueue.getFirst(); // regular data packet one = dataQueue.getFirst(); // regular data packet
long parents[] = one.getTraceParents();
if (parents.length > 0) {
scope = Trace.startSpan("dataStreamer", new TraceInfo(0, parents[0]));
// TODO: use setParents API once it's available from HTrace 3.2
// scope = Trace.startSpan("dataStreamer", Sampler.ALWAYS);
// scope.getSpan().setParents(parents);
}
} }
} }
assert one != null;
// get new block from namenode. // get new block from namenode.
if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) { if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) {
@ -486,9 +485,12 @@ public class DFSOutputStream extends FSOutputSummer
} }
// send the packet // send the packet
Span span = null;
synchronized (dataQueue) { synchronized (dataQueue) {
// move packet from dataQueue to ackQueue // move packet from dataQueue to ackQueue
if (!one.isHeartbeatPacket()) { if (!one.isHeartbeatPacket()) {
span = scope.detach();
one.setTraceSpan(span);
dataQueue.removeFirst(); dataQueue.removeFirst();
ackQueue.addLast(one); ackQueue.addLast(one);
dataQueue.notifyAll(); dataQueue.notifyAll();
@ -501,6 +503,7 @@ public class DFSOutputStream extends FSOutputSummer
} }
// write out data to remote datanode // write out data to remote datanode
TraceScope writeScope = Trace.startSpan("writeTo", span);
try { try {
one.writeTo(blockStream); one.writeTo(blockStream);
blockStream.flush(); blockStream.flush();
@ -513,6 +516,8 @@ public class DFSOutputStream extends FSOutputSummer
// will be taken out then. // will be taken out then.
tryMarkPrimaryDatanodeFailed(); tryMarkPrimaryDatanodeFailed();
throw e; throw e;
} finally {
writeScope.close();
} }
lastPacket = Time.now(); lastPacket = Time.now();
@ -562,11 +567,10 @@ public class DFSOutputStream extends FSOutputSummer
// Not a datanode issue // Not a datanode issue
streamerClosed = true; streamerClosed = true;
} }
} finally {
scope.close();
} }
} }
if (traceScope != null) {
traceScope.close();
}
closeInternal(); closeInternal();
} }
@ -721,6 +725,7 @@ public class DFSOutputStream extends FSOutputSummer
setName("ResponseProcessor for block " + block); setName("ResponseProcessor for block " + block);
PipelineAck ack = new PipelineAck(); PipelineAck ack = new PipelineAck();
TraceScope scope = NullScope.INSTANCE;
while (!responderClosed && dfsClient.clientRunning && !isLastPacketInBlock) { while (!responderClosed && dfsClient.clientRunning && !isLastPacketInBlock) {
// process responses from datanodes. // process responses from datanodes.
try { try {
@ -795,6 +800,8 @@ public class DFSOutputStream extends FSOutputSummer
block.setNumBytes(one.getLastByteOffsetBlock()); block.setNumBytes(one.getLastByteOffsetBlock());
synchronized (dataQueue) { synchronized (dataQueue) {
scope = Trace.continueSpan(one.getTraceSpan());
one.setTraceSpan(null);
lastAckedSeqno = seqno; lastAckedSeqno = seqno;
ackQueue.removeFirst(); ackQueue.removeFirst();
dataQueue.notifyAll(); dataQueue.notifyAll();
@ -819,6 +826,8 @@ public class DFSOutputStream extends FSOutputSummer
} }
responderClosed = true; responderClosed = true;
} }
} finally {
scope.close();
} }
} }
} }
@ -879,6 +888,12 @@ public class DFSOutputStream extends FSOutputSummer
// a client waiting on close() will be aware that the flush finished. // a client waiting on close() will be aware that the flush finished.
synchronized (dataQueue) { synchronized (dataQueue) {
DFSPacket endOfBlockPacket = dataQueue.remove(); // remove the end of block packet DFSPacket endOfBlockPacket = dataQueue.remove(); // remove the end of block packet
Span span = endOfBlockPacket.getTraceSpan();
if (span != null) {
// Close any trace span associated with this Packet
TraceScope scope = Trace.continueSpan(span);
scope.close();
}
assert endOfBlockPacket.isLastPacketInBlock(); assert endOfBlockPacket.isLastPacketInBlock();
assert lastAckedSeqno == endOfBlockPacket.getSeqno() - 1; assert lastAckedSeqno == endOfBlockPacket.getSeqno() - 1;
lastAckedSeqno = endOfBlockPacket.getSeqno(); lastAckedSeqno = endOfBlockPacket.getSeqno();
@ -1586,11 +1601,7 @@ public class DFSOutputStream extends FSOutputSummer
computePacketChunkSize(dfsClient.getConf().writePacketSize, bytesPerChecksum); computePacketChunkSize(dfsClient.getConf().writePacketSize, bytesPerChecksum);
Span traceSpan = null; streamer = new DataStreamer(stat, null);
if (Trace.isTracing()) {
traceSpan = Trace.startSpan(this.getClass().getSimpleName()).detach();
}
streamer = new DataStreamer(stat, null, traceSpan);
if (favoredNodes != null && favoredNodes.length != 0) { if (favoredNodes != null && favoredNodes.length != 0) {
streamer.setFavoredNodes(favoredNodes); streamer.setFavoredNodes(favoredNodes);
} }
@ -1600,6 +1611,9 @@ public class DFSOutputStream extends FSOutputSummer
FsPermission masked, EnumSet<CreateFlag> flag, boolean createParent, FsPermission masked, EnumSet<CreateFlag> flag, boolean createParent,
short replication, long blockSize, Progressable progress, int buffersize, short replication, long blockSize, Progressable progress, int buffersize,
DataChecksum checksum, String[] favoredNodes) throws IOException { DataChecksum checksum, String[] favoredNodes) throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("newStreamForCreate", src);
try {
HdfsFileStatus stat = null; HdfsFileStatus stat = null;
// Retry the create if we get a RetryStartFileException up to a maximum // Retry the create if we get a RetryStartFileException up to a maximum
@ -1644,6 +1658,9 @@ public class DFSOutputStream extends FSOutputSummer
flag, progress, checksum, favoredNodes); flag, progress, checksum, favoredNodes);
out.start(); out.start();
return out; return out;
} finally {
scope.close();
}
} }
/** Construct a new output stream for append. */ /** Construct a new output stream for append. */
@ -1653,21 +1670,16 @@ public class DFSOutputStream extends FSOutputSummer
this(dfsClient, src, progress, stat, checksum); this(dfsClient, src, progress, stat, checksum);
initialFileSize = stat.getLen(); // length of file when opened initialFileSize = stat.getLen(); // length of file when opened
Span traceSpan = null;
if (Trace.isTracing()) {
traceSpan = Trace.startSpan(this.getClass().getSimpleName()).detach();
}
// The last partial block of the file has to be filled. // The last partial block of the file has to be filled.
if (!toNewBlock && lastBlock != null) { if (!toNewBlock && lastBlock != null) {
// indicate that we are appending to an existing block // indicate that we are appending to an existing block
bytesCurBlock = lastBlock.getBlockSize(); bytesCurBlock = lastBlock.getBlockSize();
streamer = new DataStreamer(lastBlock, stat, bytesPerChecksum, traceSpan); streamer = new DataStreamer(lastBlock, stat, bytesPerChecksum);
} else { } else {
computePacketChunkSize(dfsClient.getConf().writePacketSize, computePacketChunkSize(dfsClient.getConf().writePacketSize,
bytesPerChecksum); bytesPerChecksum);
streamer = new DataStreamer(stat, streamer = new DataStreamer(stat,
lastBlock != null ? lastBlock.getBlock() : null, traceSpan); lastBlock != null ? lastBlock.getBlock() : null);
} }
this.fileEncryptionInfo = stat.getFileEncryptionInfo(); this.fileEncryptionInfo = stat.getFileEncryptionInfo();
} }
@ -1676,6 +1688,9 @@ public class DFSOutputStream extends FSOutputSummer
boolean toNewBlock, int bufferSize, Progressable progress, boolean toNewBlock, int bufferSize, Progressable progress,
LocatedBlock lastBlock, HdfsFileStatus stat, DataChecksum checksum, LocatedBlock lastBlock, HdfsFileStatus stat, DataChecksum checksum,
String[] favoredNodes) throws IOException { String[] favoredNodes) throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("newStreamForAppend", src);
try {
final DFSOutputStream out = new DFSOutputStream(dfsClient, src, toNewBlock, final DFSOutputStream out = new DFSOutputStream(dfsClient, src, toNewBlock,
progress, lastBlock, stat, checksum); progress, lastBlock, stat, checksum);
if (favoredNodes != null && favoredNodes.length != 0) { if (favoredNodes != null && favoredNodes.length != 0) {
@ -1683,6 +1698,9 @@ public class DFSOutputStream extends FSOutputSummer
} }
out.start(); out.start();
return out; return out;
} finally {
scope.close();
}
} }
private static boolean isLazyPersist(HdfsFileStatus stat) { private static boolean isLazyPersist(HdfsFileStatus stat) {
@ -1707,6 +1725,7 @@ public class DFSOutputStream extends FSOutputSummer
private void queueCurrentPacket() { private void queueCurrentPacket() {
synchronized (dataQueue) { synchronized (dataQueue) {
if (currentPacket == null) return; if (currentPacket == null) return;
currentPacket.addTraceParent(Trace.currentSpan());
dataQueue.addLast(currentPacket); dataQueue.addLast(currentPacket);
lastQueuedSeqno = currentPacket.getSeqno(); lastQueuedSeqno = currentPacket.getSeqno();
if (DFSClient.LOG.isDebugEnabled()) { if (DFSClient.LOG.isDebugEnabled()) {
@ -1721,7 +1740,17 @@ public class DFSOutputStream extends FSOutputSummer
synchronized (dataQueue) { synchronized (dataQueue) {
try { try {
// If queue is full, then wait till we have enough space // If queue is full, then wait till we have enough space
while (!isClosed() && dataQueue.size() + ackQueue.size() > dfsClient.getConf().writeMaxPackets) { boolean firstWait = true;
try {
while (!isClosed() && dataQueue.size() + ackQueue.size() >
dfsClient.getConf().writeMaxPackets) {
if (firstWait) {
Span span = Trace.currentSpan();
if (span != null) {
span.addTimelineAnnotation("dataQueue.wait");
}
firstWait = false;
}
try { try {
dataQueue.wait(); dataQueue.wait();
} catch (InterruptedException e) { } catch (InterruptedException e) {
@ -1736,6 +1765,12 @@ public class DFSOutputStream extends FSOutputSummer
break; break;
} }
} }
} finally {
Span span = Trace.currentSpan();
if ((span != null) && (!firstWait)) {
span.addTimelineAnnotation("end.wait");
}
}
checkClosed(); checkClosed();
queueCurrentPacket(); queueCurrentPacket();
} catch (ClosedChannelException e) { } catch (ClosedChannelException e) {
@ -1747,6 +1782,17 @@ public class DFSOutputStream extends FSOutputSummer
@Override @Override
protected synchronized void writeChunk(byte[] b, int offset, int len, protected synchronized void writeChunk(byte[] b, int offset, int len,
byte[] checksum, int ckoff, int cklen) throws IOException { byte[] checksum, int ckoff, int cklen) throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("DFSOutputStream#writeChunk", src);
try {
writeChunkImpl(b, offset, len, checksum, ckoff, cklen);
} finally {
scope.close();
}
}
private synchronized void writeChunkImpl(byte[] b, int offset, int len,
byte[] checksum, int ckoff, int cklen) throws IOException {
dfsClient.checkOpen(); dfsClient.checkOpen();
checkClosed(); checkClosed();
@ -1835,12 +1881,24 @@ public class DFSOutputStream extends FSOutputSummer
*/ */
@Override @Override
public void hflush() throws IOException { public void hflush() throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("hflush", src);
try {
flushOrSync(false, EnumSet.noneOf(SyncFlag.class)); flushOrSync(false, EnumSet.noneOf(SyncFlag.class));
} finally {
scope.close();
}
} }
@Override @Override
public void hsync() throws IOException { public void hsync() throws IOException {
hsync(EnumSet.noneOf(SyncFlag.class)); TraceScope scope =
dfsClient.getPathTraceScope("hsync", src);
try {
flushOrSync(true, EnumSet.noneOf(SyncFlag.class));
} finally {
scope.close();
}
} }
/** /**
@ -1857,7 +1915,13 @@ public class DFSOutputStream extends FSOutputSummer
* whether or not to update the block length in NameNode. * whether or not to update the block length in NameNode.
*/ */
public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException { public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("hsync", src);
try {
flushOrSync(true, syncFlags); flushOrSync(true, syncFlags);
} finally {
scope.close();
}
} }
/** /**
@ -2038,6 +2102,8 @@ public class DFSOutputStream extends FSOutputSummer
} }
private void waitForAckedSeqno(long seqno) throws IOException { private void waitForAckedSeqno(long seqno) throws IOException {
TraceScope scope = Trace.startSpan("waitForAckedSeqno", Sampler.NEVER);
try {
if (DFSClient.LOG.isDebugEnabled()) { if (DFSClient.LOG.isDebugEnabled()) {
DFSClient.LOG.debug("Waiting for ack for: " + seqno); DFSClient.LOG.debug("Waiting for ack for: " + seqno);
} }
@ -2066,6 +2132,9 @@ public class DFSOutputStream extends FSOutputSummer
DFSClient.LOG.warn("Slow waitForAckedSeqno took " + duration DFSClient.LOG.warn("Slow waitForAckedSeqno took " + duration
+ "ms (threshold=" + dfsclientSlowLogThresholdMs + "ms)"); + "ms (threshold=" + dfsclientSlowLogThresholdMs + "ms)");
} }
} finally {
scope.close();
}
} }
private synchronized void start() { private synchronized void start() {
@ -2129,6 +2198,16 @@ public class DFSOutputStream extends FSOutputSummer
*/ */
@Override @Override
public synchronized void close() throws IOException { public synchronized void close() throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("DFSOutputStream#close", src);
try {
closeImpl();
} finally {
scope.close();
}
}
private synchronized void closeImpl() throws IOException {
if (isClosed()) { if (isClosed()) {
IOException e = lastException.getAndSet(null); IOException e = lastException.getAndSet(null);
if (e == null) if (e == null)
@ -2154,7 +2233,12 @@ public class DFSOutputStream extends FSOutputSummer
// get last block before destroying the streamer // get last block before destroying the streamer
ExtendedBlock lastBlock = streamer.getBlock(); ExtendedBlock lastBlock = streamer.getBlock();
closeThreads(false); closeThreads(false);
TraceScope scope = Trace.startSpan("completeFile", Sampler.NEVER);
try {
completeFile(lastBlock); completeFile(lastBlock);
} finally {
scope.close();
}
dfsClient.endFileLease(fileId); dfsClient.endFileLease(fileId);
} catch (ClosedChannelException e) { } catch (ClosedChannelException e) {
} finally { } finally {

View File

@ -21,9 +21,12 @@ import java.io.DataOutputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.BufferOverflowException; import java.nio.BufferOverflowException;
import java.nio.channels.ClosedChannelException; import java.nio.channels.ClosedChannelException;
import java.util.Arrays;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
import org.apache.hadoop.hdfs.util.ByteArrayManager; import org.apache.hadoop.hdfs.util.ByteArrayManager;
import org.apache.htrace.Span;
/**************************************************************** /****************************************************************
* DFSPacket is used by DataStreamer and DFSOutputStream. * DFSPacket is used by DataStreamer and DFSOutputStream.
@ -33,6 +36,7 @@ import org.apache.hadoop.hdfs.util.ByteArrayManager;
class DFSPacket { class DFSPacket {
public static final long HEART_BEAT_SEQNO = -1L; public static final long HEART_BEAT_SEQNO = -1L;
private static long[] EMPTY = new long[0];
private final long seqno; // sequence number of buffer in block private final long seqno; // sequence number of buffer in block
private final long offsetInBlock; // offset in block private final long offsetInBlock; // offset in block
private boolean syncBlock; // this packet forces the current block to disk private boolean syncBlock; // this packet forces the current block to disk
@ -59,6 +63,9 @@ class DFSPacket {
private int checksumPos; private int checksumPos;
private final int dataStart; private final int dataStart;
private int dataPos; private int dataPos;
private long[] traceParents = EMPTY;
private int traceParentsUsed;
private Span span;
/** /**
* Create a new packet. * Create a new packet.
@ -267,4 +274,70 @@ class DFSPacket {
" lastPacketInBlock: " + this.lastPacketInBlock + " lastPacketInBlock: " + this.lastPacketInBlock +
" lastByteOffsetInBlock: " + this.getLastByteOffsetBlock(); " lastByteOffsetInBlock: " + this.getLastByteOffsetBlock();
} }
/**
* Add a trace parent span for this packet.<p/>
*
* Trace parent spans for a packet are the trace spans responsible for
* adding data to that packet. We store them as an array of longs for
* efficiency.<p/>
*
* Protected by the DFSOutputStream dataQueue lock.
*/
public void addTraceParent(Span span) {
if (span == null) {
return;
}
addTraceParent(span.getSpanId());
}
public void addTraceParent(long id) {
if (traceParentsUsed == traceParents.length) {
int newLength = (traceParents.length == 0) ? 8 :
traceParents.length * 2;
traceParents = Arrays.copyOf(traceParents, newLength);
}
traceParents[traceParentsUsed] = id;
traceParentsUsed++;
}
/**
* Get the trace parent spans for this packet.<p/>
*
* Will always be non-null.<p/>
*
* Protected by the DFSOutputStream dataQueue lock.
*/
public long[] getTraceParents() {
// Remove duplicates from the array.
int len = traceParentsUsed;
Arrays.sort(traceParents, 0, len);
int i = 0, j = 0;
long prevVal = 0; // 0 is not a valid span id
while (true) {
if (i == len) {
break;
}
long val = traceParents[i];
if (val != prevVal) {
traceParents[j] = val;
j++;
prevVal = val;
}
i++;
}
if (j < traceParents.length) {
traceParents = Arrays.copyOf(traceParents, j);
traceParentsUsed = traceParents.length;
}
return traceParents;
}
public void setTraceSpan(Span span) {
this.span = span;
}
public Span getTraceSpan() {
return span;
}
} }

View File

@ -65,4 +65,29 @@ public class TestDFSPacket {
} }
} }
} }
@Test
public void testAddParentsGetParents() throws Exception {
DFSPacket p = new DFSPacket(null, maxChunksPerPacket,
0, 0, checksumSize, false);
long parents[] = p.getTraceParents();
Assert.assertEquals(0, parents.length);
p.addTraceParent(123);
p.addTraceParent(123);
parents = p.getTraceParents();
Assert.assertEquals(1, parents.length);
Assert.assertEquals(123, parents[0]);
parents = p.getTraceParents(); // test calling 'get' again.
Assert.assertEquals(1, parents.length);
Assert.assertEquals(123, parents[0]);
p.addTraceParent(1);
p.addTraceParent(456);
p.addTraceParent(789);
parents = p.getTraceParents();
Assert.assertEquals(4, parents.length);
Assert.assertEquals(1, parents[0]);
Assert.assertEquals(123, parents[1]);
Assert.assertEquals(456, parents[2]);
Assert.assertEquals(789, parents[3]);
}
} }