HDFS-7054. Make DFSOutputStream tracing more fine-grained (cmccabe)

(cherry picked from commit 8234fd0e1087e0e49aa1d6f286f292b7f70b368e)
(cherry picked from commit 79c07bbacae94bb0aae476dfbc17ae38564e2028)
This commit is contained in:
Colin Patrick Mccabe 2015-03-18 18:06:17 -07:00
parent 388696c089
commit 94976cb369
4 changed files with 300 additions and 116 deletions

View File

@ -437,6 +437,8 @@ Release 2.7.0 - UNRELEASED
HDFS-7940. Add tracing to DFSClient#setQuotaByStorageType (Rakesh R via HDFS-7940. Add tracing to DFSClient#setQuotaByStorageType (Rakesh R via
Colin P. McCabe) Colin P. McCabe)
HDFS-7054. Make DFSOutputStream tracing more fine-grained (cmccabe)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-7454. Reduce memory footprint for AclEntries in NameNode. HDFS-7454. Reduce memory footprint for AclEntries in NameNode.

View File

@ -95,8 +95,11 @@
import org.apache.hadoop.util.DataChecksum.Type; import org.apache.hadoop.util.DataChecksum.Type;
import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Time;
import org.apache.htrace.NullScope;
import org.apache.htrace.Sampler;
import org.apache.htrace.Span; import org.apache.htrace.Span;
import org.apache.htrace.Trace; import org.apache.htrace.Trace;
import org.apache.htrace.TraceInfo;
import org.apache.htrace.TraceScope; import org.apache.htrace.TraceScope;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@ -270,17 +273,11 @@ public DatanodeInfo load(DatanodeInfo key) throws Exception {
/** Append on an existing block? */ /** Append on an existing block? */
private final boolean isAppend; private final boolean isAppend;
private final Span traceSpan; private DataStreamer(HdfsFileStatus stat, ExtendedBlock block) {
/**
* construction with tracing info
*/
private DataStreamer(HdfsFileStatus stat, ExtendedBlock block, Span span) {
isAppend = false; isAppend = false;
isLazyPersistFile = isLazyPersist(stat); isLazyPersistFile = isLazyPersist(stat);
this.block = block; this.block = block;
stage = BlockConstructionStage.PIPELINE_SETUP_CREATE; stage = BlockConstructionStage.PIPELINE_SETUP_CREATE;
traceSpan = span;
} }
/** /**
@ -291,10 +288,9 @@ private DataStreamer(HdfsFileStatus stat, ExtendedBlock block, Span span) {
* @throws IOException if error occurs * @throws IOException if error occurs
*/ */
private DataStreamer(LocatedBlock lastBlock, HdfsFileStatus stat, private DataStreamer(LocatedBlock lastBlock, HdfsFileStatus stat,
int bytesPerChecksum, Span span) throws IOException { int bytesPerChecksum) throws IOException {
isAppend = true; isAppend = true;
stage = BlockConstructionStage.PIPELINE_SETUP_APPEND; stage = BlockConstructionStage.PIPELINE_SETUP_APPEND;
traceSpan = span;
block = lastBlock.getBlock(); block = lastBlock.getBlock();
bytesSent = block.getNumBytes(); bytesSent = block.getNumBytes();
accessToken = lastBlock.getBlockToken(); accessToken = lastBlock.getBlockToken();
@ -385,12 +381,8 @@ private void endBlock() {
@Override @Override
public void run() { public void run() {
long lastPacket = Time.now(); long lastPacket = Time.now();
TraceScope traceScope = null; TraceScope scope = NullScope.INSTANCE;
if (traceSpan != null) {
traceScope = Trace.continueSpan(traceSpan);
}
while (!streamerClosed && dfsClient.clientRunning) { while (!streamerClosed && dfsClient.clientRunning) {
// if the Responder encountered an error, shutdown Responder // if the Responder encountered an error, shutdown Responder
if (hasError && response != null) { if (hasError && response != null) {
try { try {
@ -436,11 +428,18 @@ public void run() {
// get packet to be sent. // get packet to be sent.
if (dataQueue.isEmpty()) { if (dataQueue.isEmpty()) {
one = createHeartbeatPacket(); one = createHeartbeatPacket();
assert one != null;
} else { } else {
one = dataQueue.getFirst(); // regular data packet one = dataQueue.getFirst(); // regular data packet
long parents[] = one.getTraceParents();
if (parents.length > 0) {
scope = Trace.startSpan("dataStreamer", new TraceInfo(0, parents[0]));
// TODO: use setParents API once it's available from HTrace 3.2
// scope = Trace.startSpan("dataStreamer", Sampler.ALWAYS);
// scope.getSpan().setParents(parents);
}
} }
} }
assert one != null;
// get new block from namenode. // get new block from namenode.
if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) { if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE) {
@ -486,9 +485,12 @@ public void run() {
} }
// send the packet // send the packet
Span span = null;
synchronized (dataQueue) { synchronized (dataQueue) {
// move packet from dataQueue to ackQueue // move packet from dataQueue to ackQueue
if (!one.isHeartbeatPacket()) { if (!one.isHeartbeatPacket()) {
span = scope.detach();
one.setTraceSpan(span);
dataQueue.removeFirst(); dataQueue.removeFirst();
ackQueue.addLast(one); ackQueue.addLast(one);
dataQueue.notifyAll(); dataQueue.notifyAll();
@ -501,6 +503,7 @@ public void run() {
} }
// write out data to remote datanode // write out data to remote datanode
TraceScope writeScope = Trace.startSpan("writeTo", span);
try { try {
one.writeTo(blockStream); one.writeTo(blockStream);
blockStream.flush(); blockStream.flush();
@ -513,6 +516,8 @@ public void run() {
// will be taken out then. // will be taken out then.
tryMarkPrimaryDatanodeFailed(); tryMarkPrimaryDatanodeFailed();
throw e; throw e;
} finally {
writeScope.close();
} }
lastPacket = Time.now(); lastPacket = Time.now();
@ -562,11 +567,10 @@ public void run() {
// Not a datanode issue // Not a datanode issue
streamerClosed = true; streamerClosed = true;
} }
} finally {
scope.close();
} }
} }
if (traceScope != null) {
traceScope.close();
}
closeInternal(); closeInternal();
} }
@ -721,6 +725,7 @@ public void run() {
setName("ResponseProcessor for block " + block); setName("ResponseProcessor for block " + block);
PipelineAck ack = new PipelineAck(); PipelineAck ack = new PipelineAck();
TraceScope scope = NullScope.INSTANCE;
while (!responderClosed && dfsClient.clientRunning && !isLastPacketInBlock) { while (!responderClosed && dfsClient.clientRunning && !isLastPacketInBlock) {
// process responses from datanodes. // process responses from datanodes.
try { try {
@ -795,6 +800,8 @@ public void run() {
block.setNumBytes(one.getLastByteOffsetBlock()); block.setNumBytes(one.getLastByteOffsetBlock());
synchronized (dataQueue) { synchronized (dataQueue) {
scope = Trace.continueSpan(one.getTraceSpan());
one.setTraceSpan(null);
lastAckedSeqno = seqno; lastAckedSeqno = seqno;
ackQueue.removeFirst(); ackQueue.removeFirst();
dataQueue.notifyAll(); dataQueue.notifyAll();
@ -819,6 +826,8 @@ public void run() {
} }
responderClosed = true; responderClosed = true;
} }
} finally {
scope.close();
} }
} }
} }
@ -879,6 +888,12 @@ private boolean processDatanodeError() throws IOException {
// a client waiting on close() will be aware that the flush finished. // a client waiting on close() will be aware that the flush finished.
synchronized (dataQueue) { synchronized (dataQueue) {
DFSPacket endOfBlockPacket = dataQueue.remove(); // remove the end of block packet DFSPacket endOfBlockPacket = dataQueue.remove(); // remove the end of block packet
Span span = endOfBlockPacket.getTraceSpan();
if (span != null) {
// Close any trace span associated with this Packet
TraceScope scope = Trace.continueSpan(span);
scope.close();
}
assert endOfBlockPacket.isLastPacketInBlock(); assert endOfBlockPacket.isLastPacketInBlock();
assert lastAckedSeqno == endOfBlockPacket.getSeqno() - 1; assert lastAckedSeqno == endOfBlockPacket.getSeqno() - 1;
lastAckedSeqno = endOfBlockPacket.getSeqno(); lastAckedSeqno = endOfBlockPacket.getSeqno();
@ -1586,11 +1601,7 @@ private DFSOutputStream(DFSClient dfsClient, String src, HdfsFileStatus stat,
computePacketChunkSize(dfsClient.getConf().writePacketSize, bytesPerChecksum); computePacketChunkSize(dfsClient.getConf().writePacketSize, bytesPerChecksum);
Span traceSpan = null; streamer = new DataStreamer(stat, null);
if (Trace.isTracing()) {
traceSpan = Trace.startSpan(this.getClass().getSimpleName()).detach();
}
streamer = new DataStreamer(stat, null, traceSpan);
if (favoredNodes != null && favoredNodes.length != 0) { if (favoredNodes != null && favoredNodes.length != 0) {
streamer.setFavoredNodes(favoredNodes); streamer.setFavoredNodes(favoredNodes);
} }
@ -1600,6 +1611,9 @@ static DFSOutputStream newStreamForCreate(DFSClient dfsClient, String src,
FsPermission masked, EnumSet<CreateFlag> flag, boolean createParent, FsPermission masked, EnumSet<CreateFlag> flag, boolean createParent,
short replication, long blockSize, Progressable progress, int buffersize, short replication, long blockSize, Progressable progress, int buffersize,
DataChecksum checksum, String[] favoredNodes) throws IOException { DataChecksum checksum, String[] favoredNodes) throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("newStreamForCreate", src);
try {
HdfsFileStatus stat = null; HdfsFileStatus stat = null;
// Retry the create if we get a RetryStartFileException up to a maximum // Retry the create if we get a RetryStartFileException up to a maximum
@ -1644,6 +1658,9 @@ static DFSOutputStream newStreamForCreate(DFSClient dfsClient, String src,
flag, progress, checksum, favoredNodes); flag, progress, checksum, favoredNodes);
out.start(); out.start();
return out; return out;
} finally {
scope.close();
}
} }
/** Construct a new output stream for append. */ /** Construct a new output stream for append. */
@ -1653,21 +1670,16 @@ private DFSOutputStream(DFSClient dfsClient, String src, boolean toNewBlock,
this(dfsClient, src, progress, stat, checksum); this(dfsClient, src, progress, stat, checksum);
initialFileSize = stat.getLen(); // length of file when opened initialFileSize = stat.getLen(); // length of file when opened
Span traceSpan = null;
if (Trace.isTracing()) {
traceSpan = Trace.startSpan(this.getClass().getSimpleName()).detach();
}
// The last partial block of the file has to be filled. // The last partial block of the file has to be filled.
if (!toNewBlock && lastBlock != null) { if (!toNewBlock && lastBlock != null) {
// indicate that we are appending to an existing block // indicate that we are appending to an existing block
bytesCurBlock = lastBlock.getBlockSize(); bytesCurBlock = lastBlock.getBlockSize();
streamer = new DataStreamer(lastBlock, stat, bytesPerChecksum, traceSpan); streamer = new DataStreamer(lastBlock, stat, bytesPerChecksum);
} else { } else {
computePacketChunkSize(dfsClient.getConf().writePacketSize, computePacketChunkSize(dfsClient.getConf().writePacketSize,
bytesPerChecksum); bytesPerChecksum);
streamer = new DataStreamer(stat, streamer = new DataStreamer(stat,
lastBlock != null ? lastBlock.getBlock() : null, traceSpan); lastBlock != null ? lastBlock.getBlock() : null);
} }
this.fileEncryptionInfo = stat.getFileEncryptionInfo(); this.fileEncryptionInfo = stat.getFileEncryptionInfo();
} }
@ -1676,6 +1688,9 @@ static DFSOutputStream newStreamForAppend(DFSClient dfsClient, String src,
boolean toNewBlock, int bufferSize, Progressable progress, boolean toNewBlock, int bufferSize, Progressable progress,
LocatedBlock lastBlock, HdfsFileStatus stat, DataChecksum checksum, LocatedBlock lastBlock, HdfsFileStatus stat, DataChecksum checksum,
String[] favoredNodes) throws IOException { String[] favoredNodes) throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("newStreamForAppend", src);
try {
final DFSOutputStream out = new DFSOutputStream(dfsClient, src, toNewBlock, final DFSOutputStream out = new DFSOutputStream(dfsClient, src, toNewBlock,
progress, lastBlock, stat, checksum); progress, lastBlock, stat, checksum);
if (favoredNodes != null && favoredNodes.length != 0) { if (favoredNodes != null && favoredNodes.length != 0) {
@ -1683,6 +1698,9 @@ static DFSOutputStream newStreamForAppend(DFSClient dfsClient, String src,
} }
out.start(); out.start();
return out; return out;
} finally {
scope.close();
}
} }
private static boolean isLazyPersist(HdfsFileStatus stat) { private static boolean isLazyPersist(HdfsFileStatus stat) {
@ -1707,6 +1725,7 @@ private void computePacketChunkSize(int psize, int csize) {
private void queueCurrentPacket() { private void queueCurrentPacket() {
synchronized (dataQueue) { synchronized (dataQueue) {
if (currentPacket == null) return; if (currentPacket == null) return;
currentPacket.addTraceParent(Trace.currentSpan());
dataQueue.addLast(currentPacket); dataQueue.addLast(currentPacket);
lastQueuedSeqno = currentPacket.getSeqno(); lastQueuedSeqno = currentPacket.getSeqno();
if (DFSClient.LOG.isDebugEnabled()) { if (DFSClient.LOG.isDebugEnabled()) {
@ -1721,7 +1740,17 @@ private void waitAndQueueCurrentPacket() throws IOException {
synchronized (dataQueue) { synchronized (dataQueue) {
try { try {
// If queue is full, then wait till we have enough space // If queue is full, then wait till we have enough space
while (!isClosed() && dataQueue.size() + ackQueue.size() > dfsClient.getConf().writeMaxPackets) { boolean firstWait = true;
try {
while (!isClosed() && dataQueue.size() + ackQueue.size() >
dfsClient.getConf().writeMaxPackets) {
if (firstWait) {
Span span = Trace.currentSpan();
if (span != null) {
span.addTimelineAnnotation("dataQueue.wait");
}
firstWait = false;
}
try { try {
dataQueue.wait(); dataQueue.wait();
} catch (InterruptedException e) { } catch (InterruptedException e) {
@ -1736,6 +1765,12 @@ private void waitAndQueueCurrentPacket() throws IOException {
break; break;
} }
} }
} finally {
Span span = Trace.currentSpan();
if ((span != null) && (!firstWait)) {
span.addTimelineAnnotation("end.wait");
}
}
checkClosed(); checkClosed();
queueCurrentPacket(); queueCurrentPacket();
} catch (ClosedChannelException e) { } catch (ClosedChannelException e) {
@ -1747,6 +1782,17 @@ private void waitAndQueueCurrentPacket() throws IOException {
@Override @Override
protected synchronized void writeChunk(byte[] b, int offset, int len, protected synchronized void writeChunk(byte[] b, int offset, int len,
byte[] checksum, int ckoff, int cklen) throws IOException { byte[] checksum, int ckoff, int cklen) throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("DFSOutputStream#writeChunk", src);
try {
writeChunkImpl(b, offset, len, checksum, ckoff, cklen);
} finally {
scope.close();
}
}
private synchronized void writeChunkImpl(byte[] b, int offset, int len,
byte[] checksum, int ckoff, int cklen) throws IOException {
dfsClient.checkOpen(); dfsClient.checkOpen();
checkClosed(); checkClosed();
@ -1835,12 +1881,24 @@ public void sync() throws IOException {
*/ */
@Override @Override
public void hflush() throws IOException { public void hflush() throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("hflush", src);
try {
flushOrSync(false, EnumSet.noneOf(SyncFlag.class)); flushOrSync(false, EnumSet.noneOf(SyncFlag.class));
} finally {
scope.close();
}
} }
@Override @Override
public void hsync() throws IOException { public void hsync() throws IOException {
hsync(EnumSet.noneOf(SyncFlag.class)); TraceScope scope =
dfsClient.getPathTraceScope("hsync", src);
try {
flushOrSync(true, EnumSet.noneOf(SyncFlag.class));
} finally {
scope.close();
}
} }
/** /**
@ -1857,7 +1915,13 @@ public void hsync() throws IOException {
* whether or not to update the block length in NameNode. * whether or not to update the block length in NameNode.
*/ */
public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException { public void hsync(EnumSet<SyncFlag> syncFlags) throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("hsync", src);
try {
flushOrSync(true, syncFlags); flushOrSync(true, syncFlags);
} finally {
scope.close();
}
} }
/** /**
@ -2038,6 +2102,8 @@ private void flushInternal() throws IOException {
} }
private void waitForAckedSeqno(long seqno) throws IOException { private void waitForAckedSeqno(long seqno) throws IOException {
TraceScope scope = Trace.startSpan("waitForAckedSeqno", Sampler.NEVER);
try {
if (DFSClient.LOG.isDebugEnabled()) { if (DFSClient.LOG.isDebugEnabled()) {
DFSClient.LOG.debug("Waiting for ack for: " + seqno); DFSClient.LOG.debug("Waiting for ack for: " + seqno);
} }
@ -2066,6 +2132,9 @@ private void waitForAckedSeqno(long seqno) throws IOException {
DFSClient.LOG.warn("Slow waitForAckedSeqno took " + duration DFSClient.LOG.warn("Slow waitForAckedSeqno took " + duration
+ "ms (threshold=" + dfsclientSlowLogThresholdMs + "ms)"); + "ms (threshold=" + dfsclientSlowLogThresholdMs + "ms)");
} }
} finally {
scope.close();
}
} }
private synchronized void start() { private synchronized void start() {
@ -2129,6 +2198,16 @@ private void closeThreads(boolean force) throws IOException {
*/ */
@Override @Override
public synchronized void close() throws IOException { public synchronized void close() throws IOException {
TraceScope scope =
dfsClient.getPathTraceScope("DFSOutputStream#close", src);
try {
closeImpl();
} finally {
scope.close();
}
}
private synchronized void closeImpl() throws IOException {
if (isClosed()) { if (isClosed()) {
IOException e = lastException.getAndSet(null); IOException e = lastException.getAndSet(null);
if (e == null) if (e == null)
@ -2154,7 +2233,12 @@ public synchronized void close() throws IOException {
// get last block before destroying the streamer // get last block before destroying the streamer
ExtendedBlock lastBlock = streamer.getBlock(); ExtendedBlock lastBlock = streamer.getBlock();
closeThreads(false); closeThreads(false);
TraceScope scope = Trace.startSpan("completeFile", Sampler.NEVER);
try {
completeFile(lastBlock); completeFile(lastBlock);
} finally {
scope.close();
}
dfsClient.endFileLease(fileId); dfsClient.endFileLease(fileId);
} catch (ClosedChannelException e) { } catch (ClosedChannelException e) {
} finally { } finally {

View File

@ -21,9 +21,12 @@
import java.io.IOException; import java.io.IOException;
import java.nio.BufferOverflowException; import java.nio.BufferOverflowException;
import java.nio.channels.ClosedChannelException; import java.nio.channels.ClosedChannelException;
import java.util.Arrays;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
import org.apache.hadoop.hdfs.util.ByteArrayManager; import org.apache.hadoop.hdfs.util.ByteArrayManager;
import org.apache.htrace.Span;
/**************************************************************** /****************************************************************
* DFSPacket is used by DataStreamer and DFSOutputStream. * DFSPacket is used by DataStreamer and DFSOutputStream.
@ -33,6 +36,7 @@
class DFSPacket { class DFSPacket {
public static final long HEART_BEAT_SEQNO = -1L; public static final long HEART_BEAT_SEQNO = -1L;
private static long[] EMPTY = new long[0];
private final long seqno; // sequence number of buffer in block private final long seqno; // sequence number of buffer in block
private final long offsetInBlock; // offset in block private final long offsetInBlock; // offset in block
private boolean syncBlock; // this packet forces the current block to disk private boolean syncBlock; // this packet forces the current block to disk
@ -59,6 +63,9 @@ class DFSPacket {
private int checksumPos; private int checksumPos;
private final int dataStart; private final int dataStart;
private int dataPos; private int dataPos;
private long[] traceParents = EMPTY;
private int traceParentsUsed;
private Span span;
/** /**
* Create a new packet. * Create a new packet.
@ -267,4 +274,70 @@ public String toString() {
" lastPacketInBlock: " + this.lastPacketInBlock + " lastPacketInBlock: " + this.lastPacketInBlock +
" lastByteOffsetInBlock: " + this.getLastByteOffsetBlock(); " lastByteOffsetInBlock: " + this.getLastByteOffsetBlock();
} }
/**
* Add a trace parent span for this packet.<p/>
*
* Trace parent spans for a packet are the trace spans responsible for
* adding data to that packet. We store them as an array of longs for
* efficiency.<p/>
*
* Protected by the DFSOutputStream dataQueue lock.
*/
public void addTraceParent(Span span) {
if (span == null) {
return;
}
addTraceParent(span.getSpanId());
}
public void addTraceParent(long id) {
if (traceParentsUsed == traceParents.length) {
int newLength = (traceParents.length == 0) ? 8 :
traceParents.length * 2;
traceParents = Arrays.copyOf(traceParents, newLength);
}
traceParents[traceParentsUsed] = id;
traceParentsUsed++;
}
/**
* Get the trace parent spans for this packet.<p/>
*
* Will always be non-null.<p/>
*
* Protected by the DFSOutputStream dataQueue lock.
*/
public long[] getTraceParents() {
// Remove duplicates from the array.
int len = traceParentsUsed;
Arrays.sort(traceParents, 0, len);
int i = 0, j = 0;
long prevVal = 0; // 0 is not a valid span id
while (true) {
if (i == len) {
break;
}
long val = traceParents[i];
if (val != prevVal) {
traceParents[j] = val;
j++;
prevVal = val;
}
i++;
}
if (j < traceParents.length) {
traceParents = Arrays.copyOf(traceParents, j);
traceParentsUsed = traceParents.length;
}
return traceParents;
}
public void setTraceSpan(Span span) {
this.span = span;
}
public Span getTraceSpan() {
return span;
}
} }

View File

@ -65,4 +65,29 @@ public static void assertArrayRegionsEqual(byte []buf1, int off1, byte []buf2,
} }
} }
} }
@Test
public void testAddParentsGetParents() throws Exception {
DFSPacket p = new DFSPacket(null, maxChunksPerPacket,
0, 0, checksumSize, false);
long parents[] = p.getTraceParents();
Assert.assertEquals(0, parents.length);
p.addTraceParent(123);
p.addTraceParent(123);
parents = p.getTraceParents();
Assert.assertEquals(1, parents.length);
Assert.assertEquals(123, parents[0]);
parents = p.getTraceParents(); // test calling 'get' again.
Assert.assertEquals(1, parents.length);
Assert.assertEquals(123, parents[0]);
p.addTraceParent(1);
p.addTraceParent(456);
p.addTraceParent(789);
parents = p.getTraceParents();
Assert.assertEquals(4, parents.length);
Assert.assertEquals(1, parents[0]);
Assert.assertEquals(123, parents[1]);
Assert.assertEquals(456, parents[2]);
Assert.assertEquals(789, parents[3]);
}
} }