HDFS-6085. Improve CacheReplicationMonitor log messages a bit (cmccabe)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1576195 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Colin McCabe 2014-03-11 06:25:27 +00:00
parent 243db1c57e
commit 1be546ff2b
2 changed files with 70 additions and 29 deletions

View File

@ -152,6 +152,8 @@ Release 2.4.0 - UNRELEASED
HDFS-3405. Checkpointing should use HTTP POST or PUT instead of GET-GET HDFS-3405. Checkpointing should use HTTP POST or PUT instead of GET-GET
to send merged fsimages. (Vinayakumar B via wang) to send merged fsimages. (Vinayakumar B via wang)
HDFS-6085. Improve CacheReplicationMonitor log messages a bit (cmccabe)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery

View File

@ -313,30 +313,32 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
FSDirectory fsDir = namesystem.getFSDirectory(); FSDirectory fsDir = namesystem.getFSDirectory();
final long now = new Date().getTime(); final long now = new Date().getTime();
for (CacheDirective directive : cacheManager.getCacheDirectives()) { for (CacheDirective directive : cacheManager.getCacheDirectives()) {
scannedDirectives++;
// Skip processing this entry if it has expired // Skip processing this entry if it has expired
if (LOG.isTraceEnabled()) {
LOG.trace("Directive expiry is at " + directive.getExpiryTime());
}
if (directive.getExpiryTime() > 0 && directive.getExpiryTime() <= now) { if (directive.getExpiryTime() > 0 && directive.getExpiryTime() <= now) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Skipping directive id " + directive.getId() LOG.debug("Directive " + directive.getId() + ": the directive " +
+ " because it has expired (" + directive.getExpiryTime() + "<=" "expired at " + directive.getExpiryTime() + " (now = " +
+ now + ")"); now + ")");
} }
continue; continue;
} }
scannedDirectives++;
String path = directive.getPath(); String path = directive.getPath();
INode node; INode node;
try { try {
node = fsDir.getINode(path); node = fsDir.getINode(path);
} catch (UnresolvedLinkException e) { } catch (UnresolvedLinkException e) {
// We don't cache through symlinks // We don't cache through symlinks
if (LOG.isDebugEnabled()) {
LOG.debug("Directive " + directive.getId() +
": got UnresolvedLinkException while resolving path " + path);
}
continue; continue;
} }
if (node == null) { if (node == null) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("No inode found at " + path); LOG.debug("Directive " + directive.getId() +
": No inode found at " + path);
} }
} else if (node.isDirectory()) { } else if (node.isDirectory()) {
INodeDirectory dir = node.asDirectory(); INodeDirectory dir = node.asDirectory();
@ -351,8 +353,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
rescanFile(directive, node.asFile()); rescanFile(directive, node.asFile());
} else { } else {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Ignoring non-directory, non-file inode " + node + LOG.debug("Directive " + directive.getId() +
" found at " + path); ": ignoring non-directive, non-file inode " + node);
} }
} }
} }
@ -380,8 +382,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
CachePool pool = directive.getPool(); CachePool pool = directive.getPool();
if (pool.getBytesNeeded() > pool.getLimit()) { if (pool.getBytesNeeded() > pool.getLimit()) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Skipping directive id %d file %s because " LOG.debug(String.format("Directive %d: not scanning file %s because " +
+ "limit of pool %s would be exceeded (%d > %d)", "bytesNeeded for pool %s is %d, but the pool's limit is %d",
directive.getId(), directive.getId(),
file.getFullPathName(), file.getFullPathName(),
pool.getPoolName(), pool.getPoolName(),
@ -395,6 +397,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
for (BlockInfo blockInfo : blockInfos) { for (BlockInfo blockInfo : blockInfos) {
if (!blockInfo.getBlockUCState().equals(BlockUCState.COMPLETE)) { if (!blockInfo.getBlockUCState().equals(BlockUCState.COMPLETE)) {
// We don't try to cache blocks that are under construction. // We don't try to cache blocks that are under construction.
if (LOG.isTraceEnabled()) {
LOG.trace("Directive " + directive.getId() + ": can't cache " +
"block " + blockInfo + " because it is in state " +
blockInfo.getBlockUCState() + ", not COMPLETE.");
}
continue; continue;
} }
Block block = new Block(blockInfo.getBlockId()); Block block = new Block(blockInfo.getBlockId());
@ -403,6 +410,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
CachedBlock ocblock = cachedBlocks.get(ncblock); CachedBlock ocblock = cachedBlocks.get(ncblock);
if (ocblock == null) { if (ocblock == null) {
cachedBlocks.put(ncblock); cachedBlocks.put(ncblock);
ocblock = ncblock;
} else { } else {
// Update bytesUsed using the current replication levels. // Update bytesUsed using the current replication levels.
// Assumptions: we assume that all the blocks are the same length // Assumptions: we assume that all the blocks are the same length
@ -433,14 +441,18 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
ocblock.setReplicationAndMark(directive.getReplication(), mark); ocblock.setReplicationAndMark(directive.getReplication(), mark);
} }
} }
if (LOG.isTraceEnabled()) {
LOG.trace("Directive " + directive.getId() + ": setting replication " +
"for block " + blockInfo + " to " + ocblock.getReplication());
}
} }
// Increment the "cached" statistics // Increment the "cached" statistics
directive.addBytesCached(cachedTotal); directive.addBytesCached(cachedTotal);
if (cachedTotal == neededTotal) { if (cachedTotal == neededTotal) {
directive.addFilesCached(1); directive.addFilesCached(1);
} }
if (LOG.isTraceEnabled()) { if (LOG.isDebugEnabled()) {
LOG.trace("Directive " + directive.getId() + " is caching " + LOG.debug("Directive " + directive.getId() + ": caching " +
file.getFullPathName() + ": " + cachedTotal + "/" + neededTotal + file.getFullPathName() + ": " + cachedTotal + "/" + neededTotal +
" bytes"); " bytes");
} }
@ -500,6 +512,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
iter.hasNext(); ) { iter.hasNext(); ) {
DatanodeDescriptor datanode = iter.next(); DatanodeDescriptor datanode = iter.next();
if (!cblock.isInList(datanode.getCached())) { if (!cblock.isInList(datanode.getCached())) {
if (LOG.isTraceEnabled()) {
LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
"PENDING_UNCACHED for node " + datanode.getDatanodeUuid() +
"because the DataNode uncached it.");
}
datanode.getPendingUncached().remove(cblock); datanode.getPendingUncached().remove(cblock);
iter.remove(); iter.remove();
} }
@ -509,8 +526,9 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
String reason = findReasonForNotCaching(cblock, blockInfo); String reason = findReasonForNotCaching(cblock, blockInfo);
int neededCached = 0; int neededCached = 0;
if (reason != null) { if (reason != null) {
if (LOG.isDebugEnabled()) { if (LOG.isTraceEnabled()) {
LOG.debug("not caching " + cblock + " because it is " + reason); LOG.trace("Block " + cblock.getBlockId() + ": can't cache " +
"block because it is " + reason);
} }
} else { } else {
neededCached = cblock.getReplication(); neededCached = cblock.getReplication();
@ -523,6 +541,12 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
DatanodeDescriptor datanode = iter.next(); DatanodeDescriptor datanode = iter.next();
datanode.getPendingCached().remove(cblock); datanode.getPendingCached().remove(cblock);
iter.remove(); iter.remove();
if (LOG.isTraceEnabled()) {
LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
"PENDING_CACHED for node " + datanode.getDatanodeUuid() +
"because we already have " + numCached + " cached " +
"replicas and we only need " + neededCached);
}
} }
} }
if (numCached < neededCached) { if (numCached < neededCached) {
@ -532,6 +556,12 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
DatanodeDescriptor datanode = iter.next(); DatanodeDescriptor datanode = iter.next();
datanode.getPendingUncached().remove(cblock); datanode.getPendingUncached().remove(cblock);
iter.remove(); iter.remove();
if (LOG.isTraceEnabled()) {
LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
"PENDING_UNCACHED for node " + datanode.getDatanodeUuid() +
"because we only have " + numCached + " cached replicas " +
"and we need " + neededCached);
}
} }
} }
int neededUncached = numCached - int neededUncached = numCached -
@ -551,6 +581,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
pendingUncached.isEmpty() && pendingUncached.isEmpty() &&
pendingCached.isEmpty()) { pendingCached.isEmpty()) {
// we have nothing more to do with this block. // we have nothing more to do with this block.
if (LOG.isTraceEnabled()) {
LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
"cachedBlocks, since neededCached == 0, and " +
"pendingUncached and pendingCached are empty.");
}
cbIter.remove(); cbIter.remove();
} }
} }
@ -609,15 +644,16 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
getStoredBlock(new Block(cachedBlock.getBlockId())); getStoredBlock(new Block(cachedBlock.getBlockId()));
if (blockInfo == null) { if (blockInfo == null) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Not caching block " + cachedBlock + " because there " + LOG.debug("Block " + cachedBlock.getBlockId() + ": can't add new " +
"is no record of it on the NameNode."); "cached replicas, because there is no record of this block " +
"on the NameNode.");
} }
return; return;
} }
if (!blockInfo.isComplete()) { if (!blockInfo.isComplete()) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Not caching block " + cachedBlock + " because it " + LOG.debug("Block " + cachedBlock.getBlockId() + ": can't cache this " +
"is not yet complete."); "block, because it is not yet complete.");
} }
return; return;
} }
@ -665,10 +701,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
} }
if (pendingCapacity < blockInfo.getNumBytes()) { if (pendingCapacity < blockInfo.getNumBytes()) {
if (LOG.isTraceEnabled()) { if (LOG.isTraceEnabled()) {
LOG.trace("Datanode " + datanode + " is not a valid possibility for" LOG.trace("Block " + blockInfo.getBlockId() + ": DataNode " +
+ " block " + blockInfo.getBlockId() + " of size " datanode.getDatanodeUuid() + " is not a valid possibility " +
+ blockInfo.getNumBytes() + " bytes, only has " "because the block has size " + blockInfo.getNumBytes() + ", but " +
+ datanode.getCacheRemaining() + " bytes of cache remaining."); "the DataNode only has " + datanode.getCacheRemaining() + " " +
"bytes of cache remaining.");
} }
outOfCapacity++; outOfCapacity++;
continue; continue;
@ -678,6 +715,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities, List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
neededCached, blockManager.getDatanodeManager().getStaleInterval()); neededCached, blockManager.getDatanodeManager().getStaleInterval());
for (DatanodeDescriptor datanode : chosen) { for (DatanodeDescriptor datanode : chosen) {
if (LOG.isTraceEnabled()) {
LOG.trace("Block " + blockInfo.getBlockId() + ": added to " +
"PENDING_CACHED on DataNode " + datanode.getDatanodeUuid());
}
pendingCached.add(datanode); pendingCached.add(datanode);
boolean added = datanode.getPendingCached().add(cachedBlock); boolean added = datanode.getPendingCached().add(cachedBlock);
assert added; assert added;
@ -685,12 +726,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
// We were unable to satisfy the requested replication factor // We were unable to satisfy the requested replication factor
if (neededCached > chosen.size()) { if (neededCached > chosen.size()) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug( LOG.debug("Block " + blockInfo.getBlockId() + ": we only have " +
"Only have " +
(cachedBlock.getReplication() - neededCached + chosen.size()) + (cachedBlock.getReplication() - neededCached + chosen.size()) +
" of " + cachedBlock.getReplication() + " cached replicas for " + " of " + cachedBlock.getReplication() + " cached replicas. " +
cachedBlock + " (" + outOfCapacity + " nodes have insufficient " + outOfCapacity + " DataNodes have insufficient cache capacity.");
"capacity).");
} }
} }
} }