HDFS-6085. Improve CacheReplicationMonitor log messages a bit (cmccabe)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1576195 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
243db1c57e
commit
1be546ff2b
|
@ -152,6 +152,8 @@ Release 2.4.0 - UNRELEASED
|
||||||
HDFS-3405. Checkpointing should use HTTP POST or PUT instead of GET-GET
|
HDFS-3405. Checkpointing should use HTTP POST or PUT instead of GET-GET
|
||||||
to send merged fsimages. (Vinayakumar B via wang)
|
to send merged fsimages. (Vinayakumar B via wang)
|
||||||
|
|
||||||
|
HDFS-6085. Improve CacheReplicationMonitor log messages a bit (cmccabe)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
|
HDFS-5790. LeaseManager.findPath is very slow when many leases need recovery
|
||||||
|
|
|
@ -313,30 +313,32 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
FSDirectory fsDir = namesystem.getFSDirectory();
|
FSDirectory fsDir = namesystem.getFSDirectory();
|
||||||
final long now = new Date().getTime();
|
final long now = new Date().getTime();
|
||||||
for (CacheDirective directive : cacheManager.getCacheDirectives()) {
|
for (CacheDirective directive : cacheManager.getCacheDirectives()) {
|
||||||
|
scannedDirectives++;
|
||||||
// Skip processing this entry if it has expired
|
// Skip processing this entry if it has expired
|
||||||
if (LOG.isTraceEnabled()) {
|
|
||||||
LOG.trace("Directive expiry is at " + directive.getExpiryTime());
|
|
||||||
}
|
|
||||||
if (directive.getExpiryTime() > 0 && directive.getExpiryTime() <= now) {
|
if (directive.getExpiryTime() > 0 && directive.getExpiryTime() <= now) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Skipping directive id " + directive.getId()
|
LOG.debug("Directive " + directive.getId() + ": the directive " +
|
||||||
+ " because it has expired (" + directive.getExpiryTime() + "<="
|
"expired at " + directive.getExpiryTime() + " (now = " +
|
||||||
+ now + ")");
|
now + ")");
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
scannedDirectives++;
|
|
||||||
String path = directive.getPath();
|
String path = directive.getPath();
|
||||||
INode node;
|
INode node;
|
||||||
try {
|
try {
|
||||||
node = fsDir.getINode(path);
|
node = fsDir.getINode(path);
|
||||||
} catch (UnresolvedLinkException e) {
|
} catch (UnresolvedLinkException e) {
|
||||||
// We don't cache through symlinks
|
// We don't cache through symlinks
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Directive " + directive.getId() +
|
||||||
|
": got UnresolvedLinkException while resolving path " + path);
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("No inode found at " + path);
|
LOG.debug("Directive " + directive.getId() +
|
||||||
|
": No inode found at " + path);
|
||||||
}
|
}
|
||||||
} else if (node.isDirectory()) {
|
} else if (node.isDirectory()) {
|
||||||
INodeDirectory dir = node.asDirectory();
|
INodeDirectory dir = node.asDirectory();
|
||||||
|
@ -351,8 +353,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
rescanFile(directive, node.asFile());
|
rescanFile(directive, node.asFile());
|
||||||
} else {
|
} else {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Ignoring non-directory, non-file inode " + node +
|
LOG.debug("Directive " + directive.getId() +
|
||||||
" found at " + path);
|
": ignoring non-directive, non-file inode " + node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -380,8 +382,8 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
CachePool pool = directive.getPool();
|
CachePool pool = directive.getPool();
|
||||||
if (pool.getBytesNeeded() > pool.getLimit()) {
|
if (pool.getBytesNeeded() > pool.getLimit()) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug(String.format("Skipping directive id %d file %s because "
|
LOG.debug(String.format("Directive %d: not scanning file %s because " +
|
||||||
+ "limit of pool %s would be exceeded (%d > %d)",
|
"bytesNeeded for pool %s is %d, but the pool's limit is %d",
|
||||||
directive.getId(),
|
directive.getId(),
|
||||||
file.getFullPathName(),
|
file.getFullPathName(),
|
||||||
pool.getPoolName(),
|
pool.getPoolName(),
|
||||||
|
@ -395,6 +397,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
for (BlockInfo blockInfo : blockInfos) {
|
for (BlockInfo blockInfo : blockInfos) {
|
||||||
if (!blockInfo.getBlockUCState().equals(BlockUCState.COMPLETE)) {
|
if (!blockInfo.getBlockUCState().equals(BlockUCState.COMPLETE)) {
|
||||||
// We don't try to cache blocks that are under construction.
|
// We don't try to cache blocks that are under construction.
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Directive " + directive.getId() + ": can't cache " +
|
||||||
|
"block " + blockInfo + " because it is in state " +
|
||||||
|
blockInfo.getBlockUCState() + ", not COMPLETE.");
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Block block = new Block(blockInfo.getBlockId());
|
Block block = new Block(blockInfo.getBlockId());
|
||||||
|
@ -403,6 +410,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
CachedBlock ocblock = cachedBlocks.get(ncblock);
|
CachedBlock ocblock = cachedBlocks.get(ncblock);
|
||||||
if (ocblock == null) {
|
if (ocblock == null) {
|
||||||
cachedBlocks.put(ncblock);
|
cachedBlocks.put(ncblock);
|
||||||
|
ocblock = ncblock;
|
||||||
} else {
|
} else {
|
||||||
// Update bytesUsed using the current replication levels.
|
// Update bytesUsed using the current replication levels.
|
||||||
// Assumptions: we assume that all the blocks are the same length
|
// Assumptions: we assume that all the blocks are the same length
|
||||||
|
@ -433,14 +441,18 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
ocblock.setReplicationAndMark(directive.getReplication(), mark);
|
ocblock.setReplicationAndMark(directive.getReplication(), mark);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Directive " + directive.getId() + ": setting replication " +
|
||||||
|
"for block " + blockInfo + " to " + ocblock.getReplication());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Increment the "cached" statistics
|
// Increment the "cached" statistics
|
||||||
directive.addBytesCached(cachedTotal);
|
directive.addBytesCached(cachedTotal);
|
||||||
if (cachedTotal == neededTotal) {
|
if (cachedTotal == neededTotal) {
|
||||||
directive.addFilesCached(1);
|
directive.addFilesCached(1);
|
||||||
}
|
}
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.trace("Directive " + directive.getId() + " is caching " +
|
LOG.debug("Directive " + directive.getId() + ": caching " +
|
||||||
file.getFullPathName() + ": " + cachedTotal + "/" + neededTotal +
|
file.getFullPathName() + ": " + cachedTotal + "/" + neededTotal +
|
||||||
" bytes");
|
" bytes");
|
||||||
}
|
}
|
||||||
|
@ -500,6 +512,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
iter.hasNext(); ) {
|
iter.hasNext(); ) {
|
||||||
DatanodeDescriptor datanode = iter.next();
|
DatanodeDescriptor datanode = iter.next();
|
||||||
if (!cblock.isInList(datanode.getCached())) {
|
if (!cblock.isInList(datanode.getCached())) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
|
||||||
|
"PENDING_UNCACHED for node " + datanode.getDatanodeUuid() +
|
||||||
|
"because the DataNode uncached it.");
|
||||||
|
}
|
||||||
datanode.getPendingUncached().remove(cblock);
|
datanode.getPendingUncached().remove(cblock);
|
||||||
iter.remove();
|
iter.remove();
|
||||||
}
|
}
|
||||||
|
@ -509,8 +526,9 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
String reason = findReasonForNotCaching(cblock, blockInfo);
|
String reason = findReasonForNotCaching(cblock, blockInfo);
|
||||||
int neededCached = 0;
|
int neededCached = 0;
|
||||||
if (reason != null) {
|
if (reason != null) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.debug("not caching " + cblock + " because it is " + reason);
|
LOG.trace("Block " + cblock.getBlockId() + ": can't cache " +
|
||||||
|
"block because it is " + reason);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
neededCached = cblock.getReplication();
|
neededCached = cblock.getReplication();
|
||||||
|
@ -523,6 +541,12 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
DatanodeDescriptor datanode = iter.next();
|
DatanodeDescriptor datanode = iter.next();
|
||||||
datanode.getPendingCached().remove(cblock);
|
datanode.getPendingCached().remove(cblock);
|
||||||
iter.remove();
|
iter.remove();
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
|
||||||
|
"PENDING_CACHED for node " + datanode.getDatanodeUuid() +
|
||||||
|
"because we already have " + numCached + " cached " +
|
||||||
|
"replicas and we only need " + neededCached);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (numCached < neededCached) {
|
if (numCached < neededCached) {
|
||||||
|
@ -532,6 +556,12 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
DatanodeDescriptor datanode = iter.next();
|
DatanodeDescriptor datanode = iter.next();
|
||||||
datanode.getPendingUncached().remove(cblock);
|
datanode.getPendingUncached().remove(cblock);
|
||||||
iter.remove();
|
iter.remove();
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
|
||||||
|
"PENDING_UNCACHED for node " + datanode.getDatanodeUuid() +
|
||||||
|
"because we only have " + numCached + " cached replicas " +
|
||||||
|
"and we need " + neededCached);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int neededUncached = numCached -
|
int neededUncached = numCached -
|
||||||
|
@ -551,6 +581,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
pendingUncached.isEmpty() &&
|
pendingUncached.isEmpty() &&
|
||||||
pendingCached.isEmpty()) {
|
pendingCached.isEmpty()) {
|
||||||
// we have nothing more to do with this block.
|
// we have nothing more to do with this block.
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Block " + cblock.getBlockId() + ": removing from " +
|
||||||
|
"cachedBlocks, since neededCached == 0, and " +
|
||||||
|
"pendingUncached and pendingCached are empty.");
|
||||||
|
}
|
||||||
cbIter.remove();
|
cbIter.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -609,15 +644,16 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
getStoredBlock(new Block(cachedBlock.getBlockId()));
|
getStoredBlock(new Block(cachedBlock.getBlockId()));
|
||||||
if (blockInfo == null) {
|
if (blockInfo == null) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Not caching block " + cachedBlock + " because there " +
|
LOG.debug("Block " + cachedBlock.getBlockId() + ": can't add new " +
|
||||||
"is no record of it on the NameNode.");
|
"cached replicas, because there is no record of this block " +
|
||||||
|
"on the NameNode.");
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!blockInfo.isComplete()) {
|
if (!blockInfo.isComplete()) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Not caching block " + cachedBlock + " because it " +
|
LOG.debug("Block " + cachedBlock.getBlockId() + ": can't cache this " +
|
||||||
"is not yet complete.");
|
"block, because it is not yet complete.");
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -665,10 +701,11 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
}
|
}
|
||||||
if (pendingCapacity < blockInfo.getNumBytes()) {
|
if (pendingCapacity < blockInfo.getNumBytes()) {
|
||||||
if (LOG.isTraceEnabled()) {
|
if (LOG.isTraceEnabled()) {
|
||||||
LOG.trace("Datanode " + datanode + " is not a valid possibility for"
|
LOG.trace("Block " + blockInfo.getBlockId() + ": DataNode " +
|
||||||
+ " block " + blockInfo.getBlockId() + " of size "
|
datanode.getDatanodeUuid() + " is not a valid possibility " +
|
||||||
+ blockInfo.getNumBytes() + " bytes, only has "
|
"because the block has size " + blockInfo.getNumBytes() + ", but " +
|
||||||
+ datanode.getCacheRemaining() + " bytes of cache remaining.");
|
"the DataNode only has " + datanode.getCacheRemaining() + " " +
|
||||||
|
"bytes of cache remaining.");
|
||||||
}
|
}
|
||||||
outOfCapacity++;
|
outOfCapacity++;
|
||||||
continue;
|
continue;
|
||||||
|
@ -678,6 +715,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
|
List<DatanodeDescriptor> chosen = chooseDatanodesForCaching(possibilities,
|
||||||
neededCached, blockManager.getDatanodeManager().getStaleInterval());
|
neededCached, blockManager.getDatanodeManager().getStaleInterval());
|
||||||
for (DatanodeDescriptor datanode : chosen) {
|
for (DatanodeDescriptor datanode : chosen) {
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Block " + blockInfo.getBlockId() + ": added to " +
|
||||||
|
"PENDING_CACHED on DataNode " + datanode.getDatanodeUuid());
|
||||||
|
}
|
||||||
pendingCached.add(datanode);
|
pendingCached.add(datanode);
|
||||||
boolean added = datanode.getPendingCached().add(cachedBlock);
|
boolean added = datanode.getPendingCached().add(cachedBlock);
|
||||||
assert added;
|
assert added;
|
||||||
|
@ -685,12 +726,10 @@ public class CacheReplicationMonitor extends Thread implements Closeable {
|
||||||
// We were unable to satisfy the requested replication factor
|
// We were unable to satisfy the requested replication factor
|
||||||
if (neededCached > chosen.size()) {
|
if (neededCached > chosen.size()) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug(
|
LOG.debug("Block " + blockInfo.getBlockId() + ": we only have " +
|
||||||
"Only have " +
|
|
||||||
(cachedBlock.getReplication() - neededCached + chosen.size()) +
|
(cachedBlock.getReplication() - neededCached + chosen.size()) +
|
||||||
" of " + cachedBlock.getReplication() + " cached replicas for " +
|
" of " + cachedBlock.getReplication() + " cached replicas. " +
|
||||||
cachedBlock + " (" + outOfCapacity + " nodes have insufficient " +
|
outOfCapacity + " DataNodes have insufficient cache capacity.");
|
||||||
"capacity).");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue