HDFS-4937. ReplicationMonitor can infinite-loop in BlockPlacementPolicyDefault#chooseRandom(). Contributed by Kihwal Lee.

(cherry picked from commit ff47f35dee)
This commit is contained in:
Kihwal Lee 2015-11-05 09:26:53 -06:00
parent 6edd41ac69
commit 37abd0e33b
2 changed files with 12 additions and 0 deletions

View File

@ -1410,6 +1410,9 @@ Release 2.7.3 - UNRELEASED
HDFS-9289. Make DataStreamer#block thread safe and verify genStamp in HDFS-9289. Make DataStreamer#block thread safe and verify genStamp in
commitBlock. (Chang Li via zhz) commitBlock. (Chang Li via zhz)
HDFS-4937. ReplicationMonitor can infinite-loop in
BlockPlacementPolicyDefault#chooseRandom(). (kihwal)
Release 2.7.2 - UNRELEASED Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -659,6 +659,7 @@ protected DatanodeStorageInfo chooseRandom(int numOfReplicas,
int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes( int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes(
scope, excludedNodes); scope, excludedNodes);
int refreshCounter = numOfAvailableNodes;
StringBuilder builder = null; StringBuilder builder = null;
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
builder = debugLoggingBuilder.get(); builder = debugLoggingBuilder.get();
@ -708,6 +709,14 @@ protected DatanodeStorageInfo chooseRandom(int numOfReplicas,
// If no candidate storage was found on this DN then set badTarget. // If no candidate storage was found on this DN then set badTarget.
badTarget = (storage == null); badTarget = (storage == null);
} }
// Refresh the node count. If the live node count became smaller,
// but it is not reflected in this loop, it may loop forever in case
// the replicas/rack cannot be satisfied.
if (--refreshCounter == 0) {
numOfAvailableNodes = clusterMap.countNumOfAvailableNodes(scope,
excludedNodes);
refreshCounter = numOfAvailableNodes;
}
} }
if (numOfReplicas>0) { if (numOfReplicas>0) {