HDFS-4937. ReplicationMonitor can infinite-loop in BlockPlacementPolicyDefault#chooseRandom(). Contributed by Kihwal Lee.

This commit is contained in:
Kihwal Lee 2015-10-30 09:27:21 -05:00
parent ce31b22739
commit 43539b5ff4
2 changed files with 15 additions and 0 deletions

View File

@ -2201,6 +2201,9 @@ Release 2.8.0 - UNRELEASED
HDFS-9332. Fix Precondition failures from NameNodeEditLogRoller while HDFS-9332. Fix Precondition failures from NameNodeEditLogRoller while
saving namespace. (wang) saving namespace. (wang)
HDFS-4937. ReplicationMonitor can infinite-loop in
BlockPlacementPolicyDefault#chooseRandom() (kihwal)
Release 2.7.2 - UNRELEASED Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -659,6 +659,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes( int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes(
scope, excludedNodes); scope, excludedNodes);
int refreshCounter = numOfAvailableNodes;
StringBuilder builder = null; StringBuilder builder = null;
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
builder = debugLoggingBuilder.get(); builder = debugLoggingBuilder.get();
@ -708,6 +709,17 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
// If no candidate storage was found on this DN then set badTarget. // If no candidate storage was found on this DN then set badTarget.
badTarget = (storage == null); badTarget = (storage == null);
} }
// Refresh the node count. If the live node count became smaller,
// but it is not reflected in this loop, it may loop forever in case
// the replicas/rack cannot be satisfied.
if (--refreshCounter == 0) {
refreshCounter = clusterMap.countNumOfAvailableNodes(scope,
excludedNodes);
// It has already gone through enough number of nodes.
if (refreshCounter <= excludedNodes.size()) {
break;
}
}
} }
if (numOfReplicas>0) { if (numOfReplicas>0) {