HDFS-4937. ReplicationMonitor can infinite-loop in BlockPlacementPolicyDefault#chooseRandom(). Contributed by Kihwal Lee.

(cherry picked from commit 43539b5ff4)
This commit is contained in:
Kihwal Lee 2015-10-30 09:39:49 -05:00
parent c84ec0657f
commit c250b21c23
2 changed files with 15 additions and 0 deletions

View File

@ -89,6 +89,9 @@ Release 2.7.2 - UNRELEASED
HDFS-9317. Document fsck -blockId and -storagepolicy options in branch-2.7. HDFS-9317. Document fsck -blockId and -storagepolicy options in branch-2.7.
(aajisaka) (aajisaka)
HDFS-4937. ReplicationMonitor can infinite-loop in
BlockPlacementPolicyDefault#chooseRandom() (kihwal)
Release 2.7.1 - 2015-07-06 Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -622,6 +622,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes( int numOfAvailableNodes = clusterMap.countNumOfAvailableNodes(
scope, excludedNodes); scope, excludedNodes);
int refreshCounter = numOfAvailableNodes;
StringBuilder builder = null; StringBuilder builder = null;
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
builder = debugLoggingBuilder.get(); builder = debugLoggingBuilder.get();
@ -675,6 +676,17 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
// If no candidate storage was found on this DN then set badTarget. // If no candidate storage was found on this DN then set badTarget.
badTarget = (i == storages.length); badTarget = (i == storages.length);
} }
// Refresh the node count. If the live node count became smaller,
// but it is not reflected in this loop, it may loop forever in case
// the replicas/rack cannot be satisfied.
if (--refreshCounter == 0) {
refreshCounter = clusterMap.countNumOfAvailableNodes(scope,
excludedNodes);
// It has already gone through enough number of nodes.
if (refreshCounter <= excludedNodes.size()) {
break;
}
}
} }
if (numOfReplicas>0) { if (numOfReplicas>0) {