HDFS-7409. Allow dead nodes to finish decommissioning if all files are fully replicated.

This commit is contained in:
Andrew Wang 2014-11-19 17:53:00 -08:00
parent 72c141ba96
commit 765aecb4e1
3 changed files with 26 additions and 8 deletions

View File

@ -375,6 +375,9 @@ Release 2.7.0 - UNRELEASED
HDFS-7398. Reset cached thread-local FSEditLogOp's on every
FSEditLog#logEdit. (Gera Shegalov via cnauroth)
HDFS-7409. Allow dead nodes to finish decommissioning if all files are
fully replicated. (wang)
OPTIMIZATIONS
BUG FIXES

View File

@ -3272,12 +3272,20 @@ public class BlockManager {
}
if (!status && !srcNode.isAlive) {
updateState();
if (pendingReplicationBlocksCount == 0 &&
underReplicatedBlocksCount == 0) {
LOG.info("srcNode {} is dead and there are no under-replicated" +
" blocks or blocks pending replication. Marking as " +
"decommissioned.");
} else {
LOG.warn("srcNode " + srcNode + " is dead " +
"when decommission is in progress. Continue to mark " +
"it as decommission in progress. In that way, when it rejoins the " +
"while decommission is in progress. Continuing to mark " +
"it as decommission in progress so when it rejoins the " +
"cluster it can continue the decommission process.");
status = true;
}
}
srcNode.decommissioningStatus.set(underReplicatedBlocks,
decommissionOnlyReplicas,

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.namenode;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
@ -347,8 +348,15 @@ public class TestDecommissioningStatus {
BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0));
// Verify that the DN remains in DECOMMISSION_INPROGRESS state.
assertTrue("the node is in decommissioned state ",
!dead.get(0).isDecommissioned());
assertTrue("the node should be DECOMMISSION_IN_PROGRESSS",
dead.get(0).isDecommissionInProgress());
// Delete the under-replicated file, which should let the
// DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED
cleanupFile(fileSys, f);
BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0));
assertTrue("the node should be decommissioned",
dead.get(0).isDecommissioned());
// Add the node back
cluster.restartDataNode(dataNodeProperties, true);
@ -359,7 +367,6 @@ public class TestDecommissioningStatus {
// make them available again.
writeConfigFile(localFileSys, excludeFile, null);
dm.refreshNodes(conf);
cleanupFile(fileSys, f);
}
/**