HDFS-7409. Allow dead nodes to finish decommissioning if all files are fully replicated.
This commit is contained in:
parent
72c141ba96
commit
765aecb4e1
|
@ -375,6 +375,9 @@ Release 2.7.0 - UNRELEASED
|
|||
HDFS-7398. Reset cached thread-local FSEditLogOp's on every
|
||||
FSEditLog#logEdit. (Gera Shegalov via cnauroth)
|
||||
|
||||
HDFS-7409. Allow dead nodes to finish decommissioning if all files are
|
||||
fully replicated. (wang)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
|
@ -3272,11 +3272,19 @@ public class BlockManager {
|
|||
}
|
||||
|
||||
if (!status && !srcNode.isAlive) {
|
||||
LOG.warn("srcNode " + srcNode + " is dead " +
|
||||
"when decommission is in progress. Continue to mark " +
|
||||
"it as decommission in progress. In that way, when it rejoins the " +
|
||||
"cluster it can continue the decommission process.");
|
||||
status = true;
|
||||
updateState();
|
||||
if (pendingReplicationBlocksCount == 0 &&
|
||||
underReplicatedBlocksCount == 0) {
|
||||
LOG.info("srcNode {} is dead and there are no under-replicated" +
|
||||
" blocks or blocks pending replication. Marking as " +
|
||||
"decommissioned.");
|
||||
} else {
|
||||
LOG.warn("srcNode " + srcNode + " is dead " +
|
||||
"while decommission is in progress. Continuing to mark " +
|
||||
"it as decommission in progress so when it rejoins the " +
|
||||
"cluster it can continue the decommission process.");
|
||||
status = true;
|
||||
}
|
||||
}
|
||||
|
||||
srcNode.decommissioningStatus.set(underReplicatedBlocks,
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -347,8 +348,15 @@ public class TestDecommissioningStatus {
|
|||
BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0));
|
||||
|
||||
// Verify that the DN remains in DECOMMISSION_INPROGRESS state.
|
||||
assertTrue("the node is in decommissioned state ",
|
||||
!dead.get(0).isDecommissioned());
|
||||
assertTrue("the node should be DECOMMISSION_IN_PROGRESSS",
|
||||
dead.get(0).isDecommissionInProgress());
|
||||
|
||||
// Delete the under-replicated file, which should let the
|
||||
// DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED
|
||||
cleanupFile(fileSys, f);
|
||||
BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0));
|
||||
assertTrue("the node should be decommissioned",
|
||||
dead.get(0).isDecommissioned());
|
||||
|
||||
// Add the node back
|
||||
cluster.restartDataNode(dataNodeProperties, true);
|
||||
|
@ -359,7 +367,6 @@ public class TestDecommissioningStatus {
|
|||
// make them available again.
|
||||
writeConfigFile(localFileSys, excludeFile, null);
|
||||
dm.refreshNodes(conf);
|
||||
cleanupFile(fileSys, f);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue