HDFS-7409. Allow dead nodes to finish decommissioning if all files are fully replicated.
This commit is contained in:
parent
72c141ba96
commit
765aecb4e1
|
@ -375,6 +375,9 @@ Release 2.7.0 - UNRELEASED
|
||||||
HDFS-7398. Reset cached thread-local FSEditLogOp's on every
|
HDFS-7398. Reset cached thread-local FSEditLogOp's on every
|
||||||
FSEditLog#logEdit. (Gera Shegalov via cnauroth)
|
FSEditLog#logEdit. (Gera Shegalov via cnauroth)
|
||||||
|
|
||||||
|
HDFS-7409. Allow dead nodes to finish decommissioning if all files are
|
||||||
|
fully replicated. (wang)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -3272,11 +3272,19 @@ public class BlockManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!status && !srcNode.isAlive) {
|
if (!status && !srcNode.isAlive) {
|
||||||
LOG.warn("srcNode " + srcNode + " is dead " +
|
updateState();
|
||||||
"when decommission is in progress. Continue to mark " +
|
if (pendingReplicationBlocksCount == 0 &&
|
||||||
"it as decommission in progress. In that way, when it rejoins the " +
|
underReplicatedBlocksCount == 0) {
|
||||||
"cluster it can continue the decommission process.");
|
LOG.info("srcNode {} is dead and there are no under-replicated" +
|
||||||
status = true;
|
" blocks or blocks pending replication. Marking as " +
|
||||||
|
"decommissioned.");
|
||||||
|
} else {
|
||||||
|
LOG.warn("srcNode " + srcNode + " is dead " +
|
||||||
|
"while decommission is in progress. Continuing to mark " +
|
||||||
|
"it as decommission in progress so when it rejoins the " +
|
||||||
|
"cluster it can continue the decommission process.");
|
||||||
|
status = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
srcNode.decommissioningStatus.set(underReplicatedBlocks,
|
srcNode.decommissioningStatus.set(underReplicatedBlocks,
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
package org.apache.hadoop.hdfs.server.namenode;
|
package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -347,8 +348,15 @@ public class TestDecommissioningStatus {
|
||||||
BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0));
|
BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0));
|
||||||
|
|
||||||
// Verify that the DN remains in DECOMMISSION_INPROGRESS state.
|
// Verify that the DN remains in DECOMMISSION_INPROGRESS state.
|
||||||
assertTrue("the node is in decommissioned state ",
|
assertTrue("the node should be DECOMMISSION_IN_PROGRESSS",
|
||||||
!dead.get(0).isDecommissioned());
|
dead.get(0).isDecommissionInProgress());
|
||||||
|
|
||||||
|
// Delete the under-replicated file, which should let the
|
||||||
|
// DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED
|
||||||
|
cleanupFile(fileSys, f);
|
||||||
|
BlockManagerTestUtil.checkDecommissionState(dm, dead.get(0));
|
||||||
|
assertTrue("the node should be decommissioned",
|
||||||
|
dead.get(0).isDecommissioned());
|
||||||
|
|
||||||
// Add the node back
|
// Add the node back
|
||||||
cluster.restartDataNode(dataNodeProperties, true);
|
cluster.restartDataNode(dataNodeProperties, true);
|
||||||
|
@ -359,7 +367,6 @@ public class TestDecommissioningStatus {
|
||||||
// make them available again.
|
// make them available again.
|
||||||
writeConfigFile(localFileSys, excludeFile, null);
|
writeConfigFile(localFileSys, excludeFile, null);
|
||||||
dm.refreshNodes(conf);
|
dm.refreshNodes(conf);
|
||||||
cleanupFile(fileSys, f);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue