From 8053085388fec17a40856f36821c142be32aa364 Mon Sep 17 00:00:00 2001 From: Wei-Chiu Chuang Date: Mon, 29 Jul 2019 14:30:29 -0700 Subject: [PATCH] HDFS-14429. Block remain in COMMITTED but not COMPLETE caused by Decommission. Contributed by Yicong Cai. --- .../server/blockmanagement/BlockManager.java | 7 +- .../apache/hadoop/hdfs/TestDecommission.java | 90 +++++++++++++++++++ .../server/datanode/DataNodeTestUtils.java | 5 ++ 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 74e3853c3f7..8561cfd5485 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -3342,7 +3342,8 @@ public class BlockManager implements BlockStatsMXBean { int curReplicaDelta; if (result == AddBlockResult.ADDED) { - curReplicaDelta = (node.isDecommissioned()) ? 0 : 1; + curReplicaDelta = + (node.isDecommissioned() || node.isDecommissionInProgress()) ? 0 : 1; if (logEveryBlock) { blockLog.debug("BLOCK* addStoredBlock: {} is added to {} (size={})", node, storedBlock, storedBlock.getNumBytes()); @@ -3368,9 +3369,11 @@ public class BlockManager implements BlockStatsMXBean { int numLiveReplicas = num.liveReplicas(); int pendingNum = pendingReconstruction.getNumReplicas(storedBlock); int numCurrentReplica = numLiveReplicas + pendingNum; + int numUsableReplicas = num.liveReplicas() + + num.decommissioning() + num.liveEnteringMaintenanceReplicas(); if(storedBlock.getBlockUCState() == BlockUCState.COMMITTED && - hasMinStorage(storedBlock, numLiveReplicas)) { + hasMinStorage(storedBlock, numUsableReplicas)) { addExpectedReplicasToPending(storedBlock); completeBlock(storedBlock, null, false); } else if (storedBlock.isComplete() && result == AddBlockResult.ADDED) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index f6529e024b1..d23191eadef 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -61,6 +61,7 @@ import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeAdminManager; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; @@ -962,6 +963,95 @@ public class TestDecommission extends AdminStatesBaseTest { BlockManagerTestUtil.recheckDecommissionState(dm); assertTrackedAndPending(dm.getDatanodeAdminManager(), 2, 0); } + + /** + * Simulate the following scene: + * Client writes Block(bk1) to three data nodes (dn1/dn2/dn3). bk1 has + * been completely written to three data nodes, and the data node succeeds + * FinalizeBlock, joins IBR and waits to report to NameNode. The client + * commits bk1 after receiving the ACK. When the DN has not been reported + * to the IBR, all three nodes dn1/dn2/dn3 enter Decommissioning and then the + * DN reports the IBR. + */ + @Test(timeout=120000) + public void testAllocAndIBRWhileDecommission() throws IOException { + LOG.info("Starting test testAllocAndIBRWhileDecommission"); + startCluster(1, 6); + getCluster().waitActive(); + FSNamesystem ns = getCluster().getNamesystem(0); + DatanodeManager dm = ns.getBlockManager().getDatanodeManager(); + + Path file = new Path("/testAllocAndIBRWhileDecommission"); + DistributedFileSystem dfs = getCluster().getFileSystem(); + FSDataOutputStream out = dfs.create(file, true, + getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, + 4096), (short) 3, blockSize); + + // Write first block data to the file, write one more long number will + // commit first block and allocate second block. + long writtenBytes = 0; + while (writtenBytes + 8 < blockSize) { + out.writeLong(writtenBytes); + writtenBytes += 8; + } + out.hsync(); + + // Get fist block information + LocatedBlock firstLocatedBlock = NameNodeAdapter.getBlockLocations( + getCluster().getNameNode(), "/testAllocAndIBRWhileDecommission", 0, + fileSize).getLastLocatedBlock(); + DatanodeInfo[] firstBlockLocations = firstLocatedBlock.getLocations(); + + // Close first block's datanode IBR. + ArrayList toDecom = new ArrayList<>(); + ArrayList decomDNInfos = new ArrayList<>(); + for (DatanodeInfo datanodeInfo : firstBlockLocations) { + toDecom.add(datanodeInfo.getXferAddr()); + decomDNInfos.add(dm.getDatanode(datanodeInfo)); + DataNode dn = getDataNode(datanodeInfo); + DataNodeTestUtils.triggerHeartbeat(dn); + DataNodeTestUtils.pauseIBR(dn); + } + + // Write more than one block, then commit first block, allocate second + // block. + while (writtenBytes <= blockSize) { + out.writeLong(writtenBytes); + writtenBytes += 8; + } + out.hsync(); + + // IBR closed, so the first block UCState is COMMITTED, not COMPLETE. + assertEquals(BlockUCState.COMMITTED, + ((BlockInfo) firstLocatedBlock.getBlock().getLocalBlock()) + .getBlockUCState()); + + // Decommission all nodes of the first block + initExcludeHosts(toDecom); + refreshNodes(0); + + // Waiting nodes at DECOMMISSION_INPROGRESS state and then resume IBR. + for (DatanodeInfo dnDecom : decomDNInfos) { + waitNodeState(dnDecom, AdminStates.DECOMMISSION_INPROGRESS); + DataNodeTestUtils.resumeIBR(getDataNode(dnDecom)); + } + + // Recover first block's datanode hertbeat, will report the first block + // state to NN. + for (DataNode dn : getCluster().getDataNodes()) { + DataNodeTestUtils.triggerHeartbeat(dn); + } + + // NN receive first block report, transfer block state from COMMITTED to + // COMPLETE. + assertEquals(BlockUCState.COMPLETE, + ((BlockInfo) firstLocatedBlock.getBlock().getLocalBlock()) + .getBlockUCState()); + + out.close(); + + shutdownCluster(); + } /** * Tests restart of namenode while datanode hosts are added to exclude file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java index 04b99e65f19..453e04f8f1e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/DataNodeTestUtils.java @@ -101,6 +101,11 @@ public class DataNodeTestUtils { public static void pauseIBR(DataNode dn) { dn.setIBRDisabledForTest(true); } + + public static void resumeIBR(DataNode dn) { + dn.setIBRDisabledForTest(false); + } + public static InterDatanodeProtocol createInterDatanodeProtocolProxy( DataNode dn, DatanodeID datanodeid, final Configuration conf, boolean connectToDnViaHostname) throws IOException {