From 1c5095b3cb76fa5328e7bc34d76c3ca102c54181 Mon Sep 17 00:00:00 2001 From: Brahma Reddy Battula Date: Thu, 27 Jul 2017 12:02:57 -0700 Subject: [PATCH] HDFS-11896. Non-dfsUsed will be doubled on dead node re-registration. Contributed by Brahma Reddy Battula. (cherry picked from commit c4a85c694fae3f814ab4e7f3c172da1df0e0e353) --- .../blockmanagement/DatanodeDescriptor.java | 19 ++++--- .../hadoop/hdfs/server/datanode/DataNode.java | 2 +- .../server/namenode/TestDeadDatanode.java | 53 +++++++++++++++++++ 3 files changed, 65 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index 8fdaa757d77..310b1a4f731 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -315,11 +315,7 @@ public class DatanodeDescriptor extends DatanodeInfo { } public void resetBlocks() { - setCapacity(0); - setRemaining(0); - setBlockPoolUsed(0); - setDfsUsed(0); - setXceiverCount(0); + updateStorageStats(this.getStorageReports(), 0L, 0L, 0, 0, null); this.invalidateBlocks.clear(); this.volumeFailures = 0; // pendingCached, cached, and pendingUncached are protected by the @@ -367,6 +363,16 @@ public class DatanodeDescriptor extends DatanodeInfo { public void updateHeartbeatState(StorageReport[] reports, long cacheCapacity, long cacheUsed, int xceiverCount, int volFailures, VolumeFailureSummary volumeFailureSummary) { + updateStorageStats(reports, cacheCapacity, cacheUsed, xceiverCount, + volFailures, volumeFailureSummary); + setLastUpdate(Time.now()); + setLastUpdateMonotonic(Time.monotonicNow()); + rollBlocksScheduled(getLastUpdateMonotonic()); + } + + private void updateStorageStats(StorageReport[] reports, long cacheCapacity, + long cacheUsed, int xceiverCount, int volFailures, + VolumeFailureSummary volumeFailureSummary) { long totalCapacity = 0; long totalRemaining = 0; long totalBlockPoolUsed = 0; @@ -417,8 +423,6 @@ public class DatanodeDescriptor extends DatanodeInfo { setCacheCapacity(cacheCapacity); setCacheUsed(cacheUsed); setXceiverCount(xceiverCount); - setLastUpdate(Time.now()); - setLastUpdateMonotonic(Time.monotonicNow()); this.volumeFailures = volFailures; this.volumeFailureSummary = volumeFailureSummary; for (StorageReport report : reports) { @@ -434,7 +438,6 @@ public class DatanodeDescriptor extends DatanodeInfo { totalDfsUsed += report.getDfsUsed(); totalNonDfsUsed += report.getNonDfsUsed(); } - rollBlocksScheduled(getLastUpdateMonotonic()); // Update total metrics for the node. setCapacity(totalCapacity); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 5cfafa0c165..40accd6f50c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -1287,7 +1287,7 @@ public class DataNode extends ReconfigurableBase // used only for testing @VisibleForTesting - void setHeartbeatsDisabledForTests( + public void setHeartbeatsDisabledForTests( boolean heartbeatsDisabledForTests) { this.heartbeatsDisabledForTests = heartbeatsDisabledForTests; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java index 033acf29af8..92c71a7d438 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestDeadDatanode.java @@ -17,9 +17,11 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import com.google.common.base.Supplier; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; @@ -35,6 +37,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; import org.apache.hadoop.hdfs.server.datanode.DataNode; @@ -51,6 +54,7 @@ import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport; import org.apache.hadoop.hdfs.server.protocol.StorageReceivedDeletedBlocks; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.net.Node; +import org.apache.hadoop.test.GenericTestUtils; import org.junit.After; import org.junit.Test; @@ -177,4 +181,53 @@ public class TestDeadDatanode { .getDatanodeDescriptor().equals(clientNode)); } } + + @Test + public void testNonDFSUsedONDeadNodeReReg() throws Exception { + Configuration conf = new HdfsConfiguration(); + conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, + 6 * 1000); + long CAPACITY = 5000L; + long[] capacities = new long[] { 4 * CAPACITY, 4 * CAPACITY }; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2) + .simulatedCapacities(capacities).build(); + long initialCapacity = cluster.getNamesystem(0).getCapacityTotal(); + assertTrue(initialCapacity > 0); + DataNode dn1 = cluster.getDataNodes().get(0); + DataNode dn2 = cluster.getDataNodes().get(1); + final DatanodeDescriptor dn2Desc = cluster.getNamesystem(0) + .getBlockManager().getDatanodeManager() + .getDatanode(dn2.getDatanodeId()); + dn1.setHeartbeatsDisabledForTests(true); + cluster.setDataNodeDead(dn1.getDatanodeId()); + assertEquals("Capacity shouldn't include DeadNode", dn2Desc.getCapacity(), + cluster.getNamesystem(0).getCapacityTotal()); + assertEquals("NonDFS-used shouldn't include DeadNode", + dn2Desc.getNonDfsUsed(), + cluster.getNamesystem(0).getNonDfsUsedSpace()); + // Wait for re-registration and heartbeat + dn1.setHeartbeatsDisabledForTests(false); + final DatanodeDescriptor dn1Desc = cluster.getNamesystem(0) + .getBlockManager().getDatanodeManager() + .getDatanode(dn1.getDatanodeId()); + GenericTestUtils.waitFor(new Supplier() { + + @Override public Boolean get() { + return dn1Desc.isAlive() && dn1Desc.isHeartbeatedSinceRegistration(); + } + }, 100, 5000); + assertEquals("Capacity should be 0 after all DNs dead", initialCapacity, + cluster.getNamesystem(0).getCapacityTotal()); + long nonDfsAfterReg = cluster.getNamesystem(0).getNonDfsUsedSpace(); + assertEquals("NonDFS should include actual DN NonDFSUsed", + dn1Desc.getNonDfsUsed() + dn2Desc.getNonDfsUsed(), nonDfsAfterReg); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } }