diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7c098967c48..9b28d3b723b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -47,6 +47,9 @@ Release 2.7.2 - UNRELEASED HDFS-8879. Quota by storage type usage incorrectly initialized upon namenode restart. (xyao) + HDFS-8950. NameNode refresh doesn't remove DataNodes that are no longer in + the allowed list (Daniel Templeton) + HDFS-8995. Flaw in registration bookeeping can make DN die on reconnect. (Kihwal Lee via yliu) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index d7e0721a910..0cf1eeea7ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -1272,11 +1272,14 @@ public class DatanodeManager { for (DatanodeDescriptor dn : datanodeMap.values()) { final boolean isDead = isDatanodeDead(dn); final boolean isDecommissioning = dn.isDecommissionInProgress(); - if ((listLiveNodes && !isDead) || + + if (((listLiveNodes && !isDead) || (listDeadNodes && isDead) || - (listDecommissioningNodes && isDecommissioning)) { - nodes.add(dn); + (listDecommissioningNodes && isDecommissioning)) && + hostFileManager.isIncluded(dn)) { + nodes.add(dn); } + foundNodes.add(HostFileManager.resolvedAddressFromDatanodeID(dn)); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java index 0b8d6c5bc16..e05ef9a4047 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HostFileManager.java @@ -126,9 +126,28 @@ class HostFileManager { return !includes.isEmpty(); } + /** + * Read the includes and excludes lists from the named files. Any previous + * includes and excludes lists are discarded. + * @param includeFile the path to the new includes list + * @param excludeFile the path to the new excludes list + * @throws IOException thrown if there is a problem reading one of the files + */ void refresh(String includeFile, String excludeFile) throws IOException { HostSet newIncludes = readFile("included", includeFile); HostSet newExcludes = readFile("excluded", excludeFile); + + refresh(newIncludes, newExcludes); + } + + /** + * Set the includes and excludes lists by the new HostSet instances. The + * old instances are discarded. + * @param newIncludes the new includes list + * @param newExcludes the new excludes list + */ + @VisibleForTesting + void refresh(HostSet newIncludes, HostSet newExcludes) { synchronized (this) { includes = newIncludes; excludes = newExcludes; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index 1ab7427f97a..5cec657a49e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -810,16 +810,13 @@ public class TestDecommission { } assertEquals("Number of live nodes should be 0", 0, info.length); - // Test that non-live and bogus hostnames are considered "dead". - // The dead report should have an entry for (1) the DN that is - // now considered dead because it is no longer allowed to connect - // and (2) the bogus entry in the hosts file (these entries are - // always added last) + // Test that bogus hostnames are considered "dead". + // The dead report should have an entry for the bogus entry in the hosts + // file. The original datanode is excluded from the report because it + // is no longer in the included list. info = client.datanodeReport(DatanodeReportType.DEAD); - assertEquals("There should be 2 dead nodes", 2, info.length); - DatanodeID id = cluster.getDataNodes().get(0).getDatanodeId(); - assertEquals(id.getHostName(), info[0].getHostName()); - assertEquals(bogusIp, info[1].getHostName()); + assertEquals("There should be 1 dead node", 1, info.length); + assertEquals(bogusIp, info[0].getHostName()); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java index a4a6263b3de..4530ef8a66e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java @@ -19,7 +19,9 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import java.io.IOException; +import java.net.InetSocketAddress; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -33,17 +35,22 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.protocol.DatanodeInfoWithStorage; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.net.DNSToSwitchMapping; import org.junit.Assert; import org.junit.Test; import org.mockito.Mockito; - +import org.mockito.internal.util.reflection.Whitebox; import static org.hamcrest.core.Is.is; import static org.junit.Assert.*; @@ -54,6 +61,22 @@ public class TestDatanodeManager { //The number of times the registration / removal of nodes should happen final int NUM_ITERATIONS = 500; + private static DatanodeManager mockDatanodeManager( + FSNamesystem fsn, Configuration conf) throws IOException { + BlockManager bm = Mockito.mock(BlockManager.class); + DatanodeManager dm = new DatanodeManager(bm, fsn, conf); + return dm; + } + + /** + * Create an InetSocketAddress for a host:port string + * @param host a host identifier in host:port format + * @return a corresponding InetSocketAddress object + */ + private static InetSocketAddress entry(String host) { + return HostFileManager.parseEntry("dummy", "dummy", host); + } + /** * This test sends a random sequence of node registrations and node removals * to the DatanodeManager (of nodes with different IDs and versions), and @@ -294,4 +317,89 @@ public class TestDatanodeManager { assertThat(sortedLocs[sortedLocs.length-2].getAdminState(), is(DatanodeInfo.AdminStates.DECOMMISSIONED)); } + + /** + * Test whether removing a host from the includes list without adding it to + * the excludes list will exclude it from data node reports. + */ + @Test + public void testRemoveIncludedNode() throws IOException { + FSNamesystem fsn = Mockito.mock(FSNamesystem.class); + + // Set the write lock so that the DatanodeManager can start + Mockito.when(fsn.hasWriteLock()).thenReturn(true); + + DatanodeManager dm = mockDatanodeManager(fsn, new Configuration()); + HostFileManager hm = new HostFileManager(); + HostFileManager.HostSet noNodes = new HostFileManager.HostSet(); + HostFileManager.HostSet oneNode = new HostFileManager.HostSet(); + HostFileManager.HostSet twoNodes = new HostFileManager.HostSet(); + DatanodeRegistration dr1 = new DatanodeRegistration( + new DatanodeID("127.0.0.1", "127.0.0.1", "someStorageID-123", + 12345, 12345, 12345, 12345), + new StorageInfo(HdfsServerConstants.NodeType.DATA_NODE), + new ExportedBlockKeys(), "test"); + DatanodeRegistration dr2 = new DatanodeRegistration( + new DatanodeID("127.0.0.1", "127.0.0.1", "someStorageID-234", + 23456, 23456, 23456, 23456), + new StorageInfo(HdfsServerConstants.NodeType.DATA_NODE), + new ExportedBlockKeys(), "test"); + + twoNodes.add(entry("127.0.0.1:12345")); + twoNodes.add(entry("127.0.0.1:23456")); + oneNode.add(entry("127.0.0.1:23456")); + + hm.refresh(twoNodes, noNodes); + Whitebox.setInternalState(dm, "hostFileManager", hm); + + // Register two data nodes to simulate them coming up. + // We need to add two nodes, because if we have only one node, removing it + // will cause the includes list to be empty, which means all hosts will be + // allowed. + dm.registerDatanode(dr1); + dm.registerDatanode(dr2); + + // Make sure that both nodes are reported + List both = + dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.ALL); + + // Sort the list so that we know which one is which + Collections.sort(both); + + Assert.assertEquals("Incorrect number of hosts reported", + 2, both.size()); + Assert.assertEquals("Unexpected host or host in unexpected position", + "127.0.0.1:12345", both.get(0).getInfoAddr()); + Assert.assertEquals("Unexpected host or host in unexpected position", + "127.0.0.1:23456", both.get(1).getInfoAddr()); + + // Remove one node from includes, but do not add it to excludes. + hm.refresh(oneNode, noNodes); + + // Make sure that only one node is still reported + List onlyOne = + dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.ALL); + + Assert.assertEquals("Incorrect number of hosts reported", + 1, onlyOne.size()); + Assert.assertEquals("Unexpected host reported", + "127.0.0.1:23456", onlyOne.get(0).getInfoAddr()); + + // Remove all nodes from includes + hm.refresh(noNodes, noNodes); + + // Check that both nodes are reported again + List bothAgain = + dm.getDatanodeListForReport(HdfsConstants.DatanodeReportType.ALL); + + // Sort the list so that we know which one is which + Collections.sort(bothAgain); + + Assert.assertEquals("Incorrect number of hosts reported", + 2, bothAgain.size()); + Assert.assertEquals("Unexpected host or host in unexpected position", + "127.0.0.1:12345", bothAgain.get(0).getInfoAddr()); + Assert.assertEquals("Unexpected host or host in unexpected position", + "127.0.0.1:23456", bothAgain.get(1).getInfoAddr()); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHostFileManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHostFileManager.java index 733446ceb17..c65b5804396 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHostFileManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHostFileManager.java @@ -104,23 +104,22 @@ public class TestHostFileManager { BlockManager bm = mock(BlockManager.class); FSNamesystem fsn = mock(FSNamesystem.class); Configuration conf = new Configuration(); - HostFileManager hm = mock(HostFileManager.class); + HostFileManager hm = new HostFileManager(); HostFileManager.HostSet includedNodes = new HostFileManager.HostSet(); HostFileManager.HostSet excludedNodes = new HostFileManager.HostSet(); includedNodes.add(entry("127.0.0.1:12345")); includedNodes.add(entry("localhost:12345")); includedNodes.add(entry("127.0.0.1:12345")); - includedNodes.add(entry("127.0.0.2")); + excludedNodes.add(entry("127.0.0.1:12346")); excludedNodes.add(entry("127.0.30.1:12346")); Assert.assertEquals(2, includedNodes.size()); Assert.assertEquals(2, excludedNodes.size()); - doReturn(includedNodes).when(hm).getIncludes(); - doReturn(excludedNodes).when(hm).getExcludes(); + hm.refresh(includedNodes, excludedNodes); DatanodeManager dm = new DatanodeManager(bm, fsn, conf); Whitebox.setInternalState(dm, "hostFileManager", hm);