From 071733dc69a6f83c0cdca046b31ffd4f13304e93 Mon Sep 17 00:00:00 2001 From: "Aaron T. Myers" Date: Tue, 29 Sep 2015 18:19:31 -0700 Subject: [PATCH] HDFS-9001. DFSUtil.getNsServiceRpcUris() can return too many entries in a non-HA, non-federated cluster. Contributed by Daniel Templeton. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../java/org/apache/hadoop/hdfs/DFSUtil.java | 37 +++-- .../org/apache/hadoop/hdfs/TestDFSUtil.java | 144 +++++++++++++----- 3 files changed, 130 insertions(+), 54 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index dfd0b575d85..cedf1a741ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1468,6 +1468,9 @@ Release 2.8.0 - UNRELEASED HDFS-9174. Fix findbugs warnings in FSOutputSummer.tracer and DirectoryScanner$ReportCompiler.currentThread. (Yi Liu via wheat9) + HDFS-9001. DFSUtil.getNsServiceRpcUris() can return too many entries in a + non-HA, non-federated cluster. (Daniel Templeton via atm) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index 5b11ac277f6..5d405ab2f72 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -732,20 +732,29 @@ public class DFSUtil { } } } - - // Add the default URI if it is an HDFS URI. - URI defaultUri = FileSystem.getDefaultUri(conf); - // checks if defaultUri is ip:port format - // and convert it to hostname:port format - if (defaultUri != null && (defaultUri.getPort() != -1)) { - defaultUri = createUri(defaultUri.getScheme(), - NetUtils.createSocketAddr(defaultUri.getHost(), - defaultUri.getPort())); - } - if (defaultUri != null && - HdfsConstants.HDFS_URI_SCHEME.equals(defaultUri.getScheme()) && - !nonPreferredUris.contains(defaultUri)) { - ret.add(defaultUri); + + // Add the default URI if it is an HDFS URI and we haven't come up with a + // valid non-nameservice NN address yet. Consider the servicerpc-address + // and rpc-address to be the "unnamed" nameservice. defaultFS is our + // fallback when rpc-address isn't given. We therefore only want to add + // the defaultFS when neither the servicerpc-address (which is preferred) + // nor the rpc-address (which overrides defaultFS) is given. + if (!uriFound) { + URI defaultUri = FileSystem.getDefaultUri(conf); + + // checks if defaultUri is ip:port format + // and convert it to hostname:port format + if (defaultUri != null && (defaultUri.getPort() != -1)) { + defaultUri = createUri(defaultUri.getScheme(), + NetUtils.createSocketAddr(defaultUri.getHost(), + defaultUri.getPort())); + } + + if (defaultUri != null && + HdfsConstants.HDFS_URI_SCHEME.equals(defaultUri.getScheme()) && + !nonPreferredUris.contains(defaultUri)) { + ret.add(defaultUri); + } } return ret; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java index 3435b7f7808..f22deafa857 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java @@ -616,78 +616,142 @@ public class TestDFSUtil { DFSUtil.substituteForWildcardAddress("127.0.0.1:12345", "foo")); } + /** + * Test how name service URIs are handled with a variety of configuration + * settings + * @throws Exception + */ @Test public void testGetNNUris() throws Exception { HdfsConfiguration conf = new HdfsConfiguration(); - + final String NS1_NN1_ADDR = "ns1-nn1.example.com:8020"; final String NS1_NN2_ADDR = "ns1-nn2.example.com:8020"; final String NS2_NN_ADDR = "ns2-nn.example.com:8020"; final String NN1_ADDR = "nn.example.com:8020"; final String NN1_SRVC_ADDR = "nn.example.com:8021"; final String NN2_ADDR = "nn2.example.com:8020"; - + + conf.set(DFS_NAMESERVICES, "ns1"); + conf.set(DFSUtil.addKeySuffixes( + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "ns1"), NS1_NN1_ADDR); + + conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "hdfs://" + NN2_ADDR); + conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://" + NN1_ADDR); + + Collection uris = DFSUtil.getNameServiceUris(conf, + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY); + + assertEquals("Incorrect number of URIs returned", 2, uris.size()); + assertTrue("Missing URI for name service ns1", + uris.contains(new URI("hdfs://" + NS1_NN1_ADDR))); + assertTrue("Missing URI for service address", + uris.contains(new URI("hdfs://" + NN2_ADDR))); + + conf = new HdfsConfiguration(); conf.set(DFS_NAMESERVICES, "ns1,ns2"); - conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"),"nn1,nn2"); + conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"), + "nn1,nn2"); conf.set(DFSUtil.addKeySuffixes( DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn1"), NS1_NN1_ADDR); conf.set(DFSUtil.addKeySuffixes( DFS_NAMENODE_RPC_ADDRESS_KEY, "ns1", "nn2"), NS1_NN2_ADDR); - - conf.set(DFSUtil.addKeySuffixes(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "ns2"), - NS2_NN_ADDR); - + + conf.set(DFSUtil.addKeySuffixes( + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, "ns2"), NS2_NN_ADDR); + conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, "hdfs://" + NN1_ADDR); - + conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://" + NN2_ADDR); - - Collection uris = DFSUtil.getNameServiceUris(conf, + + uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY); - - assertEquals(4, uris.size()); - assertTrue(uris.contains(new URI("hdfs://ns1"))); - assertTrue(uris.contains(new URI("hdfs://" + NS2_NN_ADDR))); - assertTrue(uris.contains(new URI("hdfs://" + NN1_ADDR))); - assertTrue(uris.contains(new URI("hdfs://" + NN2_ADDR))); - + + assertEquals("Incorrect number of URIs returned", 3, uris.size()); + assertTrue("Missing URI for name service ns1", + uris.contains(new URI("hdfs://ns1"))); + assertTrue("Missing URI for name service ns2", + uris.contains(new URI("hdfs://" + NS2_NN_ADDR))); + assertTrue("Missing URI for RPC address", + uris.contains(new URI("hdfs://" + NN1_ADDR))); + // Make sure that non-HDFS URIs in fs.defaultFS don't get included. conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "viewfs://vfs-name.example.com"); - - uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, - DFS_NAMENODE_RPC_ADDRESS_KEY); - - assertEquals(3, uris.size()); - assertTrue(uris.contains(new URI("hdfs://ns1"))); - assertTrue(uris.contains(new URI("hdfs://" + NS2_NN_ADDR))); - assertTrue(uris.contains(new URI("hdfs://" + NN1_ADDR))); - + + uris = DFSUtil.getNameServiceUris(conf, + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY); + + assertEquals("Incorrect number of URIs returned", 3, uris.size()); + assertTrue("Missing URI for name service ns1", + uris.contains(new URI("hdfs://ns1"))); + assertTrue("Missing URI for name service ns2", + uris.contains(new URI("hdfs://" + NS2_NN_ADDR))); + assertTrue("Missing URI for RPC address", + uris.contains(new URI("hdfs://" + NN1_ADDR))); + // Make sure that an HA URI being the default URI doesn't result in multiple // entries being returned. conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://ns1"); - uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, - DFS_NAMENODE_RPC_ADDRESS_KEY); - - assertEquals(3, uris.size()); - assertTrue(uris.contains(new URI("hdfs://ns1"))); - assertTrue(uris.contains(new URI("hdfs://" + NS2_NN_ADDR))); - assertTrue(uris.contains(new URI("hdfs://" + NN1_ADDR))); - + uris = DFSUtil.getNameServiceUris(conf, + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY); + + assertEquals("Incorrect number of URIs returned", 3, uris.size()); + assertTrue("Missing URI for name service ns1", + uris.contains(new URI("hdfs://ns1"))); + assertTrue("Missing URI for name service ns2", + uris.contains(new URI("hdfs://" + NS2_NN_ADDR))); + assertTrue("Missing URI for RPC address", + uris.contains(new URI("hdfs://" + NN1_ADDR))); + + // Check that the default URI is returned if there's nothing else to return. + conf = new HdfsConfiguration(); + conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://" + NN1_ADDR); + + uris = DFSUtil.getNameServiceUris(conf, + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY); + + assertEquals("Incorrect number of URIs returned", 1, uris.size()); + assertTrue("Missing URI for RPC address (defaultFS)", + uris.contains(new URI("hdfs://" + NN1_ADDR))); + + // Check that the RPC address is the only address returned when the RPC + // and the default FS is given. + conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, NN2_ADDR); + + uris = DFSUtil.getNameServiceUris(conf, + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY); + + assertEquals("Incorrect number of URIs returned", 1, uris.size()); + assertTrue("Missing URI for RPC address", + uris.contains(new URI("hdfs://" + NN2_ADDR))); + // Make sure that when a service RPC address is used that is distinct from // the client RPC address, and that client RPC address is also used as the // default URI, that the client URI does not end up in the set of URIs // returned. + conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, NN1_ADDR); + + uris = DFSUtil.getNameServiceUris(conf, + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY); + + assertEquals("Incorrect number of URIs returned", 1, uris.size()); + assertTrue("Missing URI for service ns1", + uris.contains(new URI("hdfs://" + NN1_ADDR))); + + // Check that when the default FS and service address are given, but + // the RPC address isn't, that only the service address is returned. conf = new HdfsConfiguration(); conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, "hdfs://" + NN1_ADDR); - conf.set(DFS_NAMENODE_RPC_ADDRESS_KEY, NN1_ADDR); conf.set(DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, NN1_SRVC_ADDR); - uris = DFSUtil.getNameServiceUris(conf, DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, - DFS_NAMENODE_RPC_ADDRESS_KEY); - - assertEquals(1, uris.size()); - assertTrue(uris.contains(new URI("hdfs://" + NN1_SRVC_ADDR))); + uris = DFSUtil.getNameServiceUris(conf, + DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY, DFS_NAMENODE_RPC_ADDRESS_KEY); + + assertEquals("Incorrect number of URIs returned", 1, uris.size()); + assertTrue("Missing URI for service address", + uris.contains(new URI("hdfs://" + NN1_SRVC_ADDR))); } @Test (timeout=15000)