From 7084b273aca575292ac6834ff2a5f4d7c1b41ba9 Mon Sep 17 00:00:00 2001 From: Uma Maheswara Rao G Date: Mon, 6 Jul 2020 18:50:03 -0700 Subject: [PATCH] HDFS-15449. Optionally ignore port number in mount-table name when picking from initialized uri. Contributed by Uma Maheswara Rao G. (cherry picked from commit dc0626b5f2f2ba0bd3919650ea231cedd424f77a) --- .../apache/hadoop/fs/viewfs/Constants.java | 13 ++++++ .../hadoop/fs/viewfs/ViewFileSystem.java | 10 ++++- .../viewfs/ViewFileSystemOverloadScheme.java | 13 +++++- .../src/site/markdown/ViewFsOverloadScheme.md | 8 +++- ...mOverloadSchemeHdfsFileSystemContract.java | 4 ++ ...ileSystemOverloadSchemeWithHdfsScheme.java | 45 ++++++++++++++++++- ...wFileSystemOverloadSchemeWithDFSAdmin.java | 17 ++++--- ...ileSystemOverloadSchemeWithFSCommands.java | 2 +- 8 files changed, 97 insertions(+), 15 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java index 28ebf73cf55..492cb87ee02 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/Constants.java @@ -104,4 +104,17 @@ public interface Constants { "fs.viewfs.mount.links.as.symlinks"; boolean CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT = true; + + /** + * When initializing the viewfs, authority will be used as the mount table + * name to find the mount link configurations. To make the mount table name + * unique, we may want to ignore port if initialized uri authority contains + * port number. By default, we will consider port number also in + * ViewFileSystem(This default value false, because to support existing + * deployments continue with the current behavior). + */ + String CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME = + "fs.viewfs.ignore.port.in.mount.table.name"; + + boolean CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT = false; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java index e192bfce7c0..1ca1759cf6b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java @@ -19,6 +19,8 @@ package org.apache.hadoop.fs.viewfs; import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_ENABLE_INNER_CACHE; import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_ENABLE_INNER_CACHE_DEFAULT; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT; import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS; import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS_DEFAULT; import static org.apache.hadoop.fs.viewfs.Constants.PERMISSION_555; @@ -272,9 +274,15 @@ public class ViewFileSystem extends FileSystem { final InnerCache innerCache = new InnerCache(fsGetter); // Now build client side view (i.e. client side mount table) from config. final String authority = theUri.getAuthority(); + String tableName = authority; + if (theUri.getPort() != -1 && config + .getBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, + CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT)) { + tableName = theUri.getHost(); + } try { myUri = new URI(getScheme(), authority, "/", null, null); - fsState = new InodeTree(conf, authority) { + fsState = new InodeTree(conf, tableName) { @Override protected FileSystem getTargetFileSystem(final URI uri) throws URISyntaxException, IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java index 672022be824..2f3359d32e9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java @@ -31,6 +31,8 @@ import org.apache.hadoop.fs.FsConstants; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME; + /****************************************************************************** * This class is extended from the ViewFileSystem for the overloaded scheme * file system. Mount link configurations and in-memory mount table @@ -85,9 +87,14 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException; * Op3: Create file s3a://bucketA/salesDB/dbfile will go to * s3a://bucketA/salesDB/dbfile * - * Note: In ViewFileSystemOverloadScheme, by default the mount links will be + * Note: + * (1) In ViewFileSystemOverloadScheme, by default the mount links will be * represented as non-symlinks. If you want to change this behavior, please see * {@link ViewFileSystem#listStatus(Path)} + * (2) In ViewFileSystemOverloadScheme, only the initialized uri's hostname will + * be considered as the mount table name. When the passed uri has hostname:port, + * it will simply ignore the port number and only hostname will be considered as + * the mount table name. *****************************************************************************/ @InterfaceAudience.LimitedPrivate({ "MapReduce", "HBase", "Hive" }) @InterfaceStability.Evolving @@ -115,6 +122,10 @@ public class ViewFileSystemOverloadScheme extends ViewFileSystem { conf.setBoolean(Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS, conf.getBoolean(Constants.CONFIG_VIEWFS_MOUNT_LINKS_AS_SYMLINKS, false)); + /* the default value to true in ViewFSOverloadScheme */ + conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, + conf.getBoolean(Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, + true)); if (null != mountTableConfigPath) { MountTableConfigLoader loader = new HCFSMountTableConfigLoader(); loader.load(mountTableConfigPath, conf); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFsOverloadScheme.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFsOverloadScheme.md index feb0ba27183..38113cbbb0f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFsOverloadScheme.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFsOverloadScheme.md @@ -28,7 +28,11 @@ View File System Overload Scheme ### Details -The View File System Overload Scheme is an extension to the View File System. This will allow users to continue to use their existing fs.defaultFS configured scheme or any new scheme name instead of using scheme `viewfs`. Mount link configurations key, value formats are same as in [ViewFS Guide](./ViewFs.html). If a user wants to continue use the same fs.defaultFS and wants to have more mount points, then mount link configurations should have the current fs.defaultFS authority name as mount table name. Example if fs.defaultFS is `hdfs://mycluster`, then the mount link configuration key name should be like in the following format `fs.viewfs.mounttable.*mycluster*.link.`. We will discuss more example configurations in following sections. +The View File System Overload Scheme is an extension to the View File System. This will allow users to continue to use their existing fs.defaultFS configured scheme or any new scheme name instead of using scheme `viewfs`. +Mount link configurations key, value formats are same as in [ViewFS Guide](./ViewFs.html). +If a user wants to continue use the same fs.defaultFS and wants to have more mount points, then mount link configurations should have the ViewFileSystemOverloadScheme initialized uri's hostname as the mount table name. +Example if fs.defaultFS is `hdfs://mycluster`, then the mount link configuration key name should be like in the following format `fs.viewfs.mounttable.*mycluster*.link.`. +Even if the initialized fs uri has hostname:port, it will simply ignore the port number and only consider the hostname as the mount table name. We will discuss more example configurations in following sections. Another important improvement with the ViewFileSystemOverloadScheme is, administrators need not copy the `mount-table.xml` configuration file to 1000s of client nodes. Instead they can keep the mount-table configuration file in a Hadoop compatible file system. So, keeping the configuration file in a central place makes administrators life easier as they can update mount-table in single place. @@ -171,7 +175,7 @@ For example, when the following configuration is used but a path like `viewfs:/f ``` -#### Solution +### Solution To avoid the above problem, the configuration `fs.viewfs.mounttable.default.name.key` has to be set to the name of the cluster, i.e, the following should be added to `core-site.xml` ```xml diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeHdfsFileSystemContract.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeHdfsFileSystemContract.java index e7e74d13763..dcfa051c390 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeHdfsFileSystemContract.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeHdfsFileSystemContract.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.fs.viewfs; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT; import static org.junit.Assume.assumeTrue; import java.io.File; @@ -73,6 +75,8 @@ public class TestViewFileSystemOverloadSchemeHdfsFileSystemContract FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN, "hdfs"), DistributedFileSystem.class.getName()); + conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, + CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT); URI defaultFSURI = URI.create(conf.get(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY)); ConfigUtil.addLink(conf, defaultFSURI.getAuthority(), "/user", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeWithHdfsScheme.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeWithHdfsScheme.java index a44af768bdc..8b7eb88404a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeWithHdfsScheme.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/viewfs/TestViewFileSystemOverloadSchemeWithHdfsScheme.java @@ -45,6 +45,8 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME; +import static org.apache.hadoop.fs.viewfs.Constants.CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT; import static org.junit.Assert.*; @@ -79,6 +81,8 @@ public class TestViewFileSystemOverloadSchemeWithHdfsScheme { conf.set(String.format( FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN, HDFS_SCHEME), DistributedFileSystem.class.getName()); + conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, + CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME_DEFAULT); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitClusterUp(); defaultFSURI = @@ -365,7 +369,7 @@ public class TestViewFileSystemOverloadSchemeWithHdfsScheme { if (mountTableIfSet != null) { conf.set(Constants.CONFIG_VIEWFS_MOUNTTABLE_PATH, mountTableIfSet); } - addMountLinks(defaultFSURI.getAuthority(), + addMountLinks(defaultFSURI.getHost(), new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER, Constants.CONFIG_VIEWFS_LINK_FALLBACK }, new String[] {hdfsTargetPath.toUri().toString(), @@ -593,6 +597,45 @@ public class TestViewFileSystemOverloadSchemeWithHdfsScheme { } } + /** + * Tests that the fs initialization should ignore the port number when it's + * extracting the mount table name from uri. + */ + @Test(timeout = 30000) + public void testMountTableNameShouldIgnorePortFromURI() throws Exception { + final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER); + conf = new Configuration(getConf()); + addMountLinks(defaultFSURI.getHost(), + new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER, + Constants.CONFIG_VIEWFS_LINK_FALLBACK}, + new String[] {hdfsTargetPath.toUri().toString(), + localTargetDir.toURI().toString(), + hdfsTargetPath.toUri().toString()}, conf); + conf.set(CommonConfigurationKeys.FS_DEFAULT_NAME_KEY, + defaultFSURI.toString()); + conf.set(String.format(FS_IMPL_PATTERN_KEY, HDFS_SCHEME), + ViewFileSystemOverloadScheme.class.getName()); + conf.set(String + .format(FsConstants.FS_VIEWFS_OVERLOAD_SCHEME_TARGET_FS_IMPL_PATTERN, + HDFS_SCHEME), DistributedFileSystem.class.getName()); + conf.setBoolean(CONFIG_VIEWFS_IGNORE_PORT_IN_MOUNT_TABLE_NAME, true); + + Path testDirOnRoot = new Path("/test"); + URI uriWithoutPort = new URI("hdfs://" + defaultFSURI.getHost()); + //Initialize with out port + try (FileSystem fs = FileSystem + .get(uriWithoutPort, conf)) { + fs.mkdirs(testDirOnRoot); + fs.delete(testDirOnRoot, true); + } + + //Initialize with port + try (FileSystem fs = FileSystem.get(defaultFSURI, conf)) { + fs.mkdirs(testDirOnRoot); + fs.delete(testDirOnRoot, true); + } + } + private void writeString(final FileSystem nfly, final String testString, final Path testFile) throws IOException { try (FSDataOutputStream fsDos = nfly.create(testFile)) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestViewFileSystemOverloadSchemeWithDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestViewFileSystemOverloadSchemeWithDFSAdmin.java index a9475ddc8d0..aea4704711c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestViewFileSystemOverloadSchemeWithDFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestViewFileSystemOverloadSchemeWithDFSAdmin.java @@ -151,7 +151,7 @@ public class TestViewFileSystemOverloadSchemeWithDFSAdmin { @Test public void testSaveNameSpace() throws Exception { final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER); - addMountLinks(defaultFSURI.getAuthority(), + addMountLinks(defaultFSURI.getHost(), new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER }, new String[] {hdfsTargetPath.toUri().toString(), localTargetDir.toURI().toString() }, @@ -177,7 +177,7 @@ public class TestViewFileSystemOverloadSchemeWithDFSAdmin { @Test public void testSaveNamespaceWithoutSpecifyingFS() throws Exception { final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER); - addMountLinks(defaultFSURI.getAuthority(), + addMountLinks(defaultFSURI.getHost(), new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER }, new String[] {hdfsTargetPath.toUri().toString(), localTargetDir.toURI().toString() }, @@ -200,9 +200,8 @@ public class TestViewFileSystemOverloadSchemeWithDFSAdmin { public void testSafeModeWithWrongFS() throws Exception { final Path hdfsTargetPath = new Path("hdfs://nonExistent" + HDFS_USER_FOLDER); - addMountLinks(defaultFSURI.getAuthority(), - new String[] {HDFS_USER_FOLDER }, - new String[] {hdfsTargetPath.toUri().toString(), }, conf); + addMountLinks(defaultFSURI.getHost(), new String[] {HDFS_USER_FOLDER}, + new String[] {hdfsTargetPath.toUri().toString()}, conf); final DFSAdmin dfsAdmin = new DFSAdmin(conf); redirectStream(); int ret = ToolRunner.run(dfsAdmin, new String[] {"-safemode", "enter" }); @@ -215,7 +214,7 @@ public class TestViewFileSystemOverloadSchemeWithDFSAdmin { */ @Test public void testSafeModeShouldFailOnLocalTargetFS() throws Exception { - addMountLinks(defaultFSURI.getAuthority(), new String[] {LOCAL_FOLDER }, + addMountLinks(defaultFSURI.getHost(), new String[] {LOCAL_FOLDER }, new String[] {localTargetDir.toURI().toString() }, conf); final DFSAdmin dfsAdmin = new DFSAdmin(conf); // ViewFSOveloadScheme uri with localfs mount point @@ -247,8 +246,8 @@ public class TestViewFileSystemOverloadSchemeWithDFSAdmin { @Test public void testAllowAndDisalllowSnapShot() throws Exception { final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER); - addMountLinks(defaultFSURI.getAuthority(), - new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER }, + addMountLinks(defaultFSURI.getHost(), + new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER}, new String[] {hdfsTargetPath.toUri().toString(), localTargetDir.toURI().toString() }, conf); @@ -270,7 +269,7 @@ public class TestViewFileSystemOverloadSchemeWithDFSAdmin { @Test public void testSetBalancerBandwidth() throws Exception { final Path hdfsTargetPath = new Path(defaultFSURI + HDFS_USER_FOLDER); - addMountLinks(defaultFSURI.getAuthority(), + addMountLinks(defaultFSURI.getHost(), new String[] {HDFS_USER_FOLDER, LOCAL_FOLDER }, new String[] {hdfsTargetPath.toUri().toString(), localTargetDir.toURI().toString() }, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestViewFileSystemOverloadSchemeWithFSCommands.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestViewFileSystemOverloadSchemeWithFSCommands.java index a974377fac0..099c967e26e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestViewFileSystemOverloadSchemeWithFSCommands.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestViewFileSystemOverloadSchemeWithFSCommands.java @@ -142,7 +142,7 @@ public class TestViewFileSystemOverloadSchemeWithFSCommands { List mounts = Lists.newArrayList(); mounts.add(HDFS_USER_FOLDER); mounts.add(LOCAL_FOLDER); - addMountLinks(defaultFSURI.getAuthority(), + addMountLinks(defaultFSURI.getHost(), mounts.toArray(new String[mounts.size()]), new String[] {hdfsTargetPath.toUri().toString(), localTargetDir.toURI().toString() },