diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java index 3f682abb1b5..1ee06e02aab 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java @@ -529,10 +529,18 @@ public class ViewFileSystem extends FileSystem { * the target path FileStatus object. The target path will be available via * getSymlink on that children's FileStatus object. Since it represents as * symlink, isDirectory on that children's FileStatus will return false. + * This behavior can be changed by setting an advanced configuration + * fs.viewfs.mount.links.as.symlinks to false. In this case, mount points will + * be represented as non-symlinks and all the file/directory attributes like + * permissions, isDirectory etc will be assigned from it's resolved target + * directory/file. * * If you want to get the FileStatus of target path for that children, you may * want to use GetFileStatus API with that children's symlink path. Please see * {@link ViewFileSystem#getFileStatus(Path f)} + * + * Note: In ViewFileSystem, by default the mount links are represented as + * symlinks. */ @Override public FileStatus[] listStatus(final Path f) throws AccessControlException, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java index af69c92f1e6..672022be824 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystemOverloadScheme.java @@ -59,9 +59,9 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException; * data to mount with other hdfs and object store clusters(hdfs://NN1, * o3fs://bucket1.volume1/, s3a://bucket1/) * - * fs.viewfs.mounttable.Cluster./user = hdfs://NN1/user - * fs.viewfs.mounttable.Cluster./data = o3fs://bucket1.volume1/data - * fs.viewfs.mounttable.Cluster./backup = s3a://bucket1/backup/ + * fs.viewfs.mounttable.Cluster.link./user = hdfs://NN1/user + * fs.viewfs.mounttable.Cluster.link./data = o3fs://bucket1.volume1/data + * fs.viewfs.mounttable.Cluster.link./backup = s3a://bucket1/backup/ * * Op1: Create file hdfs://Cluster/user/fileA will go to hdfs://NN1/user/fileA * Op2: Create file hdfs://Cluster/data/datafile will go to @@ -75,15 +75,19 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException; * data to mount with other hdfs and object store clusters * (hdfs://NN1, o3fs://bucket1.volume1/) * - * fs.viewfs.mounttable.bucketA./user = hdfs://NN1/user - * fs.viewfs.mounttable.bucketA./data = o3fs://bucket1.volume1/data - * fs.viewfs.mounttable.bucketA./salesDB = s3a://bucketA/salesDB/ + * fs.viewfs.mounttable.bucketA.link./user = hdfs://NN1/user + * fs.viewfs.mounttable.bucketA.link./data = o3fs://bucket1.volume1/data + * fs.viewfs.mounttable.bucketA.link./salesDB = s3a://bucketA/salesDB/ * * Op1: Create file s3a://bucketA/user/fileA will go to hdfs://NN1/user/fileA * Op2: Create file s3a://bucketA/data/datafile will go to * o3fs://bucket1.volume1/data/datafile * Op3: Create file s3a://bucketA/salesDB/dbfile will go to * s3a://bucketA/salesDB/dbfile + * + * Note: In ViewFileSystemOverloadScheme, by default the mount links will be + * represented as non-symlinks. If you want to change this behavior, please see + * {@link ViewFileSystem#listStatus(Path)} *****************************************************************************/ @InterfaceAudience.LimitedPrivate({ "MapReduce", "HBase", "Hive" }) @InterfaceStability.Evolving diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java index a95df56bdb1..fae5d1b5f62 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java @@ -461,12 +461,17 @@ public class ViewFs extends AbstractFileSystem { * the target path FileStatus object. The target path will be available via * getSymlink on that children's FileStatus object. Since it represents as * symlink, isDirectory on that children's FileStatus will return false. + * This behavior can be changed by setting an advanced configuration + * fs.viewfs.mount.links.as.symlinks to false. In this case, mount points will + * be represented as non-symlinks and all the file/directory attributes like + * permissions, isDirectory etc will be assigned from it's resolved target + * directory/file. * * If you want to get the FileStatus of target path for that children, you may * want to use GetFileStatus API with that children's symlink path. Please see * {@link ViewFs#getFileStatus(Path f)} * - * Note: In ViewFs, the mount links are represented as symlinks. + * Note: In ViewFs, by default the mount links are represented as symlinks. */ @Override public FileStatus[] listStatus(final Path f) throws AccessControlException, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFsOverloadScheme.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFsOverloadScheme.md index 87607d80607..e65c5458676 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFsOverloadScheme.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/ViewFsOverloadScheme.md @@ -28,7 +28,7 @@ View File System Overload Scheme ### Details -The View File System Overload Scheme is an extension to the View File System. This will allow users to continue to use their existing fs.defaultFS configured scheme or any new scheme name instead of using scheme `viewfs`. Mount link configurations key, value formats are same as in [ViewFS Guide](./ViewFs.html). If a user wants to continue use the same fs.defaultFS and wants to have more mount points, then mount link configurations should have the current fs.defaultFS authority name as mount table name. Example if fs.defaultFS is `hdfs://mycluster`, then the mount link configuration key name should be like in the following format `fs.viewfs.mounttable.*mycluster*.`. We will discuss more example configurations in following sections. +The View File System Overload Scheme is an extension to the View File System. This will allow users to continue to use their existing fs.defaultFS configured scheme or any new scheme name instead of using scheme `viewfs`. Mount link configurations key, value formats are same as in [ViewFS Guide](./ViewFs.html). If a user wants to continue use the same fs.defaultFS and wants to have more mount points, then mount link configurations should have the current fs.defaultFS authority name as mount table name. Example if fs.defaultFS is `hdfs://mycluster`, then the mount link configuration key name should be like in the following format `fs.viewfs.mounttable.*mycluster*.link.`. We will discuss more example configurations in following sections. Another important improvement with the ViewFileSystemOverloadScheme is, administrators need not copy the `mount-table.xml` configuration file to 1000s of client nodes. Instead they can keep the mount-table configuration file in a Hadoop compatible file system. So, keeping the configuration file in a central place makes administrators life easier as they can update mount-table in single place. @@ -60,17 +60,17 @@ If users want some of their existing cluster (`hdfs://cluster`) data to mount wi ```xml - fs.viewfs.mounttable.cluster./user + fs.viewfs.mounttable.cluster.link./user hdfs://cluster/user - fs.viewfs.mounttable.cluster./data + fs.viewfs.mounttable.cluster.link./data o3fs://bucket1.volume1/data - fs.viewfs.mounttable.cluster./backup + fs.viewfs.mounttable.cluster.link./backup s3a://bucket1/backup/ ``` @@ -91,17 +91,17 @@ If users want some of their existing cluster (`s3a://bucketA/`) data to mount wi ```xml - fs.viewfs.mounttable.bucketA./user + fs.viewfs.mounttable.bucketA.link./user hdfs://cluster/user - fs.viewfs.mounttable.bucketA./data + fs.viewfs.mounttable.bucketA.link./data o3fs://bucket1.volume1.omhost/data - fs.viewfs.mounttable.bucketA./salesDB + fs.viewfs.mounttable.bucketA.link./salesDB s3a://bucketA/salesDB/ ``` @@ -123,9 +123,9 @@ Note: In ViewFsOverloadScheme, by default the mount links will not be represente ### Central Mount Table Configurations -To enable central mount table configuration, we need to configure `fs.viewfs.mounttable.path` in `core-site.xml` with the value as the Hadoop compatible file system directory/file path, where the `mount-table-.xml` file copied. Here versionNumber is an integer number and need to increase the version number and upload new file in same directory. +To enable central mount table configuration, we need to configure `fs.viewfs.mounttable.path` in `core-site.xml` with the value as the Hadoop compatible file system directory/file path, where the `mount-table..xml` file copied. Here versionNumber is an integer number and need to increase the version number and upload new file in same directory. -The ViewFileSystemOverloadScheme always loads the highest version number `mount-table-.xml`. Please don't replace the file with same name. Always increment the version number to take new file picked by newly initializing clients. Why we don't recommend to replace the files is that, some client might have already opened the connections to old mount-table files already and in middle of loading configuration files, and replacing files can make them fail. +The ViewFileSystemOverloadScheme always loads the highest version number `mount-table..xml`. Please don't replace the file with same name. Always increment the version number to take new file picked by newly initializing clients. Why we don't recommend to replace the files is that, some client might have already opened the connections to old mount-table files already and in middle of loading configuration files, and replacing files can make them fail. ```xml @@ -138,12 +138,12 @@ The ViewFileSystemOverloadScheme always loads the highest version number `mount- ```xml fs.viewfs.mounttable.path - hdfs://cluster/config/mount-table-dir/mount-table-.xml + hdfs://cluster/config/mount-table-dir/mount-table..xml ``` Note: we recommend not to configure mount-links in `core-site.xml` if you configure above valid path. Otherwise both mount links will be mixed and can lead to a confused behavior. -If you copy the `mount-table-.xml`, you may consider having big replication factor depending on your cluster size. So, that file will be available locally to majority of clients as applications(MR/YARN/HBASE..etc) use locality on HDFS when reading `mount-table-.xml`. +If you copy the `mount-table..xml`, you may consider having big replication factor depending on your cluster size. So, that file will be available locally to majority of clients as applications(MR/YARN/HBASE..etc) use locality on HDFS when reading `mount-table..xml`. DFSAdmin commands with View File System Overload Scheme -------------------------------------------------------