From 93b121a9717bb4ef5240fda877ebb5275f6446b4 Mon Sep 17 00:00:00 2001 From: Uma Maheswara Rao G Date: Wed, 10 Jun 2020 15:00:02 -0700 Subject: [PATCH] HADOOP-17060. Clarify listStatus and getFileStatus behaviors inconsistent in the case of ViewFs implementation for isDirectory. Contributed by Uma Maheswara Rao G. --- .../hadoop/fs/viewfs/ViewFileSystem.java | 36 ++++++++++++++----- .../org/apache/hadoop/fs/viewfs/ViewFs.java | 24 +++++++++++++ .../main/java/org/apache/hadoop/fs/Hdfs.java | 22 ++++++++++++ .../hadoop/hdfs/DistributedFileSystem.java | 25 ++++++++++--- 4 files changed, 94 insertions(+), 13 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java index 56d0fc59e90..895edc01397 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFileSystem.java @@ -488,6 +488,14 @@ public class ViewFileSystem extends FileSystem { : new ViewFsFileStatus(orig, qualified); } + /** + * {@inheritDoc} + * + * If the given path is a symlink(mount link), the path will be resolved to a + * target path and it will get the resolved path's FileStatus object. It will + * not be represented as a symlink and isDirectory API returns true if the + * resolved path is a directory, false otherwise. + */ @Override public FileStatus getFileStatus(final Path f) throws AccessControlException, FileNotFoundException, IOException { @@ -505,6 +513,25 @@ public class ViewFileSystem extends FileSystem { res.targetFileSystem.access(res.remainingPath, mode); } + /** + * {@inheritDoc} + * + * Note: listStatus on root("/") considers listing from fallbackLink if + * available. If the same directory name is present in configured mount path + * as well as in fallback link, then only the configured mount path will be + * listed in the returned result. + * + * If any of the the immediate children of the given path f is a symlink(mount + * link), the returned FileStatus object of that children would be represented + * as a symlink. It will not be resolved to the target path and will not get + * the target path FileStatus object. The target path will be available via + * getSymlink on that children's FileStatus object. Since it represents as + * symlink, isDirectory on that children's FileStatus will return false. + * + * If you want to get the FileStatus of target path for that children, you may + * want to use GetFileStatus API with that children's symlink path. Please see + * {@link ViewFileSystem#getFileStatus(Path f)} + */ @Override public FileStatus[] listStatus(final Path f) throws AccessControlException, FileNotFoundException, IOException { @@ -1174,20 +1201,11 @@ public class ViewFileSystem extends FileSystem { checkPathIsSlash(f); return new FileStatus(0, true, 0, 0, creationTime, creationTime, PERMISSION_555, ugi.getShortUserName(), ugi.getPrimaryGroupName(), - new Path(theInternalDir.fullPath).makeQualified( myUri, ROOT_PATH)); } - /** - * {@inheritDoc} - * - * Note: listStatus on root("/") considers listing from fallbackLink if - * available. If the same directory name is present in configured mount - * path as well as in fallback link, then only the configured mount path - * will be listed in the returned result. - */ @Override public FileStatus[] listStatus(Path f) throws AccessControlException, FileNotFoundException, IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java index df10dce50b7..4578a4c353e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ViewFs.java @@ -351,6 +351,14 @@ public class ViewFs extends AbstractFileSystem { return res.targetFileSystem.getFileChecksum(res.remainingPath); } + /** + * {@inheritDoc} + * + * If the given path is a symlink(mount link), the path will be resolved to a + * target path and it will get the resolved path's FileStatus object. It will + * not be represented as a symlink and isDirectory API returns true if the + * resolved path is a directory, false otherwise. + */ @Override public FileStatus getFileStatus(final Path f) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException { @@ -436,6 +444,22 @@ public class ViewFs extends AbstractFileSystem { }; } + /** + * {@inheritDoc} + * + * If any of the the immediate children of the given path f is a symlink(mount + * link), the returned FileStatus object of that children would be represented + * as a symlink. It will not be resolved to the target path and will not get + * the target path FileStatus object. The target path will be available via + * getSymlink on that children's FileStatus object. Since it represents as + * symlink, isDirectory on that children's FileStatus will return false. + * + * If you want to get the FileStatus of target path for that children, you may + * want to use GetFileStatus API with that children's symlink path. Please see + * {@link ViewFs#getFileStatus(Path f)} + * + * Note: In ViewFs, the mount links are represented as symlinks. + */ @Override public FileStatus[] listStatus(final Path f) throws AccessControlException, FileNotFoundException, UnresolvedLinkException, IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java index 290f2c0e676..4162b198fb1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/Hdfs.java @@ -135,6 +135,14 @@ public class Hdfs extends AbstractFileSystem { return dfs.getFileChecksumWithCombineMode(getUriPath(f), Long.MAX_VALUE); } + /** + * {@inheritDoc} + * + * If the given path is a symlink, the path will be resolved to a target path + * and it will get the resolved path's FileStatus object. It will not be + * represented as a symlink and isDirectory API returns true if the resolved + * path is a directory, false otherwise. + */ @Override public FileStatus getFileStatus(Path f) throws IOException, UnresolvedLinkException { @@ -269,6 +277,20 @@ public class Hdfs extends AbstractFileSystem { } } + /** + * {@inheritDoc} + * + * If any of the the immediate children of the given path f is a symlink, the + * returned FileStatus object of that children would be represented as a + * symlink. It will not be resolved to the target path and will not get the + * target path FileStatus object. The target path will be available via + * getSymlink on that children's FileStatus object. Since it represents as + * symlink, isDirectory on that children's FileStatus will return false. + * + * If you want to get the FileStatus of target path for that children, you may + * want to use GetFileStatus API with that children's symlink path. Please see + * {@link Hdfs#getFileStatus(Path f)} + */ @Override public FileStatus[] listStatus(Path f) throws IOException, UnresolvedLinkException { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index b4a932ef142..55e228d34eb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -1143,10 +1143,21 @@ public class DistributedFileSystem extends FileSystem /** * List all the entries of a directory * - * Note that this operation is not atomic for a large directory. - * The entries of a directory may be fetched from NameNode multiple times. - * It only guarantees that each name occurs once if a directory - * undergoes changes between the calls. + * Note that this operation is not atomic for a large directory. The entries + * of a directory may be fetched from NameNode multiple times. It only + * guarantees that each name occurs once if a directory undergoes changes + * between the calls. + * + * If any of the the immediate children of the given path f is a symlink, the + * returned FileStatus object of that children would be represented as a + * symlink. It will not be resolved to the target path and will not get the + * target path FileStatus object. The target path will be available via + * getSymlink on that children's FileStatus object. Since it represents as + * symlink, isDirectory on that children's FileStatus will return false. + * + * If you want to get the FileStatus of target path for that children, you may + * want to use GetFileStatus API with that children's symlink path. Please see + * {@link DistributedFileSystem#getFileStatus(Path f)} */ @Override public FileStatus[] listStatus(Path p) throws IOException { @@ -1712,6 +1723,12 @@ public class DistributedFileSystem extends FileSystem /** * Returns the stat information about the file. + * + * If the given path is a symlink, the path will be resolved to a target path + * and it will get the resolved path's FileStatus object. It will not be + * represented as a symlink and isDirectory API returns true if the resolved + * path is a directory, false otherwise. + * * @throws FileNotFoundException if the file does not exist. */ @Override