HADOOP-17060. Clarify listStatus and getFileStatus behaviors inconsistent in the case of ViewFs implementation for isDirectory. Contributed by Uma Maheswara Rao G.

(cherry picked from commit 93b121a971)
This commit is contained in:
Uma Maheswara Rao G 2020-06-10 15:00:02 -07:00
parent 7b29019eea
commit 3cddd0be29
4 changed files with 94 additions and 13 deletions

View File

@ -488,6 +488,14 @@ public class ViewFileSystem extends FileSystem {
: new ViewFsFileStatus(orig, qualified); : new ViewFsFileStatus(orig, qualified);
} }
/**
* {@inheritDoc}
*
* If the given path is a symlink(mount link), the path will be resolved to a
* target path and it will get the resolved path's FileStatus object. It will
* not be represented as a symlink and isDirectory API returns true if the
* resolved path is a directory, false otherwise.
*/
@Override @Override
public FileStatus getFileStatus(final Path f) throws AccessControlException, public FileStatus getFileStatus(final Path f) throws AccessControlException,
FileNotFoundException, IOException { FileNotFoundException, IOException {
@ -505,6 +513,25 @@ public class ViewFileSystem extends FileSystem {
res.targetFileSystem.access(res.remainingPath, mode); res.targetFileSystem.access(res.remainingPath, mode);
} }
/**
* {@inheritDoc}
*
* Note: listStatus on root("/") considers listing from fallbackLink if
* available. If the same directory name is present in configured mount path
* as well as in fallback link, then only the configured mount path will be
* listed in the returned result.
*
* If any of the the immediate children of the given path f is a symlink(mount
* link), the returned FileStatus object of that children would be represented
* as a symlink. It will not be resolved to the target path and will not get
* the target path FileStatus object. The target path will be available via
* getSymlink on that children's FileStatus object. Since it represents as
* symlink, isDirectory on that children's FileStatus will return false.
*
* If you want to get the FileStatus of target path for that children, you may
* want to use GetFileStatus API with that children's symlink path. Please see
* {@link ViewFileSystem#getFileStatus(Path f)}
*/
@Override @Override
public FileStatus[] listStatus(final Path f) throws AccessControlException, public FileStatus[] listStatus(final Path f) throws AccessControlException,
FileNotFoundException, IOException { FileNotFoundException, IOException {
@ -1174,20 +1201,11 @@ public class ViewFileSystem extends FileSystem {
checkPathIsSlash(f); checkPathIsSlash(f);
return new FileStatus(0, true, 0, 0, creationTime, creationTime, return new FileStatus(0, true, 0, 0, creationTime, creationTime,
PERMISSION_555, ugi.getShortUserName(), ugi.getPrimaryGroupName(), PERMISSION_555, ugi.getShortUserName(), ugi.getPrimaryGroupName(),
new Path(theInternalDir.fullPath).makeQualified( new Path(theInternalDir.fullPath).makeQualified(
myUri, ROOT_PATH)); myUri, ROOT_PATH));
} }
/**
* {@inheritDoc}
*
* Note: listStatus on root("/") considers listing from fallbackLink if
* available. If the same directory name is present in configured mount
* path as well as in fallback link, then only the configured mount path
* will be listed in the returned result.
*/
@Override @Override
public FileStatus[] listStatus(Path f) throws AccessControlException, public FileStatus[] listStatus(Path f) throws AccessControlException,
FileNotFoundException, IOException { FileNotFoundException, IOException {

View File

@ -351,6 +351,14 @@ public class ViewFs extends AbstractFileSystem {
return res.targetFileSystem.getFileChecksum(res.remainingPath); return res.targetFileSystem.getFileChecksum(res.remainingPath);
} }
/**
* {@inheritDoc}
*
* If the given path is a symlink(mount link), the path will be resolved to a
* target path and it will get the resolved path's FileStatus object. It will
* not be represented as a symlink and isDirectory API returns true if the
* resolved path is a directory, false otherwise.
*/
@Override @Override
public FileStatus getFileStatus(final Path f) throws AccessControlException, public FileStatus getFileStatus(final Path f) throws AccessControlException,
FileNotFoundException, UnresolvedLinkException, IOException { FileNotFoundException, UnresolvedLinkException, IOException {
@ -436,6 +444,22 @@ public class ViewFs extends AbstractFileSystem {
}; };
} }
/**
* {@inheritDoc}
*
* If any of the the immediate children of the given path f is a symlink(mount
* link), the returned FileStatus object of that children would be represented
* as a symlink. It will not be resolved to the target path and will not get
* the target path FileStatus object. The target path will be available via
* getSymlink on that children's FileStatus object. Since it represents as
* symlink, isDirectory on that children's FileStatus will return false.
*
* If you want to get the FileStatus of target path for that children, you may
* want to use GetFileStatus API with that children's symlink path. Please see
* {@link ViewFs#getFileStatus(Path f)}
*
* Note: In ViewFs, the mount links are represented as symlinks.
*/
@Override @Override
public FileStatus[] listStatus(final Path f) throws AccessControlException, public FileStatus[] listStatus(final Path f) throws AccessControlException,
FileNotFoundException, UnresolvedLinkException, IOException { FileNotFoundException, UnresolvedLinkException, IOException {

View File

@ -135,6 +135,14 @@ public class Hdfs extends AbstractFileSystem {
return dfs.getFileChecksumWithCombineMode(getUriPath(f), Long.MAX_VALUE); return dfs.getFileChecksumWithCombineMode(getUriPath(f), Long.MAX_VALUE);
} }
/**
* {@inheritDoc}
*
* If the given path is a symlink, the path will be resolved to a target path
* and it will get the resolved path's FileStatus object. It will not be
* represented as a symlink and isDirectory API returns true if the resolved
* path is a directory, false otherwise.
*/
@Override @Override
public FileStatus getFileStatus(Path f) public FileStatus getFileStatus(Path f)
throws IOException, UnresolvedLinkException { throws IOException, UnresolvedLinkException {
@ -269,6 +277,20 @@ public class Hdfs extends AbstractFileSystem {
} }
} }
/**
* {@inheritDoc}
*
* If any of the the immediate children of the given path f is a symlink, the
* returned FileStatus object of that children would be represented as a
* symlink. It will not be resolved to the target path and will not get the
* target path FileStatus object. The target path will be available via
* getSymlink on that children's FileStatus object. Since it represents as
* symlink, isDirectory on that children's FileStatus will return false.
*
* If you want to get the FileStatus of target path for that children, you may
* want to use GetFileStatus API with that children's symlink path. Please see
* {@link Hdfs#getFileStatus(Path f)}
*/
@Override @Override
public FileStatus[] listStatus(Path f) public FileStatus[] listStatus(Path f)
throws IOException, UnresolvedLinkException { throws IOException, UnresolvedLinkException {

View File

@ -1143,10 +1143,21 @@ public class DistributedFileSystem extends FileSystem
/** /**
* List all the entries of a directory * List all the entries of a directory
* *
* Note that this operation is not atomic for a large directory. * Note that this operation is not atomic for a large directory. The entries
* The entries of a directory may be fetched from NameNode multiple times. * of a directory may be fetched from NameNode multiple times. It only
* It only guarantees that each name occurs once if a directory * guarantees that each name occurs once if a directory undergoes changes
* undergoes changes between the calls. * between the calls.
*
* If any of the the immediate children of the given path f is a symlink, the
* returned FileStatus object of that children would be represented as a
* symlink. It will not be resolved to the target path and will not get the
* target path FileStatus object. The target path will be available via
* getSymlink on that children's FileStatus object. Since it represents as
* symlink, isDirectory on that children's FileStatus will return false.
*
* If you want to get the FileStatus of target path for that children, you may
* want to use GetFileStatus API with that children's symlink path. Please see
* {@link DistributedFileSystem#getFileStatus(Path f)}
*/ */
@Override @Override
public FileStatus[] listStatus(Path p) throws IOException { public FileStatus[] listStatus(Path p) throws IOException {
@ -1712,6 +1723,12 @@ public class DistributedFileSystem extends FileSystem
/** /**
* Returns the stat information about the file. * Returns the stat information about the file.
*
* If the given path is a symlink, the path will be resolved to a target path
* and it will get the resolved path's FileStatus object. It will not be
* represented as a symlink and isDirectory API returns true if the resolved
* path is a directory, false otherwise.
*
* @throws FileNotFoundException if the file does not exist. * @throws FileNotFoundException if the file does not exist.
*/ */
@Override @Override