HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. Contributed by Hairong Kuang.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@980953 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Hairong Kuang 2010-07-30 20:52:08 +00:00
parent c15ae29a90
commit d0ba178800
5 changed files with 168 additions and 101 deletions

View File

@ -101,6 +101,7 @@ Trunk (unreleased changes)
HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs HADOOP-6656. Adds a thread in the UserGroupInformation to renew TGTs
periodically. (Owen O'Malley and ddas via ddas) periodically. (Owen O'Malley and ddas via ddas)
HADOOP-6890. Improve listFiles API introduced by HADOOP-6870. (hairong)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -786,8 +786,8 @@ public abstract class AbstractFileSystem {
/** /**
* The specification of this method matches that of * The specification of this method matches that of
* {@link FileContext#listLocatedStatus(Path)} except that Path f must be for this * {@link FileContext#listLocatedStatus(Path)} except that Path f
* file system. * must be for this file system.
*/ */
protected Iterator<LocatedFileStatus> listLocatedStatus(final Path f) protected Iterator<LocatedFileStatus> listLocatedStatus(final Path f)
throws AccessControlException, FileNotFoundException, throws AccessControlException, FileNotFoundException,
@ -795,15 +795,28 @@ public abstract class AbstractFileSystem {
return new Iterator<LocatedFileStatus>() { return new Iterator<LocatedFileStatus>() {
private Iterator<FileStatus> itor = listStatusIterator(f); private Iterator<FileStatus> itor = listStatusIterator(f);
/**
* {@inheritDoc}
* @return {@inheritDog}
* @throws Runtimeexception if any IOException occurs during traversal;
* the IOException is set as the cause of the RuntimeException
*/
@Override @Override
public boolean hasNext() { public boolean hasNext() {
return itor.hasNext(); return itor.hasNext();
} }
/**
* {@inheritDoc}
* @return {@inheritDoc}
* @throws Runtimeexception if any IOException occurs during traversal;
* the IOException is set as the cause of the RuntimeException
* @exception {@inheritDoc}
*/
@Override @Override
public LocatedFileStatus next() { public LocatedFileStatus next() {
if (!hasNext()) { if (!hasNext()) {
throw new NoSuchElementException(); throw new NoSuchElementException("No more entry in " + f);
} }
FileStatus result = itor.next(); FileStatus result = itor.next();
try { try {

View File

@ -1287,102 +1287,20 @@ public final class FileContext {
}.resolve(this, absF); }.resolve(this, absF);
} }
/**
* List the statuses and block locations of the files in the given path
* if the path is a directory.
* If the given path is a file, return the file's status and block locations.
* if recursive is true, list all file statuses and block locations in
* the subtree rooted at the given path.
* Files across symbolic links are also returned.
*
* @param f is the path
* @param recursive if the subdirectories need to be traversed recursively
*
* @return an iterator that traverses statuses of the files
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If <code>f</code> does not exist
* @throws UnsupportedFileSystemException If file system for <code>f</code> is
* not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public Iterator<LocatedFileStatus> listFiles(
final Path f, final boolean recursive) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException,
IOException {
return new Iterator<LocatedFileStatus>() {
private Stack<Path> dirs = new Stack<Path>();
private Stack<Path> symLinks = new Stack<Path>();
Iterator<LocatedFileStatus> itor = listLocatedStatus(f);
LocatedFileStatus curFile;
@Override
public boolean hasNext() {
try {
while (curFile == null) {
if (itor.hasNext()) {
handleFileStat(itor.next());
} else if (!dirs.isEmpty()) {
Path dirPath = dirs.pop();
itor = listLocatedStatus(dirPath);
} else if (!symLinks.isEmpty()) {
Path symLink = symLinks.pop();
FileStatus stat = getFileStatus(symLink);
if (stat.isFile() || (recursive && stat.isDirectory())) {
itor = listLocatedStatus(stat.getPath());
}
} else {
return false;
}
}
return true;
} catch (IOException ioe) {
throw (RuntimeException)new RuntimeException().initCause(ioe);
}
}
private void handleFileStat(LocatedFileStatus stat) throws IOException {
if (stat.isFile()) { // file
curFile = stat;
} else if (stat.isSymlink()) { // symbolic link
symLinks.push(stat.getSymlink());
} else if (recursive) { // directory
dirs.push(stat.getPath());
}
}
@Override
public LocatedFileStatus next() {
if (hasNext()) {
LocatedFileStatus result = curFile;
curFile = null;
return result;
}
throw new java.util.NoSuchElementException("No more entry in " + f);
}
@Override
public void remove() {
throw new UnsupportedOperationException("Remove is not supported");
}
};
}
/** /**
* List the statuses of the files/directories in the given path if the path is * List the statuses of the files/directories in the given path if the path is
* a directory. Each returned status contains a file's block locations. * a directory.
* Return the file's status and block locations If the path is a file.
*
* If a returned status is a file, it contains the file's block locations.
* *
* @param f is the path * @param f is the path
* *
* @return an iterator that traverses statuses of the files/directories * @return an iterator that traverses statuses of the files/directories
* in the given path * in the given path
* If any IO exception (for example the input directory gets deleted while
* listing is being executed), next() or hasNext() of the returned iterator
* may throw a RuntimeException with the io exception as the cause.
* *
* @throws AccessControlException If access is denied * @throws AccessControlException If access is denied
* @throws FileNotFoundException If <code>f</code> does not exist * @throws FileNotFoundException If <code>f</code> does not exist
@ -1678,6 +1596,123 @@ public final class FileContext {
}.resolve(FileContext.this, absF); }.resolve(FileContext.this, absF);
} }
/**
* List the statuses and block locations of the files in the given path.
*
* If the path is a directory,
* if recursive is false, returns files in the directory;
* if recursive is true, return files in the subtree rooted at the path.
* The subtree is traversed in the depth-first order.
* If the path is a file, return the file's status and block locations.
* Files across symbolic links are also returned.
*
* @param f is the path
* @param recursive if the subdirectories need to be traversed recursively
*
* @return an iterator that traverses statuses of the files
* If any IO exception (for example a sub-directory gets deleted while
* listing is being executed), next() or hasNext() of the returned iterator
* may throw a RuntimeException with the IO exception as the cause.
*
* @throws AccessControlException If access is denied
* @throws FileNotFoundException If <code>f</code> does not exist
* @throws UnsupportedFileSystemException If file system for <code>f</code>
* is not supported
* @throws IOException If an I/O error occurred
*
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
*/
public Iterator<LocatedFileStatus> listFiles(
final Path f, final boolean recursive) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException,
IOException {
return new Iterator<LocatedFileStatus>() {
private Stack<Iterator<LocatedFileStatus>> itors =
new Stack<Iterator<LocatedFileStatus>>();
Iterator<LocatedFileStatus> curItor = listLocatedStatus(f);
LocatedFileStatus curFile;
/**
* {@inheritDoc}
* @return {@inheritDog}
* @throws Runtimeexception if any IOException occurs during traversal;
* the IOException is set as the cause of the RuntimeException
*/
@Override
public boolean hasNext() {
while (curFile == null) {
if (curItor.hasNext()) {
handleFileStat(curItor.next());
} else if (!itors.empty()) {
curItor = itors.pop();
} else {
return false;
}
}
return true;
}
/**
* Process the input stat.
* If it is a file, return the file stat.
* If it is a directory, tranverse the directory if recursive is true;
* ignore it if recursive is false.
* If it is a symlink, resolve the symlink first and then process it
* depending on if it is a file or directory.
* @param stat input status
* @throws RuntimeException if any io error occurs; the io exception
* is set as the cause of RuntimeException
*/
private void handleFileStat(LocatedFileStatus stat) {
try {
if (stat.isFile()) { // file
curFile = stat;
} else if (stat.isSymlink()) { // symbolic link
// resolve symbolic link
FileStatus symstat = FileContext.this.getFileStatus(
stat.getSymlink());
if (symstat.isFile() || (recursive && symstat.isDirectory())) {
itors.push(curItor);
curItor = listLocatedStatus(stat.getPath());
}
} else if (recursive) { // directory
itors.push(curItor);
curItor = listLocatedStatus(stat.getPath());
}
} catch (IOException ioe) {
throw (RuntimeException)new RuntimeException().initCause(ioe);
}
}
/**
* {@inheritDoc}
* @return {@inheritDoc}
* @throws Runtimeexception if any IOException occurs during traversal;
* the IOException is set as the cause of the RuntimeException
* @exception {@inheritDoc}
*/
@Override
public LocatedFileStatus next() {
if (hasNext()) {
LocatedFileStatus result = curFile;
curFile = null;
return result;
}
throw new java.util.NoSuchElementException("No more entry in " + f);
}
@Override
public void remove() {
throw new UnsupportedOperationException("Remove is not supported");
}
};
}
/** /**
* <p>Return all the files that match filePattern and are not checksum * <p>Return all the files that match filePattern and are not checksum
* files. Results are sorted by their names. * files. Results are sorted by their names.

View File

@ -1320,16 +1320,22 @@ public abstract class FileSystem extends Configured implements Closeable {
} }
/** /**
* List the statuses and block locations of the files in the given path * List the statuses and block locations of the files in the given path.
* if the path is a directory. *
* If the given path is a file, return the file's status and block locations. * If the path is a directory,
* if recursive is true, list all file statuses and block locations in * if recursive is false, returns files in the directory;
* the subtree rooted at the given path. * if recursive is true, return files in the subtree rooted at the path.
* If the path is a file, return the file's status and block locations.
* Files across symbolic links are also returned.
* *
* @param f is the path * @param f is the path
* @param recursive if the subdirectories need to be traversed recursively * @param recursive if the subdirectories need to be traversed recursively
* *
* @return an iterator that traverses statuses of the files * @return an iterator that traverses statuses of the files
* If any IO exception (for example a sub-directory gets deleted while
* listing is being executed), next() or hasNext() of the returned iterator
* may throw a RuntimeException with the IO exception as the cause.
*
* @throws FileNotFoundException when the path does not exist; * @throws FileNotFoundException when the path does not exist;
* IOException see specific implementation * IOException see specific implementation
*/ */
@ -1344,6 +1350,12 @@ public abstract class FileSystem extends Configured implements Closeable {
list(f); list(f);
} }
/**
* {@inheritDoc}
* @return {@inheritDog}
* @throws Runtimeexception if any IOException occurs during traversal;
* the IOException is set as the cause of the RuntimeException
*/
@Override @Override
public boolean hasNext() { public boolean hasNext() {
if (fileStats.isEmpty()) { if (fileStats.isEmpty()) {
@ -1382,6 +1394,13 @@ public abstract class FileSystem extends Configured implements Closeable {
} }
} }
/**
* {@inheritDoc}
* @return {@inheritDoc}
* @throws Runtimeexception if any IOException occurs during traversal;
* the IOException is set as the cause of the RuntimeException
* @exception {@inheritDoc}
*/
@Override @Override
public LocatedFileStatus next() { public LocatedFileStatus next() {
if (!hasNext()) { if (!hasNext()) {

View File

@ -106,15 +106,15 @@ public class TestListFiles {
public void testDirectory() throws IOException { public void testDirectory() throws IOException {
fs.mkdirs(DIR1); fs.mkdirs(DIR1);
// test empty directory
Iterator<LocatedFileStatus> itor = fs.listFiles( Iterator<LocatedFileStatus> itor = fs.listFiles(
DIR1, true); DIR1, true);
assertFalse(itor.hasNext()); assertFalse(itor.hasNext());
itor = fs.listFiles(DIR1, false); itor = fs.listFiles(DIR1, false);
assertFalse(itor.hasNext()); assertFalse(itor.hasNext());
// testing directory with 1 file
writeFile(fs, FILE2, FILE_LEN); writeFile(fs, FILE2, FILE_LEN);
// test empty directory
itor = fs.listFiles(DIR1, true); itor = fs.listFiles(DIR1, true);
LocatedFileStatus stat = itor.next(); LocatedFileStatus stat = itor.next();
assertFalse(itor.hasNext()); assertFalse(itor.hasNext());
@ -123,7 +123,6 @@ public class TestListFiles {
assertEquals(fs.makeQualified(FILE2), stat.getPath()); assertEquals(fs.makeQualified(FILE2), stat.getPath());
assertEquals(1, stat.getBlockLocations().length); assertEquals(1, stat.getBlockLocations().length);
// testing directory with 1 file
itor = fs.listFiles(DIR1, false); itor = fs.listFiles(DIR1, false);
stat = itor.next(); stat = itor.next();
assertFalse(itor.hasNext()); assertFalse(itor.hasNext());