From 5fda66e4a38c7597bc0bc9c94748c8491b7f3750 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Fri, 22 Jul 2016 15:38:18 -0700 Subject: [PATCH] HADOOP-13207. Specify FileSystem listStatus, listFiles and RemoteIterator. Contributed by Steve Loughran. --- .../org/apache/hadoop/fs/GlobExpander.java | 2 +- .../java/org/apache/hadoop/fs/GlobFilter.java | 2 +- .../site/markdown/filesystem/filesystem.md | 397 ++++++++++-- .../AbstractContractGetFileStatusTest.java | 586 +++++++++++++++++- .../AbstractContractRootDirectoryTest.java | 44 +- .../contract/AbstractFSContractTestBase.java | 63 +- .../hadoop/fs/contract/ContractTestUtils.java | 195 ++++-- 7 files changed, 1162 insertions(+), 127 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java index dff8b70ca95..cb430ed3f62 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java @@ -26,7 +26,7 @@ import org.apache.hadoop.classification.InterfaceStability; @InterfaceAudience.Private @InterfaceStability.Unstable -class GlobExpander { +public class GlobExpander { static class StringWithOffset { String string; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobFilter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobFilter.java index e649bd2196a..f8cb89e0ab6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobFilter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobFilter.java @@ -72,7 +72,7 @@ public class GlobFilter implements PathFilter { } } - boolean hasPattern() { + public boolean hasPattern() { return pattern.hasWildcard(); } diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index abbcf6dcdfa..72dbc3cb7b9 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -34,7 +34,9 @@ state are not reflected in the filesystem itself: they are unique to the instanc of the client. **Implementation Note**: the static `FileSystem get(URI uri, Configuration conf) ` method MAY return -a pre-existing instance of a filesystem client class—a class that may also be in use in other threads. The implementations of `FileSystem` which ship with Apache Hadoop *do not make any attempt to synchronize access to the working directory field*. +a pre-existing instance of a filesystem client class—a class that may also be in use in other threads. +The implementations of `FileSystem` which ship with Apache Hadoop +*do not make any attempt to synchronize access to the working directory field*. ## Invariants @@ -117,39 +119,50 @@ code may fail. #### Implementation Notes -* The FTPFileSystem queries this value from the remote filesystem and may -fail with a RuntimeException or subclass thereof if there is a connectivity +* The `FTPFileSystem` queries this value from the remote filesystem and may +fail with a `RuntimeException` or subclass thereof if there is a connectivity problem. The time to execute the operation is not bounded. -### `FileStatus[] listStatus(Path p, PathFilter filter)` +### `FileStatus[] listStatus(Path path, PathFilter filter)` -A `PathFilter` `f` is a predicate function that returns true iff the path `p` -meets the filter's conditions. +Lists entries under a path, `path`. + +If `path` refers to a file and the filter accepts it, +then that file's `FileStatus` entry is returned in a single-element array. + +If the path refers to a directory, the call returns a list of all its immediate +child paths which are accepted by the filter —and does not include the directory +itself. + +A `PathFilter` `filter` is a class whose `accept(path)` returns true iff the path +`path` meets the filter's conditions. #### Preconditions -Path must exist: +Path `path` must exist: - if not exists(FS, p) : raise FileNotFoundException + if not exists(FS, path) : raise FileNotFoundException #### Postconditions - if isFile(FS, p) and f(p) : - result = [getFileStatus(p)] + if isFile(FS, path) and filter.accept(path) : + result = [ getFileStatus(path) ] - elif isFile(FS, p) and not f(P) : - result = [] + elif isFile(FS, path) and not filter.accept(P) : + result = [] - elif isDir(FS, p): - result [getFileStatus(c) for c in children(FS, p) where f(c) == True] + elif isDir(FS, path): + result = [ + getFileStatus(c) for c in children(FS, path) if filter.accepts(c) + ] **Implicit invariant**: the contents of a `FileStatus` of a child retrieved via `listStatus()` are equal to those from a call of `getFileStatus()` to the same path: - forall fs in listStatus(Path) : + forall fs in listStatus(path) : fs == getFileStatus(fs.path) **Ordering of results**: there is no guarantee of ordering of the listed entries. @@ -165,29 +178,28 @@ The details MAY be out of date, including the contents of any directory, the attributes of any files, and the existence of the path supplied. The state of a directory MAY change during the evaluation -process. This may be reflected in a listing that is split between the pre- -and post-update FileSystem states. - +process. * After an entry at path `P` is created, and before any other - changes are made to the FileSystem, `listStatus(P)` MUST + changes are made to the filesystem, `listStatus(P)` MUST find the file and return its status. -* After an entry at path `P` is deleted, `listStatus(P)` MUST +* After an entry at path `P` is deleted, and before any other + changes are made to the filesystem, `listStatus(P)` MUST raise a `FileNotFoundException`. * After an entry at path `P` is created, and before any other - changes are made to the FileSystem, the result of `listStatus(parent(P))` SHOULD + changes are made to the filesystem, the result of `listStatus(parent(P))` SHOULD include the value of `getFileStatus(P)`. * After an entry at path `P` is created, and before any other - changes are made to the FileSystem, the result of `listStatus(parent(P))` SHOULD + changes are made to the filesystem, the result of `listStatus(parent(P))` SHOULD NOT include the value of `getFileStatus(P)`. This is not a theoretical possibility, it is observable in HDFS when a directory contains many thousands of files. -Consider a directory "d" with the contents: +Consider a directory `"/d"` with the contents: a part-0000001 @@ -197,8 +209,8 @@ Consider a directory "d" with the contents: If the number of files is such that HDFS returns a partial listing in each -response, then, if a listing `listStatus("d")` takes place concurrently with the operation -`rename("d/a","d/z"))`, the result may be one of: +response, then, if a listing `listStatus("/d")` takes place concurrently with the operation +`rename("/d/a","/d/z"))`, the result may be one of: [a, part-0000001, ... , part-9999999] [part-0000001, ... , part-9999999, z] @@ -212,6 +224,160 @@ these inconsistent views are only likely when listing a directory with many chil Other filesystems may have stronger consistency guarantees, or return inconsistent data more readily. +### `FileStatus[] listStatus(Path path)` + +This is exactly equivalent to `listStatus(Path, DEFAULT_FILTER)` where +`DEFAULT_FILTER.accept(path) = True` for all paths. + +The atomicity and consistency constraints are as for +`listStatus(Path, DEFAULT_FILTER)`. + +### `FileStatus[] listStatus(Path[] paths, PathFilter filter)` + +Enumerate all files found in the list of directories passed in, +calling `listStatus(path, filter)` on each one. + +As with `listStatus(path, filter)`, the results may be inconsistent. +That is: the state of the filesystem changed during the operation. + +There are no guarantees as to whether paths are listed in a specific order, only +that they must all be listed, and, at the time of listing, exist. + +#### Preconditions + +All paths must exist. There is no requirement for uniqueness. + + forall p in paths : + exists(fs, p) else raise FileNotFoundException + +#### Postconditions + +The result is an array whose entries contain every status element +found in the path listings, and no others. + + result = [listStatus(p, filter) for p in paths] + +Implementations MAY merge duplicate entries; and/or optimize the +operation by recoginizing duplicate paths and only listing the entries +once. + +The default implementation iterates through the list; it does not perform +any optimizations. + +The atomicity and consistency constraints are as for +`listStatus(Path, PathFilter)`. + + +### `FileStatus[] listStatus(Path[] paths)` + +Enumerate all files found in the list of directories passed in, +calling `listStatus(path, DEFAULT_FILTER)` on each one, where +the `DEFAULT_FILTER` accepts all path names. + +### `RemoteIterator[LocatedFileStatus] listLocatedStatus(Path path, PathFilter filter)` + +Return an iterator enumerating the `LocatedFileStatus` entries under +a path. This is similar to `listStatus(Path)` except that the return +value is an instance of the `LocatedFileStatus` subclass of a `FileStatus`, +and that rather than return an entire list, an iterator is returned. + +This is actually a `protected` method, directly invoked by +`listLocatedStatus(Path path):`. Calls to it may be delegated through +layered filesystems, such as `FilterFileSystem`, so its implementation MUST +be considered mandatory, even if `listLocatedStatus(Path path)` has been +implemented in a different manner. There are open JIRAs proposing +making this method public; it may happen in future. + +There is no requirement for the iterator to provide a consistent view +of the child entries of a path. The default implementation does use +`listStatus(Path)` to list its children, with its consistency constraints +already documented. Other implementations may perform the enumeration even +more dynamically. For example fetching a windowed subset of child entries, +so avoiding building up large data structures and the +transmission of large messages. +In such situations, changes to the filesystem are more likely to become +visible. + +Callers MUST assume that the iteration operation MAY fail if changes +to the filesystem take place between this call returning and the iteration +being completely performed. + +#### Preconditions + +Path `path` must exist: + + exists(FS, path) : raise FileNotFoundException + +#### Postconditions + +The operation generates a set of results, `resultset`, equal to the result of +`listStatus(path, filter)`: + + if isFile(FS, path) and filter.accept(path) : + resultset = [ getLocatedFileStatus(FS, path) ] + + elif isFile(FS, path) and not filter.accept(path) : + resultset = [] + + elif isDir(FS, path) : + resultset = [ + getLocatedFileStatus(FS, c) + for c in children(FS, path) where filter.accept(c) + ] + +The operation `getLocatedFileStatus(FS, path: Path): LocatedFileStatus` +is defined as a generator of a `LocatedFileStatus` instance `ls` +where: + + fileStatus = getFileStatus(FS, path) + + bl = getFileBlockLocations(FS, path, 0, fileStatus.len) + + locatedFileStatus = new LocatedFileStatus(fileStatus, bl) + +The ordering in which the elements of `resultset` are returned in the iterator +is undefined. + +The atomicity and consistency constraints are as for +`listStatus(Path, PathFilter)`. + +### `RemoteIterator[LocatedFileStatus] listLocatedStatus(Path path)` + +The equivalent to `listLocatedStatus(path, DEFAULT_FILTER)`, +where `DEFAULT_FILTER` accepts all path names. + +### `RemoteIterator[LocatedFileStatus] listFiles(Path path, boolean recursive)` + +Create an iterator over all files in/under a directory, potentially +recursing into child directories. + +The goal of this operation is to permit large recursive directory scans +to be handled more efficiently by filesystems, by reducing the amount +of data which must be collected in a single RPC call. + +#### Preconditions + + exists(FS, path) else raise FileNotFoundException + +### Postconditions + +The outcome is an iterator, whose output from the sequence of +`iterator.next()` calls can be defined as the set `iteratorset`: + + if not recursive: + iteratorset == listStatus(path) + else: + iteratorset = [ + getLocatedFileStatus(FS, d) + for d in descendants(FS, path) + ] + +The function `getLocatedFileStatus(FS, d)` is as defined in +`listLocatedStatus(Path, PathFilter)`. + +The atomicity and consistency constraints are as for +`listStatus(Path, PathFilter)`. + ### `BlockLocation[] getFileBlockLocations(FileStatus f, int s, int l)` #### Preconditions @@ -229,19 +395,19 @@ of block locations where the data in the range `[s:s+l]` can be found. if f == null : result = null - elif f.getLen()) <= s + elif f.getLen() <= s: result = [] - else result = [ locations(FS, b) for all b in blocks(FS, p, s, s+l)] + else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)] -where +Where def locations(FS, b) = a list of all locations of a block in the filesystem - def blocks(FS, p, s, s + l) = a list of the blocks containing data(FS, path)[s:s+l] + def blocks(FS, p, s, s + l) = a list of the blocks containing data(FS, path)[s:s+l] -Note that that as `length(FS, f) ` is defined as 0 if `isDir(FS, f)`, the result -of `getFileBlockLocations()` on a directory is [] +Note that that as `length(FS, f) ` is defined as `0` if `isDir(FS, f)`, the result +of `getFileBlockLocations()` on a directory is `[]` If the filesystem is not location aware, it SHOULD return @@ -256,7 +422,8 @@ If the filesystem is not location aware, it SHOULD return *A bug in Hadoop 1.0.3 means that a topology path of the same number of elements as the cluster topology MUST be provided, hence Filesystems SHOULD -return that `"/default/localhost"` path +return that `"/default/localhost"` path. While this is no longer an issue, +the convention is generally retained. ### `BlockLocation[] getFileBlockLocations(Path P, int S, int L)` @@ -270,11 +437,14 @@ return that `"/default/localhost"` path #### Postconditions - result = getFileBlockLocations(getStatus(P), S, L) + result = getFileBlockLocations(getFileStatus(FS, P), S, L) ### `long getDefaultBlockSize()` +Get the "default" block size for a filesystem. This often used during +split calculations to divide work optimally across a set of worker processes. + #### Preconditions #### Postconditions @@ -293,6 +463,9 @@ A FileSystem MAY make this user-configurable (the S3 and Swift filesystem client ### `long getDefaultBlockSize(Path p)` +Get the "default" block size for a path —that is, the block size to be used +when writing objects to a path in the filesystem. + #### Preconditions @@ -308,9 +481,19 @@ Filesystems that support mount points may have different default values for different paths, in which case the specific default value for the destination path SHOULD be returned. +It is not an error if the path does not exist: the default/recommended value +for that part of the filesystem MUST be returned. ### `long getBlockSize(Path p)` +This method is exactly equivalent to querying the block size +of the `FileStatus` structure returned in `getFileStatus(p)`. +It is deprecated in order to encourage users to make a single call to +`getFileStatus(p)` and then use the result to examine multiple attributes +of the file (e.g. length, type, block size). If more than one attribute is queried, +This can become a significant performance optimization —and reduce load +on the filesystem. + #### Preconditions if not exists(FS, p) : raise FileNotFoundException @@ -321,7 +504,7 @@ SHOULD be returned. result == getFileStatus(P).getBlockSize() -The outcome of this operation MUST be identical to that contained in +The outcome of this operation MUST be identical to that contained in the `FileStatus` returned from `getFileStatus(P)`. @@ -415,7 +598,7 @@ The result is `FSDataOutputStream`, which through its operations may generate ne clients creating files with `overwrite==true` to fail if the file is created by another client between the two tests. -* S3N, Swift and potentially other Object Stores do not currently change the FS state +* S3N, S3A, Swift and potentially other Object Stores do not currently change the FS state until the output stream `close()` operation is completed. This MAY be a bug, as it allows >1 client to create a file with `overwrite==false`, and potentially confuse file/directory logic @@ -827,7 +1010,7 @@ Truncate cannot be performed on a file, which is open for writing or appending. Return: `true`, if truncation is finished and the file can be immediately opened for appending, or `false` otherwise. -HDFS: HDFS reutrns `false` to indicate that a background process of adjusting +HDFS: HDFS returns `false` to indicate that a background process of adjusting the length of the last block has been started, and clients should wait for it to complete before they can proceed with further file updates. @@ -835,3 +1018,145 @@ to complete before they can proceed with further file updates. If an input stream is open when truncate() occurs, the outcome of read operations related to the part of the file being truncated is undefined. + + + +## interface `RemoteIterator` + +The `RemoteIterator` interface is used as a remote-access equivalent +to `java.util.Iterator`, allowing the caller to iterate through a finite sequence +of remote data elements. + +The core differences are + +1. `Iterator`'s optional `void remove()` method is not supported. +2. For those methods which are supported, `IOException` exceptions +may be raised. + +```java +public interface RemoteIterator { + boolean hasNext() throws IOException; + E next() throws IOException; +} +``` + +The basic view of the interface is that `hasNext()` being true implies +that `next()` will successfully return the next entry in the list: + +``` +while hasNext(): next() +``` + +Equally, a successful call to `next()` implies that had `hasNext()` been invoked +prior to the call to `next()`, it would have been true. + +```java +boolean elementAvailable = hasNext(); +try { + next(); + assert elementAvailable; +} catch (NoSuchElementException e) { + assert !elementAvailable +} +``` + +The `next()` operator MUST iterate through the list of available +results, *even if no calls to `hasNext()` are made*. + +That is, it is possible to enumerate the results through a loop which +only terminates when a `NoSuchElementException` exception is raised. + +```java +try { + while (true) { + process(iterator.next()); + } +} catch (NoSuchElementException ignored) { + // the end of the list has been reached +} +``` + +The output of the iteration is equivalent to the loop + +```java +while (iterator.hasNext()) { + process(iterator.next()); +} +``` + +As raising exceptions is an expensive operation in JVMs, the `while(hasNext())` +loop option is more efficient. (see also [Concurrency and the Remote Iterator](#RemoteIteratorConcurrency) +for a dicussion on this topic). + +Implementors of the interface MUST support both forms of iterations; authors +of tests SHOULD verify that both iteration mechanisms work. + +The iteration is required to return a finite sequence; both forms +of loop MUST ultimately terminate. All implementations of the interface in the +Hadoop codebase meet this requirement; all consumers assume that it holds. + +### `boolean hasNext()` + +Returns true if-and-only-if a subsequent single call to `next()` would +return an element rather than raise an exception. + +#### Preconditions + +#### Postconditions + + result = True ==> next() will succeed. + result = False ==> next() will raise an exception + +Multiple calls to `hasNext()`, without any intervening `next()` calls, MUST +return the same value. + +```java +boolean has1 = iterator.hasNext(); +boolean has2 = iterator.hasNext(); +assert has1 == has2; +``` + +### `E next()` + +Return the next element in the iteration. + +#### Preconditions + + hasNext() else raise java.util.NoSuchElementException + +#### Postconditions + + result = the next element in the iteration + +Repeated calls to `next()` return +subsequent elements in the sequence, until the entire sequence has been returned. + + +### Concurrency and the Remote Iterator + +The primary use of `RemoteIterator` in the filesystem APIs is to list files +on (possibly remote) filesystems. These filesystems are invariably accessed +concurrently; the state of the filesystem MAY change between a `hasNext()` +probe and the invocation of the `next()` call. + +Accordingly, a robust iteration through a `RemoteIterator` would catch and +discard `NoSuchElementException` exceptions raised during the process, which +could be done through the `while(true)` iteration example above, or +through a `hasNext()/next()` sequence with an outer `try/catch` clause to +catch a `NoSuchElementException` alongside other exceptions which may be +raised during a failure (for example, a `FileNotFoundException`) + + +```java +try { + while (iterator.hasNext()) { + process(iterator.next()); + } +} catch (NoSuchElementException ignored) { + // the end of the list has been reached +} +``` + +It is notable that this is *not* done in the Hadoop codebase. This does not imply +that robust loops are not recommended —more that the concurrency +problems were not considered during the implementation of these loops. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractGetFileStatusTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractGetFileStatusTest.java index 3e5bb12f7ad..c68c1930e78 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractGetFileStatusTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractGetFileStatusTest.java @@ -20,27 +20,36 @@ package org.apache.hadoop.fs.contract; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.List; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FilterFileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.RemoteIterator; import org.junit.Test; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.*; /** - * Test getFileStatus -if supported + * Test getFileStatus and related listing operations. */ public abstract class AbstractContractGetFileStatusTest extends AbstractFSContractTestBase { - private static final Logger LOG = - LoggerFactory.getLogger(AbstractContractGetFileStatusTest.class); private Path testPath; private Path target; + // the tree parameters. Kept small to avoid killing object store test + // runs too much. + + private static final int TREE_DEPTH = 2; + private static final int TREE_WIDTH = 3; + private static final int TREE_FILES = 4; + private static final int TREE_FILESIZE = 512; + @Override public void setup() throws Exception { super.setup(); @@ -56,7 +65,7 @@ public abstract class AbstractContractGetFileStatusTest extends try { FileStatus status = getFileSystem().getFileStatus(target); //got here: trouble - fail("expected a failure"); + fail("expected a failure, got " + status); } catch (FileNotFoundException e) { //expected handleExpectedException(e); @@ -65,20 +74,561 @@ public abstract class AbstractContractGetFileStatusTest extends @Test public void testListStatusEmptyDirectory() throws IOException { + describe("List status on an empty directory"); + Path subfolder = createDirWithEmptySubFolder(); + FileSystem fs = getFileSystem(); + Path path = getContract().getTestPath(); + new TreeScanResults(fs.listStatus(path)) + .assertSizeEquals("listStatus(" + path + ")", 0, 1, 0); + describe("Test on empty subdirectory"); + new TreeScanResults(fs.listStatus(subfolder)) + .assertSizeEquals("listStatus(empty subfolder)", 0, 0, 0); + } + + @Test + public void testListFilesEmptyDirectoryNonrecursive() throws IOException { + listFilesOnEmptyDir(false); + } + + @Test + public void testListFilesEmptyDirectoryRecursive() throws IOException { + listFilesOnEmptyDir(true); + } + + /** + * Call listFiles on an directory with an empty subdir. + * @param recursive should the list be recursive? + * @throws IOException IO Problems + */ + private void listFilesOnEmptyDir(boolean recursive) throws IOException { + describe("Invoke listFiles(recursive=" + recursive + ")" + + " on empty directories, expect nothing found"); + Path subfolder = createDirWithEmptySubFolder(); + FileSystem fs = getFileSystem(); + new TreeScanResults(fs.listFiles(getContract().getTestPath(), recursive)) + .assertSizeEquals("listFiles(test dir, " + recursive + ")", 0, 0, 0); + describe("Test on empty subdirectory"); + new TreeScanResults(fs.listFiles(subfolder, recursive)) + .assertSizeEquals("listFiles(empty subfolder, " + recursive + ")", + 0, 0, 0); + } + + @Test + public void testListLocatedStatusEmptyDirectory() throws IOException { + describe("Invoke listLocatedStatus() on empty directories;" + + " expect directories to be found"); + Path subfolder = createDirWithEmptySubFolder(); + FileSystem fs = getFileSystem(); + new TreeScanResults(fs.listLocatedStatus(getContract().getTestPath())) + .assertSizeEquals("listLocatedStatus(test dir)", 0, 1, 0); + describe("Test on empty subdirectory"); + new TreeScanResults(fs.listLocatedStatus(subfolder)) + .assertSizeEquals("listLocatedStatus(empty subfolder)", 0, 0, 0); + } + + /** + * All tests cases against complex directories are aggregated into one, so + * that the setup and teardown costs against object stores can be shared. + * @throws Throwable + */ + @Test + public void testComplexDirActions() throws Throwable { + TreeScanResults tree = createTestTree(); + checkListStatusStatusComplexDir(tree); + checkListLocatedStatusStatusComplexDir(tree); + checkListFilesComplexDirNonRecursive(tree); + checkListFilesComplexDirRecursive(tree); + } + + /** + * Test {@link FileSystem#listStatus(Path)} on a complex + * directory tree. + * @param tree directory tree to list. + * @throws Throwable + */ + protected void checkListStatusStatusComplexDir(TreeScanResults tree) + throws Throwable { + describe("Expect listStatus to list all entries in top dir only"); + + FileSystem fs = getFileSystem(); + TreeScanResults listing = new TreeScanResults( + fs.listStatus(tree.getBasePath())); + listing.assertSizeEquals("listStatus()", TREE_FILES, TREE_WIDTH, 0); + } + + /** + * Test {@link FileSystem#listLocatedStatus(Path)} on a complex + * directory tree. + * @param tree directory tree to list. + * @throws Throwable + */ + protected void checkListLocatedStatusStatusComplexDir(TreeScanResults tree) + throws Throwable { + describe("Expect listLocatedStatus to list all entries in top dir only"); + FileSystem fs = getFileSystem(); + TreeScanResults listing = new TreeScanResults( + fs.listLocatedStatus(tree.getBasePath())); + listing.assertSizeEquals("listLocatedStatus()", TREE_FILES, TREE_WIDTH, 0); + verifyFileStats(fs.listLocatedStatus(tree.getBasePath())); + + // listLocatedStatus and listStatus must return the same files. + TreeScanResults listStatus = new TreeScanResults( + fs.listStatus(tree.getBasePath())); + listing.assertEquivalent(listStatus); + + // now check without using + List statusThroughNext = toListThroughNextCallsAlone( + fs.listLocatedStatus(tree.getBasePath()) + ); + TreeScanResults resultsThroughNext = new TreeScanResults(statusThroughNext); + listStatus.assertFieldsEquivalent("files", listing, + listStatus.getFiles(), + resultsThroughNext.getFiles()); + } + + /** + * Test {@link FileSystem#listFiles(Path, boolean)} on a complex + * directory tree and the recursive flag set to false. + * @param tree directory tree to list. + * @throws Throwable + */ + protected void checkListFilesComplexDirNonRecursive(TreeScanResults tree) + throws Throwable { + describe("Expect non-recursive listFiles(false) to list all entries" + + " in top dir only"); + FileSystem fs = getFileSystem(); + TreeScanResults listing = new TreeScanResults( + fs.listFiles(tree.getBasePath(), false)); + listing.assertSizeEquals("listFiles(false)", TREE_FILES, 0, 0); + verifyFileStats(fs.listFiles(tree.getBasePath(), false)); + + // the files listed should match the set of files in a listStatus() call. + // the directories are not checked + TreeScanResults listStatus = new TreeScanResults( + fs.listStatus(tree.getBasePath())); + listStatus.assertFieldsEquivalent("files", listing, + listStatus.getFiles(), + listing.getFiles()); + List statusThroughNext = toListThroughNextCallsAlone( + fs.listFiles(tree.getBasePath(), false)); + TreeScanResults resultsThroughNext = new TreeScanResults(statusThroughNext); + listStatus.assertFieldsEquivalent("files", listing, + listStatus.getFiles(), + resultsThroughNext.getFiles()); + } + + /** + * Test {@link FileSystem#listFiles(Path, boolean)} on a complex + * directory tree and the recursive flag set to true. + * @param tree directory tree to list. + * @throws Throwable + */ + protected void checkListFilesComplexDirRecursive(TreeScanResults tree) + throws Throwable { + describe("Expect recursive listFiles(true) to" + + " list all files down the tree"); + FileSystem fs = getFileSystem(); + TreeScanResults listing = new TreeScanResults( + fs.listFiles(tree.getBasePath(), true)); + // files are checked, but not the directories. + tree.assertFieldsEquivalent("files", listing, tree.getFiles(), + listing.getFiles()); + int count = verifyFileStats(fs.listFiles(tree.getBasePath(), true)); + // assert that the content matches that of a tree walk + describe("verifying consistency with treewalk's files"); + TreeScanResults treeWalk = treeWalk(fs, tree.getBasePath()); + treeWalk.assertFieldsEquivalent("files", listing, + treeWalk.getFiles(), + listing.getFiles()); + assertEquals("Size of status list through next() calls", + count, + toListThroughNextCallsAlone( + fs.listFiles(tree.getBasePath(), true)).size()); + } + + @Test + public void testListFilesNoDir() throws Throwable { + describe("test the listFiles calls on a path which is not present"); + Path path = path("missing"); + try { + RemoteIterator iterator + = getFileSystem().listFiles(path, false); + fail("Expected an exception, got an iterator: " + iterator); + } catch (FileNotFoundException expected) { + // expected + } + try { + RemoteIterator iterator + = getFileSystem().listFiles(path, true); + fail("Expected an exception, got an iterator: " + iterator); + } catch (FileNotFoundException expected) { + // expected + } + } + + @Test + public void testLocatedStatusNoDir() throws Throwable { + describe("test the LocatedStatus call on a path which is not present"); + try { + RemoteIterator iterator + = getFileSystem().listLocatedStatus(path("missing")); + fail("Expected an exception, got an iterator: " + iterator); + } catch (FileNotFoundException expected) { + // expected + } + } + + @Test + public void testListStatusNoDir() throws Throwable { + describe("test the listStatus(path) call on a path which is not present"); + try { + getFileSystem().listStatus(path("missing")); + fail("Expected an exception"); + } catch (FileNotFoundException expected) { + // expected + } + } + + @Test + public void testListStatusFilteredNoDir() throws Throwable { + describe("test the listStatus(path, filter) call on a missing path"); + try { + getFileSystem().listStatus(path("missing"), ALL_PATHS); + fail("Expected an exception"); + } catch (FileNotFoundException expected) { + // expected + } + } + + @Test + public void testListStatusFilteredFile() throws Throwable { + describe("test the listStatus(path, filter) on a file"); + Path f = touchf("liststatus"); + assertEquals(0, getFileSystem().listStatus(f, NO_PATHS).length); + } + + @Test + public void testListStatusFile() throws Throwable { + describe("test the listStatus(path) on a file"); + Path f = touchf("liststatusfile"); + verifyStatusArrayMatchesFile(f, getFileSystem().listStatus(f)); + } + + @Test + public void testListFilesFile() throws Throwable { + describe("test the listStatus(path) on a file"); + Path f = touchf("listfilesfile"); + List statusList = toList( + getFileSystem().listFiles(f, false)); + assertEquals("size of file list returned", 1, statusList.size()); + assertIsNamedFile(f, statusList.get(0)); + List statusList2 = toListThroughNextCallsAlone( + getFileSystem().listFiles(f, false)); + assertEquals("size of file list returned through next() calls", + 1, statusList2.size()); + assertIsNamedFile(f, statusList2.get(0)); + } + + @Test + public void testListFilesFileRecursive() throws Throwable { + describe("test the listFiles(path, true) on a file"); + Path f = touchf("listfilesRecursive"); + List statusList = toList( + getFileSystem().listFiles(f, true)); + assertEquals("size of file list returned", 1, statusList.size()); + assertIsNamedFile(f, statusList.get(0)); + List statusList2 = toListThroughNextCallsAlone( + getFileSystem().listFiles(f, true)); + assertEquals("size of file list returned", 1, statusList2.size()); + } + + @Test + public void testListLocatedStatusFile() throws Throwable { + describe("test the listLocatedStatus(path) on a file"); + Path f = touchf("listLocatedStatus"); + List statusList = toList( + getFileSystem().listLocatedStatus(f)); + assertEquals("size of file list returned", 1, statusList.size()); + assertIsNamedFile(f, statusList.get(0)); + List statusList2 = toListThroughNextCallsAlone( + getFileSystem().listLocatedStatus(f)); + assertEquals("size of file list returned through next() calls", + 1, statusList2.size()); + } + + /** + * Verify a returned status array matches a single named file. + * @param f filename + * @param status status array + */ + private void verifyStatusArrayMatchesFile(Path f, FileStatus[] status) { + assertEquals(1, status.length); + FileStatus fileStatus = status[0]; + assertIsNamedFile(f, fileStatus); + } + + /** + * Verify that a file status refers to a file at the given path. + * @param f filename + * @param fileStatus status to validate + */ + private void assertIsNamedFile(Path f, FileStatus fileStatus) { + assertEquals("Wrong pathname in " + fileStatus, f, fileStatus.getPath()); + assertTrue("Not a file: " + fileStatus, fileStatus.isFile()); + } + + /** + * Touch a file with a given name; return the path. + * @param name name + * @return the full name + * @throws IOException IO Problems + */ + Path touchf(String name) throws IOException { + Path path = path(name); + ContractTestUtils.touch(getFileSystem(), path); + return path; + } + + /** + * Clear the test directory and add an empty subfolder. + * @return the path to the subdirectory + * @throws IOException + */ + private Path createDirWithEmptySubFolder() throws IOException { // remove the test directory FileSystem fs = getFileSystem(); - assertTrue(fs.delete(getContract().getTestPath(), true)); - + Path path = getContract().getTestPath(); + fs.delete(path, true); // create a - non-qualified - Path for a subdir - Path subfolder = getContract().getTestPath().suffix("/"+testPath.getName()); - assertTrue(fs.mkdirs(subfolder)); + Path subfolder = path.suffix('/' + this.methodName.getMethodName()); + mkdirs(subfolder); + return subfolder; + } - // assert empty ls on the empty dir - assertEquals("ls on an empty directory not of length 0", 0, - fs.listStatus(subfolder).length); + /** + * Create a test tree. + * @return the details about the created tree. The files and directories + * are those created under the path, not the base directory created. + * @throws IOException + */ + private TreeScanResults createTestTree() throws IOException { + return createSubdirs(getFileSystem(), path(methodName.getMethodName()), + TREE_DEPTH, TREE_WIDTH, TREE_FILES, TREE_FILESIZE); + } - // assert non-empty ls on parent dir - assertTrue("ls on a non-empty directory of length 0", - fs.listStatus(getContract().getTestPath()).length > 0); + /** + * Scan through a filestatus iterator, get the status of every element and + * verify core attributes. This should identify a situation where the + * attributes of a file/dir retrieved in a listing operation do not + * match the values individually retrieved. That is: the metadata returned + * in a directory listing is different from the explicitly retrieved data. + * + * Timestamps are not compared. + * @param results iterator to scan + * @return the number of entries in the result set + * @throws IOException any IO problem + */ + private int verifyFileStats(RemoteIterator results) + throws IOException { + describe("verifying file statuses"); + int count = 0; + while (results.hasNext()) { + count++; + LocatedFileStatus next = results.next(); + FileStatus fileStatus = getFileSystem().getFileStatus(next.getPath()); + assertEquals("isDirectory", fileStatus.isDirectory(), next.isDirectory()); + assertEquals("isFile", fileStatus.isFile(), next.isFile()); + assertEquals("getLen", fileStatus.getLen(), next.getLen()); + assertEquals("getOwner", fileStatus.getOwner(), next.getOwner()); + } + return count; + } + + + @Test + public void testListStatusFiltering() throws Throwable { + describe("Call listStatus() against paths and directories with filtering"); + Path file1 = touchf("file-1.txt"); + touchf("file-2.txt"); + Path parent = file1.getParent(); + FileStatus[] result; + + verifyListStatus(0, parent, NO_PATHS); + verifyListStatus(2, parent, ALL_PATHS); + + MatchesNameFilter file1Filter = new MatchesNameFilter("file-1.txt"); + result = verifyListStatus(1, parent, file1Filter); + assertEquals(file1, result[0].getPath()); + + verifyListStatus(0, file1, NO_PATHS); + result = verifyListStatus(1, file1, ALL_PATHS); + assertEquals(file1, result[0].getPath()); + result = verifyListStatus(1, file1, file1Filter); + assertEquals(file1, result[0].getPath()); + + // empty subdirectory + Path subdir = path("subdir"); + mkdirs(subdir); + verifyListStatus(0, subdir, NO_PATHS); + verifyListStatus(0, subdir, ALL_PATHS); + verifyListStatus(0, subdir, new MatchesNameFilter("subdir")); + } + + @Test + public void testListLocatedStatusFiltering() throws Throwable { + describe("Call listLocatedStatus() with filtering"); + describe("Call listStatus() against paths and directories with filtering"); + Path file1 = touchf("file-1.txt"); + Path file2 = touchf("file-2.txt"); + Path parent = file1.getParent(); + FileSystem fs = getFileSystem(); + + touch(fs, file1); + touch(fs, file2); + // this is not closed: ignore any IDE warnings. + ExtendedFilterFS xfs = new ExtendedFilterFS(fs); + List result; + + verifyListStatus(0, parent, NO_PATHS); + verifyListStatus(2, parent, ALL_PATHS); + + MatchesNameFilter file1Filter = new MatchesNameFilter("file-1.txt"); + result = verifyListLocatedStatus(xfs, 1, parent, file1Filter); + assertEquals(file1, result.get(0).getPath()); + + verifyListLocatedStatus(xfs, 0, file1, NO_PATHS); + verifyListLocatedStatus(xfs, 1, file1, ALL_PATHS); + assertEquals(file1, result.get(0).getPath()); + verifyListLocatedStatus(xfs, 1, file1, file1Filter); + assertEquals(file1, result.get(0).getPath()); + verifyListLocatedStatusNextCalls(xfs, 1, file1, file1Filter); + + // empty subdirectory + Path subdir = path("subdir"); + mkdirs(subdir); + verifyListLocatedStatus(xfs, 0, subdir, NO_PATHS); + verifyListLocatedStatus(xfs, 0, subdir, ALL_PATHS); + verifyListLocatedStatusNextCalls(xfs, 0, subdir, ALL_PATHS); + verifyListLocatedStatus(xfs, 0, subdir, new MatchesNameFilter("subdir")); + } + + /** + * Execute {@link FileSystem#listStatus(Path, PathFilter)}, + * verify the length of the result, then return the listing. + * @param expected expected length + * @param path path to list + * @param filter filter to apply + * @return the listing + * @throws IOException IO Problems + */ + private FileStatus[] verifyListStatus(int expected, + Path path, + PathFilter filter) throws IOException { + FileStatus[] result = getFileSystem().listStatus(path, filter); + assertEquals("length of listStatus(" + path + ", " + filter + " )", + expected, result.length); + return result; + } + + /** + * Execute {@link FileSystem#listLocatedStatus(Path, PathFilter)}, + * generate a list from the iterator, verify the length of the list returned + * and then return it. + * @param expected expected length + * @param path path to list + * @param filter filter to apply + * @return the listing + * @throws IOException IO Problems + */ + private List verifyListLocatedStatus(ExtendedFilterFS xfs, + int expected, + Path path, + PathFilter filter) throws IOException { + RemoteIterator it = xfs.listLocatedStatus(path, filter); + List result = toList(it); + assertEquals("length of listLocatedStatus(" + path + ", " + filter + " )", + expected, result.size()); + return result; + } + + /** + * Execute {@link FileSystem#listLocatedStatus(Path, PathFilter)}, + * generate a list from the iterator, verify the length of the list returned + * and then return it. + * Uses {@link ContractTestUtils#toListThroughNextCallsAlone(RemoteIterator)} + * to stress the iteration process. + * @param expected expected length + * @param path path to list + * @param filter filter to apply + * @return the listing + * @throws IOException IO Problems + */ + private List verifyListLocatedStatusNextCalls( + ExtendedFilterFS xfs, + int expected, + Path path, + PathFilter filter) throws IOException { + RemoteIterator it = xfs.listLocatedStatus(path, filter); + List result = toListThroughNextCallsAlone(it); + assertEquals("length of listLocatedStatus(" + path + ", " + filter + " )", + expected, result.size()); + return result; + } + + private static final PathFilter ALL_PATHS = new AllPathsFilter(); + private static final PathFilter NO_PATHS = new NoPathsFilter(); + + /** + * Accept everything. + */ + private static final class AllPathsFilter implements PathFilter { + @Override + public boolean accept(Path path) { + return true; + } + } + + /** + * Accept nothing. + */ + private static final class NoPathsFilter implements PathFilter { + @Override + public boolean accept(Path path) { + return false; + } + } + + /** + * Path filter which only expects paths whose final name element + * equals the {@code match} field. + */ + private static final class MatchesNameFilter implements PathFilter { + private final String match; + + MatchesNameFilter(String match) { + this.match = match; + } + + @Override + public boolean accept(Path path) { + return match.equals(path.getName()); + } + } + + /** + * A filesystem filter which exposes the protected method + * {@link #listLocatedStatus(Path, PathFilter)}. + */ + protected static final class ExtendedFilterFS extends FilterFileSystem { + public ExtendedFilterFS(FileSystem fs) { + super(fs); + } + + @Override + public RemoteIterator listLocatedStatus(Path f, + PathFilter filter) + throws IOException { + return super.listLocatedStatus(f, filter); + } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java index 7273945efc6..cf3ede5c95c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java @@ -19,16 +19,21 @@ package org.apache.hadoop.fs.contract; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.util.List; + import org.apache.hadoop.fs.FileStatus; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.toList; +import static org.apache.hadoop.fs.contract.ContractTestUtils.treeWalk; /** * This class does things to the root directory. @@ -109,6 +114,8 @@ public abstract class AbstractContractRootDirectoryTest extends AbstractFSContra @Test public void testCreateFileOverRoot() throws Throwable { + //extra sanity checks here to avoid support calls about complete loss of data + skipIfUnsupported(TEST_ROOT_TESTS_ENABLED); Path root = new Path("/"); byte[] dataset = dataset(1024, ' ', 'z'); try { @@ -123,7 +130,6 @@ public abstract class AbstractContractRootDirectoryTest extends AbstractFSContra @Test public void testListEmptyRootDirectory() throws IOException { - //extra sanity checks here to avoid support calls about complete loss of data skipIfUnsupported(TEST_ROOT_TESTS_ENABLED); FileSystem fs = getFileSystem(); Path root = new Path("/"); @@ -133,5 +139,41 @@ public abstract class AbstractContractRootDirectoryTest extends AbstractFSContra } assertEquals("listStatus on empty root-directory returned a non-empty list", 0, fs.listStatus(root).length); + assertFalse("listFiles(/, false).hasNext", + fs.listFiles(root, false).hasNext()); + assertFalse("listFiles(/, true).hasNext", + fs.listFiles(root, true).hasNext()); + assertFalse("listLocatedStatus(/).hasNext", + fs.listLocatedStatus(root).hasNext()); + assertIsDirectory(root); } + + @Test + public void testSimpleRootListing() throws IOException { + describe("test the nonrecursive root listing calls"); + FileSystem fs = getFileSystem(); + Path root = new Path("/"); + FileStatus[] statuses = fs.listStatus(root); + List locatedStatusList = toList( + fs.listLocatedStatus(root)); + assertEquals(statuses.length, locatedStatusList.size()); + List fileList = toList(fs.listFiles(root, false)); + assertTrue(fileList.size() <= statuses.length); + } + + @Test + public void testRecursiveRootListing() throws IOException { + describe("test a recursive root directory listing"); + FileSystem fs = getFileSystem(); + Path root = new Path("/"); + ContractTestUtils.TreeScanResults + listing = new ContractTestUtils.TreeScanResults( + fs.listFiles(root, true)); + describe("verifying consistency with treewalk's files"); + ContractTestUtils.TreeScanResults treeWalk = treeWalk(fs, root); + treeWalk.assertFieldsEquivalent("files", listing, + treeWalk.getFiles(), + listing.getFiles()); + } + } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java index 03bf2aa7e76..baea968b67a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractFSContractTestBase.java @@ -25,8 +25,10 @@ import org.apache.hadoop.fs.Path; import org.junit.After; import org.junit.Assert; import org.junit.Before; +import org.junit.BeforeClass; import org.junit.Rule; import org.junit.internal.AssumptionViolatedException; +import org.junit.rules.TestName; import org.junit.rules.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,7 +40,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.cleanup; import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; /** - * This is the base class for all the contract tests + * This is the base class for all the contract tests. */ public abstract class AbstractFSContractTestBase extends Assert implements ContractOptions { @@ -47,39 +49,48 @@ public abstract class AbstractFSContractTestBase extends Assert LoggerFactory.getLogger(AbstractFSContractTestBase.class); /** - * Length of files to work with: {@value} + * Length of files to work with: {@value}. */ public static final int TEST_FILE_LEN = 1024; /** - * standard test timeout: {@value} + * standard test timeout: {@value}. */ public static final int DEFAULT_TEST_TIMEOUT = 180 * 1000; /** - * The FS contract used for these tests + * The FS contract used for these tests. */ private AbstractFSContract contract; /** - * The test filesystem extracted from it + * The test filesystem extracted from it. */ private FileSystem fileSystem; /** - * The path for tests + * The path for tests. */ private Path testPath; + @Rule + public TestName methodName = new TestName(); + + + @BeforeClass + public static void nameTestThread() { + Thread.currentThread().setName("JUnit"); + } + /** - * This must be implemented by all instantiated test cases + * This must be implemented by all instantiated test cases. * -provide the FS contract * @return the FS contract */ protected abstract AbstractFSContract createContract(Configuration conf); /** - * Get the contract + * Get the contract. * @return the contract, which will be non-null once the setup operation has * succeeded */ @@ -88,7 +99,7 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * Get the filesystem created in startup + * Get the filesystem created in startup. * @return the filesystem to use for tests */ public FileSystem getFileSystem() { @@ -96,7 +107,7 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * Get the log of the base class + * Get the log of the base class. * @return a logger */ public static Logger getLog() { @@ -104,7 +115,7 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * Skip a test if a feature is unsupported in this FS + * Skip a test if a feature is unsupported in this FS. * @param feature feature to look for * @throws IOException IO problem */ @@ -141,13 +152,13 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * Set the timeout for every test + * Set the timeout for every test. */ @Rule public Timeout testTimeout = new Timeout(getTestTimeoutMillis()); /** - * Option for tests to override the default timeout value + * Option for tests to override the default timeout value. * @return the current test timeout */ protected int getTestTimeoutMillis() { @@ -156,11 +167,12 @@ public abstract class AbstractFSContractTestBase extends Assert /** - * Setup: create the contract then init it + * Setup: create the contract then init it. * @throws Exception on any failure */ @Before public void setup() throws Exception { + LOG.debug("== Setup =="); contract = createContract(createConfiguration()); contract.init(); //skip tests if they aren't enabled @@ -179,19 +191,22 @@ public abstract class AbstractFSContractTestBase extends Assert //create the test path testPath = getContract().getTestPath(); mkdirs(testPath); + LOG.debug("== Setup complete =="); } /** - * Teardown + * Teardown. * @throws Exception on any failure */ @After public void teardown() throws Exception { + LOG.debug("== Teardown =="); deleteTestDirInTeardown(); + LOG.debug("== Teardown complete =="); } /** - * Delete the test dir in the per-test teardown + * Delete the test dir in the per-test teardown. * @throws IOException */ protected void deleteTestDirInTeardown() throws IOException { @@ -200,7 +215,7 @@ public abstract class AbstractFSContractTestBase extends Assert /** * Create a path under the test path provided by - * the FS contract + * the FS contract. * @param filepath path string in * @return a path qualified by the test filesystem * @throws IOException IO problems @@ -212,7 +227,7 @@ public abstract class AbstractFSContractTestBase extends Assert /** * Take a simple path like "/something" and turn it into - * a qualified path against the test FS + * a qualified path against the test FS. * @param filepath path string in * @return a path qualified by the test filesystem * @throws IOException IO problems @@ -222,7 +237,7 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * List a path in the test FS + * List a path in the test FS. * @param path path to list * @return the contents of the path/dir * @throws IOException IO problems @@ -262,7 +277,7 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * Handle expected exceptions through logging and/or other actions + * Handle expected exceptions through logging and/or other actions. * @param e exception raised. */ protected void handleExpectedException(Exception e) { @@ -270,7 +285,7 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * assert that a path exists + * assert that a path exists. * @param message message to use in an assertion * @param path path to probe * @throws IOException IO problems @@ -280,7 +295,7 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * assert that a path does not + * Assert that a path does not exist. * @param message message to use in an assertion * @param path path to probe * @throws IOException IO problems @@ -324,7 +339,7 @@ public abstract class AbstractFSContractTestBase extends Assert } /** - * Assert that a delete succeeded + * Assert that a delete succeeded. * @param path path to delete * @param recursive recursive flag * @throws IOException IO problems @@ -336,7 +351,7 @@ public abstract class AbstractFSContractTestBase extends Assert /** * Assert that the result value == -1; which implies - * that a read was successful + * that a read was successful. * @param text text to include in a message (usually the operation) * @param result read result to validate */ diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java index 20ba0752aaa..b8ca6b28f94 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java @@ -42,12 +42,13 @@ import java.util.Collection; import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.NoSuchElementException; import java.util.Properties; import java.util.Set; import java.util.UUID; /** - * Utilities used across test cases + * Utilities used across test cases. */ public class ContractTestUtils extends Assert { @@ -64,7 +65,7 @@ public class ContractTestUtils extends Assert { public static final int DEFAULT_IO_CHUNK_MODULUS_SIZE = 128; /** - * Assert that a property in the property set matches the expected value + * Assert that a property in the property set matches the expected value. * @param props property set * @param key property name * @param expected expected value. If null, the property must not be in the set @@ -170,11 +171,10 @@ public class ContractTestUtils extends Assert { */ public static byte[] readDataset(FileSystem fs, Path path, int len) throws IOException { - FSDataInputStream in = fs.open(path); byte[] dest = new byte[len]; int offset =0; int nread = 0; - try { + try (FSDataInputStream in = fs.open(path)) { while (nread < len) { int nbytes = in.read(dest, offset + nread, len - nread); if (nbytes < 0) { @@ -182,14 +182,12 @@ public class ContractTestUtils extends Assert { } nread += nbytes; } - } finally { - in.close(); } return dest; } /** - * Read a file, verify its length and contents match the expected array + * Read a file, verify its length and contents match the expected array. * @param fs filesystem * @param path path to file * @param original original dataset @@ -208,7 +206,7 @@ public class ContractTestUtils extends Assert { /** * Verify that the read at a specific offset in a stream - * matches that expected + * matches that expected. * @param stm stream * @param fileContents original file contents * @param seekOff seek offset @@ -262,9 +260,9 @@ public class ContractTestUtils extends Assert { byte actual = received[i]; byte expected = original[i]; String letter = toChar(actual); - String line = String.format("[%04d] %2x %s\n", i, actual, letter); + String line = String.format("[%04d] %2x %s%n", i, actual, letter); if (expected != actual) { - line = String.format("[%04d] %2x %s -expected %2x %s\n", + line = String.format("[%04d] %2x %s -expected %2x %s%n", i, actual, letter, @@ -293,7 +291,7 @@ public class ContractTestUtils extends Assert { } /** - * Convert a buffer to a string, character by character + * Convert a buffer to a string, character by character. * @param buffer input bytes * @return a string conversion */ @@ -316,7 +314,7 @@ public class ContractTestUtils extends Assert { } /** - * Cleanup at the end of a test run + * Cleanup at the end of a test run. * @param action action triggering the operation (for use in logging) * @param fileSystem filesystem to work with. May be null * @param cleanupPath path to delete as a string @@ -333,7 +331,7 @@ public class ContractTestUtils extends Assert { } /** - * Cleanup at the end of a test run + * Cleanup at the end of a test run. * @param action action triggering the operation (for use in logging) * @param fileSystem filesystem to work with. May be null * @param path path to delete @@ -403,7 +401,7 @@ public class ContractTestUtils extends Assert { /** * downgrade a failure to a message and a warning, then an - * exception for the Junit test runner to mark as failed + * exception for the Junit test runner to mark as failed. * @param message text message * @param failure what failed * @throws AssumptionViolatedException always @@ -416,7 +414,7 @@ public class ContractTestUtils extends Assert { } /** - * report an overridden test as unsupported + * report an overridden test as unsupported. * @param message message to use in the text * @throws AssumptionViolatedException always */ @@ -425,7 +423,7 @@ public class ContractTestUtils extends Assert { } /** - * report a test has been skipped for some reason + * report a test has been skipped for some reason. * @param message message to use in the text * @throws AssumptionViolatedException always */ @@ -435,7 +433,7 @@ public class ContractTestUtils extends Assert { } /** - * Fail with an exception that was received + * Fail with an exception that was received. * @param text text to use in the exception * @param thrown a (possibly null) throwable to init the cause with * @throws AssertionError with the text and throwable -always @@ -445,7 +443,7 @@ public class ContractTestUtils extends Assert { } /** - * Make an assertion about the length of a file + * Make an assertion about the length of a file. * @param fs filesystem * @param path path of the file * @param expected expected length @@ -461,7 +459,7 @@ public class ContractTestUtils extends Assert { } /** - * Assert that a path refers to a directory + * Assert that a path refers to a directory. * @param fs filesystem * @param path path of the directory * @throws IOException on File IO problems @@ -473,7 +471,7 @@ public class ContractTestUtils extends Assert { } /** - * Assert that a path refers to a directory + * Assert that a path refers to a directory. * @param fileStatus stats to check */ public static void assertIsDirectory(FileStatus fileStatus) { @@ -483,7 +481,7 @@ public class ContractTestUtils extends Assert { /** * Write the text to a file, returning the converted byte array - * for use in validating the round trip + * for use in validating the round trip. * @param fs filesystem * @param path path of file * @param text text to write @@ -504,7 +502,7 @@ public class ContractTestUtils extends Assert { } /** - * Create a file + * Create a file. * @param fs filesystem * @param path path to write * @param overwrite overwrite flag @@ -527,7 +525,7 @@ public class ContractTestUtils extends Assert { } /** - * Touch a file + * Touch a file. * @param fs filesystem * @param path path * @throws IOException IO problems @@ -540,7 +538,7 @@ public class ContractTestUtils extends Assert { /** * Delete a file/dir and assert that delete() returned true * and that the path no longer exists. This variant rejects - * all operations on root directories + * all operations on root directories. * @param fs filesystem * @param file path to delete * @param recursive flag to enable recursive delete @@ -575,7 +573,7 @@ public class ContractTestUtils extends Assert { } /** - * Read in "length" bytes, convert to an ascii string + * Read in "length" bytes, convert to an ascii string. * @param fs filesystem * @param path path to read * @param length #of bytes to read. @@ -593,7 +591,8 @@ public class ContractTestUtils extends Assert { } /** - * Take an array of filestats and convert to a string (prefixed w/ a [01] counter + * Take an array of filestats and convert to a string + * (prefixed with/ a [%02d] counter). * @param stats array of stats * @param separator separator after every entry * @return a stringified set @@ -607,7 +606,7 @@ public class ContractTestUtils extends Assert { } /** - * List a directory + * List a directory. * @param fileSystem FS * @param path path * @return a directory listing or failure message @@ -631,7 +630,8 @@ public class ContractTestUtils extends Assert { } public static String dumpStats(String pathname, FileStatus[] stats) { - return pathname + fileStatsToString(stats, "\n"); + return pathname + ' ' + fileStatsToString(stats, + System.lineSeparator()); } /** @@ -641,8 +641,8 @@ public class ContractTestUtils extends Assert { * @param filename name of the file * @throws IOException IO problems during file operations */ - public static void assertIsFile(FileSystem fileSystem, Path filename) throws - IOException { + public static void assertIsFile(FileSystem fileSystem, Path filename) + throws IOException { assertPathExists(fileSystem, "Expected file", filename); FileStatus status = fileSystem.getFileStatus(filename); assertIsFile(filename, status); @@ -664,7 +664,7 @@ public class ContractTestUtils extends Assert { /** * Create a dataset for use in the tests; all data is in the range - * base to (base+modulo-1) inclusive + * base to (base+modulo-1) inclusive. * @param len length of data * @param base base of the data * @param modulo the modulo @@ -680,7 +680,7 @@ public class ContractTestUtils extends Assert { /** * Assert that a path exists -but make no assertions as to the - * type of that entry + * type of that entry. * * @param fileSystem filesystem to examine * @param message message to include in the assertion failure message @@ -699,7 +699,7 @@ public class ContractTestUtils extends Assert { } /** - * Assert that a path does not exist + * Assert that a path does not exist. * * @param fileSystem filesystem to examine * @param message message to include in the assertion failure message @@ -719,7 +719,7 @@ public class ContractTestUtils extends Assert { } /** - * Assert that a FileSystem.listStatus on a dir finds the subdir/child entry + * Assert that a FileSystem.listStatus on a dir finds the subdir/child entry. * @param fs filesystem * @param dir directory to scan * @param subdir full path to look for @@ -732,7 +732,7 @@ public class ContractTestUtils extends Assert { boolean found = false; StringBuilder builder = new StringBuilder(); for (FileStatus stat : stats) { - builder.append(stat.toString()).append('\n'); + builder.append(stat.toString()).append(System.lineSeparator()); if (stat.getPath().equals(subdir)) { found = true; } @@ -751,7 +751,7 @@ public class ContractTestUtils extends Assert { } /** - * compare content of file operations using a double byte array + * compare content of file operations using a double byte array. * @param concat concatenated files * @param bytes bytes */ @@ -845,9 +845,8 @@ public class ContractTestUtils extends Assert { testBuffer[i] = (byte) (i % modulus); } - final OutputStream outputStream = fs.create(path, false); long bytesWritten = 0; - try { + try (OutputStream outputStream = fs.create(path, false)) { while (bytesWritten < size) { final long diff = size - bytesWritten; if (diff < testBuffer.length) { @@ -860,8 +859,6 @@ public class ContractTestUtils extends Assert { } return bytesWritten; - } finally { - outputStream.close(); } } @@ -1013,6 +1010,24 @@ public class ContractTestUtils extends Assert { return leftSet.containsAll(right) && rightSet.containsAll(left); } + /** + * Take a collection of paths and build a string from them: useful + * for assertion messages. + * @param paths paths to stringify + * @return a string representation + */ + public static String pathsToString(Collection paths) { + StringBuilder builder = new StringBuilder(paths.size() * 100); + String nl = System.lineSeparator(); + builder.append(nl); + for (Path path : paths) { + builder.append(" \"").append(path.toString()) + .append("\"").append(nl); + } + builder.append("]"); + return builder.toString(); + } + /** * Predicate to determine if two lists are equivalent, that is, they * contain the same entries. @@ -1060,6 +1075,52 @@ public class ContractTestUtils extends Assert { return dirsAndFiles; } + /** + * Convert a remote iterator over file status results into a list. + * The utility equivalents in commons collection and guava cannot be + * used here, as this is a different interface, one whose operators + * can throw IOEs. + * @param iterator input iterator + * @return the status entries as a list. + * @throws IOException + */ + public static List toList( + RemoteIterator iterator) throws IOException { + ArrayList list = new ArrayList<>(); + while (iterator.hasNext()) { + list.add(iterator.next()); + } + return list; + } + + /** + * Convert a remote iterator over file status results into a list. + * This uses {@link RemoteIterator#next()} calls only, expecting + * a raised {@link NoSuchElementException} exception to indicate that + * the end of the listing has been reached. This iteration strategy is + * designed to verify that the implementation of the remote iterator + * generates results and terminates consistently with the {@code hasNext/next} + * iteration. More succinctly "verifies that the {@code next()} operator + * isn't relying on {@code hasNext()} to always be called during an iteration. + * @param iterator input iterator + * @return the status entries as a list. + * @throws IOException IO problems + */ + @SuppressWarnings("InfiniteLoopStatement") + public static List toListThroughNextCallsAlone( + RemoteIterator iterator) throws IOException { + ArrayList list = new ArrayList<>(); + try { + while (true) { + list.add(iterator.next()); + } + } catch (NoSuchElementException expected) { + // ignored + } + return list; + } + + /** * Results of recursive directory creation/scan operations. */ @@ -1101,6 +1162,16 @@ public class ContractTestUtils extends Assert { } } + /** + * Construct results from an iterable collection of statistics. + * @param stats statistics source. Must not be null. + */ + public TreeScanResults(Iterable stats) { + for (FileStatus stat : stats) { + add(stat); + } + } + /** * Add all paths in the other set of results to this instance. * @param that the other instance @@ -1140,6 +1211,35 @@ public class ContractTestUtils extends Assert { getFileCount() == 1 ? "" : "s"); } + /** + * Equality check compares files and directory counts. + * As these are non-final fields, this class cannot be used in + * hash tables. + * @param o other object + * @return true iff the file and dir count match. + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + TreeScanResults that = (TreeScanResults) o; + return getFileCount() == that.getFileCount() && + getDirCount() == that.getDirCount(); + } + + /** + * This is a spurious hash code subclass to keep findbugs quiet. + * @return the base {@link Object#hashCode()} + */ + @Override + public int hashCode() { + return super.hashCode(); + } + /** * Assert that the state of a listing has the specific number of files, * directories and other entries. The error text will include @@ -1166,7 +1266,6 @@ public class ContractTestUtils extends Assert { * @param that the other entry */ public void assertEquivalent(TreeScanResults that) { - String details = "this= " + this + "; that=" + that; assertFieldsEquivalent("files", that, files, that.files); assertFieldsEquivalent("directories", that, directories, that.directories); @@ -1183,13 +1282,17 @@ public class ContractTestUtils extends Assert { public void assertFieldsEquivalent(String fieldname, TreeScanResults that, List ours, List theirs) { - assertFalse("Duplicate " + files + " in " + this, + String ourList = pathsToString(ours); + String theirList = pathsToString(theirs); + assertFalse("Duplicate " + fieldname + " in " + this + +": " + ourList, containsDuplicates(ours)); - assertFalse("Duplicate " + files + " in other " + that, + assertFalse("Duplicate " + fieldname + " in other " + that + + ": " + theirList, containsDuplicates(theirs)); - assertTrue(fieldname + " mismatch: between {" + this + "}" + - " and {" + that + "}", - collectionsEquivalent(files, that.files)); + assertTrue(fieldname + " mismatch: between " + ourList + + " and " + theirList, + collectionsEquivalent(ours, theirs)); } public List getFiles() {