HADOOP-9817. FileSystem#globStatus and FileContext#globStatus need to work with symlinks. (Colin Patrick McCabe via Andrew Wang)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1510807 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Wang 2013-08-05 23:28:14 +00:00
parent cc8c703c57
commit 95f9a515aa
10 changed files with 539 additions and 279 deletions

View File

@ -314,6 +314,9 @@ Release 2.3.0 - UNRELEASED
HADOOP-9761. ViewFileSystem#rename fails when using DistributedFileSystem.
(Andrew Wang via Colin Patrick McCabe)
HADOOP-9817. FileSystem#globStatus and FileContext#globStatus need to work
with symlinks. (Colin Patrick McCabe via Andrew Wang)
Release 2.1.1-beta - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -258,7 +258,7 @@ public final class FileContext {
* Hence this method is not called makeAbsolute() and
* has been deliberately declared private.
*/
private Path fixRelativePart(Path p) {
Path fixRelativePart(Path p) {
if (p.isUriPathAbsolute()) {
return p;
} else {
@ -1905,7 +1905,7 @@ public final class FileContext {
public FileStatus[] globStatus(Path pathPattern)
throws AccessControlException, UnsupportedFileSystemException,
IOException {
return globStatus(pathPattern, DEFAULT_FILTER);
return new Globber(FileContext.this, pathPattern, DEFAULT_FILTER).glob();
}
/**
@ -1934,154 +1934,7 @@ public final class FileContext {
public FileStatus[] globStatus(final Path pathPattern,
final PathFilter filter) throws AccessControlException,
UnsupportedFileSystemException, IOException {
URI uri = getFSofPath(fixRelativePart(pathPattern)).getUri();
String filename = pathPattern.toUri().getPath();
List<String> filePatterns = GlobExpander.expand(filename);
if (filePatterns.size() == 1) {
Path absPathPattern = fixRelativePart(pathPattern);
return globStatusInternal(uri, new Path(absPathPattern.toUri()
.getPath()), filter);
} else {
List<FileStatus> results = new ArrayList<FileStatus>();
for (String iFilePattern : filePatterns) {
Path iAbsFilePattern = fixRelativePart(new Path(iFilePattern));
FileStatus[] files = globStatusInternal(uri, iAbsFilePattern, filter);
for (FileStatus file : files) {
results.add(file);
}
}
return results.toArray(new FileStatus[results.size()]);
}
}
/**
*
* @param uri for all the inPathPattern
* @param inPathPattern - without the scheme & authority (take from uri)
* @param filter
*
* @return an array of FileStatus objects
*
* @throws AccessControlException If access is denied
* @throws IOException If an I/O error occurred
*/
private FileStatus[] globStatusInternal(final URI uri,
final Path inPathPattern, final PathFilter filter)
throws AccessControlException, IOException
{
Path[] parents = new Path[1];
int level = 0;
assert(inPathPattern.toUri().getScheme() == null &&
inPathPattern.toUri().getAuthority() == null &&
inPathPattern.isUriPathAbsolute());
String filename = inPathPattern.toUri().getPath();
// path has only zero component
if (filename.isEmpty() || Path.SEPARATOR.equals(filename)) {
Path p = inPathPattern.makeQualified(uri, null);
return getFileStatus(new Path[]{p});
}
// path has at least one component
String[] components = filename.split(Path.SEPARATOR);
// Path is absolute, first component is "/" hence first component
// is the uri root
parents[0] = new Path(new Path(uri), new Path("/"));
level = 1;
// glob the paths that match the parent path, ie. [0, components.length-1]
boolean[] hasGlob = new boolean[]{false};
Path[] relParentPaths =
globPathsLevel(parents, components, level, hasGlob);
FileStatus[] results;
if (relParentPaths == null || relParentPaths.length == 0) {
results = null;
} else {
// fix the pathes to be abs
Path[] parentPaths = new Path [relParentPaths.length];
for(int i=0; i<relParentPaths.length; i++) {
parentPaths[i] = relParentPaths[i].makeQualified(uri, null);
}
// Now work on the last component of the path
GlobFilter fp =
new GlobFilter(components[components.length - 1], filter);
if (fp.hasPattern()) { // last component has a pattern
// list parent directories and then glob the results
try {
results = listStatus(parentPaths, fp);
} catch (FileNotFoundException e) {
results = null;
}
hasGlob[0] = true;
} else { // last component does not have a pattern
// get all the path names
ArrayList<Path> filteredPaths =
new ArrayList<Path>(parentPaths.length);
for (int i = 0; i < parentPaths.length; i++) {
parentPaths[i] = new Path(parentPaths[i],
components[components.length - 1]);
if (fp.accept(parentPaths[i])) {
filteredPaths.add(parentPaths[i]);
}
}
// get all their statuses
results = getFileStatus(
filteredPaths.toArray(new Path[filteredPaths.size()]));
}
}
// Decide if the pathPattern contains a glob or not
if (results == null) {
if (hasGlob[0]) {
results = new FileStatus[0];
}
} else {
if (results.length == 0) {
if (!hasGlob[0]) {
results = null;
}
} else {
Arrays.sort(results);
}
}
return results;
}
/*
* For a path of N components, return a list of paths that match the
* components [<code>level</code>, <code>N-1</code>].
*/
private Path[] globPathsLevel(Path[] parents, String[] filePattern,
int level, boolean[] hasGlob) throws AccessControlException,
FileNotFoundException, IOException {
if (level == filePattern.length - 1) {
return parents;
}
if (parents == null || parents.length == 0) {
return null;
}
GlobFilter fp = new GlobFilter(filePattern[level]);
if (fp.hasPattern()) {
try {
parents = FileUtil.stat2Paths(listStatus(parents, fp));
} catch (FileNotFoundException e) {
parents = null;
}
hasGlob[0] = true;
} else {
for (int i = 0; i < parents.length; i++) {
parents[i] = new Path(parents[i], filePattern[level]);
}
}
return globPathsLevel(parents, filePattern, level + 1, hasGlob);
return new Globber(FileContext.this, pathPattern, filter).glob();
}
/**

View File

@ -1619,7 +1619,7 @@ public abstract class FileSystem extends Configured implements Closeable {
* @throws IOException
*/
public FileStatus[] globStatus(Path pathPattern) throws IOException {
return globStatus(pathPattern, DEFAULT_FILTER);
return new Globber(this, pathPattern, DEFAULT_FILTER).glob();
}
/**
@ -1637,126 +1637,7 @@ public abstract class FileSystem extends Configured implements Closeable {
*/
public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException {
String filename = pathPattern.toUri().getPath();
List<FileStatus> allMatches = null;
List<String> filePatterns = GlobExpander.expand(filename);
for (String filePattern : filePatterns) {
Path path = new Path(filePattern.isEmpty() ? Path.CUR_DIR : filePattern);
List<FileStatus> matches = globStatusInternal(path, filter);
if (matches != null) {
if (allMatches == null) {
allMatches = matches;
} else {
allMatches.addAll(matches);
}
}
}
FileStatus[] results = null;
if (allMatches != null) {
results = allMatches.toArray(new FileStatus[allMatches.size()]);
} else if (filePatterns.size() > 1) {
// no matches with multiple expansions is a non-matching glob
results = new FileStatus[0];
}
return results;
}
// sort gripes because FileStatus Comparable isn't parameterized...
@SuppressWarnings("unchecked")
private List<FileStatus> globStatusInternal(Path pathPattern,
PathFilter filter) throws IOException {
boolean patternHasGlob = false; // pathPattern has any globs
List<FileStatus> matches = new ArrayList<FileStatus>();
// determine starting point
int level = 0;
String baseDir = Path.CUR_DIR;
if (pathPattern.isAbsolute()) {
level = 1; // need to skip empty item at beginning of split list
baseDir = Path.SEPARATOR;
}
// parse components and determine if it's a glob
String[] components = null;
GlobFilter[] filters = null;
String filename = pathPattern.toUri().getPath();
if (!filename.isEmpty() && !Path.SEPARATOR.equals(filename)) {
components = filename.split(Path.SEPARATOR);
filters = new GlobFilter[components.length];
for (int i=level; i < components.length; i++) {
filters[i] = new GlobFilter(components[i]);
patternHasGlob |= filters[i].hasPattern();
}
if (!patternHasGlob) {
baseDir = unquotePathComponent(filename);
components = null; // short through to filter check
}
}
// seed the parent directory path, return if it doesn't exist
try {
matches.add(getFileStatus(new Path(baseDir)));
} catch (FileNotFoundException e) {
return patternHasGlob ? matches : null;
}
// skip if there are no components other than the basedir
if (components != null) {
// iterate through each path component
for (int i=level; (i < components.length) && !matches.isEmpty(); i++) {
List<FileStatus> children = new ArrayList<FileStatus>();
for (FileStatus match : matches) {
// don't look for children in a file matched by a glob
if (!match.isDirectory()) {
continue;
}
try {
if (filters[i].hasPattern()) {
// get all children matching the filter
FileStatus[] statuses = listStatus(match.getPath(), filters[i]);
children.addAll(Arrays.asList(statuses));
} else {
// the component does not have a pattern
String component = unquotePathComponent(components[i]);
Path child = new Path(match.getPath(), component);
children.add(getFileStatus(child));
}
} catch (FileNotFoundException e) {
// don't care
}
}
matches = children;
}
}
// remove anything that didn't match the filter
if (!matches.isEmpty()) {
Iterator<FileStatus> iter = matches.iterator();
while (iter.hasNext()) {
if (!filter.accept(iter.next().getPath())) {
iter.remove();
}
}
}
// no final paths, if there were any globs return empty list
if (matches.isEmpty()) {
return patternHasGlob ? matches : null;
}
Collections.sort(matches);
return matches;
}
/**
* The glob filter builds a regexp per path component. If the component
* does not contain a shell metachar, then it falls back to appending the
* raw string to the list of built up paths. This raw path needs to have
* the quoting removed. Ie. convert all occurances of "\X" to "X"
* @param name of the path component
* @return the unquoted path component
*/
private String unquotePathComponent(String name) {
return name.replaceAll("\\\\(.)", "$1");
return new Globber(this, pathPattern, filter).glob();
}
/**

View File

@ -0,0 +1,215 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.Log;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@InterfaceAudience.Private
@InterfaceStability.Unstable
class Globber {
public static final Log LOG = LogFactory.getLog(Globber.class.getName());
private final FileSystem fs;
private final FileContext fc;
private final Path pathPattern;
private final PathFilter filter;
public Globber(FileSystem fs, Path pathPattern, PathFilter filter) {
this.fs = fs;
this.fc = null;
this.pathPattern = pathPattern;
this.filter = filter;
}
public Globber(FileContext fc, Path pathPattern, PathFilter filter) {
this.fs = null;
this.fc = fc;
this.pathPattern = pathPattern;
this.filter = filter;
}
private FileStatus getFileStatus(Path path) {
try {
if (fs != null) {
return fs.getFileStatus(path);
} else {
return fc.getFileStatus(path);
}
} catch (IOException e) {
return null;
}
}
private FileStatus[] listStatus(Path path) {
try {
if (fs != null) {
return fs.listStatus(path);
} else {
return fc.util().listStatus(path);
}
} catch (IOException e) {
return new FileStatus[0];
}
}
private Path fixRelativePart(Path path) {
if (fs != null) {
return fs.fixRelativePart(path);
} else {
return fc.fixRelativePart(path);
}
}
/**
* Translate an absolute path into a list of path components.
* We merge double slashes into a single slash here.
* The first path component (i.e. root) does not get an entry in the list.
*/
private static List<String> getPathComponents(String path)
throws IOException {
ArrayList<String> ret = new ArrayList<String>();
for (String component : path.split(Path.SEPARATOR)) {
if (!component.isEmpty()) {
ret.add(component);
}
}
return ret;
}
private String schemeFromPath(Path path) throws IOException {
String scheme = pathPattern.toUri().getScheme();
if (scheme == null) {
if (fs != null) {
scheme = fs.getUri().getScheme();
} else {
scheme = fc.getFSofPath(path).getUri().getScheme();
}
}
return scheme;
}
private String authorityFromPath(Path path) throws IOException {
String authority = pathPattern.toUri().getAuthority();
if (authority == null) {
if (fs != null) {
authority = fs.getUri().getAuthority();
} else {
authority = fc.getFSofPath(path).getUri().getAuthority();
}
}
return authority ;
}
public FileStatus[] glob() throws IOException {
// First we get the scheme and authority of the pattern that was passed
// in.
String scheme = schemeFromPath(pathPattern);
String authority = authorityFromPath(pathPattern);
// Next we strip off everything except the pathname itself, and expand all
// globs. Expansion is a process which turns "grouping" clauses,
// expressed as brackets, into separate path patterns.
String pathPatternString = pathPattern.toUri().getPath();
List<String> flattenedPatterns = GlobExpander.expand(pathPatternString);
// Now loop over all flattened patterns. In every case, we'll be trying to
// match them to entries in the filesystem.
ArrayList<FileStatus> results =
new ArrayList<FileStatus>(flattenedPatterns.size());
boolean sawWildcard = false;
for (String flatPattern : flattenedPatterns) {
// Get the absolute path for this flattened pattern. We couldn't do
// this prior to flattening because of patterns like {/,a}, where which
// path you go down influences how the path must be made absolute.
Path absPattern =
fixRelativePart(new Path(flatPattern .isEmpty() ? "." : flatPattern ));
// Now we break the flattened, absolute pattern into path components.
// For example, /a/*/c would be broken into the list [a, *, c]
List<String> components =
getPathComponents(absPattern.toUri().getPath());
// Starting out at the root of the filesystem, we try to match
// filesystem entries against pattern components.
ArrayList<FileStatus> candidates = new ArrayList<FileStatus>(1);
candidates.add(new FileStatus(0, true, 0, 0, 0,
new Path(scheme, authority, "/")));
for (String component : components) {
ArrayList<FileStatus> newCandidates =
new ArrayList<FileStatus>(candidates.size());
GlobFilter globFilter = new GlobFilter(component);
if (globFilter.hasPattern()) {
sawWildcard = true;
}
if (candidates.isEmpty() && sawWildcard) {
break;
}
for (FileStatus candidate : candidates) {
FileStatus resolvedCandidate = candidate;
if (candidate.isSymlink()) {
// We have to resolve symlinks, because otherwise we don't know
// whether they are directories.
resolvedCandidate = getFileStatus(candidate.getPath());
}
if (resolvedCandidate == null ||
resolvedCandidate.isDirectory() == false) {
continue;
}
FileStatus[] children = listStatus(candidate.getPath());
for (FileStatus child : children) {
// Set the child path based on the parent path.
// This keeps the symlinks in our path.
child.setPath(new Path(candidate.getPath(),
child.getPath().getName()));
if (globFilter.accept(child.getPath())) {
newCandidates.add(child);
}
}
}
candidates = newCandidates;
}
for (FileStatus status : candidates) {
// HADOOP-3497 semantics: the user-defined filter is applied at the
// end, once the full path is built up.
if (filter.accept(status.getPath())) {
results.add(status);
}
}
}
/*
* When the input pattern "looks" like just a simple filename, and we
* can't find it, we return null rather than an empty array.
* This is a special case which the shell relies on.
*
* To be more precise: if there were no results, AND there were no
* groupings (aka brackets), and no wildcards in the input (aka stars),
* we return null.
*/
if ((!sawWildcard) && results.isEmpty() &&
(flattenedPatterns.size() <= 1)) {
return null;
}
return results.toArray(new FileStatus[0]);
}
}

View File

@ -109,4 +109,7 @@ public interface FSWrapper {
abstract public FileStatus[] listStatus(final Path f)
throws AccessControlException, FileNotFoundException,
UnsupportedFileSystemException, IOException;
abstract public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException;
}

View File

@ -332,4 +332,10 @@ public final class FileContextTestWrapper extends FSTestWrapper {
FileNotFoundException, UnsupportedFileSystemException, IOException {
return fc.util().listStatus(f);
}
@Override
public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException {
return fc.util().globStatus(pathPattern, filter);
}
}

View File

@ -397,4 +397,10 @@ public final class FileSystemTestWrapper extends FSTestWrapper {
FileNotFoundException, UnsupportedFileSystemException, IOException {
return fs.listStatus(f);
}
@Override
public FileStatus[] globStatus(Path pathPattern, PathFilter filter)
throws IOException {
return fs.globStatus(pathPattern, filter);
}
}

View File

@ -793,6 +793,8 @@ public class TestFileUtil {
}
}
List<String> actualClassPaths = Arrays.asList(classPathAttr.split(" "));
Collections.sort(expectedClassPaths);
Collections.sort(actualClassPaths);
Assert.assertEquals(expectedClassPaths, actualClassPaths);
} finally {
if (jarFile != null) {

View File

@ -28,11 +28,38 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.AvroTestUtil;
import org.apache.hadoop.util.Shell;
import com.google.common.base.Joiner;
import junit.framework.TestCase;
import static org.junit.Assert.fail;
public class TestPath extends TestCase {
/**
* Merge a bunch of Path objects into a sorted semicolon-separated
* path string.
*/
public static String mergeStatuses(Path paths[]) {
String pathStrings[] = new String[paths.length];
int i = 0;
for (Path path : paths) {
pathStrings[i++] = path.toUri().getPath();
}
Arrays.sort(pathStrings);
return Joiner.on(";").join(pathStrings);
}
/**
* Merge a bunch of FileStatus objects into a sorted semicolon-separated
* path string.
*/
public static String mergeStatuses(FileStatus statuses[]) {
Path paths[] = new Path[statuses.length];
int i = 0;
for (FileStatus status : statuses) {
paths[i++] = status.getPath();
}
return mergeStatuses(paths);
}
@Test (timeout = 30000)
public void testToString() {
toStringTest("/");
@ -352,10 +379,11 @@ public class TestPath extends TestCase {
// ensure globStatus with "*" finds all dir contents
stats = lfs.globStatus(new Path(testRoot, "*"));
Arrays.sort(stats);
assertEquals(paths.length, stats.length);
for (int i=0; i < paths.length; i++) {
assertEquals(paths[i].getParent(), stats[i].getPath());
Path parentPaths[] = new Path[paths.length];
for (int i = 0; i < paths.length; i++) {
parentPaths[i] = paths[i].getParent();
}
assertEquals(mergeStatuses(parentPaths), mergeStatuses(stats));
// ensure that globStatus with an escaped "\*" only finds "*"
stats = lfs.globStatus(new Path(testRoot, "\\*"));
@ -365,9 +393,7 @@ public class TestPath extends TestCase {
// try to glob the inner file for all dirs
stats = lfs.globStatus(new Path(testRoot, "*/f"));
assertEquals(paths.length, stats.length);
for (int i=0; i < paths.length; i++) {
assertEquals(paths[i], stats[i].getPath());
}
assertEquals(mergeStatuses(paths), mergeStatuses(stats));
// try to get the inner file for only the "*" dir
stats = lfs.globStatus(new Path(testRoot, "\\*/f"));

View File

@ -20,14 +20,18 @@ package org.apache.hadoop.fs;
import static org.junit.Assert.*;
import java.io.IOException;
import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.junit.*;
import com.google.common.base.Joiner;
public class TestGlobPaths {
static class RegexPathFilter implements PathFilter {
@ -784,4 +788,265 @@ public class TestGlobPaths {
fs.delete(new Path(USER_DIR), true);
}
/**
* A glob test that can be run on either FileContext or FileSystem.
*/
private static interface FSTestWrapperGlobTest {
void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception;
}
/**
* Run a glob test on FileSystem.
*/
private static void testOnFileSystem(FSTestWrapperGlobTest test) throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
FileSystem fs = FileSystem.get(conf);
test.run(new FileSystemTestWrapper(fs), fs, null);
} finally {
cluster.shutdown();
}
}
/**
* Run a glob test on FileContext.
*/
private static void testOnFileContext(FSTestWrapperGlobTest test) throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
try {
FileContext fc = FileContext.getFileContext(conf);
test.run(new FileContextTestWrapper(fc), null, fc);
} finally {
cluster.shutdown();
}
}
/**
* Accept all paths.
*/
private static class AcceptAllPathFilter implements PathFilter {
@Override
public boolean accept(Path path) {
return true;
}
}
/**
* Accept only paths ending in Z.
*/
private static class AcceptPathsEndingInZ implements PathFilter {
@Override
public boolean accept(Path path) {
String stringPath = path.toUri().getPath();
return stringPath.endsWith("z");
}
}
/**
* Test globbing through symlinks.
*/
private static class TestGlobWithSymlinks implements FSTestWrapperGlobTest {
public void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception {
// Test that globbing through a symlink to a directory yields a path
// containing that symlink.
wrap.mkdir(new Path("/alpha"),
FsPermission.getDirDefault(), false);
wrap.createSymlink(new Path("/alpha"), new Path("/alphaLink"), false);
wrap.mkdir(new Path("/alphaLink/beta"),
FsPermission.getDirDefault(), false);
// Test simple glob
FileStatus[] statuses =
wrap.globStatus(new Path("/alpha/*"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alpha/beta",
statuses[0].getPath().toUri().getPath());
// Test glob through symlink
statuses =
wrap.globStatus(new Path("/alphaLink/*"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alphaLink/beta",
statuses[0].getPath().toUri().getPath());
// If the terminal path component in a globbed path is a symlink,
// we don't dereference that link.
wrap.createSymlink(new Path("beta"), new Path("/alphaLink/betaLink"),
false);
statuses = wrap.globStatus(new Path("/alpha/betaLi*"),
new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alpha/betaLink",
statuses[0].getPath().toUri().getPath());
// todo: test symlink-to-symlink-to-dir, etc.
}
}
@Test
public void testGlobWithSymlinksOnFS() throws Exception {
testOnFileSystem(new TestGlobWithSymlinks());
}
@Test
public void testGlobWithSymlinksOnFC() throws Exception {
testOnFileContext(new TestGlobWithSymlinks());
}
/**
* Test globbing symlinks to symlinks.
*
* Also test globbing dangling symlinks. It should NOT throw any exceptions!
*/
private static class TestGlobWithSymlinksToSymlinks
implements FSTestWrapperGlobTest {
public void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception {
// Test that globbing through a symlink to a symlink to a directory
// fully resolves
wrap.mkdir(new Path("/alpha"), FsPermission.getDirDefault(), false);
wrap.createSymlink(new Path("/alpha"), new Path("/alphaLink"), false);
wrap.createSymlink(new Path("/alphaLink"),
new Path("/alphaLinkLink"), false);
wrap.mkdir(new Path("/alpha/beta"), FsPermission.getDirDefault(), false);
// Test glob through symlink to a symlink to a directory
FileStatus statuses[] =
wrap.globStatus(new Path("/alphaLinkLink"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alphaLinkLink",
statuses[0].getPath().toUri().getPath());
statuses =
wrap.globStatus(new Path("/alphaLinkLink/*"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alphaLinkLink/beta",
statuses[0].getPath().toUri().getPath());
// Test glob of dangling symlink (theta does not actually exist)
wrap.createSymlink(new Path("theta"), new Path("/alpha/kappa"), false);
statuses = wrap.globStatus(new Path("/alpha/kappa/kappa"),
new AcceptAllPathFilter());
Assert.assertNull(statuses);
// Test glob of symlinks
wrap.createFile("/alpha/beta/gamma");
wrap.createSymlink(new Path("gamma"),
new Path("/alpha/beta/gammaLink"), false);
wrap.createSymlink(new Path("gammaLink"),
new Path("/alpha/beta/gammaLinkLink"), false);
wrap.createSymlink(new Path("gammaLinkLink"),
new Path("/alpha/beta/gammaLinkLinkLink"), false);
statuses = wrap.globStatus(new Path("/alpha/*/gammaLinkLinkLink"),
new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alpha/beta/gammaLinkLinkLink",
statuses[0].getPath().toUri().getPath());
statuses = wrap.globStatus(new Path("/alpha/beta/*"),
new AcceptAllPathFilter());
Assert.assertEquals("/alpha/beta/gamma;/alpha/beta/gammaLink;" +
"/alpha/beta/gammaLinkLink;/alpha/beta/gammaLinkLinkLink",
TestPath.mergeStatuses(statuses));
// Let's create two symlinks that point to each other, and glob on them.
wrap.createSymlink(new Path("tweedledee"),
new Path("/tweedledum"), false);
wrap.createSymlink(new Path("tweedledum"),
new Path("/tweedledee"), false);
statuses = wrap.globStatus(new Path("/tweedledee/unobtainium"),
new AcceptAllPathFilter());
Assert.assertNull(statuses);
}
}
@Test
public void testGlobWithSymlinksToSymlinksOnFS() throws Exception {
testOnFileSystem(new TestGlobWithSymlinksToSymlinks());
}
@Test
public void testGlobWithSymlinksToSymlinksOnFC() throws Exception {
testOnFileContext(new TestGlobWithSymlinksToSymlinks());
}
/**
* Test globbing symlinks with a custom PathFilter
*/
private static class TestGlobSymlinksWithCustomPathFilter
implements FSTestWrapperGlobTest {
public void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception {
// Test that globbing through a symlink to a symlink to a directory
// fully resolves
wrap.mkdir(new Path("/alpha"), FsPermission.getDirDefault(), false);
wrap.createSymlink(new Path("/alpha"), new Path("/alphaLinkz"), false);
wrap.mkdir(new Path("/alpha/beta"), FsPermission.getDirDefault(), false);
wrap.mkdir(new Path("/alpha/betaz"), FsPermission.getDirDefault(), false);
// Test glob through symlink to a symlink to a directory, with a PathFilter
FileStatus statuses[] =
wrap.globStatus(new Path("/alpha/beta"), new AcceptPathsEndingInZ());
Assert.assertNull(statuses);
statuses =
wrap.globStatus(new Path("/alphaLinkz/betaz"), new AcceptPathsEndingInZ());
Assert.assertEquals(1, statuses.length);
Assert.assertEquals("/alphaLinkz/betaz",
statuses[0].getPath().toUri().getPath());
statuses =
wrap.globStatus(new Path("/*/*"), new AcceptPathsEndingInZ());
Assert.assertEquals("/alpha/betaz;/alphaLinkz/betaz",
TestPath.mergeStatuses(statuses));
statuses =
wrap.globStatus(new Path("/*/*"), new AcceptAllPathFilter());
Assert.assertEquals("/alpha/beta;/alpha/betaz;" +
"/alphaLinkz/beta;/alphaLinkz/betaz",
TestPath.mergeStatuses(statuses));
}
}
@Test
public void testGlobSymlinksWithCustomPathFilterOnFS() throws Exception {
testOnFileSystem(new TestGlobSymlinksWithCustomPathFilter());
}
@Test
public void testGlobSymlinksWithCustomPathFilterOnFC() throws Exception {
testOnFileContext(new TestGlobSymlinksWithCustomPathFilter());
}
/**
* Test that globStatus fills in the scheme even when it is not provided.
*/
private static class TestGlobFillsInScheme
implements FSTestWrapperGlobTest {
public void run(FSTestWrapper wrap, FileSystem fs, FileContext fc)
throws Exception {
// Verify that the default scheme is hdfs, when we don't supply one.
wrap.mkdir(new Path("/alpha"), FsPermission.getDirDefault(), false);
wrap.createSymlink(new Path("/alpha"), new Path("/alphaLink"), false);
FileStatus statuses[] =
wrap.globStatus(new Path("/alphaLink"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Path path = statuses[0].getPath();
Assert.assertEquals("/alphaLink", path.toUri().getPath());
Assert.assertEquals("hdfs", path.toUri().getScheme());
if (fc != null) {
// If we're using FileContext, then we can list a file:/// URI.
// Since everyone should have the root directory, we list that.
statuses =
wrap.globStatus(new Path("file:///"), new AcceptAllPathFilter());
Assert.assertEquals(1, statuses.length);
Path filePath = statuses[0].getPath();
Assert.assertEquals("file", filePath.toUri().getScheme());
Assert.assertEquals("/", filePath.toUri().getPath());
} else {
// The FileSystem we passed in should have scheme 'hdfs'
Assert.assertEquals("hdfs", fs.getScheme());
}
}
}
@Test
public void testGlobFillsInSchemeOnFS() throws Exception {
testOnFileSystem(new TestGlobFillsInScheme());
}
@Test
public void testGlobFillsInSchemeOnFC() throws Exception {
testOnFileContext(new TestGlobFillsInScheme());
}
}