From 03e036b51e46e0f954cfb57b6af3edf69e766567 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Tue, 13 Mar 2012 19:07:08 +0000 Subject: [PATCH] HADOOP-8164. Back slash as path separator is handled for Windows only. Contributed by Daryn Sharp. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1300290 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../java/org/apache/hadoop/fs/FileSystem.java | 23 ++++++-- .../main/java/org/apache/hadoop/fs/Path.java | 4 +- .../java/org/apache/hadoop/fs/TestPath.java | 58 +++++++++++++++++++ 4 files changed, 83 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d69488eb5f4..fe1aac0ec53 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -365,6 +365,9 @@ Release 0.23.2 - UNRELEASED HADOOP-8140. dfs -getmerge should process its argments better (Daryn Sharp via bobby) + HADOOP-8164. Back slash as path separator is handled for Windows only. + (Daryn Sharp via suresh) + Release 0.23.1 - 2012-02-17 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 8db9d258299..61db1ed5e1c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -1456,11 +1456,12 @@ private FileStatus[] globStatusInternal(Path pathPattern, PathFilter filter) results = listStatus(parentPaths, fp); hasGlob[0] = true; } else { // last component does not have a pattern + // remove the quoting of metachars in a non-regexp expansion + String name = unquotePathComponent(components[components.length - 1]); // get all the path names ArrayList filteredPaths = new ArrayList(parentPaths.length); for (int i = 0; i < parentPaths.length; i++) { - parentPaths[i] = new Path(parentPaths[i], - components[components.length - 1]); + parentPaths[i] = new Path(parentPaths[i], name); if (fp.accept(parentPaths[i])) { filteredPaths.add(parentPaths[i]); } @@ -1503,14 +1504,28 @@ private Path[] globPathsLevel(Path[] parents, String[] filePattern, if (fp.hasPattern()) { parents = FileUtil.stat2Paths(listStatus(parents, fp)); hasGlob[0] = true; - } else { + } else { // the component does not have a pattern + // remove the quoting of metachars in a non-regexp expansion + String name = unquotePathComponent(filePattern[level]); for (int i = 0; i < parents.length; i++) { - parents[i] = new Path(parents[i], filePattern[level]); + parents[i] = new Path(parents[i], name); } } return globPathsLevel(parents, filePattern, level + 1, hasGlob); } + /** + * The glob filter builds a regexp per path component. If the component + * does not contain a shell metachar, then it falls back to appending the + * raw string to the list of built up paths. This raw path needs to have + * the quoting removed. Ie. convert all occurances of "\X" to "X" + * @param name of the path component + * @return the unquoted path component + */ + private String unquotePathComponent(String name) { + return name.replaceAll("\\\\(.)", "$1"); + } + /** * List the statuses of the files/directories in the given path if the path is * a directory. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java index 81c79dbded7..2fbed2a2bb2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Path.java @@ -161,7 +161,9 @@ private void initialize(String scheme, String authority, String path, private String normalizePath(String path) { // remove double slashes & backslashes path = StringUtils.replace(path, "//", "/"); - path = StringUtils.replace(path, "\\", "/"); + if (Path.WINDOWS) { + path = StringUtils.replace(path, "\\", "/"); + } // trim trailing slash from non-root path (ignoring windows drive) int minLength = hasWindowsDrive(path, true) ? 4 : 1; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java index b1d92686c26..2be0f9d26b6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestPath.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.Arrays; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.AvroTestUtil; @@ -264,4 +265,61 @@ public void testAvroReflect() throws Exception { "{\"type\":\"string\",\"java-class\":\"org.apache.hadoop.fs.Path\"}"); } + public void testGlobEscapeStatus() throws Exception { + FileSystem lfs = FileSystem.getLocal(new Configuration()); + Path testRoot = lfs.makeQualified(new Path( + System.getProperty("test.build.data","test/build/data"), + "testPathGlob")); + lfs.delete(testRoot, true); + lfs.mkdirs(testRoot); + assertTrue(lfs.isDirectory(testRoot)); + lfs.setWorkingDirectory(testRoot); + + // create a couple dirs with file in them + Path paths[] = new Path[]{ + new Path(testRoot, "*/f"), + new Path(testRoot, "d1/f"), + new Path(testRoot, "d2/f") + }; + Arrays.sort(paths); + for (Path p : paths) { + lfs.create(p).close(); + assertTrue(lfs.exists(p)); + } + + // try the non-globbed listStatus + FileStatus stats[] = lfs.listStatus(new Path(testRoot, "*")); + assertEquals(1, stats.length); + assertEquals(new Path(testRoot, "*/f"), stats[0].getPath()); + + // ensure globStatus with "*" finds all dir contents + stats = lfs.globStatus(new Path(testRoot, "*")); + Arrays.sort(stats); + assertEquals(paths.length, stats.length); + for (int i=0; i < paths.length; i++) { + assertEquals(paths[i].getParent(), stats[i].getPath()); + } + + // ensure that globStatus with an escaped "\*" only finds "*" + stats = lfs.globStatus(new Path(testRoot, "\\*")); + assertEquals(1, stats.length); + assertEquals(new Path(testRoot, "*"), stats[0].getPath()); + + // try to glob the inner file for all dirs + stats = lfs.globStatus(new Path(testRoot, "*/f")); + assertEquals(paths.length, stats.length); + for (int i=0; i < paths.length; i++) { + assertEquals(paths[i], stats[i].getPath()); + } + + // try to get the inner file for only the "*" dir + stats = lfs.globStatus(new Path(testRoot, "\\*/f")); + assertEquals(1, stats.length); + assertEquals(new Path(testRoot, "*/f"), stats[0].getPath()); + + // try to glob all the contents of the "*" dir + stats = lfs.globStatus(new Path(testRoot, "\\*/*")); + assertEquals(1, stats.length); + assertEquals(new Path(testRoot, "*/f"), stats[0].getPath()); + } }