From 68e588cbee660d55dba518892d064bee3795a002 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Tue, 30 Jun 2015 16:40:21 -0700 Subject: [PATCH] HADOOP-10798. globStatus() should always return a sorted list of files (cmccabe) --- .../hadoop-common/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/fs/Globber.java | 11 +++++++- .../org/apache/hadoop/fs/TestGlobPaths.java | 25 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 2aab8b44bb7..4d69a229504 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -905,6 +905,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12154. FileSystem#getUsed() returns the file length only from root '/' (J.Andreina via vinayakumarb) + HADOOP-10798. globStatus() should always return a sorted list of files + (cmccabe) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index 8a8137a3f01..9cb810fc5cf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -20,6 +20,7 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.commons.logging.LogFactory; @@ -285,6 +286,14 @@ public FileStatus[] glob() throws IOException { (flattenedPatterns.size() <= 1)) { return null; } - return results.toArray(new FileStatus[0]); + /* + * In general, the results list will already be sorted, since listStatus + * returns results in sorted order for many Hadoop filesystems. However, + * not all Hadoop filesystems have this property. So we sort here in order + * to get consistent results. See HADOOP-10798 for details. + */ + FileStatus ret[] = results.toArray(new FileStatus[0]); + Arrays.sort(ret); + return ret; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java index 50e2e5b0a34..afd8fb2b480 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestGlobPaths.java @@ -21,9 +21,11 @@ import java.io.IOException; import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; import java.util.UUID; import java.util.regex.Pattern; +import com.google.common.collect.Ordering; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; @@ -1284,4 +1286,27 @@ public void testNonTerminalGlobsOnFS() throws Exception { public void testNonTerminalGlobsOnFC() throws Exception { testOnFileContext(new TestNonTerminalGlobs(true)); } + + @Test + public void testLocalFilesystem() throws Exception { + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.getLocal(conf); + String localTmp = System.getProperty("java.io.tmpdir"); + Path base = new Path(new Path(localTmp), UUID.randomUUID().toString()); + Assert.assertTrue(fs.mkdirs(base)); + Assert.assertTrue(fs.mkdirs(new Path(base, "e"))); + Assert.assertTrue(fs.mkdirs(new Path(base, "c"))); + Assert.assertTrue(fs.mkdirs(new Path(base, "a"))); + Assert.assertTrue(fs.mkdirs(new Path(base, "d"))); + Assert.assertTrue(fs.mkdirs(new Path(base, "b"))); + fs.deleteOnExit(base); + FileStatus[] status = fs.globStatus(new Path(base, "*")); + ArrayList list = new ArrayList(); + for (FileStatus f: status) { + list.add(f.getPath().toString()); + } + boolean sorted = Ordering.natural().isOrdered(list); + Assert.assertTrue(sorted); + } } +