HADOOP-10798. globStatus() should always return a sorted list of files (cmccabe)

This commit is contained in:
Colin Patrick Mccabe 2015-06-30 16:40:21 -07:00
parent d0cc0380b5
commit 68e588cbee
3 changed files with 38 additions and 1 deletions

View File

@ -905,6 +905,9 @@ Release 2.8.0 - UNRELEASED
HADOOP-12154. FileSystem#getUsed() returns the file length only from root '/' HADOOP-12154. FileSystem#getUsed() returns the file length only from root '/'
(J.Andreina via vinayakumarb) (J.Andreina via vinayakumarb)
HADOOP-10798. globStatus() should always return a sorted list of files
(cmccabe)
Release 2.7.2 - UNRELEASED Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.fs;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -285,6 +286,14 @@ class Globber {
(flattenedPatterns.size() <= 1)) { (flattenedPatterns.size() <= 1)) {
return null; return null;
} }
return results.toArray(new FileStatus[0]); /*
* In general, the results list will already be sorted, since listStatus
* returns results in sorted order for many Hadoop filesystems. However,
* not all Hadoop filesystems have this property. So we sort here in order
* to get consistent results. See HADOOP-10798 for details.
*/
FileStatus ret[] = results.toArray(new FileStatus[0]);
Arrays.sort(ret);
return ret;
} }
} }

View File

@ -21,9 +21,11 @@ import static org.junit.Assert.*;
import java.io.IOException; import java.io.IOException;
import java.security.PrivilegedExceptionAction; import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.UUID; import java.util.UUID;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import com.google.common.collect.Ordering;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
@ -1284,4 +1286,27 @@ public class TestGlobPaths {
public void testNonTerminalGlobsOnFC() throws Exception { public void testNonTerminalGlobsOnFC() throws Exception {
testOnFileContext(new TestNonTerminalGlobs(true)); testOnFileContext(new TestNonTerminalGlobs(true));
} }
@Test
public void testLocalFilesystem() throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
String localTmp = System.getProperty("java.io.tmpdir");
Path base = new Path(new Path(localTmp), UUID.randomUUID().toString());
Assert.assertTrue(fs.mkdirs(base));
Assert.assertTrue(fs.mkdirs(new Path(base, "e")));
Assert.assertTrue(fs.mkdirs(new Path(base, "c")));
Assert.assertTrue(fs.mkdirs(new Path(base, "a")));
Assert.assertTrue(fs.mkdirs(new Path(base, "d")));
Assert.assertTrue(fs.mkdirs(new Path(base, "b")));
fs.deleteOnExit(base);
FileStatus[] status = fs.globStatus(new Path(base, "*"));
ArrayList list = new ArrayList();
for (FileStatus f: status) {
list.add(f.getPath().toString());
}
boolean sorted = Ordering.natural().isOrdered(list);
Assert.assertTrue(sorted);
}
} }