HADOOP-8934. Shell command ls should include sort options (Jonathan Allen via aw)

This commit is contained in:
Allen Wittenauer 2015-02-09 12:50:44 -08:00
parent ab934e8594
commit 30b797ee9d
5 changed files with 362 additions and 34 deletions

View File

@ -24,6 +24,9 @@ Trunk (Unreleased)
HADOOP-11485. Pluggable shell integration (aw)
HADOOP-8934. Shell command ls should include sort options (Jonathan Allen
via aw)
IMPROVEMENTS
HADOOP-8017. Configure hadoop-main pom to get rid of M2E plugin execution

View File

@ -20,6 +20,8 @@
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Date;
import java.util.LinkedList;
import org.apache.hadoop.util.StringUtils;
@ -40,29 +42,59 @@ public static void registerCommands(CommandFactory factory) {
factory.addClass(Ls.class, "-ls");
factory.addClass(Lsr.class, "-lsr");
}
public static final String NAME = "ls";
public static final String USAGE = "[-d] [-h] [-R] [<path> ...]";
public static final String DESCRIPTION =
"List the contents that match the specified file pattern. If " +
"path is not specified, the contents of /user/<currentUser> " +
"will be listed. Directory entries are of the form:\n" +
"\tpermissions - userId groupId sizeOfDirectory(in bytes) modificationDate(yyyy-MM-dd HH:mm) directoryName\n\n" +
"and file entries are of the form:\n" +
"\tpermissions numberOfReplicas userId groupId sizeOfFile(in bytes) modificationDate(yyyy-MM-dd HH:mm) fileName\n" +
"-d: Directories are listed as plain files.\n" +
"-h: Formats the sizes of files in a human-readable fashion " +
"rather than a number of bytes.\n" +
"-R: Recursively list the contents of directories.";
protected final SimpleDateFormat dateFormat =
private static final String OPTION_DIRECTORY = "d";
private static final String OPTION_HUMAN = "h";
private static final String OPTION_RECURSIVE = "R";
private static final String OPTION_REVERSE = "r";
private static final String OPTION_MTIME = "t";
private static final String OPTION_ATIME = "u";
private static final String OPTION_SIZE = "S";
public static final String NAME = "ls";
public static final String USAGE = "[-" + OPTION_DIRECTORY + "] [-"
+ OPTION_HUMAN + "] " + "[-" + OPTION_RECURSIVE + "] [-" + OPTION_MTIME
+ "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE + "] " + "[-"
+ OPTION_ATIME + "] [<path> ...]";
public static final String DESCRIPTION =
"List the contents that match the specified file pattern. If " +
"path is not specified, the contents of /user/<currentUser> " +
"will be listed. For a directory a list of its direct children " +
"is returned (unless -" + OPTION_DIRECTORY +
" option is specified).\n\n" +
"Directory entries are of the form:\n" +
"\tpermissions - userId groupId sizeOfDirectory(in bytes) modificationDate(yyyy-MM-dd HH:mm) directoryName\n\n" +
"and file entries are of the form:\n" +
"\tpermissions numberOfReplicas userId groupId sizeOfFile(in bytes) modificationDate(yyyy-MM-dd HH:mm) fileName\n\n" +
" -" + OPTION_DIRECTORY +
" Directories are listed as plain files.\n" +
" -" + OPTION_HUMAN +
" Formats the sizes of files in a human-readable fashion\n" +
" rather than a number of bytes.\n" +
" -" + OPTION_RECURSIVE +
" Recursively list the contents of directories.\n" +
" -" + OPTION_MTIME +
" Sort files by modification time (most recent first).\n" +
" -" + OPTION_SIZE +
" Sort files by size.\n" +
" -" + OPTION_REVERSE +
" Reverse the order of the sort.\n" +
" -" + OPTION_ATIME +
" Use time of last access instead of modification for\n" +
" display and sorting.";
protected static final SimpleDateFormat dateFormat =
new SimpleDateFormat("yyyy-MM-dd HH:mm");
protected int maxRepl = 3, maxLen = 10, maxOwner = 0, maxGroup = 0;
protected String lineFormat;
protected boolean dirRecurse;
private boolean orderReverse;
private boolean orderTime;
private boolean orderSize;
private boolean useAtime;
private Comparator<PathData> orderComparator;
protected boolean humanReadable = false;
@ -75,12 +107,74 @@ protected String formatSize(long size) {
@Override
protected void processOptions(LinkedList<String> args)
throws IOException {
CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, "d", "h", "R");
CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE,
OPTION_DIRECTORY, OPTION_HUMAN, OPTION_RECURSIVE, OPTION_REVERSE,
OPTION_MTIME, OPTION_SIZE, OPTION_ATIME);
cf.parse(args);
dirRecurse = !cf.getOpt("d");
setRecursive(cf.getOpt("R") && dirRecurse);
humanReadable = cf.getOpt("h");
dirRecurse = !cf.getOpt(OPTION_DIRECTORY);
setRecursive(cf.getOpt(OPTION_RECURSIVE) && dirRecurse);
humanReadable = cf.getOpt(OPTION_HUMAN);
orderReverse = cf.getOpt(OPTION_REVERSE);
orderTime = cf.getOpt(OPTION_MTIME);
orderSize = !orderTime && cf.getOpt(OPTION_SIZE);
useAtime = cf.getOpt(OPTION_ATIME);
if (args.isEmpty()) args.add(Path.CUR_DIR);
initialiseOrderComparator();
}
/**
* Should the contents of the directory be shown or just the directory?
* @return true if directory contents, false if just directory
*/
@InterfaceAudience.Private
boolean isDirRecurse() {
return this.dirRecurse;
}
/**
* Should file sizes be returned in human readable format rather than bytes?
* @return true is human readable, false if bytes
*/
@InterfaceAudience.Private
boolean isHumanReadable() {
return this.humanReadable;
}
/**
* Should directory contents be displayed in reverse order
* @return true reverse order, false default order
*/
@InterfaceAudience.Private
boolean isOrderReverse() {
return this.orderReverse;
}
/**
* Should directory contents be displayed in mtime order.
* @return true mtime order, false default order
*/
@InterfaceAudience.Private
boolean isOrderTime() {
return this.orderTime;
}
/**
* Should directory contents be displayed in size order.
* @return true size order, false default order
*/
@InterfaceAudience.Private
boolean isOrderSize() {
return this.orderSize;
}
/**
* Should access time be used rather than modification time.
* @return true use access time, false use modification time
*/
@InterfaceAudience.Private
boolean isUseAtime() {
return this.useAtime;
}
@Override
@ -98,6 +192,7 @@ protected void processPaths(PathData parent, PathData ... items)
throws IOException {
if (parent != null && !isRecursive() && items.length != 0) {
out.println("Found " + items.length + " items");
Arrays.sort(items, getOrderComparator());
}
adjustColumnWidths(items);
super.processPaths(parent, items);
@ -113,9 +208,10 @@ protected void processPath(PathData item) throws IOException {
stat.getOwner(),
stat.getGroup(),
formatSize(stat.getLen()),
dateFormat.format(new Date(stat.getModificationTime())),
item
);
dateFormat.format(new Date(isUseAtime()
? stat.getAccessTime()
: stat.getModificationTime())),
item);
out.println(line);
}
@ -149,6 +245,49 @@ private int maxLength(int n, Object value) {
return Math.max(n, (value != null) ? String.valueOf(value).length() : 0);
}
/**
* Get the comparator to be used for sorting files.
* @return comparator
*/
private Comparator<PathData> getOrderComparator() {
return this.orderComparator;
}
/**
* Initialise the comparator to be used for sorting files. If multiple options
* are selected then the order is chosen in the following precedence: -
* Modification time (or access time if requested) - File size - File name
*/
private void initialiseOrderComparator() {
if (isOrderTime()) {
// mtime is ordered latest date first in line with the unix ls -t command
this.orderComparator = new Comparator<PathData>() {
public int compare(PathData o1, PathData o2) {
Long o1Time = (isUseAtime() ? o1.stat.getAccessTime()
: o1.stat.getModificationTime());
Long o2Time = (isUseAtime() ? o2.stat.getAccessTime()
: o2.stat.getModificationTime());
return o2Time.compareTo(o1Time) * (isOrderReverse() ? -1 : 1);
}
};
} else if (isOrderSize()) {
// size is ordered largest first in line with the unix ls -S command
this.orderComparator = new Comparator<PathData>() {
public int compare(PathData o1, PathData o2) {
Long o1Length = o1.stat.getLen();
Long o2Length = o2.stat.getLen();
return o2Length.compareTo(o1Length) * (isOrderReverse() ? -1 : 1);
}
};
} else {
this.orderComparator = new Comparator<PathData>() {
public int compare(PathData o1, PathData o2) {
return o1.compareTo(o2) * (isOrderReverse() ? -1 : 1);
}
};
}
}
/**
* Get a recursive listing of all files in that match the file patterns.
* Same as "-ls -R"
@ -162,7 +301,7 @@ protected void processOptions(LinkedList<String> args)
args.addFirst("-R");
super.processOptions(args);
}
@Override
public String getReplacementCommand() {
return "ls -R";

View File

@ -406,14 +406,25 @@ bin/hadoop fs <args>
* ls
Usage: <<<hadoop fs -ls [-R] <args> >>>
Usage: <<<hadoop fs -ls [-d] [-h] [-R] [-t] [-S] [-r] [-u] <args> >>>
Options:
* The -R option will return stat recursively through the directory
structure.
* -d: Directories are listed as plain files.
For a file returns stat on the file with the following format:
* -h: Format file sizes in a human-readable fashion (eg 64.0m instead of 67108864).
* -R: Recursively list subdirectories encountered.
* -t: Sort output by modification time (most recent first).
* -S: Sort output by file size.
* -r: Reverse the sort order.
* -u: Use access time rather than modification time for display and sorting.
For a file ls returns stat on the file with the following format:
+---+
permissions number_of_replicas userid groupid filesize modification_date modification_time filename
@ -425,6 +436,9 @@ permissions number_of_replicas userid groupid filesize modification_date modific
permissions userid groupid modification_date modification_time dirname
+---+
Files within a directory are order by filename by default.
Example:
* <<<hadoop fs -ls /user/hadoop/file1>>>

View File

@ -54,7 +54,7 @@
<comparators>
<comparator>
<type>RegexpComparator</type>
<expected-output>^-ls \[-d\] \[-h\] \[-R\] \[&lt;path&gt; \.\.\.\] :( |\t)*</expected-output>
<expected-output>^-ls \[-d\] \[-h\] \[-R\] \[-t\] \[-S\] \[-r\] \[-u\] \[&lt;path&gt; \.\.\.\] :( |\t)*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
@ -62,11 +62,15 @@
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^\s*specified, the contents of /user/&lt;currentUser&gt; will be listed. Directory entries( )*</expected-output>
<expected-output>^\s*specified, the contents of /user/&lt;currentUser&gt; will be listed. For a directory a( )*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^\s*are of the form:( )*</expected-output>
<expected-output>^( |\t)*list of its direct children is returned \(unless -d option is specified\).*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^( |\t)*Directory entries are of the form:( )*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
@ -94,16 +98,36 @@
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^\s*-h\s+Formats the sizes of files in a human-readable fashion rather than a number( )*</expected-output>
<expected-output>^\s*-h\s+Formats the sizes of files in a human-readable fashion( )*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^\s*of bytes\.( )*</expected-output>
<expected-output>^\s*rather than a number of bytes\.( )*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^\s*-R\s+Recursively list the contents of directories\.( )*</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^( |\t)*-t\s+Sort files by modification time \(most recent first\)\.</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^( |\t)*-S\s+Sort files by size\.</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^( |\t)*-r\s+Reverse the order of the sort\.</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^( |\t)*-u\s+Use time of last access instead of modification for</expected-output>
</comparator>
<comparator>
<type>RegexpComparator</type>
<expected-output>^( |\t)*display and sorting\.</expected-output>
</comparator>
</comparators>
</test>

View File

@ -1046,6 +1046,154 @@
</comparators>
</test>
<test> <!-- TESTED -->
<description>ls: files sorted in default order</description>
<test-commands>
<command>-fs NAMENODE -mkdir /dir</command>
<command>-fs NAMENODE -touchz /dir/file2 /dir/file1 /dir/file3</command>
<command>-fs NAMENODE -ls /dir</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /dir</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>Found 3 items
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file1
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file2
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file3</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>ls: files sorted in reverse default order</description>
<test-commands>
<command>-fs NAMENODE -mkdir /dir</command>
<command>-fs NAMENODE -touchz /dir/file2 /dir/file1 /dir/file3</command>
<command>-fs NAMENODE -ls -r /dir</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /dir</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>Found 3 items
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file3
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file2
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file1</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>ls: files sorted in size order</description>
<test-commands>
<command>-fs NAMENODE -mkdir /dir</command>
<command>-fs NAMENODE -put CLITEST_DATA/data30bytes /dir/data30bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data15bytes /dir/data15bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes /dir/data120bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /dir/data60bytes</command>
<command>-fs NAMENODE -ls -S /dir</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /dir</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>Found 4 items
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*120( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data120bytes
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*60( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data60bytes
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*30( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data30bytes
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*15( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data15bytes</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>ls: files sorted in reverse size order</description>
<test-commands>
<command>-fs NAMENODE -mkdir /dir</command>
<command>-fs NAMENODE -put CLITEST_DATA/data30bytes /dir/data30bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data15bytes /dir/data15bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes /dir/data120bytes</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /dir/data60bytes</command>
<command>-fs NAMENODE -ls -S -r /dir</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /dir</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>Found 4 items
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*15( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data15bytes
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*30( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data30bytes
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*60( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data60bytes
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*120( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data120bytes</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>ls: files sorted in modification order</description>
<!-- this test assumes that the files take > 1ms each to create -->
<test-commands>
<command>-fs NAMENODE -mkdir /dir</command>
<command>-fs NAMENODE -touchz /dir/first</command>
<command>-fs NAMENODE -touchz /dir/second</command>
<command>-fs NAMENODE -touchz /dir/third</command>
<command>-fs NAMENODE -touchz /dir/fourth</command>
<command>-fs NAMENODE -touchz /dir/fifth</command>
<command>-fs NAMENODE -ls -t /dir</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /dir</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>Found 5 items
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/fifth
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/fourth
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/third
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/second
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/first</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>ls: files sorted in reverse modification order</description>
<!-- this test assumes that the files take > 1ms each to create -->
<test-commands>
<command>-fs NAMENODE -mkdir /dir</command>
<command>-fs NAMENODE -touchz /dir/first</command>
<command>-fs NAMENODE -touchz /dir/second</command>
<command>-fs NAMENODE -touchz /dir/third</command>
<command>-fs NAMENODE -touchz /dir/fourth</command>
<command>-fs NAMENODE -touchz /dir/fifth</command>
<command>-fs NAMENODE -ls -t -r /dir</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /dir</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>Found 5 items
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/first
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/second
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/third
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/fourth
-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/fifth</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>rm: Test for quoted globbing </description>
<windows>false</windows>