From 30b797ee9df30260314eeadffc7d51492871b352 Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Mon, 9 Feb 2015 12:50:44 -0800 Subject: [PATCH] HADOOP-8934. Shell command ls should include sort options (Jonathan Allen via aw) --- .../hadoop-common/CHANGES.txt | 3 + .../java/org/apache/hadoop/fs/shell/Ls.java | 189 +++++++++++++++--- .../src/site/apt/FileSystemShell.apt.vm | 22 +- .../src/test/resources/testConf.xml | 34 +++- .../src/test/resources/testHDFSConf.xml | 148 ++++++++++++++ 5 files changed, 362 insertions(+), 34 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index b02e6954a94..55baf8aed0b 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -24,6 +24,9 @@ Trunk (Unreleased) HADOOP-11485. Pluggable shell integration (aw) + HADOOP-8934. Shell command ls should include sort options (Jonathan Allen + via aw) + IMPROVEMENTS HADOOP-8017. Configure hadoop-main pom to get rid of M2E plugin execution diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java index c7e80b6b4af..0e467008d4b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Ls.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Comparator; import java.util.Date; import java.util.LinkedList; import org.apache.hadoop.util.StringUtils; @@ -40,29 +42,59 @@ public static void registerCommands(CommandFactory factory) { factory.addClass(Ls.class, "-ls"); factory.addClass(Lsr.class, "-lsr"); } - - public static final String NAME = "ls"; - public static final String USAGE = "[-d] [-h] [-R] [ ...]"; - public static final String DESCRIPTION = - "List the contents that match the specified file pattern. If " + - "path is not specified, the contents of /user/ " + - "will be listed. Directory entries are of the form:\n" + - "\tpermissions - userId groupId sizeOfDirectory(in bytes) modificationDate(yyyy-MM-dd HH:mm) directoryName\n\n" + - "and file entries are of the form:\n" + - "\tpermissions numberOfReplicas userId groupId sizeOfFile(in bytes) modificationDate(yyyy-MM-dd HH:mm) fileName\n" + - "-d: Directories are listed as plain files.\n" + - "-h: Formats the sizes of files in a human-readable fashion " + - "rather than a number of bytes.\n" + - "-R: Recursively list the contents of directories."; - - - protected final SimpleDateFormat dateFormat = + private static final String OPTION_DIRECTORY = "d"; + private static final String OPTION_HUMAN = "h"; + private static final String OPTION_RECURSIVE = "R"; + private static final String OPTION_REVERSE = "r"; + private static final String OPTION_MTIME = "t"; + private static final String OPTION_ATIME = "u"; + private static final String OPTION_SIZE = "S"; + + public static final String NAME = "ls"; + public static final String USAGE = "[-" + OPTION_DIRECTORY + "] [-" + + OPTION_HUMAN + "] " + "[-" + OPTION_RECURSIVE + "] [-" + OPTION_MTIME + + "] [-" + OPTION_SIZE + "] [-" + OPTION_REVERSE + "] " + "[-" + + OPTION_ATIME + "] [ ...]"; + + public static final String DESCRIPTION = + "List the contents that match the specified file pattern. If " + + "path is not specified, the contents of /user/ " + + "will be listed. For a directory a list of its direct children " + + "is returned (unless -" + OPTION_DIRECTORY + + " option is specified).\n\n" + + "Directory entries are of the form:\n" + + "\tpermissions - userId groupId sizeOfDirectory(in bytes) modificationDate(yyyy-MM-dd HH:mm) directoryName\n\n" + + "and file entries are of the form:\n" + + "\tpermissions numberOfReplicas userId groupId sizeOfFile(in bytes) modificationDate(yyyy-MM-dd HH:mm) fileName\n\n" + + " -" + OPTION_DIRECTORY + + " Directories are listed as plain files.\n" + + " -" + OPTION_HUMAN + + " Formats the sizes of files in a human-readable fashion\n" + + " rather than a number of bytes.\n" + + " -" + OPTION_RECURSIVE + + " Recursively list the contents of directories.\n" + + " -" + OPTION_MTIME + + " Sort files by modification time (most recent first).\n" + + " -" + OPTION_SIZE + + " Sort files by size.\n" + + " -" + OPTION_REVERSE + + " Reverse the order of the sort.\n" + + " -" + OPTION_ATIME + + " Use time of last access instead of modification for\n" + + " display and sorting."; + + protected static final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm"); protected int maxRepl = 3, maxLen = 10, maxOwner = 0, maxGroup = 0; protected String lineFormat; protected boolean dirRecurse; + private boolean orderReverse; + private boolean orderTime; + private boolean orderSize; + private boolean useAtime; + private Comparator orderComparator; protected boolean humanReadable = false; @@ -75,12 +107,74 @@ protected String formatSize(long size) { @Override protected void processOptions(LinkedList args) throws IOException { - CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, "d", "h", "R"); + CommandFormat cf = new CommandFormat(0, Integer.MAX_VALUE, + OPTION_DIRECTORY, OPTION_HUMAN, OPTION_RECURSIVE, OPTION_REVERSE, + OPTION_MTIME, OPTION_SIZE, OPTION_ATIME); cf.parse(args); - dirRecurse = !cf.getOpt("d"); - setRecursive(cf.getOpt("R") && dirRecurse); - humanReadable = cf.getOpt("h"); + dirRecurse = !cf.getOpt(OPTION_DIRECTORY); + setRecursive(cf.getOpt(OPTION_RECURSIVE) && dirRecurse); + humanReadable = cf.getOpt(OPTION_HUMAN); + orderReverse = cf.getOpt(OPTION_REVERSE); + orderTime = cf.getOpt(OPTION_MTIME); + orderSize = !orderTime && cf.getOpt(OPTION_SIZE); + useAtime = cf.getOpt(OPTION_ATIME); if (args.isEmpty()) args.add(Path.CUR_DIR); + + initialiseOrderComparator(); + } + + /** + * Should the contents of the directory be shown or just the directory? + * @return true if directory contents, false if just directory + */ + @InterfaceAudience.Private + boolean isDirRecurse() { + return this.dirRecurse; + } + + /** + * Should file sizes be returned in human readable format rather than bytes? + * @return true is human readable, false if bytes + */ + @InterfaceAudience.Private + boolean isHumanReadable() { + return this.humanReadable; + } + + /** + * Should directory contents be displayed in reverse order + * @return true reverse order, false default order + */ + @InterfaceAudience.Private + boolean isOrderReverse() { + return this.orderReverse; + } + + /** + * Should directory contents be displayed in mtime order. + * @return true mtime order, false default order + */ + @InterfaceAudience.Private + boolean isOrderTime() { + return this.orderTime; + } + + /** + * Should directory contents be displayed in size order. + * @return true size order, false default order + */ + @InterfaceAudience.Private + boolean isOrderSize() { + return this.orderSize; + } + + /** + * Should access time be used rather than modification time. + * @return true use access time, false use modification time + */ + @InterfaceAudience.Private + boolean isUseAtime() { + return this.useAtime; } @Override @@ -98,6 +192,7 @@ protected void processPaths(PathData parent, PathData ... items) throws IOException { if (parent != null && !isRecursive() && items.length != 0) { out.println("Found " + items.length + " items"); + Arrays.sort(items, getOrderComparator()); } adjustColumnWidths(items); super.processPaths(parent, items); @@ -113,9 +208,10 @@ protected void processPath(PathData item) throws IOException { stat.getOwner(), stat.getGroup(), formatSize(stat.getLen()), - dateFormat.format(new Date(stat.getModificationTime())), - item - ); + dateFormat.format(new Date(isUseAtime() + ? stat.getAccessTime() + : stat.getModificationTime())), + item); out.println(line); } @@ -149,6 +245,49 @@ private int maxLength(int n, Object value) { return Math.max(n, (value != null) ? String.valueOf(value).length() : 0); } + /** + * Get the comparator to be used for sorting files. + * @return comparator + */ + private Comparator getOrderComparator() { + return this.orderComparator; + } + + /** + * Initialise the comparator to be used for sorting files. If multiple options + * are selected then the order is chosen in the following precedence: - + * Modification time (or access time if requested) - File size - File name + */ + private void initialiseOrderComparator() { + if (isOrderTime()) { + // mtime is ordered latest date first in line with the unix ls -t command + this.orderComparator = new Comparator() { + public int compare(PathData o1, PathData o2) { + Long o1Time = (isUseAtime() ? o1.stat.getAccessTime() + : o1.stat.getModificationTime()); + Long o2Time = (isUseAtime() ? o2.stat.getAccessTime() + : o2.stat.getModificationTime()); + return o2Time.compareTo(o1Time) * (isOrderReverse() ? -1 : 1); + } + }; + } else if (isOrderSize()) { + // size is ordered largest first in line with the unix ls -S command + this.orderComparator = new Comparator() { + public int compare(PathData o1, PathData o2) { + Long o1Length = o1.stat.getLen(); + Long o2Length = o2.stat.getLen(); + return o2Length.compareTo(o1Length) * (isOrderReverse() ? -1 : 1); + } + }; + } else { + this.orderComparator = new Comparator() { + public int compare(PathData o1, PathData o2) { + return o1.compareTo(o2) * (isOrderReverse() ? -1 : 1); + } + }; + } + } + /** * Get a recursive listing of all files in that match the file patterns. * Same as "-ls -R" @@ -162,7 +301,7 @@ protected void processOptions(LinkedList args) args.addFirst("-R"); super.processOptions(args); } - + @Override public String getReplacementCommand() { return "ls -R"; diff --git a/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm index 3fd56fcb3df..6831ebf5149 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm @@ -406,14 +406,25 @@ bin/hadoop fs * ls - Usage: << >>> + Usage: << >>> Options: - * The -R option will return stat recursively through the directory - structure. + * -d: Directories are listed as plain files. - For a file returns stat on the file with the following format: + * -h: Format file sizes in a human-readable fashion (eg 64.0m instead of 67108864). + + * -R: Recursively list subdirectories encountered. + + * -t: Sort output by modification time (most recent first). + + * -S: Sort output by file size. + + * -r: Reverse the sort order. + + * -u: Use access time rather than modification time for display and sorting. + + For a file ls returns stat on the file with the following format: +---+ permissions number_of_replicas userid groupid filesize modification_date modification_time filename @@ -425,6 +436,9 @@ permissions number_of_replicas userid groupid filesize modification_date modific permissions userid groupid modification_date modification_time dirname +---+ + Files within a directory are order by filename by default. + + Example: * <<>> diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml index f962813b337..ac28192d5d2 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml @@ -54,7 +54,7 @@ RegexpComparator - ^-ls \[-d\] \[-h\] \[-R\] \[<path> \.\.\.\] :( |\t)* + ^-ls \[-d\] \[-h\] \[-R\] \[-t\] \[-S\] \[-r\] \[-u\] \[<path> \.\.\.\] :( |\t)* RegexpComparator @@ -62,11 +62,15 @@ RegexpComparator - ^\s*specified, the contents of /user/<currentUser> will be listed. Directory entries( )* + ^\s*specified, the contents of /user/<currentUser> will be listed. For a directory a( )* RegexpComparator - ^\s*are of the form:( )* + ^( |\t)*list of its direct children is returned \(unless -d option is specified\).* + + + RegexpComparator + ^( |\t)*Directory entries are of the form:( )* RegexpComparator @@ -94,16 +98,36 @@ RegexpComparator - ^\s*-h\s+Formats the sizes of files in a human-readable fashion rather than a number( )* + ^\s*-h\s+Formats the sizes of files in a human-readable fashion( )* RegexpComparator - ^\s*of bytes\.( )* + ^\s*rather than a number of bytes\.( )* RegexpComparator ^\s*-R\s+Recursively list the contents of directories\.( )* + + RegexpComparator + ^( |\t)*-t\s+Sort files by modification time \(most recent first\)\. + + + RegexpComparator + ^( |\t)*-S\s+Sort files by size\. + + + RegexpComparator + ^( |\t)*-r\s+Reverse the order of the sort\. + + + RegexpComparator + ^( |\t)*-u\s+Use time of last access instead of modification for + + + RegexpComparator + ^( |\t)*display and sorting\. + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml index 6c71b6ee1f8..8b221d61807 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml @@ -1046,6 +1046,154 @@ + + ls: files sorted in default order + + -fs NAMENODE -mkdir /dir + -fs NAMENODE -touchz /dir/file2 /dir/file1 /dir/file3 + -fs NAMENODE -ls /dir + + + -fs NAMENODE -rm -r /dir + + + + RegexpAcrossOutputComparator + Found 3 items +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file1 +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file2 +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file3 + + + + + + ls: files sorted in reverse default order + + -fs NAMENODE -mkdir /dir + -fs NAMENODE -touchz /dir/file2 /dir/file1 /dir/file3 + -fs NAMENODE -ls -r /dir + + + -fs NAMENODE -rm -r /dir + + + + RegexpAcrossOutputComparator + Found 3 items +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file3 +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file2 +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/file1 + + + + + + ls: files sorted in size order + + -fs NAMENODE -mkdir /dir + -fs NAMENODE -put CLITEST_DATA/data30bytes /dir/data30bytes + -fs NAMENODE -put CLITEST_DATA/data15bytes /dir/data15bytes + -fs NAMENODE -put CLITEST_DATA/data120bytes /dir/data120bytes + -fs NAMENODE -put CLITEST_DATA/data60bytes /dir/data60bytes + -fs NAMENODE -ls -S /dir + + + -fs NAMENODE -rm -r /dir + + + + RegexpAcrossOutputComparator + Found 4 items +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*120( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data120bytes +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*60( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data60bytes +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*30( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data30bytes +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*15( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data15bytes + + + + + + ls: files sorted in reverse size order + + -fs NAMENODE -mkdir /dir + -fs NAMENODE -put CLITEST_DATA/data30bytes /dir/data30bytes + -fs NAMENODE -put CLITEST_DATA/data15bytes /dir/data15bytes + -fs NAMENODE -put CLITEST_DATA/data120bytes /dir/data120bytes + -fs NAMENODE -put CLITEST_DATA/data60bytes /dir/data60bytes + -fs NAMENODE -ls -S -r /dir + + + -fs NAMENODE -rm -r /dir + + + + RegexpAcrossOutputComparator + Found 4 items +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*15( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data15bytes +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*30( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data30bytes +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*60( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data60bytes +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*120( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/data120bytes + + + + + + ls: files sorted in modification order + + + -fs NAMENODE -mkdir /dir + -fs NAMENODE -touchz /dir/first + -fs NAMENODE -touchz /dir/second + -fs NAMENODE -touchz /dir/third + -fs NAMENODE -touchz /dir/fourth + -fs NAMENODE -touchz /dir/fifth + -fs NAMENODE -ls -t /dir + + + -fs NAMENODE -rm -r /dir + + + + RegexpAcrossOutputComparator + Found 5 items +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/fifth +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/fourth +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/third +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/second +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/first + + + + + + ls: files sorted in reverse modification order + + + -fs NAMENODE -mkdir /dir + -fs NAMENODE -touchz /dir/first + -fs NAMENODE -touchz /dir/second + -fs NAMENODE -touchz /dir/third + -fs NAMENODE -touchz /dir/fourth + -fs NAMENODE -touchz /dir/fifth + -fs NAMENODE -ls -t -r /dir + + + -fs NAMENODE -rm -r /dir + + + + RegexpAcrossOutputComparator + Found 5 items +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/first +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/second +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/third +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/fourth +-rw-r--r--( )*1( )*[a-zA-z0-9]*( )*supergroup( )*0( )*[0-9]{4,}-[0-9]{2,}-[0-9]{2,} [0-9]{2,}:[0-9]{2,}( )*/dir/fifth + + + + rm: Test for quoted globbing false