From c060d60a4016afe71ba7650b92733c3089ba973c Mon Sep 17 00:00:00 2001 From: Allen Wittenauer Date: Thu, 15 Jan 2015 11:56:16 -0800 Subject: [PATCH] HADOOP-8989. hadoop fs -find feature (Jonathan Allen via aw) --- .../hadoop-common/CHANGES.txt | 2 + .../org/apache/hadoop/fs/shell/Command.java | 28 ++- .../hadoop/fs/shell/CommandFactory.java | 1 + .../org/apache/hadoop/fs/shell/FsCommand.java | 2 + .../src/site/apt/FileSystemShell.apt.vm | 43 ++++ .../src/test/resources/testConf.xml | 44 ++++ .../src/test/resources/testHDFSConf.xml | 223 ++++++++++++++++++ 7 files changed, 342 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 9bf7b11edd8..9cc3b8d695f 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -23,6 +23,8 @@ Release 2.7.0 - UNRELEASED Mike Liddell, Chuan Liu, Lengning Liu, Ivan Mitic, Michael Rys, Alexander Stojanovich, Brian Swan, and Min Wei via cnauroth) + HADOOP-8989. hadoop fs -find feature (Jonathan Allen via aw) + IMPROVEMENTS HADOOP-11483. HardLink.java should use the jdk7 createLink method (aajisaka) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java index 47a1dc22774..fff07aa95c0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/Command.java @@ -64,6 +64,8 @@ abstract public class Command extends Configured { public PrintStream out = System.out; /** allows stderr to be captured if necessary */ public PrintStream err = System.err; + /** allows the command factory to be used if necessary */ + private CommandFactory commandFactory = null; /** Constructor */ protected Command() { @@ -120,6 +122,15 @@ abstract public class Command extends Configured { return exitCode; } + /** sets the command factory for later use */ + public void setCommandFactory(CommandFactory factory) { + this.commandFactory = factory; + } + /** retrieves the command factory */ + protected CommandFactory getCommandFactory() { + return this.commandFactory; + } + /** * Invokes the command handler. The default behavior is to process options, * expand arguments, and then process each argument. @@ -304,7 +315,7 @@ abstract public class Command extends Configured { for (PathData item : items) { try { processPath(item); - if (recursive && item.stat.isDirectory()) { + if (recursive && isPathRecursable(item)) { recursePath(item); } postProcessPath(item); @@ -314,6 +325,21 @@ abstract public class Command extends Configured { } } + /** + * Determines whether a {@link PathData} item is recursable. Default + * implementation is to recurse directories but can be overridden to recurse + * through symbolic links. + * + * @param item + * a {@link PathData} object + * @return true if the item is recursable, false otherwise + * @throws IOException + * if anything goes wrong in the user-implementation + */ + protected boolean isPathRecursable(PathData item) throws IOException { + return item.stat.isDirectory(); + } + /** * Hook for commands to implement an operation to be applied on each * path for the command. Note implementation of this method is optional diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandFactory.java index dec83738118..9b128cfad58 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandFactory.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/CommandFactory.java @@ -124,6 +124,7 @@ public class CommandFactory extends Configured { if (cmdClass != null) { instance = ReflectionUtils.newInstance(cmdClass, conf); instance.setName(cmdName); + instance.setCommandFactory(this); } } return instance; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java index 3372809022e..cc8fbb4f2f1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/shell/FsCommand.java @@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FsShellPermissions; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.shell.find.Find; /** * Base class for all "hadoop fs" commands @@ -48,6 +49,7 @@ abstract public class FsCommand extends Command { factory.registerCommands(Count.class); factory.registerCommands(Delete.class); factory.registerCommands(Display.class); + factory.registerCommands(Find.class); factory.registerCommands(FsShellPermissions.class); factory.registerCommands(FsUsage.class); factory.registerCommands(Ls.class); diff --git a/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm index c43c5179772..c8b4b54c97e 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/FileSystemShell.apt.vm @@ -232,6 +232,49 @@ expunge Empty the Trash. Refer to the {{{../hadoop-hdfs/HdfsDesign.html} HDFS Architecture Guide}} for more information on the Trash feature. +find + + Usage: << ... ... >>> + + Finds all files that match the specified expression and applies selected + actions to them. If no is specified then defaults to the current + working directory. If no expression is specified then defaults to -print. + + The following primary expressions are recognised: + + * -name pattern \ + -iname pattern + + Evaluates as true if the basename of the file matches the pattern using + standard file system globbing. If -iname is used then the match is case + insensitive. + + * -print \ + -print0 + + Always evaluates to true. Causes the current pathname to be written to + standard output. If the -print0 expression is used then an ASCII NULL + character is appended. + + The following operators are recognised: + + * expression -a expression \ + expression -and expression \ + expression expression + + Logical AND operator for joining two expressions. Returns true if both + child expressions return true. Implied by the juxtaposition of two + expressions and so does not need to be explicitly specified. The second + expression will not be applied if the first fails. + + Example: + + <<>> + + Exit Code: + + Returns 0 on success and -1 on error. + get Usage: << >>> diff --git a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml index f93399cedb7..5c667e11cd4 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/testConf.xml @@ -979,6 +979,50 @@ + + help: help for find + + -help find + + + + + + RegexpAcrossOutputComparator + -find <path> \.\.\. <expression> \.\.\. : + Finds all files that match the specified expression and + applies selected actions to them\. If no <path> is specified + then defaults to the current working directory\. If no + expression is specified then defaults to -print\. + + The following primary expressions are recognised: + -name pattern + -iname pattern + Evaluates as true if the basename of the file matches the + pattern using standard file system globbing\. + If -iname is used then the match is case insensitive\. + + -print + -print0 + Always evaluates to true. Causes the current pathname to be + written to standard output followed by a newline. If the -print0 + expression is used then an ASCII NULL character is appended rather + than a newline. + + The following operators are recognised: + expression -a expression + expression -and expression + expression expression + Logical AND operator for joining two expressions\. Returns + true if both child expressions return true\. Implied by the + juxtaposition of two expressions and so does not need to be + explicitly specified\. The second expression will not be + applied if the first fails\. + + + + + help: help for help diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml index 8f894cb7f43..aef15dacb9c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testHDFSConf.xml @@ -16841,5 +16841,228 @@ + + + + find: default expression + + -fs NAMENODE -mkdir /donotfind + -fs NAMENODE -mkdir donotfind + -fs NAMENODE -mkdir /findtest + -fs NAMENODE -mkdir /findtest/item1 + -fs NAMENODE -mkdir /findtest/item1/item1a + -fs NAMENODE -touchz /findtest/item1/item1a/item1aa + -fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b + -fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2 + -fs NAMENODE -mkdir /findtest/item3 + -fs NAMENODE -mkdir /findtest/item4 + -fs NAMENODE -mkdir /findtest/item4/item4a + -fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b + -fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5 + -fs NAMENODE -find /findtest + + + -fs NAMENODE -rm -r /donotfind + -fs NAMENODE -rm -r donotfind + -fs NAMENODE -rm -r /findtest + + + + RegexpAcrossOutputComparator + ^/findtest +/findtest/item1 +/findtest/item1/item1a +/findtest/item1/item1a/item1aa +/findtest/item1/item1b +/findtest/item2 +/findtest/item3 +/findtest/item4 +/findtest/item4/item4a +/findtest/item4/item4b +/findtest/item5 +$ + + + + + find: -print + + -fs NAMENODE -mkdir /donotfind + -fs NAMENODE -mkdir donotfind + -fs NAMENODE -mkdir /findtest + -fs NAMENODE -mkdir /findtest/item1 + -fs NAMENODE -mkdir /findtest/item1/item1a + -fs NAMENODE -touchz /findtest/item1/item1a/item1aa + -fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b + -fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2 + -fs NAMENODE -mkdir /findtest/item3 + -fs NAMENODE -mkdir /findtest/item4 + -fs NAMENODE -mkdir /findtest/item4/item4a + -fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b + -fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5 + -fs NAMENODE -find /findtest -print + + + -fs NAMENODE -rm -r /donotfind + -fs NAMENODE -rm -r donotfind + -fs NAMENODE -rm -r /findtest + + + + RegexpAcrossOutputComparator + ^/findtest +/findtest/item1 +/findtest/item1/item1a +/findtest/item1/item1a/item1aa +/findtest/item1/item1b +/findtest/item2 +/findtest/item3 +/findtest/item4 +/findtest/item4/item4a +/findtest/item4/item4b +/findtest/item5 +$ + + + + + find: -print (relative path) + + -fs NAMENODE -mkdir /donotfind + -fs NAMENODE -mkdir -p donotfind + -fs NAMENODE -mkdir -p findtest + -fs NAMENODE -mkdir -p findtest/item1 + -fs NAMENODE -mkdir -p findtest/item1/item1a + -fs NAMENODE -touchz findtest/item1/item1a/item1aa + -fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item1/item1b + -fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item2 + -fs NAMENODE -mkdir -p findtest/item3 + -fs NAMENODE -mkdir -p findtest/item4 + -fs NAMENODE -mkdir -p findtest/item4/item4a + -fs NAMENODE -put CLITEST_DATA/data120bytes findtest/item4/item4b + -fs NAMENODE -put CLITEST_DATA/data1k findtest/item5 + -fs NAMENODE -find findtest -print + + + -fs NAMENODE -rm -r /donotfind + -fs NAMENODE -rm -r donotfind + -fs NAMENODE -rm -r findtest + + + + RegexpAcrossOutputComparator + ^findtest +findtest/item1 +findtest/item1/item1a +findtest/item1/item1a/item1aa +findtest/item1/item1b +findtest/item2 +findtest/item3 +findtest/item4 +findtest/item4/item4a +findtest/item4/item4b +findtest/item5 +$ + + + + + find: -print (cwd) + + -fs NAMENODE -mkdir /donotfind + -fs NAMENODE -mkdir findtest + -fs NAMENODE -mkdir findtest/item1 + -fs NAMENODE -mkdir findtest/item1/item1a + -fs NAMENODE -touchz findtest/item1/item1a/item1aa + -fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item1/item1b + -fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item2 + -fs NAMENODE -mkdir findtest/item3 + -fs NAMENODE -mkdir findtest/item4 + -fs NAMENODE -mkdir findtest/item4/item4a + -fs NAMENODE -put CLITEST_DATA/data120bytes findtest/item4/item4b + -fs NAMENODE -put CLITEST_DATA/data1k findtest/item5 + -fs NAMENODE -find -print + + + -fs NAMENODE -rm -r findtest + -fs NAMENODE -rm -r /donotfind + + + + RegexpAcrossOutputComparator + ^. +findtest +findtest/item1 +findtest/item1/item1a +findtest/item1/item1a/item1aa +findtest/item1/item1b +findtest/item2 +findtest/item3 +findtest/item4 +findtest/item4/item4a +findtest/item4/item4b +findtest/item5 +$ + + + + + find: -name + + -fs NAMENODE -mkdir /findtest + -fs NAMENODE -mkdir /findtest/item1 + -fs NAMENODE -mkdir /findtest/item1/item1a + -fs NAMENODE -touchz /findtest/item1/item1a/item1aa + -fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b + -fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2 + -fs NAMENODE -mkdir /findtest/item3 + -fs NAMENODE -mkdir /findtest/item4 + -fs NAMENODE -mkdir /findtest/item4/item4a + -fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b + -fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5 + -fs NAMENODE -find /findtest -name item*a + + + -fs NAMENODE -rm -r /findtest + + + + RegexpAcrossOutputComparator + ^/findtest/item1/item1a +/findtest/item1/item1a/item1aa +/findtest/item4/item4a +$ + + + + + find: -iname + + -fs NAMENODE -mkdir /findtest + -fs NAMENODE -mkdir /findtest/item1 + -fs NAMENODE -mkdir /findtest/item1/item1a + -fs NAMENODE -touchz /findtest/item1/item1a/item1aa + -fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b + -fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2 + -fs NAMENODE -mkdir /findtest/item3 + -fs NAMENODE -mkdir /findtest/item4 + -fs NAMENODE -mkdir /findtest/item4/item4a + -fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b + -fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5 + -fs NAMENODE -find /findtest -iname ITEM*a + + + -fs NAMENODE -rm -r /findtest + + + + RegexpAcrossOutputComparator + ^/findtest/item1/item1a +/findtest/item1/item1a/item1aa +/findtest/item4/item4a +$ + + +