HADOOP-8989. hadoop fs -find feature (Jonathan Allen via aw)

This commit is contained in:
Allen Wittenauer 2015-01-15 11:56:16 -08:00
parent 24a8b8e5a7
commit c060d60a40
7 changed files with 342 additions and 1 deletions

View File

@ -23,6 +23,8 @@ Release 2.7.0 - UNRELEASED
Mike Liddell, Chuan Liu, Lengning Liu, Ivan Mitic, Michael Rys, Mike Liddell, Chuan Liu, Lengning Liu, Ivan Mitic, Michael Rys,
Alexander Stojanovich, Brian Swan, and Min Wei via cnauroth) Alexander Stojanovich, Brian Swan, and Min Wei via cnauroth)
HADOOP-8989. hadoop fs -find feature (Jonathan Allen via aw)
IMPROVEMENTS IMPROVEMENTS
HADOOP-11483. HardLink.java should use the jdk7 createLink method (aajisaka) HADOOP-11483. HardLink.java should use the jdk7 createLink method (aajisaka)

View File

@ -64,6 +64,8 @@ abstract public class Command extends Configured {
public PrintStream out = System.out; public PrintStream out = System.out;
/** allows stderr to be captured if necessary */ /** allows stderr to be captured if necessary */
public PrintStream err = System.err; public PrintStream err = System.err;
/** allows the command factory to be used if necessary */
private CommandFactory commandFactory = null;
/** Constructor */ /** Constructor */
protected Command() { protected Command() {
@ -120,6 +122,15 @@ abstract public class Command extends Configured {
return exitCode; return exitCode;
} }
/** sets the command factory for later use */
public void setCommandFactory(CommandFactory factory) {
this.commandFactory = factory;
}
/** retrieves the command factory */
protected CommandFactory getCommandFactory() {
return this.commandFactory;
}
/** /**
* Invokes the command handler. The default behavior is to process options, * Invokes the command handler. The default behavior is to process options,
* expand arguments, and then process each argument. * expand arguments, and then process each argument.
@ -304,7 +315,7 @@ abstract public class Command extends Configured {
for (PathData item : items) { for (PathData item : items) {
try { try {
processPath(item); processPath(item);
if (recursive && item.stat.isDirectory()) { if (recursive && isPathRecursable(item)) {
recursePath(item); recursePath(item);
} }
postProcessPath(item); postProcessPath(item);
@ -314,6 +325,21 @@ abstract public class Command extends Configured {
} }
} }
/**
* Determines whether a {@link PathData} item is recursable. Default
* implementation is to recurse directories but can be overridden to recurse
* through symbolic links.
*
* @param item
* a {@link PathData} object
* @return true if the item is recursable, false otherwise
* @throws IOException
* if anything goes wrong in the user-implementation
*/
protected boolean isPathRecursable(PathData item) throws IOException {
return item.stat.isDirectory();
}
/** /**
* Hook for commands to implement an operation to be applied on each * Hook for commands to implement an operation to be applied on each
* path for the command. Note implementation of this method is optional * path for the command. Note implementation of this method is optional

View File

@ -124,6 +124,7 @@ public class CommandFactory extends Configured {
if (cmdClass != null) { if (cmdClass != null) {
instance = ReflectionUtils.newInstance(cmdClass, conf); instance = ReflectionUtils.newInstance(cmdClass, conf);
instance.setName(cmdName); instance.setName(cmdName);
instance.setCommandFactory(this);
} }
} }
return instance; return instance;

View File

@ -25,6 +25,7 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FsShellPermissions; import org.apache.hadoop.fs.FsShellPermissions;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.shell.find.Find;
/** /**
* Base class for all "hadoop fs" commands * Base class for all "hadoop fs" commands
@ -48,6 +49,7 @@ abstract public class FsCommand extends Command {
factory.registerCommands(Count.class); factory.registerCommands(Count.class);
factory.registerCommands(Delete.class); factory.registerCommands(Delete.class);
factory.registerCommands(Display.class); factory.registerCommands(Display.class);
factory.registerCommands(Find.class);
factory.registerCommands(FsShellPermissions.class); factory.registerCommands(FsShellPermissions.class);
factory.registerCommands(FsUsage.class); factory.registerCommands(FsUsage.class);
factory.registerCommands(Ls.class); factory.registerCommands(Ls.class);

View File

@ -232,6 +232,49 @@ expunge
Empty the Trash. Refer to the {{{../hadoop-hdfs/HdfsDesign.html} Empty the Trash. Refer to the {{{../hadoop-hdfs/HdfsDesign.html}
HDFS Architecture Guide}} for more information on the Trash feature. HDFS Architecture Guide}} for more information on the Trash feature.
find
Usage: <<<hdfs dfs -find <path> ... <expression> ... >>>
Finds all files that match the specified expression and applies selected
actions to them. If no <path> is specified then defaults to the current
working directory. If no expression is specified then defaults to -print.
The following primary expressions are recognised:
* -name pattern \
-iname pattern
Evaluates as true if the basename of the file matches the pattern using
standard file system globbing. If -iname is used then the match is case
insensitive.
* -print \
-print0
Always evaluates to true. Causes the current pathname to be written to
standard output. If the -print0 expression is used then an ASCII NULL
character is appended.
The following operators are recognised:
* expression -a expression \
expression -and expression \
expression expression
Logical AND operator for joining two expressions. Returns true if both
child expressions return true. Implied by the juxtaposition of two
expressions and so does not need to be explicitly specified. The second
expression will not be applied if the first fails.
Example:
<<<hdfs dfs -find / -name test -print>>>
Exit Code:
Returns 0 on success and -1 on error.
get get
Usage: <<<hdfs dfs -get [-ignorecrc] [-crc] <src> <localdst> >>> Usage: <<<hdfs dfs -get [-ignorecrc] [-crc] <src> <localdst> >>>

View File

@ -979,6 +979,50 @@
</comparators> </comparators>
</test> </test>
<test> <!-- TESTED -->
<description>help: help for find</description>
<test-commands>
<command>-help find</command>
</test-commands>
<cleanup-commands>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>-find &lt;path&gt; \.\.\. &lt;expression&gt; \.\.\. :
Finds all files that match the specified expression and
applies selected actions to them\. If no &lt;path&gt; is specified
then defaults to the current working directory\. If no
expression is specified then defaults to -print\.
The following primary expressions are recognised:
-name pattern
-iname pattern
Evaluates as true if the basename of the file matches the
pattern using standard file system globbing\.
If -iname is used then the match is case insensitive\.
-print
-print0
Always evaluates to true. Causes the current pathname to be
written to standard output followed by a newline. If the -print0
expression is used then an ASCII NULL character is appended rather
than a newline.
The following operators are recognised:
expression -a expression
expression -and expression
expression expression
Logical AND operator for joining two expressions\. Returns
true if both child expressions return true\. Implied by the
juxtaposition of two expressions and so does not need to be
explicitly specified\. The second expression will not be
applied if the first fails\.
</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED --> <test> <!-- TESTED -->
<description>help: help for help</description> <description>help: help for help</description>
<test-commands> <test-commands>

View File

@ -16841,5 +16841,228 @@
</comparator> </comparator>
</comparators> </comparators>
</test> </test>
<!-- Tests for find -->
<test> <!-- TESTED -->
<description>find: default expression</description>
<test-commands>
<command>-fs NAMENODE -mkdir /donotfind</command>
<command>-fs NAMENODE -mkdir donotfind</command>
<command>-fs NAMENODE -mkdir /findtest</command>
<command>-fs NAMENODE -mkdir /findtest/item1</command>
<command>-fs NAMENODE -mkdir /findtest/item1/item1a</command>
<command>-fs NAMENODE -touchz /findtest/item1/item1a/item1aa</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2</command>
<command>-fs NAMENODE -mkdir /findtest/item3</command>
<command>-fs NAMENODE -mkdir /findtest/item4</command>
<command>-fs NAMENODE -mkdir /findtest/item4/item4a</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5</command>
<command>-fs NAMENODE -find /findtest</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /donotfind</command>
<command>-fs NAMENODE -rm -r donotfind</command>
<command>-fs NAMENODE -rm -r /findtest</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>^/findtest
/findtest/item1
/findtest/item1/item1a
/findtest/item1/item1a/item1aa
/findtest/item1/item1b
/findtest/item2
/findtest/item3
/findtest/item4
/findtest/item4/item4a
/findtest/item4/item4b
/findtest/item5
$</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>find: -print </description>
<test-commands>
<command>-fs NAMENODE -mkdir /donotfind</command>
<command>-fs NAMENODE -mkdir donotfind</command>
<command>-fs NAMENODE -mkdir /findtest</command>
<command>-fs NAMENODE -mkdir /findtest/item1</command>
<command>-fs NAMENODE -mkdir /findtest/item1/item1a</command>
<command>-fs NAMENODE -touchz /findtest/item1/item1a/item1aa</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2</command>
<command>-fs NAMENODE -mkdir /findtest/item3</command>
<command>-fs NAMENODE -mkdir /findtest/item4</command>
<command>-fs NAMENODE -mkdir /findtest/item4/item4a</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5</command>
<command>-fs NAMENODE -find /findtest -print</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /donotfind</command>
<command>-fs NAMENODE -rm -r donotfind</command>
<command>-fs NAMENODE -rm -r /findtest</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>^/findtest
/findtest/item1
/findtest/item1/item1a
/findtest/item1/item1a/item1aa
/findtest/item1/item1b
/findtest/item2
/findtest/item3
/findtest/item4
/findtest/item4/item4a
/findtest/item4/item4b
/findtest/item5
$</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>find: -print (relative path) </description>
<test-commands>
<command>-fs NAMENODE -mkdir /donotfind</command>
<command>-fs NAMENODE -mkdir -p donotfind</command>
<command>-fs NAMENODE -mkdir -p findtest</command>
<command>-fs NAMENODE -mkdir -p findtest/item1</command>
<command>-fs NAMENODE -mkdir -p findtest/item1/item1a</command>
<command>-fs NAMENODE -touchz findtest/item1/item1a/item1aa</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item1/item1b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item2</command>
<command>-fs NAMENODE -mkdir -p findtest/item3</command>
<command>-fs NAMENODE -mkdir -p findtest/item4</command>
<command>-fs NAMENODE -mkdir -p findtest/item4/item4a</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes findtest/item4/item4b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data1k findtest/item5</command>
<command>-fs NAMENODE -find findtest -print</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /donotfind</command>
<command>-fs NAMENODE -rm -r donotfind</command>
<command>-fs NAMENODE -rm -r findtest</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>^findtest
findtest/item1
findtest/item1/item1a
findtest/item1/item1a/item1aa
findtest/item1/item1b
findtest/item2
findtest/item3
findtest/item4
findtest/item4/item4a
findtest/item4/item4b
findtest/item5
$</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>find: -print (cwd) </description>
<test-commands>
<command>-fs NAMENODE -mkdir /donotfind</command>
<command>-fs NAMENODE -mkdir findtest</command>
<command>-fs NAMENODE -mkdir findtest/item1</command>
<command>-fs NAMENODE -mkdir findtest/item1/item1a</command>
<command>-fs NAMENODE -touchz findtest/item1/item1a/item1aa</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item1/item1b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes findtest/item2</command>
<command>-fs NAMENODE -mkdir findtest/item3</command>
<command>-fs NAMENODE -mkdir findtest/item4</command>
<command>-fs NAMENODE -mkdir findtest/item4/item4a</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes findtest/item4/item4b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data1k findtest/item5</command>
<command>-fs NAMENODE -find -print</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r findtest</command>
<command>-fs NAMENODE -rm -r /donotfind</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>^.
findtest
findtest/item1
findtest/item1/item1a
findtest/item1/item1a/item1aa
findtest/item1/item1b
findtest/item2
findtest/item3
findtest/item4
findtest/item4/item4a
findtest/item4/item4b
findtest/item5
$</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>find: -name </description>
<test-commands>
<command>-fs NAMENODE -mkdir /findtest</command>
<command>-fs NAMENODE -mkdir /findtest/item1</command>
<command>-fs NAMENODE -mkdir /findtest/item1/item1a</command>
<command>-fs NAMENODE -touchz /findtest/item1/item1a/item1aa</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2</command>
<command>-fs NAMENODE -mkdir /findtest/item3</command>
<command>-fs NAMENODE -mkdir /findtest/item4</command>
<command>-fs NAMENODE -mkdir /findtest/item4/item4a</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5</command>
<command>-fs NAMENODE -find /findtest -name item*a</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /findtest</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>^/findtest/item1/item1a
/findtest/item1/item1a/item1aa
/findtest/item4/item4a
$</expected-output>
</comparator>
</comparators>
</test>
<test> <!-- TESTED -->
<description>find: -iname </description>
<test-commands>
<command>-fs NAMENODE -mkdir /findtest</command>
<command>-fs NAMENODE -mkdir /findtest/item1</command>
<command>-fs NAMENODE -mkdir /findtest/item1/item1a</command>
<command>-fs NAMENODE -touchz /findtest/item1/item1a/item1aa</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item1/item1b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data60bytes /findtest/item2</command>
<command>-fs NAMENODE -mkdir /findtest/item3</command>
<command>-fs NAMENODE -mkdir /findtest/item4</command>
<command>-fs NAMENODE -mkdir /findtest/item4/item4a</command>
<command>-fs NAMENODE -put CLITEST_DATA/data120bytes /findtest/item4/item4b</command>
<command>-fs NAMENODE -put CLITEST_DATA/data1k /findtest/item5</command>
<command>-fs NAMENODE -find /findtest -iname ITEM*a</command>
</test-commands>
<cleanup-commands>
<command>-fs NAMENODE -rm -r /findtest</command>
</cleanup-commands>
<comparators>
<comparator>
<type>RegexpAcrossOutputComparator</type>
<expected-output>^/findtest/item1/item1a
/findtest/item1/item1a/item1aa
/findtest/item4/item4a
$</expected-output>
</comparator>
</comparators>
</test>
</tests> </tests>
</configuration> </configuration>