From 00a06e28d46dc46553da7d3a9056df95c191825f Mon Sep 17 00:00:00 2001 From: cnauroth Date: Tue, 18 Nov 2014 17:05:48 -0800 Subject: [PATCH] HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov. (cherry picked from commit 79301e80d7510f055c01a06970bb409607a4197c) --- .../hadoop-common/CHANGES.txt | 3 + .../apache/hadoop/tools/HadoopArchives.java | 17 ++-- .../hadoop/tools/TestHadoopArchives.java | 77 ++++++++++++++++--- 3 files changed, 75 insertions(+), 22 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index edb8d63b425..b8e9ff12194 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -96,6 +96,9 @@ Release 2.7.0 - UNRELEASED HADOOP-11312. Fix unit tests to not use uppercase key names. (wang) + HADOOP-11201. Hadoop Archives should support globs resolving to files. + (Gera Shegalov via cnauroth) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES diff --git a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java index e53576d7d8f..aa3027772d0 100644 --- a/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/main/java/org/apache/hadoop/tools/HadoopArchives.java @@ -101,7 +101,7 @@ public class HadoopArchives implements Tool { short repl = 10; private static final String usage = "archive" - + " -archiveName NAME -p [-r ]" + + + " -archiveName .har -p [-r ]" + "* " + "\n"; @@ -348,15 +348,10 @@ public class HadoopArchives implements Tool { */ private void writeTopLevelDirs(SequenceFile.Writer srcWriter, List paths, Path parentPath) throws IOException { - //add all the directories - List justDirs = new ArrayList(); + // extract paths from absolute URI's + List justPaths = new ArrayList(); for (Path p: paths) { - if (!p.getFileSystem(getConf()).isFile(p)) { - justDirs.add(new Path(p.toUri().getPath())); - } - else { - justDirs.add(new Path(p.getParent().toUri().getPath())); - } + justPaths.add(new Path(p.toUri().getPath())); } /* find all the common parents of paths that are valid archive * paths. The below is done so that we do not add a common path @@ -372,7 +367,7 @@ public class HadoopArchives implements Tool { Path root = new Path(Path.SEPARATOR); for (int i = parentPath.depth(); i < deepest.depth(); i++) { List parents = new ArrayList(); - for (Path p: justDirs) { + for (Path p: justPaths) { if (p.compareTo(root) == 0){ //do nothing } @@ -392,7 +387,7 @@ public class HadoopArchives implements Tool { } } } - justDirs = parents; + justPaths = parents; } Set>> keyVals = allpaths.entrySet(); for (Map.Entry> entry : keyVals) { diff --git a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java index e7eef3f9666..3fa59190bc4 100644 --- a/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java +++ b/hadoop-tools/hadoop-archives/src/test/java/org/apache/hadoop/tools/TestHadoopArchives.java @@ -203,9 +203,58 @@ public class TestHadoopArchives { Assert.assertEquals(originalPaths, harPaths); } - private static List lsr(final FsShell shell, String dir) - throws Exception { - System.out.println("lsr root=" + dir); + @Test + public void testSingleFile() throws Exception { + final Path sub1 = new Path(inputPath, "dir1"); + fs.mkdirs(sub1); + String singleFileName = "a"; + createFile(inputPath, fs, sub1.getName(), singleFileName); + final FsShell shell = new FsShell(conf); + + final List originalPaths = lsr(shell, sub1.toString()); + System.out.println("originalPaths: " + originalPaths); + + // make the archive: + final String fullHarPathStr = makeArchive(sub1, singleFileName); + + // compare results: + final List harPaths = lsr(shell, fullHarPathStr); + Assert.assertEquals(originalPaths, harPaths); + } + + @Test + public void testGlobFiles() throws Exception { + final Path sub1 = new Path(inputPath, "dir1"); + final Path sub2 = new Path(inputPath, "dir2"); + fs.mkdirs(sub1); + String fileName = "a"; + createFile(inputPath, fs, sub1.getName(), fileName); + createFile(inputPath, fs, sub2.getName(), fileName); + createFile(inputPath, fs, sub1.getName(), "b"); // not part of result + + final String glob = "dir{1,2}/a"; + final FsShell shell = new FsShell(conf); + final List originalPaths = lsr(shell, inputPath.toString(), + inputPath + "/" + glob); + System.out.println("originalPaths: " + originalPaths); + + // make the archive: + final String fullHarPathStr = makeArchive(inputPath, glob); + + // compare results: + final List harPaths = lsr(shell, fullHarPathStr, + fullHarPathStr + "/" + glob); + Assert.assertEquals(originalPaths, harPaths); + } + + private static List lsr(final FsShell shell, String rootDir) throws Exception { + return lsr(shell, rootDir, null); + } + + private static List lsr(final FsShell shell, String rootDir, + String glob) throws Exception { + final String dir = glob == null ? rootDir : glob; + System.out.println("lsr root=" + rootDir); final ByteArrayOutputStream bytes = new ByteArrayOutputStream(); final PrintStream out = new PrintStream(bytes); final PrintStream oldOut = System.out; @@ -222,9 +271,9 @@ public class TestHadoopArchives { System.setErr(oldErr); } System.out.println("lsr results:\n" + results); - String dirname = dir; - if (dir.lastIndexOf(Path.SEPARATOR) != -1) { - dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR)); + String dirname = rootDir; + if (rootDir.lastIndexOf(Path.SEPARATOR) != -1) { + dirname = rootDir.substring(rootDir.lastIndexOf(Path.SEPARATOR)); } final List paths = new ArrayList(); @@ -621,13 +670,19 @@ public class TestHadoopArchives { return bb; } + + private String makeArchive() throws Exception { + return makeArchive(inputPath, null); + } + /* * Run the HadoopArchives tool to create an archive on the * given file system. */ - private String makeArchive() throws Exception { - final String inputPathStr = inputPath.toUri().getPath(); - System.out.println("inputPathStr = " + inputPathStr); + private String makeArchive(Path parentPath, String relGlob) throws Exception { + final String parentPathStr = parentPath.toUri().getPath(); + final String relPathGlob = relGlob == null ? "*" : relGlob; + System.out.println("parentPathStr = " + parentPathStr); final URI uri = fs.getUri(); final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort() @@ -635,8 +690,8 @@ public class TestHadoopArchives { final String harName = "foo.har"; final String fullHarPathStr = prefix + harName; - final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*", - archivePath.toString() }; + final String[] args = { "-archiveName", harName, "-p", parentPathStr, + relPathGlob, archivePath.toString() }; System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH, HADOOP_ARCHIVES_JAR); final HadoopArchives har = new HadoopArchives(conf);