HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov.

This commit is contained in:
cnauroth 2014-11-18 17:05:48 -08:00
parent 9e81be0114
commit 79301e80d7
3 changed files with 75 additions and 22 deletions

View File

@ -452,6 +452,9 @@ Release 2.7.0 - UNRELEASED
HADOOP-11312. Fix unit tests to not use uppercase key names. (wang) HADOOP-11312. Fix unit tests to not use uppercase key names. (wang)
HADOOP-11201. Hadoop Archives should support globs resolving to files.
(Gera Shegalov via cnauroth)
Release 2.6.0 - 2014-11-18 Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -101,7 +101,7 @@ public class HadoopArchives implements Tool {
short repl = 10; short repl = 10;
private static final String usage = "archive" private static final String usage = "archive"
+ " -archiveName NAME -p <parent path> [-r <replication factor>]" + + " -archiveName <NAME>.har -p <parent path> [-r <replication factor>]" +
"<src>* <dest>" + "<src>* <dest>" +
"\n"; "\n";
@ -348,15 +348,10 @@ public class HadoopArchives implements Tool {
*/ */
private void writeTopLevelDirs(SequenceFile.Writer srcWriter, private void writeTopLevelDirs(SequenceFile.Writer srcWriter,
List<Path> paths, Path parentPath) throws IOException { List<Path> paths, Path parentPath) throws IOException {
//add all the directories // extract paths from absolute URI's
List<Path> justDirs = new ArrayList<Path>(); List<Path> justPaths = new ArrayList<Path>();
for (Path p: paths) { for (Path p: paths) {
if (!p.getFileSystem(getConf()).isFile(p)) { justPaths.add(new Path(p.toUri().getPath()));
justDirs.add(new Path(p.toUri().getPath()));
}
else {
justDirs.add(new Path(p.getParent().toUri().getPath()));
}
} }
/* find all the common parents of paths that are valid archive /* find all the common parents of paths that are valid archive
* paths. The below is done so that we do not add a common path * paths. The below is done so that we do not add a common path
@ -372,7 +367,7 @@ public class HadoopArchives implements Tool {
Path root = new Path(Path.SEPARATOR); Path root = new Path(Path.SEPARATOR);
for (int i = parentPath.depth(); i < deepest.depth(); i++) { for (int i = parentPath.depth(); i < deepest.depth(); i++) {
List<Path> parents = new ArrayList<Path>(); List<Path> parents = new ArrayList<Path>();
for (Path p: justDirs) { for (Path p: justPaths) {
if (p.compareTo(root) == 0){ if (p.compareTo(root) == 0){
//do nothing //do nothing
} }
@ -392,7 +387,7 @@ public class HadoopArchives implements Tool {
} }
} }
} }
justDirs = parents; justPaths = parents;
} }
Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet(); Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
for (Map.Entry<String, HashSet<String>> entry : keyVals) { for (Map.Entry<String, HashSet<String>> entry : keyVals) {

View File

@ -203,9 +203,58 @@ public class TestHadoopArchives {
Assert.assertEquals(originalPaths, harPaths); Assert.assertEquals(originalPaths, harPaths);
} }
private static List<String> lsr(final FsShell shell, String dir) @Test
throws Exception { public void testSingleFile() throws Exception {
System.out.println("lsr root=" + dir); final Path sub1 = new Path(inputPath, "dir1");
fs.mkdirs(sub1);
String singleFileName = "a";
createFile(inputPath, fs, sub1.getName(), singleFileName);
final FsShell shell = new FsShell(conf);
final List<String> originalPaths = lsr(shell, sub1.toString());
System.out.println("originalPaths: " + originalPaths);
// make the archive:
final String fullHarPathStr = makeArchive(sub1, singleFileName);
// compare results:
final List<String> harPaths = lsr(shell, fullHarPathStr);
Assert.assertEquals(originalPaths, harPaths);
}
@Test
public void testGlobFiles() throws Exception {
final Path sub1 = new Path(inputPath, "dir1");
final Path sub2 = new Path(inputPath, "dir2");
fs.mkdirs(sub1);
String fileName = "a";
createFile(inputPath, fs, sub1.getName(), fileName);
createFile(inputPath, fs, sub2.getName(), fileName);
createFile(inputPath, fs, sub1.getName(), "b"); // not part of result
final String glob = "dir{1,2}/a";
final FsShell shell = new FsShell(conf);
final List<String> originalPaths = lsr(shell, inputPath.toString(),
inputPath + "/" + glob);
System.out.println("originalPaths: " + originalPaths);
// make the archive:
final String fullHarPathStr = makeArchive(inputPath, glob);
// compare results:
final List<String> harPaths = lsr(shell, fullHarPathStr,
fullHarPathStr + "/" + glob);
Assert.assertEquals(originalPaths, harPaths);
}
private static List<String> lsr(final FsShell shell, String rootDir) throws Exception {
return lsr(shell, rootDir, null);
}
private static List<String> lsr(final FsShell shell, String rootDir,
String glob) throws Exception {
final String dir = glob == null ? rootDir : glob;
System.out.println("lsr root=" + rootDir);
final ByteArrayOutputStream bytes = new ByteArrayOutputStream(); final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final PrintStream out = new PrintStream(bytes); final PrintStream out = new PrintStream(bytes);
final PrintStream oldOut = System.out; final PrintStream oldOut = System.out;
@ -222,9 +271,9 @@ public class TestHadoopArchives {
System.setErr(oldErr); System.setErr(oldErr);
} }
System.out.println("lsr results:\n" + results); System.out.println("lsr results:\n" + results);
String dirname = dir; String dirname = rootDir;
if (dir.lastIndexOf(Path.SEPARATOR) != -1) { if (rootDir.lastIndexOf(Path.SEPARATOR) != -1) {
dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR)); dirname = rootDir.substring(rootDir.lastIndexOf(Path.SEPARATOR));
} }
final List<String> paths = new ArrayList<String>(); final List<String> paths = new ArrayList<String>();
@ -621,13 +670,19 @@ public class TestHadoopArchives {
return bb; return bb;
} }
private String makeArchive() throws Exception {
return makeArchive(inputPath, null);
}
/* /*
* Run the HadoopArchives tool to create an archive on the * Run the HadoopArchives tool to create an archive on the
* given file system. * given file system.
*/ */
private String makeArchive() throws Exception { private String makeArchive(Path parentPath, String relGlob) throws Exception {
final String inputPathStr = inputPath.toUri().getPath(); final String parentPathStr = parentPath.toUri().getPath();
System.out.println("inputPathStr = " + inputPathStr); final String relPathGlob = relGlob == null ? "*" : relGlob;
System.out.println("parentPathStr = " + parentPathStr);
final URI uri = fs.getUri(); final URI uri = fs.getUri();
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort() final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
@ -635,8 +690,8 @@ public class TestHadoopArchives {
final String harName = "foo.har"; final String harName = "foo.har";
final String fullHarPathStr = prefix + harName; final String fullHarPathStr = prefix + harName;
final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*", final String[] args = { "-archiveName", harName, "-p", parentPathStr,
archivePath.toString() }; relPathGlob, archivePath.toString() };
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH, System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
HADOOP_ARCHIVES_JAR); HADOOP_ARCHIVES_JAR);
final HadoopArchives har = new HadoopArchives(conf); final HadoopArchives har = new HadoopArchives(conf);