HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov.

This commit is contained in:
cnauroth 2014-11-18 17:05:48 -08:00
parent 9e81be0114
commit 79301e80d7
3 changed files with 75 additions and 22 deletions
hadoop-common-project/hadoop-common
hadoop-tools/hadoop-archives/src
main/java/org/apache/hadoop/tools
test/java/org/apache/hadoop/tools

View File

@ -452,6 +452,9 @@ Release 2.7.0 - UNRELEASED
HADOOP-11312. Fix unit tests to not use uppercase key names. (wang)
HADOOP-11201. Hadoop Archives should support globs resolving to files.
(Gera Shegalov via cnauroth)
Release 2.6.0 - 2014-11-18
INCOMPATIBLE CHANGES

View File

@ -101,7 +101,7 @@ public class HadoopArchives implements Tool {
short repl = 10;
private static final String usage = "archive"
+ " -archiveName NAME -p <parent path> [-r <replication factor>]" +
+ " -archiveName <NAME>.har -p <parent path> [-r <replication factor>]" +
"<src>* <dest>" +
"\n";
@ -348,15 +348,10 @@ public class HadoopArchives implements Tool {
*/
private void writeTopLevelDirs(SequenceFile.Writer srcWriter,
List<Path> paths, Path parentPath) throws IOException {
//add all the directories
List<Path> justDirs = new ArrayList<Path>();
// extract paths from absolute URI's
List<Path> justPaths = new ArrayList<Path>();
for (Path p: paths) {
if (!p.getFileSystem(getConf()).isFile(p)) {
justDirs.add(new Path(p.toUri().getPath()));
}
else {
justDirs.add(new Path(p.getParent().toUri().getPath()));
}
justPaths.add(new Path(p.toUri().getPath()));
}
/* find all the common parents of paths that are valid archive
* paths. The below is done so that we do not add a common path
@ -372,7 +367,7 @@ public class HadoopArchives implements Tool {
Path root = new Path(Path.SEPARATOR);
for (int i = parentPath.depth(); i < deepest.depth(); i++) {
List<Path> parents = new ArrayList<Path>();
for (Path p: justDirs) {
for (Path p: justPaths) {
if (p.compareTo(root) == 0){
//do nothing
}
@ -392,7 +387,7 @@ public class HadoopArchives implements Tool {
}
}
}
justDirs = parents;
justPaths = parents;
}
Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
for (Map.Entry<String, HashSet<String>> entry : keyVals) {

View File

@ -203,9 +203,58 @@ public class TestHadoopArchives {
Assert.assertEquals(originalPaths, harPaths);
}
private static List<String> lsr(final FsShell shell, String dir)
throws Exception {
System.out.println("lsr root=" + dir);
@Test
public void testSingleFile() throws Exception {
final Path sub1 = new Path(inputPath, "dir1");
fs.mkdirs(sub1);
String singleFileName = "a";
createFile(inputPath, fs, sub1.getName(), singleFileName);
final FsShell shell = new FsShell(conf);
final List<String> originalPaths = lsr(shell, sub1.toString());
System.out.println("originalPaths: " + originalPaths);
// make the archive:
final String fullHarPathStr = makeArchive(sub1, singleFileName);
// compare results:
final List<String> harPaths = lsr(shell, fullHarPathStr);
Assert.assertEquals(originalPaths, harPaths);
}
@Test
public void testGlobFiles() throws Exception {
final Path sub1 = new Path(inputPath, "dir1");
final Path sub2 = new Path(inputPath, "dir2");
fs.mkdirs(sub1);
String fileName = "a";
createFile(inputPath, fs, sub1.getName(), fileName);
createFile(inputPath, fs, sub2.getName(), fileName);
createFile(inputPath, fs, sub1.getName(), "b"); // not part of result
final String glob = "dir{1,2}/a";
final FsShell shell = new FsShell(conf);
final List<String> originalPaths = lsr(shell, inputPath.toString(),
inputPath + "/" + glob);
System.out.println("originalPaths: " + originalPaths);
// make the archive:
final String fullHarPathStr = makeArchive(inputPath, glob);
// compare results:
final List<String> harPaths = lsr(shell, fullHarPathStr,
fullHarPathStr + "/" + glob);
Assert.assertEquals(originalPaths, harPaths);
}
private static List<String> lsr(final FsShell shell, String rootDir) throws Exception {
return lsr(shell, rootDir, null);
}
private static List<String> lsr(final FsShell shell, String rootDir,
String glob) throws Exception {
final String dir = glob == null ? rootDir : glob;
System.out.println("lsr root=" + rootDir);
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final PrintStream out = new PrintStream(bytes);
final PrintStream oldOut = System.out;
@ -222,9 +271,9 @@ public class TestHadoopArchives {
System.setErr(oldErr);
}
System.out.println("lsr results:\n" + results);
String dirname = dir;
if (dir.lastIndexOf(Path.SEPARATOR) != -1) {
dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR));
String dirname = rootDir;
if (rootDir.lastIndexOf(Path.SEPARATOR) != -1) {
dirname = rootDir.substring(rootDir.lastIndexOf(Path.SEPARATOR));
}
final List<String> paths = new ArrayList<String>();
@ -621,13 +670,19 @@ public class TestHadoopArchives {
return bb;
}
private String makeArchive() throws Exception {
return makeArchive(inputPath, null);
}
/*
* Run the HadoopArchives tool to create an archive on the
* given file system.
*/
private String makeArchive() throws Exception {
final String inputPathStr = inputPath.toUri().getPath();
System.out.println("inputPathStr = " + inputPathStr);
private String makeArchive(Path parentPath, String relGlob) throws Exception {
final String parentPathStr = parentPath.toUri().getPath();
final String relPathGlob = relGlob == null ? "*" : relGlob;
System.out.println("parentPathStr = " + parentPathStr);
final URI uri = fs.getUri();
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
@ -635,8 +690,8 @@ public class TestHadoopArchives {
final String harName = "foo.har";
final String fullHarPathStr = prefix + harName;
final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*",
archivePath.toString() };
final String[] args = { "-archiveName", harName, "-p", parentPathStr,
relPathGlob, archivePath.toString() };
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
HADOOP_ARCHIVES_JAR);
final HadoopArchives har = new HadoopArchives(conf);