HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov.
(cherry picked from commit 79301e80d7
)
This commit is contained in:
parent
69eb56ad6d
commit
00a06e28d4
|
@ -96,6 +96,9 @@ Release 2.7.0 - UNRELEASED
|
|||
|
||||
HADOOP-11312. Fix unit tests to not use uppercase key names. (wang)
|
||||
|
||||
HADOOP-11201. Hadoop Archives should support globs resolving to files.
|
||||
(Gera Shegalov via cnauroth)
|
||||
|
||||
Release 2.6.0 - 2014-11-18
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -101,7 +101,7 @@ public class HadoopArchives implements Tool {
|
|||
short repl = 10;
|
||||
|
||||
private static final String usage = "archive"
|
||||
+ " -archiveName NAME -p <parent path> [-r <replication factor>]" +
|
||||
+ " -archiveName <NAME>.har -p <parent path> [-r <replication factor>]" +
|
||||
"<src>* <dest>" +
|
||||
"\n";
|
||||
|
||||
|
@ -348,15 +348,10 @@ public class HadoopArchives implements Tool {
|
|||
*/
|
||||
private void writeTopLevelDirs(SequenceFile.Writer srcWriter,
|
||||
List<Path> paths, Path parentPath) throws IOException {
|
||||
//add all the directories
|
||||
List<Path> justDirs = new ArrayList<Path>();
|
||||
// extract paths from absolute URI's
|
||||
List<Path> justPaths = new ArrayList<Path>();
|
||||
for (Path p: paths) {
|
||||
if (!p.getFileSystem(getConf()).isFile(p)) {
|
||||
justDirs.add(new Path(p.toUri().getPath()));
|
||||
}
|
||||
else {
|
||||
justDirs.add(new Path(p.getParent().toUri().getPath()));
|
||||
}
|
||||
justPaths.add(new Path(p.toUri().getPath()));
|
||||
}
|
||||
/* find all the common parents of paths that are valid archive
|
||||
* paths. The below is done so that we do not add a common path
|
||||
|
@ -372,7 +367,7 @@ public class HadoopArchives implements Tool {
|
|||
Path root = new Path(Path.SEPARATOR);
|
||||
for (int i = parentPath.depth(); i < deepest.depth(); i++) {
|
||||
List<Path> parents = new ArrayList<Path>();
|
||||
for (Path p: justDirs) {
|
||||
for (Path p: justPaths) {
|
||||
if (p.compareTo(root) == 0){
|
||||
//do nothing
|
||||
}
|
||||
|
@ -392,7 +387,7 @@ public class HadoopArchives implements Tool {
|
|||
}
|
||||
}
|
||||
}
|
||||
justDirs = parents;
|
||||
justPaths = parents;
|
||||
}
|
||||
Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
|
||||
for (Map.Entry<String, HashSet<String>> entry : keyVals) {
|
||||
|
|
|
@ -203,9 +203,58 @@ public class TestHadoopArchives {
|
|||
Assert.assertEquals(originalPaths, harPaths);
|
||||
}
|
||||
|
||||
private static List<String> lsr(final FsShell shell, String dir)
|
||||
throws Exception {
|
||||
System.out.println("lsr root=" + dir);
|
||||
@Test
|
||||
public void testSingleFile() throws Exception {
|
||||
final Path sub1 = new Path(inputPath, "dir1");
|
||||
fs.mkdirs(sub1);
|
||||
String singleFileName = "a";
|
||||
createFile(inputPath, fs, sub1.getName(), singleFileName);
|
||||
final FsShell shell = new FsShell(conf);
|
||||
|
||||
final List<String> originalPaths = lsr(shell, sub1.toString());
|
||||
System.out.println("originalPaths: " + originalPaths);
|
||||
|
||||
// make the archive:
|
||||
final String fullHarPathStr = makeArchive(sub1, singleFileName);
|
||||
|
||||
// compare results:
|
||||
final List<String> harPaths = lsr(shell, fullHarPathStr);
|
||||
Assert.assertEquals(originalPaths, harPaths);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGlobFiles() throws Exception {
|
||||
final Path sub1 = new Path(inputPath, "dir1");
|
||||
final Path sub2 = new Path(inputPath, "dir2");
|
||||
fs.mkdirs(sub1);
|
||||
String fileName = "a";
|
||||
createFile(inputPath, fs, sub1.getName(), fileName);
|
||||
createFile(inputPath, fs, sub2.getName(), fileName);
|
||||
createFile(inputPath, fs, sub1.getName(), "b"); // not part of result
|
||||
|
||||
final String glob = "dir{1,2}/a";
|
||||
final FsShell shell = new FsShell(conf);
|
||||
final List<String> originalPaths = lsr(shell, inputPath.toString(),
|
||||
inputPath + "/" + glob);
|
||||
System.out.println("originalPaths: " + originalPaths);
|
||||
|
||||
// make the archive:
|
||||
final String fullHarPathStr = makeArchive(inputPath, glob);
|
||||
|
||||
// compare results:
|
||||
final List<String> harPaths = lsr(shell, fullHarPathStr,
|
||||
fullHarPathStr + "/" + glob);
|
||||
Assert.assertEquals(originalPaths, harPaths);
|
||||
}
|
||||
|
||||
private static List<String> lsr(final FsShell shell, String rootDir) throws Exception {
|
||||
return lsr(shell, rootDir, null);
|
||||
}
|
||||
|
||||
private static List<String> lsr(final FsShell shell, String rootDir,
|
||||
String glob) throws Exception {
|
||||
final String dir = glob == null ? rootDir : glob;
|
||||
System.out.println("lsr root=" + rootDir);
|
||||
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
|
||||
final PrintStream out = new PrintStream(bytes);
|
||||
final PrintStream oldOut = System.out;
|
||||
|
@ -222,9 +271,9 @@ public class TestHadoopArchives {
|
|||
System.setErr(oldErr);
|
||||
}
|
||||
System.out.println("lsr results:\n" + results);
|
||||
String dirname = dir;
|
||||
if (dir.lastIndexOf(Path.SEPARATOR) != -1) {
|
||||
dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR));
|
||||
String dirname = rootDir;
|
||||
if (rootDir.lastIndexOf(Path.SEPARATOR) != -1) {
|
||||
dirname = rootDir.substring(rootDir.lastIndexOf(Path.SEPARATOR));
|
||||
}
|
||||
|
||||
final List<String> paths = new ArrayList<String>();
|
||||
|
@ -621,13 +670,19 @@ public class TestHadoopArchives {
|
|||
return bb;
|
||||
}
|
||||
|
||||
|
||||
private String makeArchive() throws Exception {
|
||||
return makeArchive(inputPath, null);
|
||||
}
|
||||
|
||||
/*
|
||||
* Run the HadoopArchives tool to create an archive on the
|
||||
* given file system.
|
||||
*/
|
||||
private String makeArchive() throws Exception {
|
||||
final String inputPathStr = inputPath.toUri().getPath();
|
||||
System.out.println("inputPathStr = " + inputPathStr);
|
||||
private String makeArchive(Path parentPath, String relGlob) throws Exception {
|
||||
final String parentPathStr = parentPath.toUri().getPath();
|
||||
final String relPathGlob = relGlob == null ? "*" : relGlob;
|
||||
System.out.println("parentPathStr = " + parentPathStr);
|
||||
|
||||
final URI uri = fs.getUri();
|
||||
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
|
||||
|
@ -635,8 +690,8 @@ public class TestHadoopArchives {
|
|||
|
||||
final String harName = "foo.har";
|
||||
final String fullHarPathStr = prefix + harName;
|
||||
final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*",
|
||||
archivePath.toString() };
|
||||
final String[] args = { "-archiveName", harName, "-p", parentPathStr,
|
||||
relPathGlob, archivePath.toString() };
|
||||
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
|
||||
HADOOP_ARCHIVES_JAR);
|
||||
final HadoopArchives har = new HadoopArchives(conf);
|
||||
|
|
Loading…
Reference in New Issue