HADOOP-11201. Hadoop Archives should support globs resolving to files. Contributed by Gera Shegalov.
This commit is contained in:
parent
9e81be0114
commit
79301e80d7
|
@ -452,6 +452,9 @@ Release 2.7.0 - UNRELEASED
|
||||||
|
|
||||||
HADOOP-11312. Fix unit tests to not use uppercase key names. (wang)
|
HADOOP-11312. Fix unit tests to not use uppercase key names. (wang)
|
||||||
|
|
||||||
|
HADOOP-11201. Hadoop Archives should support globs resolving to files.
|
||||||
|
(Gera Shegalov via cnauroth)
|
||||||
|
|
||||||
Release 2.6.0 - 2014-11-18
|
Release 2.6.0 - 2014-11-18
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -101,7 +101,7 @@ public class HadoopArchives implements Tool {
|
||||||
short repl = 10;
|
short repl = 10;
|
||||||
|
|
||||||
private static final String usage = "archive"
|
private static final String usage = "archive"
|
||||||
+ " -archiveName NAME -p <parent path> [-r <replication factor>]" +
|
+ " -archiveName <NAME>.har -p <parent path> [-r <replication factor>]" +
|
||||||
"<src>* <dest>" +
|
"<src>* <dest>" +
|
||||||
"\n";
|
"\n";
|
||||||
|
|
||||||
|
@ -348,15 +348,10 @@ public class HadoopArchives implements Tool {
|
||||||
*/
|
*/
|
||||||
private void writeTopLevelDirs(SequenceFile.Writer srcWriter,
|
private void writeTopLevelDirs(SequenceFile.Writer srcWriter,
|
||||||
List<Path> paths, Path parentPath) throws IOException {
|
List<Path> paths, Path parentPath) throws IOException {
|
||||||
//add all the directories
|
// extract paths from absolute URI's
|
||||||
List<Path> justDirs = new ArrayList<Path>();
|
List<Path> justPaths = new ArrayList<Path>();
|
||||||
for (Path p: paths) {
|
for (Path p: paths) {
|
||||||
if (!p.getFileSystem(getConf()).isFile(p)) {
|
justPaths.add(new Path(p.toUri().getPath()));
|
||||||
justDirs.add(new Path(p.toUri().getPath()));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
justDirs.add(new Path(p.getParent().toUri().getPath()));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
/* find all the common parents of paths that are valid archive
|
/* find all the common parents of paths that are valid archive
|
||||||
* paths. The below is done so that we do not add a common path
|
* paths. The below is done so that we do not add a common path
|
||||||
|
@ -372,7 +367,7 @@ public class HadoopArchives implements Tool {
|
||||||
Path root = new Path(Path.SEPARATOR);
|
Path root = new Path(Path.SEPARATOR);
|
||||||
for (int i = parentPath.depth(); i < deepest.depth(); i++) {
|
for (int i = parentPath.depth(); i < deepest.depth(); i++) {
|
||||||
List<Path> parents = new ArrayList<Path>();
|
List<Path> parents = new ArrayList<Path>();
|
||||||
for (Path p: justDirs) {
|
for (Path p: justPaths) {
|
||||||
if (p.compareTo(root) == 0){
|
if (p.compareTo(root) == 0){
|
||||||
//do nothing
|
//do nothing
|
||||||
}
|
}
|
||||||
|
@ -392,7 +387,7 @@ public class HadoopArchives implements Tool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
justDirs = parents;
|
justPaths = parents;
|
||||||
}
|
}
|
||||||
Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
|
Set<Map.Entry<String, HashSet<String>>> keyVals = allpaths.entrySet();
|
||||||
for (Map.Entry<String, HashSet<String>> entry : keyVals) {
|
for (Map.Entry<String, HashSet<String>> entry : keyVals) {
|
||||||
|
|
|
@ -203,9 +203,58 @@ public class TestHadoopArchives {
|
||||||
Assert.assertEquals(originalPaths, harPaths);
|
Assert.assertEquals(originalPaths, harPaths);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<String> lsr(final FsShell shell, String dir)
|
@Test
|
||||||
throws Exception {
|
public void testSingleFile() throws Exception {
|
||||||
System.out.println("lsr root=" + dir);
|
final Path sub1 = new Path(inputPath, "dir1");
|
||||||
|
fs.mkdirs(sub1);
|
||||||
|
String singleFileName = "a";
|
||||||
|
createFile(inputPath, fs, sub1.getName(), singleFileName);
|
||||||
|
final FsShell shell = new FsShell(conf);
|
||||||
|
|
||||||
|
final List<String> originalPaths = lsr(shell, sub1.toString());
|
||||||
|
System.out.println("originalPaths: " + originalPaths);
|
||||||
|
|
||||||
|
// make the archive:
|
||||||
|
final String fullHarPathStr = makeArchive(sub1, singleFileName);
|
||||||
|
|
||||||
|
// compare results:
|
||||||
|
final List<String> harPaths = lsr(shell, fullHarPathStr);
|
||||||
|
Assert.assertEquals(originalPaths, harPaths);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGlobFiles() throws Exception {
|
||||||
|
final Path sub1 = new Path(inputPath, "dir1");
|
||||||
|
final Path sub2 = new Path(inputPath, "dir2");
|
||||||
|
fs.mkdirs(sub1);
|
||||||
|
String fileName = "a";
|
||||||
|
createFile(inputPath, fs, sub1.getName(), fileName);
|
||||||
|
createFile(inputPath, fs, sub2.getName(), fileName);
|
||||||
|
createFile(inputPath, fs, sub1.getName(), "b"); // not part of result
|
||||||
|
|
||||||
|
final String glob = "dir{1,2}/a";
|
||||||
|
final FsShell shell = new FsShell(conf);
|
||||||
|
final List<String> originalPaths = lsr(shell, inputPath.toString(),
|
||||||
|
inputPath + "/" + glob);
|
||||||
|
System.out.println("originalPaths: " + originalPaths);
|
||||||
|
|
||||||
|
// make the archive:
|
||||||
|
final String fullHarPathStr = makeArchive(inputPath, glob);
|
||||||
|
|
||||||
|
// compare results:
|
||||||
|
final List<String> harPaths = lsr(shell, fullHarPathStr,
|
||||||
|
fullHarPathStr + "/" + glob);
|
||||||
|
Assert.assertEquals(originalPaths, harPaths);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> lsr(final FsShell shell, String rootDir) throws Exception {
|
||||||
|
return lsr(shell, rootDir, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> lsr(final FsShell shell, String rootDir,
|
||||||
|
String glob) throws Exception {
|
||||||
|
final String dir = glob == null ? rootDir : glob;
|
||||||
|
System.out.println("lsr root=" + rootDir);
|
||||||
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
|
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
|
||||||
final PrintStream out = new PrintStream(bytes);
|
final PrintStream out = new PrintStream(bytes);
|
||||||
final PrintStream oldOut = System.out;
|
final PrintStream oldOut = System.out;
|
||||||
|
@ -222,9 +271,9 @@ public class TestHadoopArchives {
|
||||||
System.setErr(oldErr);
|
System.setErr(oldErr);
|
||||||
}
|
}
|
||||||
System.out.println("lsr results:\n" + results);
|
System.out.println("lsr results:\n" + results);
|
||||||
String dirname = dir;
|
String dirname = rootDir;
|
||||||
if (dir.lastIndexOf(Path.SEPARATOR) != -1) {
|
if (rootDir.lastIndexOf(Path.SEPARATOR) != -1) {
|
||||||
dirname = dir.substring(dir.lastIndexOf(Path.SEPARATOR));
|
dirname = rootDir.substring(rootDir.lastIndexOf(Path.SEPARATOR));
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<String> paths = new ArrayList<String>();
|
final List<String> paths = new ArrayList<String>();
|
||||||
|
@ -621,13 +670,19 @@ public class TestHadoopArchives {
|
||||||
return bb;
|
return bb;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private String makeArchive() throws Exception {
|
||||||
|
return makeArchive(inputPath, null);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Run the HadoopArchives tool to create an archive on the
|
* Run the HadoopArchives tool to create an archive on the
|
||||||
* given file system.
|
* given file system.
|
||||||
*/
|
*/
|
||||||
private String makeArchive() throws Exception {
|
private String makeArchive(Path parentPath, String relGlob) throws Exception {
|
||||||
final String inputPathStr = inputPath.toUri().getPath();
|
final String parentPathStr = parentPath.toUri().getPath();
|
||||||
System.out.println("inputPathStr = " + inputPathStr);
|
final String relPathGlob = relGlob == null ? "*" : relGlob;
|
||||||
|
System.out.println("parentPathStr = " + parentPathStr);
|
||||||
|
|
||||||
final URI uri = fs.getUri();
|
final URI uri = fs.getUri();
|
||||||
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
|
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
|
||||||
|
@ -635,8 +690,8 @@ public class TestHadoopArchives {
|
||||||
|
|
||||||
final String harName = "foo.har";
|
final String harName = "foo.har";
|
||||||
final String fullHarPathStr = prefix + harName;
|
final String fullHarPathStr = prefix + harName;
|
||||||
final String[] args = { "-archiveName", harName, "-p", inputPathStr, "*",
|
final String[] args = { "-archiveName", harName, "-p", parentPathStr,
|
||||||
archivePath.toString() };
|
relPathGlob, archivePath.toString() };
|
||||||
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
|
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
|
||||||
HADOOP_ARCHIVES_JAR);
|
HADOOP_ARCHIVES_JAR);
|
||||||
final HadoopArchives har = new HadoopArchives(conf);
|
final HadoopArchives har = new HadoopArchives(conf);
|
||||||
|
|
Loading…
Reference in New Issue