HADOOP-11021. Configurable replication factor in the hadoop archive command. Contributed by Zhe Zhang.

This commit is contained in:
Andrew Wang 2014-08-29 14:44:37 -07:00
parent c60da4d3b3
commit ea1c6f31c2
3 changed files with 59 additions and 6 deletions

View File

@ -38,7 +38,7 @@ Overview
How to Create an Archive How to Create an Archive
------------------------ ------------------------
`Usage: hadoop archive -archiveName name -p <parent> <src>* <dest>` `Usage: hadoop archive -archiveName name -p <parent> [-r <replication factor>] <src>* <dest>`
-archiveName is the name of the archive you would like to create. An example -archiveName is the name of the archive you would like to create. An example
would be foo.har. The name should have a \*.har extension. The parent argument would be foo.har. The name should have a \*.har extension. The parent argument
@ -52,9 +52,12 @@ How to Create an Archive
would need a map reduce cluster to run this. For a detailed example the later would need a map reduce cluster to run this. For a detailed example the later
sections. sections.
-r indicates the desired replication factor; if this optional argument is
not specified, a replication factor of 10 will be used.
If you just want to archive a single directory /foo/bar then you can just use If you just want to archive a single directory /foo/bar then you can just use
`hadoop archive -archiveName zoo.har -p /foo/bar /outputdir` `hadoop archive -archiveName zoo.har -p /foo/bar -r 3 /outputdir`
How to Look Up Files in Archives How to Look Up Files in Archives
-------------------------------- --------------------------------
@ -90,14 +93,15 @@ Archives Examples
$H3 Creating an Archive $H3 Creating an Archive
`hadoop archive -archiveName foo.har -p /user/hadoop dir1 dir2 /user/zoo` `hadoop archive -archiveName foo.har -p /user/hadoop -r 3 dir1 dir2 /user/zoo`
The above example is creating an archive using /user/hadoop as the relative The above example is creating an archive using /user/hadoop as the relative
archive directory. The directories /user/hadoop/dir1 and /user/hadoop/dir2 archive directory. The directories /user/hadoop/dir1 and /user/hadoop/dir2
will be archived in the following file system directory -- /user/zoo/foo.har. will be archived in the following file system directory -- /user/zoo/foo.har.
Archiving does not delete the input files. If you want to delete the input Archiving does not delete the input files. If you want to delete the input
files after creating the archives (to reduce namespace), you will have to do files after creating the archives (to reduce namespace), you will have to do
it on your own. it on your own. In this example, because `-r 3` is specified, a replication
factor of 3 will be used.
$H3 Looking Up Files $H3 Looking Up Files

View File

@ -97,9 +97,12 @@ public class HadoopArchives implements Tool {
long partSize = 2 * 1024 * 1024 * 1024l; long partSize = 2 * 1024 * 1024 * 1024l;
/** size of blocks in hadoop archives **/ /** size of blocks in hadoop archives **/
long blockSize = 512 * 1024 * 1024l; long blockSize = 512 * 1024 * 1024l;
/** the desired replication degree; default is 10 **/
short repl = 10;
private static final String usage = "archive" private static final String usage = "archive"
+ " -archiveName NAME -p <parent path> <src>* <dest>" + + " -archiveName NAME -p <parent path> [-r <replication factor>]" +
"<src>* <dest>" +
"\n"; "\n";
@ -542,7 +545,7 @@ public class HadoopArchives implements Tool {
srcWriter.close(); srcWriter.close();
} }
//increase the replication of src files //increase the replication of src files
jobfs.setReplication(srcFiles, (short) 10); jobfs.setReplication(srcFiles, repl);
conf.setInt(SRC_COUNT_LABEL, numFiles); conf.setInt(SRC_COUNT_LABEL, numFiles);
conf.setLong(TOTAL_SIZE_LABEL, totalSize); conf.setLong(TOTAL_SIZE_LABEL, totalSize);
int numMaps = (int)(totalSize/partSize); int numMaps = (int)(totalSize/partSize);
@ -835,6 +838,11 @@ public class HadoopArchives implements Tool {
} }
i+=2; i+=2;
if ("-r".equals(args[i])) {
repl = Short.parseShort(args[i+1]);
i+=2;
}
//read the rest of the paths //read the rest of the paths
for (; i < args.length; i++) { for (; i < args.length; i++) {
if (i == (args.length - 1)) { if (i == (args.length - 1)) {

View File

@ -158,6 +158,24 @@ public class TestHadoopArchives {
Assert.assertEquals(originalPaths, harPaths); Assert.assertEquals(originalPaths, harPaths);
} }
@Test
public void testRelativePathWitRepl() throws Exception {
final Path sub1 = new Path(inputPath, "dir1");
fs.mkdirs(sub1);
createFile(inputPath, fs, sub1.getName(), "a");
final FsShell shell = new FsShell(conf);
final List<String> originalPaths = lsr(shell, "input");
System.out.println("originalPaths: " + originalPaths);
// make the archive:
final String fullHarPathStr = makeArchiveWithRepl();
// compare results:
final List<String> harPaths = lsr(shell, fullHarPathStr);
Assert.assertEquals(originalPaths, harPaths);
}
@Test @Test
public void testPathWithSpaces() throws Exception { public void testPathWithSpaces() throws Exception {
// create files/directories with spaces // create files/directories with spaces
@ -626,6 +644,29 @@ public class TestHadoopArchives {
return fullHarPathStr; return fullHarPathStr;
} }
/*
* Run the HadoopArchives tool to create an archive on the
* given file system with a specified replication degree.
*/
private String makeArchiveWithRepl() throws Exception {
final String inputPathStr = inputPath.toUri().getPath();
System.out.println("inputPathStr = " + inputPathStr);
final URI uri = fs.getUri();
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
+ archivePath.toUri().getPath() + Path.SEPARATOR;
final String harName = "foo.har";
final String fullHarPathStr = prefix + harName;
final String[] args = { "-archiveName", harName, "-p", inputPathStr,
"-r 3", "*", archivePath.toString() };
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
HADOOP_ARCHIVES_JAR);
final HadoopArchives har = new HadoopArchives(conf);
assertEquals(0, ToolRunner.run(har, args));
return fullHarPathStr;
}
@Test @Test
/* /*
* Tests copying from archive file system to a local file system * Tests copying from archive file system to a local file system