HADOOP-11021. Configurable replication factor in the hadoop archive command. Contributed by Zhe Zhang.
(cherry picked from commit ea1c6f31c2
)
This commit is contained in:
parent
5889f4d5f3
commit
075bb9e380
|
@ -38,7 +38,7 @@ Overview
|
||||||
How to Create an Archive
|
How to Create an Archive
|
||||||
------------------------
|
------------------------
|
||||||
|
|
||||||
`Usage: hadoop archive -archiveName name -p <parent> <src>* <dest>`
|
`Usage: hadoop archive -archiveName name -p <parent> [-r <replication factor>] <src>* <dest>`
|
||||||
|
|
||||||
-archiveName is the name of the archive you would like to create. An example
|
-archiveName is the name of the archive you would like to create. An example
|
||||||
would be foo.har. The name should have a \*.har extension. The parent argument
|
would be foo.har. The name should have a \*.har extension. The parent argument
|
||||||
|
@ -52,9 +52,12 @@ How to Create an Archive
|
||||||
would need a map reduce cluster to run this. For a detailed example the later
|
would need a map reduce cluster to run this. For a detailed example the later
|
||||||
sections.
|
sections.
|
||||||
|
|
||||||
|
-r indicates the desired replication factor; if this optional argument is
|
||||||
|
not specified, a replication factor of 10 will be used.
|
||||||
|
|
||||||
If you just want to archive a single directory /foo/bar then you can just use
|
If you just want to archive a single directory /foo/bar then you can just use
|
||||||
|
|
||||||
`hadoop archive -archiveName zoo.har -p /foo/bar /outputdir`
|
`hadoop archive -archiveName zoo.har -p /foo/bar -r 3 /outputdir`
|
||||||
|
|
||||||
How to Look Up Files in Archives
|
How to Look Up Files in Archives
|
||||||
--------------------------------
|
--------------------------------
|
||||||
|
@ -90,14 +93,15 @@ Archives Examples
|
||||||
|
|
||||||
$H3 Creating an Archive
|
$H3 Creating an Archive
|
||||||
|
|
||||||
`hadoop archive -archiveName foo.har -p /user/hadoop dir1 dir2 /user/zoo`
|
`hadoop archive -archiveName foo.har -p /user/hadoop -r 3 dir1 dir2 /user/zoo`
|
||||||
|
|
||||||
The above example is creating an archive using /user/hadoop as the relative
|
The above example is creating an archive using /user/hadoop as the relative
|
||||||
archive directory. The directories /user/hadoop/dir1 and /user/hadoop/dir2
|
archive directory. The directories /user/hadoop/dir1 and /user/hadoop/dir2
|
||||||
will be archived in the following file system directory -- /user/zoo/foo.har.
|
will be archived in the following file system directory -- /user/zoo/foo.har.
|
||||||
Archiving does not delete the input files. If you want to delete the input
|
Archiving does not delete the input files. If you want to delete the input
|
||||||
files after creating the archives (to reduce namespace), you will have to do
|
files after creating the archives (to reduce namespace), you will have to do
|
||||||
it on your own.
|
it on your own. In this example, because `-r 3` is specified, a replication
|
||||||
|
factor of 3 will be used.
|
||||||
|
|
||||||
$H3 Looking Up Files
|
$H3 Looking Up Files
|
||||||
|
|
||||||
|
|
|
@ -97,9 +97,12 @@ public class HadoopArchives implements Tool {
|
||||||
long partSize = 2 * 1024 * 1024 * 1024l;
|
long partSize = 2 * 1024 * 1024 * 1024l;
|
||||||
/** size of blocks in hadoop archives **/
|
/** size of blocks in hadoop archives **/
|
||||||
long blockSize = 512 * 1024 * 1024l;
|
long blockSize = 512 * 1024 * 1024l;
|
||||||
|
/** the desired replication degree; default is 10 **/
|
||||||
|
short repl = 10;
|
||||||
|
|
||||||
private static final String usage = "archive"
|
private static final String usage = "archive"
|
||||||
+ " -archiveName NAME -p <parent path> <src>* <dest>" +
|
+ " -archiveName NAME -p <parent path> [-r <replication factor>]" +
|
||||||
|
"<src>* <dest>" +
|
||||||
"\n";
|
"\n";
|
||||||
|
|
||||||
|
|
||||||
|
@ -542,7 +545,7 @@ public class HadoopArchives implements Tool {
|
||||||
srcWriter.close();
|
srcWriter.close();
|
||||||
}
|
}
|
||||||
//increase the replication of src files
|
//increase the replication of src files
|
||||||
jobfs.setReplication(srcFiles, (short) 10);
|
jobfs.setReplication(srcFiles, repl);
|
||||||
conf.setInt(SRC_COUNT_LABEL, numFiles);
|
conf.setInt(SRC_COUNT_LABEL, numFiles);
|
||||||
conf.setLong(TOTAL_SIZE_LABEL, totalSize);
|
conf.setLong(TOTAL_SIZE_LABEL, totalSize);
|
||||||
int numMaps = (int)(totalSize/partSize);
|
int numMaps = (int)(totalSize/partSize);
|
||||||
|
@ -835,6 +838,11 @@ public class HadoopArchives implements Tool {
|
||||||
}
|
}
|
||||||
|
|
||||||
i+=2;
|
i+=2;
|
||||||
|
|
||||||
|
if ("-r".equals(args[i])) {
|
||||||
|
repl = Short.parseShort(args[i+1]);
|
||||||
|
i+=2;
|
||||||
|
}
|
||||||
//read the rest of the paths
|
//read the rest of the paths
|
||||||
for (; i < args.length; i++) {
|
for (; i < args.length; i++) {
|
||||||
if (i == (args.length - 1)) {
|
if (i == (args.length - 1)) {
|
||||||
|
|
|
@ -158,6 +158,24 @@ public class TestHadoopArchives {
|
||||||
Assert.assertEquals(originalPaths, harPaths);
|
Assert.assertEquals(originalPaths, harPaths);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRelativePathWitRepl() throws Exception {
|
||||||
|
final Path sub1 = new Path(inputPath, "dir1");
|
||||||
|
fs.mkdirs(sub1);
|
||||||
|
createFile(inputPath, fs, sub1.getName(), "a");
|
||||||
|
final FsShell shell = new FsShell(conf);
|
||||||
|
|
||||||
|
final List<String> originalPaths = lsr(shell, "input");
|
||||||
|
System.out.println("originalPaths: " + originalPaths);
|
||||||
|
|
||||||
|
// make the archive:
|
||||||
|
final String fullHarPathStr = makeArchiveWithRepl();
|
||||||
|
|
||||||
|
// compare results:
|
||||||
|
final List<String> harPaths = lsr(shell, fullHarPathStr);
|
||||||
|
Assert.assertEquals(originalPaths, harPaths);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPathWithSpaces() throws Exception {
|
public void testPathWithSpaces() throws Exception {
|
||||||
// create files/directories with spaces
|
// create files/directories with spaces
|
||||||
|
@ -626,6 +644,29 @@ public class TestHadoopArchives {
|
||||||
return fullHarPathStr;
|
return fullHarPathStr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Run the HadoopArchives tool to create an archive on the
|
||||||
|
* given file system with a specified replication degree.
|
||||||
|
*/
|
||||||
|
private String makeArchiveWithRepl() throws Exception {
|
||||||
|
final String inputPathStr = inputPath.toUri().getPath();
|
||||||
|
System.out.println("inputPathStr = " + inputPathStr);
|
||||||
|
|
||||||
|
final URI uri = fs.getUri();
|
||||||
|
final String prefix = "har://hdfs-" + uri.getHost() + ":" + uri.getPort()
|
||||||
|
+ archivePath.toUri().getPath() + Path.SEPARATOR;
|
||||||
|
|
||||||
|
final String harName = "foo.har";
|
||||||
|
final String fullHarPathStr = prefix + harName;
|
||||||
|
final String[] args = { "-archiveName", harName, "-p", inputPathStr,
|
||||||
|
"-r 3", "*", archivePath.toString() };
|
||||||
|
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
|
||||||
|
HADOOP_ARCHIVES_JAR);
|
||||||
|
final HadoopArchives har = new HadoopArchives(conf);
|
||||||
|
assertEquals(0, ToolRunner.run(har, args));
|
||||||
|
return fullHarPathStr;
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
/*
|
/*
|
||||||
* Tests copying from archive file system to a local file system
|
* Tests copying from archive file system to a local file system
|
||||||
|
|
Loading…
Reference in New Issue