HADOOP-12214. Parse 'HadoopArchive' commandline using cli Options. (Contributed by Vinayakumar B)

This commit is contained in:
Vinayakumar B 2015-07-21 13:12:46 +05:30
parent df1e8ce44a
commit 87f29c6b8a
3 changed files with 63 additions and 29 deletions

View File

@ -708,6 +708,9 @@ Release 2.8.0 - UNRELEASED
HADOOP-12081. Fix UserGroupInformation.java to support 64-bit zLinux.
(aajisaka)
HADOOP-12214. Parse 'HadoopArchive' commandline using cli Options.
(vinayakumarb)
OPTIMIZATIONS
HADOOP-11785. Reduce the number of listStatus operation in distcp

View File

@ -33,6 +33,11 @@ import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.Parser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@ -81,6 +86,10 @@ public class HadoopArchives implements Tool {
private static final Log LOG = LogFactory.getLog(HadoopArchives.class);
private static final String NAME = "har";
private static final String ARCHIVE_NAME = "archiveName";
private static final String REPLICATION = "r";
private static final String PARENT_PATH = "p";
private static final String HELP = "help";
static final String SRC_LIST_LABEL = NAME + ".src.list";
static final String DST_DIR_LABEL = NAME + ".dest.path";
static final String TMP_DIR_LABEL = NAME + ".tmp.dir";
@ -101,9 +110,9 @@ public class HadoopArchives implements Tool {
/** the desired replication degree; default is 10 **/
short repl = 10;
private static final String usage = "Usage: archive"
+ " -archiveName <NAME>.har -p <parent path> [-r <replication factor>]" +
"<src>* <dest>" +
private static final String usage = "archive"
+ " <-archiveName <NAME>.har> <-p <parent path>> [-r <replication factor>]" +
" <src>* <dest>" +
"\n";
@ -794,7 +803,17 @@ public class HadoopArchives implements Tool {
}
}
private void printUsage(Options opts, boolean printDetailed) {
HelpFormatter helpFormatter = new HelpFormatter();
if (printDetailed) {
helpFormatter.printHelp(usage.length() + 10, usage, null, opts, null,
false);
} else {
System.out.println(usage);
}
}
/** the main driver for creating the archives
* it takes at least three command line parameters. The parent path,
* The src and the dest. It does an lsr on the source paths.
@ -804,43 +823,51 @@ public class HadoopArchives implements Tool {
public int run(String[] args) throws Exception {
try {
Path parentPath = null;
List<Path> srcPaths = new ArrayList<Path>();
Path destPath = null;
String archiveName = null;
if (args.length < 5) {
System.out.println(usage);
throw new IOException("Invalid usage.");
// Parse CLI options
Options options = new Options();
options.addOption(ARCHIVE_NAME, true,
"Name of the Archive. This is mandatory option");
options.addOption(PARENT_PATH, true,
"Parent path of sources. This is mandatory option");
options.addOption(REPLICATION, true, "Replication factor archive files");
options.addOption(HELP, false, "Show the usage");
Parser parser = new GnuParser();
CommandLine commandLine = parser.parse(options, args, true);
if (commandLine.hasOption(HELP)) {
printUsage(options, true);
return 0;
}
if (!"-archiveName".equals(args[0])) {
System.out.println(usage);
if (!commandLine.hasOption(ARCHIVE_NAME)) {
printUsage(options, false);
throw new IOException("Archive Name not specified.");
}
archiveName = args[1];
String archiveName = commandLine.getOptionValue(ARCHIVE_NAME);
if (!checkValidName(archiveName)) {
System.out.println(usage);
printUsage(options, false);
throw new IOException("Invalid name for archives. " + archiveName);
}
int i = 2;
//check to see if relative parent has been provided or not
//this is a required parameter.
if (! "-p".equals(args[i])) {
System.out.println(usage);
if (!commandLine.hasOption(PARENT_PATH)) {
printUsage(options, false);
throw new IOException("Parent path not specified.");
}
parentPath = new Path(args[i+1]);
Path parentPath = new Path(commandLine.getOptionValue(PARENT_PATH));
if (!parentPath.isAbsolute()) {
parentPath= parentPath.getFileSystem(getConf()).makeQualified(parentPath);
parentPath = parentPath.getFileSystem(getConf()).makeQualified(
parentPath);
}
i+=2;
if ("-r".equals(args[i])) {
repl = Short.parseShort(args[i+1]);
i+=2;
if (commandLine.hasOption(REPLICATION)) {
repl = Short.parseShort(commandLine.getOptionValue(REPLICATION));
}
// Remaining args
args = commandLine.getArgs();
List<Path> srcPaths = new ArrayList<Path>();
Path destPath = null;
//read the rest of the paths
for (; i < args.length; i++) {
for (int i = 0; i < args.length; i++) {
if (i == (args.length - 1)) {
destPath = new Path(args[i]);
if (!destPath.isAbsolute()) {
@ -850,13 +877,17 @@ public class HadoopArchives implements Tool {
else {
Path argPath = new Path(args[i]);
if (argPath.isAbsolute()) {
System.out.println(usage);
printUsage(options, false);
throw new IOException("Source path " + argPath +
" is not relative to "+ parentPath);
}
srcPaths.add(new Path(parentPath, argPath));
}
}
if (destPath == null) {
printUsage(options, false);
throw new IOException("Destination path not specified.");
}
if (srcPaths.size() == 0) {
// assuming if the user does not specify path for sources
// the whole parent directory needs to be archived.

View File

@ -753,8 +753,8 @@ public class TestHadoopArchives {
final String harName = "foo.har";
final String fullHarPathStr = prefix + harName;
final String[] args = { "-archiveName", harName, "-p", inputPathStr,
"-r 3", "*", archivePath.toString() };
final String[] args = { "-archiveName", harName, "-p", inputPathStr, "-r",
"3", "*", archivePath.toString() };
System.setProperty(HadoopArchives.TEST_HADOOP_ARCHIVES_JAR_PATH,
HADOOP_ARCHIVES_JAR);
final HadoopArchives har = new HadoopArchives(conf);