MAPREDUCE-3829. [Gridmix] Gridmix should give better error message when input data directory already exists and -generate option is given.(ravigummadi)

git-svn-id: 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ravi Gummadi 2012-03-12 11:08:32 +00:00
parent 42900bd080
commit 7d60932060
6 changed files with 373 additions and 97 deletions

View File

@ -52,6 +52,10 @@ Trunk (unreleased changes)
MAPREDUCE-3829. [Gridmix] Gridmix should give better error message when
input data directory already exists and -generate opton is
MAPREDUCE-2722. [Gridmix] Gridmix simulated job's map's hdfsBytesRead
counter is wrong when compressed input is used.(ravigummadi)

View File

@ -86,11 +86,6 @@ class DistributedCacheEmulator {
static final long AVG_BYTES_PER_MAP = 128 * 1024 * 1024L;// 128MB
// If at least 1 distributed cache file is missing in the expected
// distributed cache dir, Gridmix cannot proceed with emulation of
// distributed cache load.
private Path distCachePath;
@ -154,7 +149,7 @@ public DistributedCacheEmulator(Configuration conf, Path ioPath) {
* <li> execute permission is not there for any of the ascendant directories
* of &lt;ioPath&gt; till root. This is because for emulation of distributed
* cache load, distributed cache files created under
* &lt;ioPath/distributedCache/public/&gt; should be considered by hadoop
* &lt;ioPath/distributedCache/&gt; should be considered by hadoop
* as public distributed cache files.
* <li> creation of pseudo local file system fails.</ol>
* <br> For (2), (3), (4) and (5), generation of distributed cache data
@ -470,7 +465,7 @@ public int compare(Object dc1, Object dc2) {
+ "disable\ndistributed cache emulation by configuring '"
+ "' to false.");
return 0;

View File

@ -145,6 +145,18 @@ public class Gridmix extends Configured implements Tool {
// Shutdown hook
private final Shutdown sdh = new Shutdown();
/** Error while parsing/analyzing the arguments to Gridmix */
static final int ARGS_ERROR = 1;
/** Error while trying to start/setup the Gridmix run */
static final int STARTUP_FAILED_ERROR = 2;
* If at least 1 distributed cache file is missing in the expected
* distributed cache dir, Gridmix cannot proceed with emulation of
* distributed cache load.
static final int MISSING_DIST_CACHE_FILES_ERROR = 3;
Gridmix(String[] args) {
summarizer = new Summarizer(args);
@ -160,13 +172,21 @@ static Path getGridmixInputDataPath(Path ioPath) {
* Write random bytes at the path &lt;inputDir&gt;.
* Write random bytes at the path &lt;inputDir&gt; if needed.
* @see org.apache.hadoop.mapred.gridmix.GenerateData
* @return exit status
protected void writeInputData(long genbytes, Path inputDir)
protected int writeInputData(long genbytes, Path inputDir)
throws IOException, InterruptedException {
if (genbytes > 0) {
final Configuration conf = getConf();
if (inputDir.getFileSystem(conf).exists(inputDir)) {
LOG.error("Gridmix input data directory " + inputDir
+ " already exists when -generate option is used.\n");
// configure the compression ratio if needed
@ -187,6 +207,9 @@ protected void writeInputData(long genbytes, Path inputDir)"Input data generation successful.");
return 0;
* Write random bytes in the distributed cache files that will be used by all
* simulated jobs of current gridmix run, if files are to be generated.
@ -363,31 +386,33 @@ public Integer run() throws Exception {
private int runJob(Configuration conf, String[] argv)
throws IOException, InterruptedException {
if (argv.length < 2) {
LOG.error("Too few arguments to Gridmix.\n");
return 1;
return ARGS_ERROR;
// Should gridmix generate distributed cache data ?
boolean generate = false;
long genbytes = -1L;
String traceIn = null;
Path ioPath = null;
URI userRsrc = null;
userResolver = ReflectionUtils.newInstance(
try {
userResolver = ReflectionUtils.newInstance(conf.getClass(GRIDMIX_USR_RSV,
SubmitterUserResolver.class, UserResolver.class), conf);
for (int i = 0; i < argv.length - 2; ++i) {
if ("-generate".equals(argv[i])) {
genbytes = StringUtils.TraditionalBinaryPrefix.string2long(argv[++i]);
generate = true;
if (genbytes <= 0) {
LOG.error("size of input data to be generated specified using "
+ "-generate option should be nonnegative.\n");
return ARGS_ERROR;
} else if ("-users".equals(argv[i])) {
userRsrc = new URI(argv[++i]);
} else {
LOG.error("Unknown option " + argv[i] + " specified.\n");
return 1;
return ARGS_ERROR;
@ -397,10 +422,10 @@ private int runJob(Configuration conf, String[] argv)
LOG.warn("Ignoring the user resource '" + userRsrc + "'.");
} else {
System.err.println("\n\n" + userResolver.getClass()
+ " needs target user list. Use -users option." + "\n\n");
+ " needs target user list. Use -users option.\n");
return 1;
return ARGS_ERROR;
} else if (userRsrc != null) {
LOG.warn("Ignoring the user resource '" + userRsrc + "'.");
@ -409,11 +434,32 @@ private int runJob(Configuration conf, String[] argv)
ioPath = new Path(argv[argv.length - 2]);
traceIn = argv[argv.length - 1];
} catch (Exception e) {
LOG.error(e.toString() + "\n");
if (LOG.isDebugEnabled()) {
return 1;
return start(conf, traceIn, ioPath, genbytes, userResolver, generate);
return ARGS_ERROR;
// Create <ioPath> with 777 permissions
final FileSystem inputFs = ioPath.getFileSystem(conf);
ioPath = ioPath.makeQualified(inputFs);
boolean succeeded = false;
try {
succeeded = FileSystem.mkdirs(inputFs, ioPath,
new FsPermission((short)0777));
} catch(IOException e) {
// No need to emit this exception message
} finally {
if (!succeeded) {
LOG.error("Failed creation of <ioPath> directory " + ioPath + "\n");
return start(conf, traceIn, ioPath, genbytes, userResolver);
@ -429,17 +475,16 @@ private int runJob(Configuration conf, String[] argv)
* @param genbytes size of input data to be generated under the directory
* &lt;ioPath&gt;/input/
* @param userResolver gridmix user resolver
* @param generate true if -generate option was specified
* @return exit code
* @throws IOException
* @throws InterruptedException
int start(Configuration conf, String traceIn, Path ioPath, long genbytes,
UserResolver userResolver, boolean generate)
UserResolver userResolver)
throws IOException, InterruptedException {
DataStatistics stats = null;
InputStream trace = null;
ioPath = ioPath.makeQualified(ioPath.getFileSystem(conf));
int exitCode = 0;
try {
Path scratchDir = new Path(ioPath, conf.get(GRIDMIX_OUT_DIR, "gridmix"));
@ -455,8 +500,9 @@ int start(Configuration conf, String traceIn, Path ioPath, long genbytes,
Path inputDir = getGridmixInputDataPath(ioPath);
// Write input data if specified
if (genbytes > 0) {
writeInputData(genbytes, inputDir);
exitCode = writeInputData(genbytes, inputDir);
if (exitCode != 0) {
return exitCode;
// publish the data statistics
@ -465,9 +511,10 @@ int start(Configuration conf, String traceIn, Path ioPath, long genbytes,
// scan input dir contents
boolean shouldGenerate = (genbytes > 0);
// set up the needed things for emulation of various loads
int exitCode = setupEmulation(conf, traceIn, scratchDir, ioPath,
exitCode = setupEmulation(conf, traceIn, scratchDir, ioPath,
if (exitCode != 0) {
return exitCode;
@ -478,8 +525,12 @@ int start(Configuration conf, String traceIn, Path ioPath, long genbytes,
} catch (Throwable e) {
LOG.error("Startup failed", e);
LOG.error("Startup failed. " + e.toString() + "\n");
if (LOG.isDebugEnabled()) {
if (factory != null) factory.abort(); // abort pipeline
} finally {
// signal for factory to start; sets start time
@ -510,7 +561,7 @@ int start(Configuration conf, String traceIn, Path ioPath, long genbytes,
IOUtils.cleanup(LOG, trace);
return 0;
return exitCode;

View File

@ -204,7 +204,8 @@ private JobConf runSetupGenerateDistCacheData(boolean generate,
dce = createDistributedCacheEmulator(jobConf, ioPath, generate);
int exitCode = dce.setupGenerateDistCacheData(jobProducer);
int expectedExitCode = generate ? 0 : dce.MISSING_DIST_CACHE_FILES_ERROR;
int expectedExitCode =
generate ? 0 : Gridmix.MISSING_DIST_CACHE_FILES_ERROR;
assertEquals("setupGenerateDistCacheData failed.",
expectedExitCode, exitCode);

View File

@ -0,0 +1,176 @@
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.hadoop.mapred.gridmix;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/** Test Gridmix exit codes for different error types */
public class TestGridmixExitCodes {
public static void init() throws IOException {
public static void shutDown() throws IOException {
* Test Gridmix exit codes for different error types like
* <li> when less than 2 arguments are provided to Gridmix
* <li> when input data dir already exists and -generate option is specified
* <li> Specifying negative input-data-size using -generate option
* <li> specifying a non-existing option to Gridmix command-line
* <li> Wrong combination of arguments to Gridmix run
* <li> Unable to create ioPath dir
* <li> Bad class specified as a user resolver
public void testGridmixExitCodes() throws Exception {
* Specify less than 2 arguments to Gridmix and verify the exit code
private void testTooFewArgs() throws Exception {
int expectedExitCode = Gridmix.ARGS_ERROR;
// Provide only 1 argument to Gridmix
String[] argv = new String[1];
argv[0] = "ioPath";
TestGridmixSubmission.testGridmixExitCode(true, argv, expectedExitCode);
* Specify -ve input data size to be generated and verify the exit code
private void testNegativeInputDataSize() throws Exception {
int expectedExitCode = Gridmix.ARGS_ERROR;
String[] argv = new String[4];
argv[0] = "-generate";
argv[1] = "-5m"; // -ve size
argv[2] = "ioPath";
argv[3] = "-";
TestGridmixSubmission.testGridmixExitCode(true, argv, expectedExitCode);
* Specify a non-existing option to Gridmix command-line and verify
* the exit code
private void testNonexistingOption() throws Exception {
int expectedExitCode = Gridmix.ARGS_ERROR;
String[] argv = new String[3];
argv[0] = "-unknownOption";
argv[1] = "dummyArg1";
argv[2] = "dummyArg2";
// No need to call prepareArgs() as -unknownOption should make Gridmix fail
TestGridmixSubmission.testGridmixExitCode(true, argv, expectedExitCode);
* Specify wrong combination of arguments to Gridmix run and verify
* the exit code. This is done by specifying RoundRobinUserResolver and not
* specifying -users option
private void testWrongArgs() throws Exception {
int expectedExitCode = Gridmix.ARGS_ERROR;
String[] argv = TestGridmixSubmission.prepareArgs(true,
TestGridmixSubmission.testGridmixExitCode(true, argv, expectedExitCode);
* <li> Specify a non-existing class as a userResolver class and validate the
* exit code
* <li> Specify an existing class which doesn't implement {@link UserResolver}
* as a userResolver class and validate the exit code
private void testBadUserResolvers() throws Exception {
int expectedExitCode = Gridmix.ARGS_ERROR;
// Verify the case of an existing class that doesn't implement the
// interface UserResolver
String[] argv = TestGridmixSubmission.prepareArgs(true,
TestGridmixSubmission.testGridmixExitCode(true, argv, expectedExitCode);
// Verify the case of a nonexisting class name as user resolver class
argv = TestGridmixSubmission.prepareArgs(true, "NonExistingUserResolver");
TestGridmixSubmission.testGridmixExitCode(true, argv, expectedExitCode);
/** A class which doesn't implement the interface {@link UserResolver} */
static class WrongUserResolver {}
* Setup such that creation of ioPath dir fails and verify the exit code
private void testBadIOPath() throws Exception {
// Create foo as a file (not as a directory).
// This ioPath cannot be created as a directory now.
int expectedExitCode = Gridmix.STARTUP_FAILED_ERROR;
String[] argv = TestGridmixSubmission.prepareArgs(true,
TestGridmixSubmission.testGridmixExitCode(true, argv, expectedExitCode);
* Create input data dir and specify -generate option verify the exit code
private void testExistingInputDataDir() throws Exception {
int expectedExitCode = Gridmix.STARTUP_FAILED_ERROR;
String[] argv = TestGridmixSubmission.prepareArgs(true,
TestGridmixSubmission.testGridmixExitCode(true, argv, expectedExitCode);
* Create input data directory of Gridmix run
* @param ioPath ioPath argument of Gridmix run
private static void createInputDataDirectory(Path ioPath)
throws IOException {
Path inputDir = Gridmix.getGridmixInputDataPath(ioPath);
FileSystem.mkdirs(GridmixTestUtils.dfs, inputDir,
new FsPermission((short)0777));

View File

@ -80,9 +80,15 @@ public class TestGridmixSubmission {
private static final long GENDATA = 30; // in megabytes
private static final int GENSLOP = 100 * 1024; // +/- 100k for logs
static Path ioPath;
private static Path out;
private static final Path root = new Path("/user");
public static void init() throws IOException {
ioPath = new Path("foo").makeQualified(GridmixTestUtils.dfs);
out = GridmixTestUtils.DEST.makeQualified(GridmixTestUtils.dfs);
@ -499,35 +505,46 @@ public void testSerialSubmit() throws Exception {
System.out.println("Serial ended at " + System.currentTimeMillis());
/** Submit Gridmix run and verify that it succeeds */
private void doSubmission(boolean useDefaultQueue,
boolean defaultOutputPath) throws Exception {
final Path in = new Path("foo").makeQualified(GridmixTestUtils.dfs);
final Path out = GridmixTestUtils.DEST.makeQualified(GridmixTestUtils.dfs);
final Path root = new Path("/user");
Configuration conf = null;
ArrayList<String> argsList = new ArrayList<String>();
argsList.add("-D" + FilePool.GRIDMIX_MIN_FILE + "=0");
argsList.add("-D" + Gridmix.GRIDMIX_USR_RSV + "="
+ EchoUserResolver.class.getName());
// Set the config property only if
// defaultOutputPath is false. If defaultOutputPath is true, then
// let us allow gridmix to use the path foo/gridmix/ as output dir.
if (!defaultOutputPath) {
argsList.add("-D" + Gridmix.GRIDMIX_OUT_DIR + "=" + out);
String[] argv = prepareArgs(defaultOutputPath,
testGridmixExitCode(useDefaultQueue, argv, 0);
argsList.add(String.valueOf(GENDATA) + "m");
argsList.add("-"); // ignored by DebugGridmix
String[] argv = argsList.toArray(new String[argsList.size()]);
* Setup args for Gridmix run and run gridmix and verify the exit code.
* @param useDefaultQueue whether to use default queue or not
* @param argv array of arguments to gridmix
* @param expectedExitCode the expected exit code of Gridmix run
static void testGridmixExitCode(boolean useDefaultQueue, String[] argv,
int expectedExitCode) throws Exception {
try {
// Allow synthetic users to create home directories
FileSystem.mkdirs(GridmixTestUtils.dfs, root,
new FsPermission((short)0777));
runGridmix(useDefaultQueue, argv, expectedExitCode);
} catch (Exception e) {
// fail the test if there is an exception
throw new RuntimeException(e);
} finally {
/** Run gridmix with specified arguments and verify the exit code. */
private static void runGridmix(boolean useDefaultQueue, String[] argv,
int expectedExitCode) throws Exception {
DebugGridmix client = new DebugGridmix();
conf = new Configuration();
Configuration conf = new Configuration();
conf.set("mapreduce.job.hdfs-servers", "");
if (useDefaultQueue) {
@ -537,21 +554,53 @@ private void doSubmission(boolean useDefaultQueue,
conf.setBoolean(GridmixJob.GRIDMIX_USE_QUEUE_IN_TRACE, true);
conf = GridmixTestUtils.mrCluster.createJobConf(new JobConf(conf));
// allow synthetic users to create home directories
GridmixTestUtils.dfs.mkdirs(root, new FsPermission((short)0777));
GridmixTestUtils.dfs.setPermission(root, new FsPermission((short)0777));
int res =, client, argv);
assertEquals("Client exited with nonzero status", 0, res);
assertEquals("Gridmix exited with wrong exit status",
expectedExitCode, res);
if (expectedExitCode == 0) {
} catch (Exception e) {
// fail the test if there is an exception
throw new RuntimeException(e);
} finally {
in.getFileSystem(conf).delete(in, true);
out.getFileSystem(conf).delete(out, true);
// Verify the permissions of ioPath
FsPermission perm =
assertEquals("Wrong permissions of ioPath",
new FsPermission((short)0777), perm);
* Create the list of arguments for the Gridmix run.
* @param defaultOutputPath Should the default output path be used for the
* Gridmix run ?
* @param userResolver the user resolver for the Gridmix run
* @return the array of arguments to Gridmix
static String[] prepareArgs(boolean defaultOutputPath, String userResolver) {
ArrayList<String> argsList = new ArrayList<String>();
argsList.add("-D" + FilePool.GRIDMIX_MIN_FILE + "=0");
argsList.add("-D" + Gridmix.GRIDMIX_USR_RSV + "=" + userResolver);
// Set the config property only if
// defaultOutputPath is false. If defaultOutputPath is true, then
// let us allow gridmix to use the path foo/gridmix/ as output dir.
if (!defaultOutputPath) {
argsList.add("-D" + Gridmix.GRIDMIX_OUT_DIR + "=" + out);
argsList.add(String.valueOf(GENDATA) + "m");
argsList.add("-"); // ignored by DebugGridmix
return argsList.toArray(new String[argsList.size()]);
/** If the given path exists, deletes it and its contents recursively */
private static void deletePath(Path dir) throws IOException {
if (GridmixTestUtils.dfs.exists(dir)) {
GridmixTestUtils.dfs.setPermission(dir, new FsPermission((short)0777));
GridmixTestUtils.dfs.delete(dir, true);