HBASE-23352: Allow chaos monkeys to access cmd line params, and improve FillDiskCommandAction (#885)

Instead of using the default properties when checking for monkey
properties, now we use the ones already extended with command line
params.
Change FillDiskCommandAction to try to stop the remote process if the
command failed with an exception.

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
BukrosSzabolcs 2019-12-02 03:29:06 +01:00 committed by binlijin
parent 25d83c1aa7
commit d69ecf6092
3 changed files with 14 additions and 9 deletions

View File

@ -92,7 +92,7 @@ public abstract class IntegrationTestBase extends AbstractHBaseTool {
// Add entries for the CM from hbase-site.xml as a convenience.
// Do this prior to loading from the properties file to make sure those in the properties
// file are given precedence to those in hbase-site.xml (backwards compatibility).
loadMonkeyProperties(monkeyProps, HBaseConfiguration.create());
loadMonkeyProperties(monkeyProps, conf);
if (cmd.hasOption(CHAOS_MONKEY_PROPS)) {
String chaosMonkeyPropsFile = cmd.getOptionValue(CHAOS_MONKEY_PROPS);
if (StringUtils.isNotEmpty(chaosMonkeyPropsFile)) {
@ -183,6 +183,7 @@ public abstract class IntegrationTestBase extends AbstractHBaseTool {
if (fact == null) {
fact = getDefaultMonkeyFactory();
}
LOG.info("Using chaos monkey factory: {}", fact.getClass());
monkey = fact.setUtil(util)
.setTableName(getTablename())
.setProperties(monkeyProps)

View File

@ -58,15 +58,15 @@ public class FillDiskCommandAction extends SudoCommandAction {
String hostname = server.getHostname();
try {
clusterManager.execSudoWithRetries(hostname, timeout, getFillCommand());
Thread.sleep(duration);
} catch (InterruptedException e) {
LOG.debug("Failed to run the command for the full duration", e);
clusterManager.execSudo(hostname, duration, getFillCommand());
} catch (IOException ex) {
LOG.info("Potential timeout. We try to stop the dd process on target machine");
clusterManager.execSudoWithRetries(hostname, timeout, getStopCommand());
throw ex;
} finally {
clusterManager.execSudoWithRetries(hostname, timeout, getClearCommand());
LOG.info("Finished to execute FillDiskCommandAction");
}
LOG.info("Finished to execute FillDiskCommandAction");
}
private String getFillCommand(){
@ -80,4 +80,8 @@ public class FillDiskCommandAction extends SudoCommandAction {
private String getClearCommand(){
return String.format("rm -f %s/garbage", path);
}
private String getStopCommand() {
return String.format("killall dd");
}
}

View File

@ -55,7 +55,7 @@ public interface MonkeyConstants {
String NETWORK_ISSUE_RATIO = "network.issue.ratio";
String NETWORK_ISSUE_DELAY = "network.issue.delay";
String NETWORK_ISSUE_INTERFACE = "network.issue.interface";
//should be big enough to create the file
//should be higher than the usual timeout because the target machine might respond slowly
String FILL_DISK_COMMAND_TIMEOUT = "fill.disk.command.timeout";
String FILL_DISK_PATH = "fill.disk.path";
String FILL_DISK_FILE_SIZE = "fill.disk.file.size";
@ -101,7 +101,7 @@ public interface MonkeyConstants {
float DEFAULT_NETWORK_ISSUE_RATIO = 0.1f;
long DEFAULT_NETWORK_ISSUE_DELAY = 100;
String DEFAULT_NETWORK_ISSUE_INTERFACE = "eth0";
long DEFAULT_FILL_DISK_COMMAND_TIMEOUT = 5 * 60 * 1000 + 30 * 1000;//duration + timeout
long DEFAULT_FILL_DISK_COMMAND_TIMEOUT = 60 * 1000;
String DEFAULT_FILL_DISK_PATH = "/tmp";
long DEFAULT_FILL_DISK_FILE_SIZE = 0;
long DEFAULT_FILL_DISK_ISSUE_DURATION = 5 * 60 * 1000;