HBASE-14261 Enhance Chaos Monkey framework by adding zookeeper and datanode fault injections.
This commit is contained in:
parent
90b8a3c894
commit
1717de65a4
|
@ -39,6 +39,7 @@ interface ClusterManager extends Configurable {
|
||||||
HADOOP_DATANODE("datanode"),
|
HADOOP_DATANODE("datanode"),
|
||||||
HADOOP_JOBTRACKER("jobtracker"),
|
HADOOP_JOBTRACKER("jobtracker"),
|
||||||
HADOOP_TASKTRACKER("tasktracker"),
|
HADOOP_TASKTRACKER("tasktracker"),
|
||||||
|
ZOOKEEPER_SERVER("QuorumPeerMain"),
|
||||||
HBASE_MASTER("master"),
|
HBASE_MASTER("master"),
|
||||||
HBASE_REGIONSERVER("regionserver");
|
HBASE_REGIONSERVER("regionserver");
|
||||||
|
|
||||||
|
|
|
@ -114,16 +114,14 @@ public class DistributedHBaseCluster extends HBaseCluster {
|
||||||
public void killRegionServer(ServerName serverName) throws IOException {
|
public void killRegionServer(ServerName serverName) throws IOException {
|
||||||
LOG.info("Aborting RS: " + serverName.getServerName());
|
LOG.info("Aborting RS: " + serverName.getServerName());
|
||||||
clusterManager.kill(ServiceType.HBASE_REGIONSERVER,
|
clusterManager.kill(ServiceType.HBASE_REGIONSERVER,
|
||||||
serverName.getHostname(),
|
serverName.getHostname(), serverName.getPort());
|
||||||
serverName.getPort());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void stopRegionServer(ServerName serverName) throws IOException {
|
public void stopRegionServer(ServerName serverName) throws IOException {
|
||||||
LOG.info("Stopping RS: " + serverName.getServerName());
|
LOG.info("Stopping RS: " + serverName.getServerName());
|
||||||
clusterManager.stop(ServiceType.HBASE_REGIONSERVER,
|
clusterManager.stop(ServiceType.HBASE_REGIONSERVER,
|
||||||
serverName.getHostname(),
|
serverName.getHostname(), serverName.getPort());
|
||||||
serverName.getPort());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -131,20 +129,96 @@ public class DistributedHBaseCluster extends HBaseCluster {
|
||||||
waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout);
|
waitForServiceToStop(ServiceType.HBASE_REGIONSERVER, serverName, timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startZkNode(String hostname, int port) throws IOException {
|
||||||
|
LOG.info("Starting Zookeeper node on: " + hostname);
|
||||||
|
clusterManager.start(ServiceType.ZOOKEEPER_SERVER, hostname, port);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void killZkNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.info("Aborting Zookeeper node on: " + serverName.getServerName());
|
||||||
|
clusterManager.kill(ServiceType.ZOOKEEPER_SERVER,
|
||||||
|
serverName.getHostname(), serverName.getPort());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stopZkNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.info("Stopping Zookeeper node: " + serverName.getServerName());
|
||||||
|
clusterManager.stop(ServiceType.ZOOKEEPER_SERVER,
|
||||||
|
serverName.getHostname(), serverName.getPort());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitForZkNodeToStart(ServerName serverName, long timeout) throws IOException {
|
||||||
|
waitForServiceToStart(ServiceType.ZOOKEEPER_SERVER, serverName, timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitForZkNodeToStop(ServerName serverName, long timeout) throws IOException {
|
||||||
|
waitForServiceToStop(ServiceType.ZOOKEEPER_SERVER, serverName, timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startDataNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.info("Starting data node on: " + serverName.getServerName());
|
||||||
|
clusterManager.start(ServiceType.HADOOP_DATANODE,
|
||||||
|
serverName.getHostname(), serverName.getPort());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void killDataNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.info("Aborting data node on: " + serverName.getServerName());
|
||||||
|
clusterManager.kill(ServiceType.HADOOP_DATANODE,
|
||||||
|
serverName.getHostname(), serverName.getPort());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stopDataNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.info("Stopping data node on: " + serverName.getServerName());
|
||||||
|
clusterManager.stop(ServiceType.HADOOP_DATANODE,
|
||||||
|
serverName.getHostname(), serverName.getPort());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitForDataNodeToStart(ServerName serverName, long timeout) throws IOException {
|
||||||
|
waitForServiceToStart(ServiceType.HADOOP_DATANODE, serverName, timeout);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitForDataNodeToStop(ServerName serverName, long timeout) throws IOException {
|
||||||
|
waitForServiceToStop(ServiceType.HADOOP_DATANODE, serverName, timeout);
|
||||||
|
}
|
||||||
|
|
||||||
private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout)
|
private void waitForServiceToStop(ServiceType service, ServerName serverName, long timeout)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
LOG.info("Waiting service:" + service + " to stop: " + serverName.getServerName());
|
LOG.info("Waiting for service: " + service + " to stop: " + serverName.getServerName());
|
||||||
long start = System.currentTimeMillis();
|
long start = System.currentTimeMillis();
|
||||||
|
|
||||||
while ((System.currentTimeMillis() - start) < timeout) {
|
while ((System.currentTimeMillis() - start) < timeout) {
|
||||||
if (!clusterManager.isRunning(service, serverName.getHostname(), serverName.getPort())) {
|
if (!clusterManager.isRunning(service, serverName.getHostname(), serverName.getPort())) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Threads.sleep(1000);
|
Threads.sleep(100);
|
||||||
}
|
}
|
||||||
throw new IOException("did timeout waiting for service to stop:" + serverName);
|
throw new IOException("did timeout waiting for service to stop:" + serverName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void waitForServiceToStart(ServiceType service, ServerName serverName, long timeout)
|
||||||
|
throws IOException {
|
||||||
|
LOG.info("Waiting for service: " + service + " to start: " + serverName.getServerName());
|
||||||
|
long start = System.currentTimeMillis();
|
||||||
|
|
||||||
|
while ((System.currentTimeMillis() - start) < timeout) {
|
||||||
|
if (clusterManager.isRunning(service, serverName.getHostname(), serverName.getPort())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Threads.sleep(100);
|
||||||
|
}
|
||||||
|
throw new IOException("did timeout waiting for service to start:" + serverName);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public MasterService.BlockingInterface getMasterAdminService()
|
public MasterService.BlockingInterface getMasterAdminService()
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
|
@ -54,9 +54,11 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The command format that is used to execute the remote command. Arguments:
|
* The command format that is used to execute the remote command. Arguments:
|
||||||
* 1 SSH options, 2 user name , 3 "@" if username is set, 4 host, 5 original command.
|
* 1 SSH options, 2 user name , 3 "@" if username is set, 4 host,
|
||||||
|
* 5 original command, 6 service user.
|
||||||
*/
|
*/
|
||||||
private static final String DEFAULT_TUNNEL_CMD = "/usr/bin/ssh %1$s %2$s%3$s%4$s \"%5$s\"";
|
private static final String DEFAULT_TUNNEL_CMD =
|
||||||
|
"/usr/bin/ssh %1$s %2$s%3$s%4$s \"sudo -u %6$s %5$s\"";
|
||||||
private String tunnelCmd;
|
private String tunnelCmd;
|
||||||
|
|
||||||
private static final String RETRY_ATTEMPTS_KEY = "hbase.it.clustermanager.retry.attempts";
|
private static final String RETRY_ATTEMPTS_KEY = "hbase.it.clustermanager.retry.attempts";
|
||||||
|
@ -93,11 +95,24 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
.setSleepInterval(conf.getLong(RETRY_SLEEP_INTERVAL_KEY, DEFAULT_RETRY_SLEEP_INTERVAL)));
|
.setSleepInterval(conf.getLong(RETRY_SLEEP_INTERVAL_KEY, DEFAULT_RETRY_SLEEP_INTERVAL)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getServiceUser(ServiceType service) {
|
||||||
|
Configuration conf = getConf();
|
||||||
|
switch (service) {
|
||||||
|
case HADOOP_DATANODE:
|
||||||
|
return conf.get("hbase.it.clustermanager.hadoop.hdfs.user", "hdfs");
|
||||||
|
case ZOOKEEPER_SERVER:
|
||||||
|
return conf.get("hbase.it.clustermanager.zookeeper.user", "zookeeper");
|
||||||
|
default:
|
||||||
|
return conf.get("hbase.it.clustermanager.hbase.user", "hbase");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Executes commands over SSH
|
* Executes commands over SSH
|
||||||
*/
|
*/
|
||||||
protected class RemoteShell extends Shell.ShellCommandExecutor {
|
protected class RemoteShell extends Shell.ShellCommandExecutor {
|
||||||
private String hostname;
|
private String hostname;
|
||||||
|
private String user;
|
||||||
|
|
||||||
public RemoteShell(String hostname, String[] execString, File dir, Map<String, String> env,
|
public RemoteShell(String hostname, String[] execString, File dir, Map<String, String> env,
|
||||||
long timeout) {
|
long timeout) {
|
||||||
|
@ -120,11 +135,17 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
this.hostname = hostname;
|
this.hostname = hostname;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public RemoteShell(String hostname, String user, String[] execString) {
|
||||||
|
super(execString);
|
||||||
|
this.hostname = hostname;
|
||||||
|
this.user = user;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String[] getExecString() {
|
public String[] getExecString() {
|
||||||
String at = sshUserName.isEmpty() ? "" : "@";
|
String at = sshUserName.isEmpty() ? "" : "@";
|
||||||
String remoteCmd = StringUtils.join(super.getExecString(), " ");
|
String remoteCmd = StringUtils.join(super.getExecString(), " ");
|
||||||
String cmd = String.format(tunnelCmd, sshOptions, sshUserName, at, hostname, remoteCmd);
|
String cmd = String.format(tunnelCmd, sshOptions, sshUserName, at, hostname, remoteCmd, user);
|
||||||
LOG.info("Executing full command [" + cmd + "]");
|
LOG.info("Executing full command [" + cmd + "]");
|
||||||
return new String[] { "/usr/bin/env", "bash", "-c", cmd };
|
return new String[] { "/usr/bin/env", "bash", "-c", cmd };
|
||||||
}
|
}
|
||||||
|
@ -188,24 +209,95 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CommandProvider to manage the service using sbin/hadoop-* scripts.
|
||||||
|
*/
|
||||||
|
static class HadoopShellCommandProvider extends CommandProvider {
|
||||||
|
private final String hadoopHome;
|
||||||
|
private final String confDir;
|
||||||
|
|
||||||
|
HadoopShellCommandProvider(Configuration conf) throws IOException {
|
||||||
|
hadoopHome = conf.get("hbase.it.clustermanager.hadoop.home",
|
||||||
|
System.getenv("HADOOP_HOME"));
|
||||||
|
String tmp = conf.get("hbase.it.clustermanager.hadoop.conf.dir",
|
||||||
|
System.getenv("HADOOP_CONF_DIR"));
|
||||||
|
if (hadoopHome == null) {
|
||||||
|
throw new IOException("Hadoop home configuration parameter i.e. " +
|
||||||
|
"'hbase.it.clustermanager.hadoop.home' is not configured properly.");
|
||||||
|
}
|
||||||
|
if (tmp != null) {
|
||||||
|
confDir = String.format("--config %s", tmp);
|
||||||
|
} else {
|
||||||
|
confDir = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCommand(ServiceType service, Operation op) {
|
||||||
|
return String.format("%s/sbin/hadoop-daemon.sh %s %s %s", hadoopHome, confDir,
|
||||||
|
op.toString().toLowerCase(), service);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CommandProvider to manage the service using bin/zk* scripts.
|
||||||
|
*/
|
||||||
|
static class ZookeeperShellCommandProvider extends CommandProvider {
|
||||||
|
private final String zookeeperHome;
|
||||||
|
private final String confDir;
|
||||||
|
|
||||||
|
ZookeeperShellCommandProvider(Configuration conf) throws IOException {
|
||||||
|
zookeeperHome = conf.get("hbase.it.clustermanager.zookeeper.home",
|
||||||
|
System.getenv("ZOOBINDIR"));
|
||||||
|
String tmp = conf.get("hbase.it.clustermanager.zookeeper.conf.dir",
|
||||||
|
System.getenv("ZOOCFGDIR"));
|
||||||
|
if (zookeeperHome == null) {
|
||||||
|
throw new IOException("Zookeeper home configuration parameter i.e. " +
|
||||||
|
"'hbase.it.clustermanager.zookeeper.home' is not configured properly.");
|
||||||
|
}
|
||||||
|
if (tmp != null) {
|
||||||
|
confDir = String.format("--config %s", tmp);
|
||||||
|
} else {
|
||||||
|
confDir = "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCommand(ServiceType service, Operation op) {
|
||||||
|
return String.format("%s/bin/zkServer.sh %s", zookeeperHome, op.toString().toLowerCase());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String findPidCommand(ServiceType service) {
|
||||||
|
return String.format("ps aux | grep %s | grep -v grep | tr -s ' ' | cut -d ' ' -f2",
|
||||||
|
service);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public HBaseClusterManager() {
|
public HBaseClusterManager() {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected CommandProvider getCommandProvider(ServiceType service) {
|
protected CommandProvider getCommandProvider(ServiceType service) throws IOException {
|
||||||
//TODO: make it pluggable, or auto-detect the best command provider, should work with
|
switch (service) {
|
||||||
//hadoop daemons as well
|
case HADOOP_DATANODE:
|
||||||
|
return new HadoopShellCommandProvider(getConf());
|
||||||
|
case ZOOKEEPER_SERVER:
|
||||||
|
return new ZookeeperShellCommandProvider(getConf());
|
||||||
|
default:
|
||||||
return new HBaseShellCommandProvider(getConf());
|
return new HBaseShellCommandProvider(getConf());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute the given command on the host using SSH
|
* Execute the given command on the host using SSH
|
||||||
* @return pair of exit code and command output
|
* @return pair of exit code and command output
|
||||||
* @throws IOException if something goes wrong.
|
* @throws IOException if something goes wrong.
|
||||||
*/
|
*/
|
||||||
private Pair<Integer, String> exec(String hostname, String... cmd) throws IOException {
|
private Pair<Integer, String> exec(String hostname, ServiceType service, String... cmd)
|
||||||
|
throws IOException {
|
||||||
LOG.info("Executing remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname);
|
LOG.info("Executing remote command: " + StringUtils.join(cmd, " ") + " , hostname:" + hostname);
|
||||||
|
|
||||||
RemoteShell shell = new RemoteShell(hostname, cmd);
|
RemoteShell shell = new RemoteShell(hostname, getServiceUser(service), cmd);
|
||||||
try {
|
try {
|
||||||
shell.execute();
|
shell.execute();
|
||||||
} catch (Shell.ExitCodeException ex) {
|
} catch (Shell.ExitCodeException ex) {
|
||||||
|
@ -222,12 +314,12 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
return new Pair<Integer, String>(shell.getExitCode(), shell.getOutput());
|
return new Pair<Integer, String>(shell.getExitCode(), shell.getOutput());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Pair<Integer, String> execWithRetries(String hostname, String... cmd)
|
private Pair<Integer, String> execWithRetries(String hostname, ServiceType service, String... cmd)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
RetryCounter retryCounter = retryCounterFactory.create();
|
RetryCounter retryCounter = retryCounterFactory.create();
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
return exec(hostname, cmd);
|
return exec(hostname, service, cmd);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
retryOrThrow(retryCounter, e, hostname, cmd);
|
retryOrThrow(retryCounter, e, hostname, cmd);
|
||||||
}
|
}
|
||||||
|
@ -252,7 +344,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void exec(String hostname, ServiceType service, Operation op) throws IOException {
|
private void exec(String hostname, ServiceType service, Operation op) throws IOException {
|
||||||
execWithRetries(hostname, getCommandProvider(service).getCommand(service, op));
|
execWithRetries(hostname, service, getCommandProvider(service).getCommand(service, op));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -271,13 +363,13 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void signal(ServiceType service, String signal, String hostname) throws IOException {
|
public void signal(ServiceType service, String signal, String hostname) throws IOException {
|
||||||
execWithRetries(hostname, getCommandProvider(service).signalCommand(service, signal));
|
execWithRetries(hostname, service, getCommandProvider(service).signalCommand(service, signal));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isRunning(ServiceType service, String hostname, int port) throws IOException {
|
public boolean isRunning(ServiceType service, String hostname, int port) throws IOException {
|
||||||
String ret = execWithRetries(hostname, getCommandProvider(service).isRunningCommand(service))
|
String ret = execWithRetries(hostname, service,
|
||||||
.getSecond();
|
getCommandProvider(service).isRunningCommand(service)).getSecond();
|
||||||
return ret.length() > 0;
|
return ret.length() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -321,6 +321,7 @@ public class RESTApiClusterManager extends Configured implements ClusterManager
|
||||||
|
|
||||||
// The RoleCommand enum is used by the doRoleCommand method to guard against non-existent methods
|
// The RoleCommand enum is used by the doRoleCommand method to guard against non-existent methods
|
||||||
// being invoked on a given role.
|
// being invoked on a given role.
|
||||||
|
// TODO: Integrate zookeeper and hdfs related failure injections (Ref: HBASE-14261).
|
||||||
private enum RoleCommand {
|
private enum RoleCommand {
|
||||||
START, STOP, RESTART;
|
START, STOP, RESTART;
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ import java.util.List;
|
||||||
import org.apache.commons.lang.math.RandomUtils;
|
import org.apache.commons.lang.math.RandomUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hbase.ClusterStatus;
|
import org.apache.hadoop.hbase.ClusterStatus;
|
||||||
import org.apache.hadoop.hbase.HBaseCluster;
|
import org.apache.hadoop.hbase.HBaseCluster;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
|
@ -35,7 +36,6 @@ import org.apache.hadoop.hbase.ServerLoad;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -49,6 +49,14 @@ public class Action {
|
||||||
"hbase.chaosmonkey.action.startmastertimeout";
|
"hbase.chaosmonkey.action.startmastertimeout";
|
||||||
public static final String KILL_RS_TIMEOUT_KEY = "hbase.chaosmonkey.action.killrstimeout";
|
public static final String KILL_RS_TIMEOUT_KEY = "hbase.chaosmonkey.action.killrstimeout";
|
||||||
public static final String START_RS_TIMEOUT_KEY = "hbase.chaosmonkey.action.startrstimeout";
|
public static final String START_RS_TIMEOUT_KEY = "hbase.chaosmonkey.action.startrstimeout";
|
||||||
|
public static final String KILL_ZK_NODE_TIMEOUT_KEY =
|
||||||
|
"hbase.chaosmonkey.action.killzknodetimeout";
|
||||||
|
public static final String START_ZK_NODE_TIMEOUT_KEY =
|
||||||
|
"hbase.chaosmonkey.action.startzknodetimeout";
|
||||||
|
public static final String KILL_DATANODE_TIMEOUT_KEY =
|
||||||
|
"hbase.chaosmonkey.action.killdatanodetimeout";
|
||||||
|
public static final String START_DATANODE_TIMEOUT_KEY =
|
||||||
|
"hbase.chaosmonkey.action.startdatanodetimeout";
|
||||||
|
|
||||||
protected static final Log LOG = LogFactory.getLog(Action.class);
|
protected static final Log LOG = LogFactory.getLog(Action.class);
|
||||||
|
|
||||||
|
@ -56,6 +64,10 @@ public class Action {
|
||||||
protected static final long START_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
protected static final long START_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
protected static final long KILL_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
protected static final long KILL_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
protected static final long START_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
protected static final long START_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
|
protected static final long KILL_ZK_NODE_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
|
protected static final long START_ZK_NODE_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
|
protected static final long KILL_DATANODE_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
|
protected static final long START_DATANODE_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
|
|
||||||
protected ActionContext context;
|
protected ActionContext context;
|
||||||
protected HBaseCluster cluster;
|
protected HBaseCluster cluster;
|
||||||
|
@ -66,6 +78,10 @@ public class Action {
|
||||||
protected long startMasterTimeout;
|
protected long startMasterTimeout;
|
||||||
protected long killRsTimeout;
|
protected long killRsTimeout;
|
||||||
protected long startRsTimeout;
|
protected long startRsTimeout;
|
||||||
|
protected long killZkNodeTimeout;
|
||||||
|
protected long startZkNodeTimeout;
|
||||||
|
protected long killDataNodeTimeout;
|
||||||
|
protected long startDataNodeTimeout;
|
||||||
|
|
||||||
public void init(ActionContext context) throws IOException {
|
public void init(ActionContext context) throws IOException {
|
||||||
this.context = context;
|
this.context = context;
|
||||||
|
@ -80,6 +96,14 @@ public class Action {
|
||||||
START_MASTER_TIMEOUT_DEFAULT);
|
START_MASTER_TIMEOUT_DEFAULT);
|
||||||
killRsTimeout = cluster.getConf().getLong(KILL_RS_TIMEOUT_KEY, KILL_RS_TIMEOUT_DEFAULT);
|
killRsTimeout = cluster.getConf().getLong(KILL_RS_TIMEOUT_KEY, KILL_RS_TIMEOUT_DEFAULT);
|
||||||
startRsTimeout = cluster.getConf().getLong(START_RS_TIMEOUT_KEY, START_RS_TIMEOUT_DEFAULT);
|
startRsTimeout = cluster.getConf().getLong(START_RS_TIMEOUT_KEY, START_RS_TIMEOUT_DEFAULT);
|
||||||
|
killZkNodeTimeout = cluster.getConf().getLong(KILL_ZK_NODE_TIMEOUT_KEY,
|
||||||
|
KILL_ZK_NODE_TIMEOUT_DEFAULT);
|
||||||
|
startZkNodeTimeout = cluster.getConf().getLong(START_ZK_NODE_TIMEOUT_KEY,
|
||||||
|
START_ZK_NODE_TIMEOUT_DEFAULT);
|
||||||
|
killDataNodeTimeout = cluster.getConf().getLong(KILL_DATANODE_TIMEOUT_KEY,
|
||||||
|
KILL_DATANODE_TIMEOUT_DEFAULT);
|
||||||
|
startDataNodeTimeout = cluster.getConf().getLong(START_DATANODE_TIMEOUT_KEY,
|
||||||
|
START_DATANODE_TIMEOUT_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void perform() throws Exception { }
|
public void perform() throws Exception { }
|
||||||
|
@ -135,6 +159,36 @@ public class Action {
|
||||||
+ cluster.getClusterStatus().getServersSize());
|
+ cluster.getClusterStatus().getServersSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void killZKNode(ServerName server) throws IOException {
|
||||||
|
LOG.info("Killing zookeeper node:" + server);
|
||||||
|
cluster.killZkNode(server);
|
||||||
|
cluster.waitForZkNodeToStop(server, killZkNodeTimeout);
|
||||||
|
LOG.info("Killed zookeeper node:" + server + ". Reported num of rs:"
|
||||||
|
+ cluster.getClusterStatus().getServersSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void startZKNode(ServerName server) throws IOException {
|
||||||
|
LOG.info("Starting zookeeper node:" + server.getHostname());
|
||||||
|
cluster.startZkNode(server.getHostname(), server.getPort());
|
||||||
|
cluster.waitForZkNodeToStart(server, startZkNodeTimeout);
|
||||||
|
LOG.info("Started zookeeper node:" + server);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void killDataNode(ServerName server) throws IOException {
|
||||||
|
LOG.info("Killing datanode:" + server);
|
||||||
|
cluster.killDataNode(server);
|
||||||
|
cluster.waitForDataNodeToStop(server, killDataNodeTimeout);
|
||||||
|
LOG.info("Killed datanode:" + server + ". Reported num of rs:"
|
||||||
|
+ cluster.getClusterStatus().getServersSize());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void startDataNode(ServerName server) throws IOException {
|
||||||
|
LOG.info("Starting datanode:" + server.getHostname());
|
||||||
|
cluster.startDataNode(server);
|
||||||
|
cluster.waitForDataNodeToStart(server, startDataNodeTimeout);
|
||||||
|
LOG.info("Started datanode:" + server);
|
||||||
|
}
|
||||||
|
|
||||||
protected void unbalanceRegions(ClusterStatus clusterStatus,
|
protected void unbalanceRegions(ClusterStatus clusterStatus,
|
||||||
List<ServerName> fromServers, List<ServerName> toServers,
|
List<ServerName> fromServers, List<ServerName> toServers,
|
||||||
double fractionOfRegions) throws Exception {
|
double fractionOfRegions) throws Exception {
|
||||||
|
@ -174,6 +228,10 @@ public class Action {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Configuration getConf() {
|
||||||
|
return cluster.getConf();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Context for Action's
|
* Context for Action's
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -51,4 +51,18 @@ public class RestartActionBaseAction extends Action {
|
||||||
sleep(sleepTime);
|
sleep(sleepTime);
|
||||||
startRs(server);
|
startRs(server);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void restartZKNode(ServerName server, long sleepTime) throws IOException {
|
||||||
|
sleepTime = Math.max(sleepTime, 1000);
|
||||||
|
killZKNode(server);
|
||||||
|
sleep(sleepTime);
|
||||||
|
startZKNode(server);
|
||||||
|
}
|
||||||
|
|
||||||
|
void restartDataNode(ServerName server, long sleepTime) throws IOException {
|
||||||
|
sleepTime = Math.max(sleepTime, 1000);
|
||||||
|
killDataNode(server);
|
||||||
|
sleep(sleepTime);
|
||||||
|
startDataNode(server);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
|
import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
|
import org.apache.hadoop.hdfs.DFSClient;
|
||||||
|
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Action that restarts a random datanode.
|
||||||
|
*/
|
||||||
|
public class RestartRandomDataNodeAction extends RestartActionBaseAction {
|
||||||
|
public RestartRandomDataNodeAction(long sleepTime) {
|
||||||
|
super(sleepTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void perform() throws Exception {
|
||||||
|
LOG.info("Performing action: Restart random data node");
|
||||||
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getDataNodes());
|
||||||
|
restartDataNode(server, sleepTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ServerName[] getDataNodes() throws IOException {
|
||||||
|
DistributedFileSystem fs = (DistributedFileSystem) FSUtils.getRootDir(getConf())
|
||||||
|
.getFileSystem(getConf());
|
||||||
|
DFSClient dfsClient = fs.getClient();
|
||||||
|
List<ServerName> hosts = new LinkedList<ServerName>();
|
||||||
|
for (DatanodeInfo dataNode: dfsClient.datanodeReport(HdfsConstants.DatanodeReportType.LIVE)) {
|
||||||
|
hosts.add(ServerName.valueOf(dataNode.getHostName(), -1, -1));
|
||||||
|
}
|
||||||
|
return hosts.toArray(new ServerName[hosts.size()]);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,40 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
|
import org.apache.hadoop.hbase.zookeeper.ZKServerTool;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Action that restarts a random zookeeper node.
|
||||||
|
*/
|
||||||
|
public class RestartRandomZKNodeAction extends RestartActionBaseAction {
|
||||||
|
public RestartRandomZKNodeAction(long sleepTime) {
|
||||||
|
super(sleepTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void perform() throws Exception {
|
||||||
|
LOG.info("Performing action: Restart random zookeeper node");
|
||||||
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(
|
||||||
|
ZKServerTool.readZKNodes(getConf()));
|
||||||
|
restartZKNode(server, sleepTime);
|
||||||
|
}
|
||||||
|
}
|
|
@ -70,12 +70,14 @@ public abstract class MonkeyFactory {
|
||||||
public static final String STRESS_AM = "stressAM";
|
public static final String STRESS_AM = "stressAM";
|
||||||
public static final String NO_KILL = "noKill";
|
public static final String NO_KILL = "noKill";
|
||||||
public static final String MASTER_KILLING = "masterKilling";
|
public static final String MASTER_KILLING = "masterKilling";
|
||||||
|
public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
|
||||||
|
|
||||||
public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
|
public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
|
||||||
.put(CALM, new CalmMonkeyFactory())
|
.put(CALM, new CalmMonkeyFactory())
|
||||||
.put(SLOW_DETERMINISTIC, new SlowDeterministicMonkeyFactory())
|
.put(SLOW_DETERMINISTIC, new SlowDeterministicMonkeyFactory())
|
||||||
.put(UNBALANCE, new UnbalanceMonkeyFactory())
|
.put(UNBALANCE, new UnbalanceMonkeyFactory())
|
||||||
.put(SERVER_KILLING, new ServerKillingMonkeyFactory())
|
.put(SERVER_KILLING, new ServerKillingMonkeyFactory())
|
||||||
|
.put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
|
||||||
.put(STRESS_AM, new StressAssignmentManagerMonkeyFactory())
|
.put(STRESS_AM, new StressAssignmentManagerMonkeyFactory())
|
||||||
.put(NO_KILL, new NoKillMonkeyFactory())
|
.put(NO_KILL, new NoKillMonkeyFactory())
|
||||||
.put(MASTER_KILLING, new MasterKillingMonkeyFactory())
|
.put(MASTER_KILLING, new MasterKillingMonkeyFactory())
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.hbase.chaos.factories;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.Action;
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction;
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.ForceBalancerAction;
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.RestartRandomDataNodeAction;
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.RestartRandomRsExceptMetaAction;
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.RestartRandomZKNodeAction;
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.RollingBatchRestartRsExceptMetaAction;
|
||||||
|
import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
|
||||||
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
|
import org.apache.hadoop.hbase.chaos.policies.CompositeSequentialPolicy;
|
||||||
|
import org.apache.hadoop.hbase.chaos.policies.DoActionsOncePolicy;
|
||||||
|
import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates ChaosMonkeys for doing server restart actions, but not
|
||||||
|
* flush / compact / snapshot kind of actions.
|
||||||
|
*/
|
||||||
|
public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ChaosMonkey build() {
|
||||||
|
|
||||||
|
// Destructive actions to mess things around. Cannot run batch restart.
|
||||||
|
Action[] actions1 = new Action[]{
|
||||||
|
new RestartRandomRsExceptMetaAction(60000),
|
||||||
|
new RestartActiveMasterAction(5000),
|
||||||
|
new RollingBatchRestartRsExceptMetaAction(5000, 1.0f, 2), // only allow 2 servers to be dead.
|
||||||
|
new ForceBalancerAction(),
|
||||||
|
new RestartRandomDataNodeAction(60000),
|
||||||
|
new RestartRandomZKNodeAction(60000)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Action to log more info for debugging
|
||||||
|
Action[] actions2 = new Action[]{
|
||||||
|
new DumpClusterStatusAction()
|
||||||
|
};
|
||||||
|
|
||||||
|
return new PolicyBasedChaosMonkey(util,
|
||||||
|
new CompositeSequentialPolicy(
|
||||||
|
new DoActionsOncePolicy(60 * 1000, actions1),
|
||||||
|
new PeriodicRandomActionPolicy(60 * 1000, actions1)),
|
||||||
|
new PeriodicRandomActionPolicy(60 * 1000, actions2));
|
||||||
|
}
|
||||||
|
}
|
|
@ -19,9 +19,13 @@
|
||||||
|
|
||||||
package org.apache.hadoop.hbase.zookeeper;
|
package org.apache.hadoop.hbase.zookeeper;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
|
@ -33,12 +37,10 @@ import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
|
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
|
||||||
public class ZKServerTool {
|
public class ZKServerTool {
|
||||||
/**
|
|
||||||
* Run the tool.
|
public static ServerName[] readZKNodes(Configuration conf) {
|
||||||
* @param args Command line arguments.
|
List<ServerName> hosts = new LinkedList<ServerName>();
|
||||||
*/
|
|
||||||
public static void main(String args[]) {
|
|
||||||
Configuration conf = HBaseConfiguration.create();
|
|
||||||
// Note that we do not simply grab the property
|
// Note that we do not simply grab the property
|
||||||
// HConstants.ZOOKEEPER_QUORUM from the HBaseConfiguration because the
|
// HConstants.ZOOKEEPER_QUORUM from the HBaseConfiguration because the
|
||||||
// user may be using a zoo.cfg file.
|
// user may be using a zoo.cfg file.
|
||||||
|
@ -49,8 +51,24 @@ public class ZKServerTool {
|
||||||
if (key.startsWith("server.")) {
|
if (key.startsWith("server.")) {
|
||||||
String[] parts = value.split(":");
|
String[] parts = value.split(":");
|
||||||
String host = parts[0];
|
String host = parts[0];
|
||||||
System.out.println("ZK host:" + host);
|
|
||||||
|
int port = HConstants.DEFAULT_ZOOKEPER_CLIENT_PORT;
|
||||||
|
if (parts.length > 1) {
|
||||||
|
port = Integer.parseInt(parts[1]);
|
||||||
}
|
}
|
||||||
|
hosts.add(ServerName.valueOf(host, port, -1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return hosts.toArray(new ServerName[hosts.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the tool.
|
||||||
|
* @param args Command line arguments.
|
||||||
|
*/
|
||||||
|
public static void main(String args[]) {
|
||||||
|
for(ServerName server: readZKNodes(HBaseConfiguration.create())) {
|
||||||
|
System.out.println("Zk host: " + server.getHostname());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -163,6 +163,81 @@ public abstract class HBaseCluster implements Closeable, Configurable {
|
||||||
public abstract void waitForRegionServerToStop(ServerName serverName, long timeout)
|
public abstract void waitForRegionServerToStop(ServerName serverName, long timeout)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts a new zookeeper node on the given hostname or if this is a mini/local cluster,
|
||||||
|
* silently logs warning message.
|
||||||
|
* @param hostname the hostname to start the regionserver on
|
||||||
|
* @throws IOException if something goes wrong
|
||||||
|
*/
|
||||||
|
public abstract void startZkNode(String hostname, int port) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Kills the zookeeper node process if this is a distributed cluster, otherwise,
|
||||||
|
* this causes master to exit doing basic clean up only.
|
||||||
|
* @throws IOException if something goes wrong
|
||||||
|
*/
|
||||||
|
public abstract void killZkNode(ServerName serverName) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops the region zookeeper if this is a distributed cluster, otherwise
|
||||||
|
* silently logs warning message.
|
||||||
|
* @throws IOException if something goes wrong
|
||||||
|
*/
|
||||||
|
public abstract void stopZkNode(ServerName serverName) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for the specified zookeeper node to join the cluster
|
||||||
|
* @return whether the operation finished with success
|
||||||
|
* @throws IOException if something goes wrong or timeout occurs
|
||||||
|
*/
|
||||||
|
public abstract void waitForZkNodeToStart(ServerName serverName, long timeout)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for the specified zookeeper node to stop the thread / process.
|
||||||
|
* @return whether the operation finished with success
|
||||||
|
* @throws IOException if something goes wrong or timeout occurs
|
||||||
|
*/
|
||||||
|
public abstract void waitForZkNodeToStop(ServerName serverName, long timeout)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts a new datanode on the given hostname or if this is a mini/local cluster,
|
||||||
|
* silently logs warning message.
|
||||||
|
* @throws IOException if something goes wrong
|
||||||
|
*/
|
||||||
|
public abstract void startDataNode(ServerName serverName) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Kills the datanode process if this is a distributed cluster, otherwise,
|
||||||
|
* this causes master to exit doing basic clean up only.
|
||||||
|
* @throws IOException if something goes wrong
|
||||||
|
*/
|
||||||
|
public abstract void killDataNode(ServerName serverName) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops the datanode if this is a distributed cluster, otherwise
|
||||||
|
* silently logs warning message.
|
||||||
|
* @throws IOException if something goes wrong
|
||||||
|
*/
|
||||||
|
public abstract void stopDataNode(ServerName serverName) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for the specified datanode to join the cluster
|
||||||
|
* @return whether the operation finished with success
|
||||||
|
* @throws IOException if something goes wrong or timeout occurs
|
||||||
|
*/
|
||||||
|
public abstract void waitForDataNodeToStart(ServerName serverName, long timeout)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for the specified datanode to stop the thread / process.
|
||||||
|
* @return whether the operation finished with success
|
||||||
|
* @throws IOException if something goes wrong or timeout occurs
|
||||||
|
*/
|
||||||
|
public abstract void waitForDataNodeToStop(ServerName serverName, long timeout)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Starts a new master on the given hostname or if this is a mini/local cluster,
|
* Starts a new master on the given hostname or if this is a mini/local cluster,
|
||||||
* starts a master locally.
|
* starts a master locally.
|
||||||
|
|
|
@ -260,6 +260,56 @@ public class MiniHBaseCluster extends HBaseCluster {
|
||||||
waitOnRegionServer(getRegionServerIndex(serverName));
|
waitOnRegionServer(getRegionServerIndex(serverName));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startZkNode(String hostname, int port) throws IOException {
|
||||||
|
LOG.warn("Starting zookeeper nodes on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void killZkNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.warn("Aborting zookeeper nodes on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stopZkNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.warn("Stopping zookeeper nodes on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitForZkNodeToStart(ServerName serverName, long timeout) throws IOException {
|
||||||
|
LOG.warn("Waiting for zookeeper nodes to start on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitForZkNodeToStop(ServerName serverName, long timeout) throws IOException {
|
||||||
|
LOG.warn("Waiting for zookeeper nodes to stop on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startDataNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.warn("Starting datanodes on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void killDataNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.warn("Aborting datanodes on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stopDataNode(ServerName serverName) throws IOException {
|
||||||
|
LOG.warn("Stopping datanodes on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitForDataNodeToStart(ServerName serverName, long timeout) throws IOException {
|
||||||
|
LOG.warn("Waiting for datanodes to start on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void waitForDataNodeToStop(ServerName serverName, long timeout) throws IOException {
|
||||||
|
LOG.warn("Waiting for datanodes to stop on mini cluster is not supported");
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startMaster(String hostname, int port) throws IOException {
|
public void startMaster(String hostname, int port) throws IOException {
|
||||||
this.startMaster();
|
this.startMaster();
|
||||||
|
|
Loading…
Reference in New Issue