HBASE-24295 [Chaos Monkey] abstract logging through the class hierarchy
Adds `protected abstract Logger getLogger()` to `Action` so that implementation's names are logged when actions are performed. Signed-off-by: stack <stack@apache.org> Signed-off-by: Jan Hentschel <jan.hentschel@ultratendency.com>
This commit is contained in:
parent
e37aafcfc2
commit
204a1fad92
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -50,12 +50,11 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A (possibly mischievous) action that the ChaosMonkey can perform.
|
* A (possibly mischievous) action that the ChaosMonkey can perform.
|
||||||
*/
|
*/
|
||||||
public class Action {
|
public abstract class Action {
|
||||||
|
|
||||||
public static final String KILL_MASTER_TIMEOUT_KEY =
|
public static final String KILL_MASTER_TIMEOUT_KEY =
|
||||||
"hbase.chaosmonkey.action.killmastertimeout";
|
"hbase.chaosmonkey.action.killmastertimeout";
|
||||||
|
@ -76,8 +75,6 @@ public class Action {
|
||||||
public static final String START_NAMENODE_TIMEOUT_KEY =
|
public static final String START_NAMENODE_TIMEOUT_KEY =
|
||||||
"hbase.chaosmonkey.action.startnamenodetimeout";
|
"hbase.chaosmonkey.action.startnamenodetimeout";
|
||||||
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(Action.class);
|
|
||||||
|
|
||||||
protected static final long KILL_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
protected static final long KILL_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
protected static final long START_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
protected static final long START_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
protected static final long KILL_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
protected static final long KILL_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
|
||||||
|
@ -107,12 +104,17 @@ public class Action {
|
||||||
protected long startNameNodeTimeout;
|
protected long startNameNodeTimeout;
|
||||||
protected boolean skipMetaRS;
|
protected boolean skipMetaRS;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the instance's {@link Logger}, for use throughout the class hierarchy.
|
||||||
|
*/
|
||||||
|
protected abstract Logger getLogger();
|
||||||
|
|
||||||
public void init(ActionContext context) throws IOException {
|
public void init(ActionContext context) throws IOException {
|
||||||
this.context = context;
|
this.context = context;
|
||||||
cluster = context.getHBaseCluster();
|
cluster = context.getHBaseCluster();
|
||||||
initialStatus = cluster.getInitialClusterMetrics();
|
initialStatus = cluster.getInitialClusterMetrics();
|
||||||
Collection<ServerName> regionServers = initialStatus.getLiveServerMetrics().keySet();
|
Collection<ServerName> regionServers = initialStatus.getLiveServerMetrics().keySet();
|
||||||
initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
|
initialServers = regionServers.toArray(new ServerName[0]);
|
||||||
|
|
||||||
monkeyProps = context.getMonkeyProps();
|
monkeyProps = context.getMonkeyProps();
|
||||||
if (monkeyProps == null){
|
if (monkeyProps == null){
|
||||||
|
@ -150,12 +152,12 @@ public class Action {
|
||||||
protected ServerName[] getCurrentServers() throws IOException {
|
protected ServerName[] getCurrentServers() throws IOException {
|
||||||
ClusterMetrics clusterStatus = cluster.getClusterMetrics();
|
ClusterMetrics clusterStatus = cluster.getClusterMetrics();
|
||||||
Collection<ServerName> regionServers = clusterStatus.getLiveServerMetrics().keySet();
|
Collection<ServerName> regionServers = clusterStatus.getLiveServerMetrics().keySet();
|
||||||
int count = regionServers == null ? 0 : regionServers.size();
|
int count = regionServers.size();
|
||||||
if (count <= 0) {
|
if (count <= 0) {
|
||||||
return new ServerName [] {};
|
return new ServerName [] {};
|
||||||
}
|
}
|
||||||
ServerName master = clusterStatus.getMasterName();
|
ServerName master = clusterStatus.getMasterName();
|
||||||
Set<ServerName> masters = new HashSet<ServerName>();
|
Set<ServerName> masters = new HashSet<>();
|
||||||
masters.add(master);
|
masters.add(master);
|
||||||
masters.addAll(clusterStatus.getBackupMasterNames());
|
masters.addAll(clusterStatus.getBackupMasterNames());
|
||||||
ArrayList<ServerName> tmp = new ArrayList<>(count);
|
ArrayList<ServerName> tmp = new ArrayList<>(count);
|
||||||
|
@ -167,110 +169,110 @@ public class Action {
|
||||||
tmp.remove(metaServer);
|
tmp.remove(metaServer);
|
||||||
}
|
}
|
||||||
|
|
||||||
return tmp.toArray(new ServerName[tmp.size()]);
|
return tmp.toArray(new ServerName[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void killMaster(ServerName server) throws IOException {
|
protected void killMaster(ServerName server) throws IOException {
|
||||||
LOG.info("Killing master {}", server);
|
getLogger().info("Killing master {}", server);
|
||||||
cluster.killMaster(server);
|
cluster.killMaster(server);
|
||||||
cluster.waitForMasterToStop(server, killMasterTimeout);
|
cluster.waitForMasterToStop(server, killMasterTimeout);
|
||||||
LOG.info("Killed master " + server);
|
getLogger().info("Killed master " + server);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void startMaster(ServerName server) throws IOException {
|
protected void startMaster(ServerName server) throws IOException {
|
||||||
LOG.info("Starting master {}", server.getHostname());
|
getLogger().info("Starting master {}", server.getHostname());
|
||||||
cluster.startMaster(server.getHostname(), server.getPort());
|
cluster.startMaster(server.getHostname(), server.getPort());
|
||||||
cluster.waitForActiveAndReadyMaster(startMasterTimeout);
|
cluster.waitForActiveAndReadyMaster(startMasterTimeout);
|
||||||
LOG.info("Started master " + server.getHostname());
|
getLogger().info("Started master " + server.getHostname());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void stopRs(ServerName server) throws IOException {
|
protected void stopRs(ServerName server) throws IOException {
|
||||||
LOG.info("Stopping regionserver {}", server);
|
getLogger().info("Stopping regionserver {}", server);
|
||||||
cluster.stopRegionServer(server);
|
cluster.stopRegionServer(server);
|
||||||
cluster.waitForRegionServerToStop(server, killRsTimeout);
|
cluster.waitForRegionServerToStop(server, killRsTimeout);
|
||||||
LOG.info("Stoppiong regionserver {}. Reported num of rs:{}", server,
|
getLogger().info("Stoppiong regionserver {}. Reported num of rs:{}", server,
|
||||||
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void suspendRs(ServerName server) throws IOException {
|
protected void suspendRs(ServerName server) throws IOException {
|
||||||
LOG.info("Suspending regionserver {}", server);
|
getLogger().info("Suspending regionserver {}", server);
|
||||||
cluster.suspendRegionServer(server);
|
cluster.suspendRegionServer(server);
|
||||||
if(!(cluster instanceof MiniHBaseCluster)){
|
if(!(cluster instanceof MiniHBaseCluster)){
|
||||||
cluster.waitForRegionServerToStop(server, killRsTimeout);
|
cluster.waitForRegionServerToStop(server, killRsTimeout);
|
||||||
}
|
}
|
||||||
LOG.info("Suspending regionserver {}. Reported num of rs:{}", server,
|
getLogger().info("Suspending regionserver {}. Reported num of rs:{}", server,
|
||||||
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void resumeRs(ServerName server) throws IOException {
|
protected void resumeRs(ServerName server) throws IOException {
|
||||||
LOG.info("Resuming regionserver {}", server);
|
getLogger().info("Resuming regionserver {}", server);
|
||||||
cluster.resumeRegionServer(server);
|
cluster.resumeRegionServer(server);
|
||||||
if(!(cluster instanceof MiniHBaseCluster)){
|
if(!(cluster instanceof MiniHBaseCluster)){
|
||||||
cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout);
|
cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout);
|
||||||
}
|
}
|
||||||
LOG.info("Resuming regionserver {}. Reported num of rs:{}", server,
|
getLogger().info("Resuming regionserver {}. Reported num of rs:{}", server,
|
||||||
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void killRs(ServerName server) throws IOException {
|
protected void killRs(ServerName server) throws IOException {
|
||||||
LOG.info("Killing regionserver {}", server);
|
getLogger().info("Killing regionserver {}", server);
|
||||||
cluster.killRegionServer(server);
|
cluster.killRegionServer(server);
|
||||||
cluster.waitForRegionServerToStop(server, killRsTimeout);
|
cluster.waitForRegionServerToStop(server, killRsTimeout);
|
||||||
LOG.info("Killed regionserver {}. Reported num of rs:{}", server,
|
getLogger().info("Killed regionserver {}. Reported num of rs:{}", server,
|
||||||
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void startRs(ServerName server) throws IOException {
|
protected void startRs(ServerName server) throws IOException {
|
||||||
LOG.info("Starting regionserver {}", server.getAddress());
|
getLogger().info("Starting regionserver {}", server.getAddress());
|
||||||
cluster.startRegionServer(server.getHostname(), server.getPort());
|
cluster.startRegionServer(server.getHostname(), server.getPort());
|
||||||
cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout);
|
cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout);
|
||||||
LOG.info("Started regionserver {}. Reported num of rs:{}", server.getAddress(),
|
getLogger().info("Started regionserver {}. Reported num of rs:{}", server.getAddress(),
|
||||||
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void killZKNode(ServerName server) throws IOException {
|
protected void killZKNode(ServerName server) throws IOException {
|
||||||
LOG.info("Killing zookeeper node {}", server);
|
getLogger().info("Killing zookeeper node {}", server);
|
||||||
cluster.killZkNode(server);
|
cluster.killZkNode(server);
|
||||||
cluster.waitForZkNodeToStop(server, killZkNodeTimeout);
|
cluster.waitForZkNodeToStop(server, killZkNodeTimeout);
|
||||||
LOG.info("Killed zookeeper node {}. Reported num of rs:{}", server,
|
getLogger().info("Killed zookeeper node {}. Reported num of rs:{}", server,
|
||||||
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void startZKNode(ServerName server) throws IOException {
|
protected void startZKNode(ServerName server) throws IOException {
|
||||||
LOG.info("Starting zookeeper node {}", server.getHostname());
|
getLogger().info("Starting zookeeper node {}", server.getHostname());
|
||||||
cluster.startZkNode(server.getHostname(), server.getPort());
|
cluster.startZkNode(server.getHostname(), server.getPort());
|
||||||
cluster.waitForZkNodeToStart(server, startZkNodeTimeout);
|
cluster.waitForZkNodeToStart(server, startZkNodeTimeout);
|
||||||
LOG.info("Started zookeeper node {}", server);
|
getLogger().info("Started zookeeper node {}", server);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void killDataNode(ServerName server) throws IOException {
|
protected void killDataNode(ServerName server) throws IOException {
|
||||||
LOG.info("Killing datanode {}", server);
|
getLogger().info("Killing datanode {}", server);
|
||||||
cluster.killDataNode(server);
|
cluster.killDataNode(server);
|
||||||
cluster.waitForDataNodeToStop(server, killDataNodeTimeout);
|
cluster.waitForDataNodeToStop(server, killDataNodeTimeout);
|
||||||
LOG.info("Killed datanode {}. Reported num of rs:{}", server,
|
getLogger().info("Killed datanode {}. Reported num of rs:{}", server,
|
||||||
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void startDataNode(ServerName server) throws IOException {
|
protected void startDataNode(ServerName server) throws IOException {
|
||||||
LOG.info("Starting datanode {}", server.getHostname());
|
getLogger().info("Starting datanode {}", server.getHostname());
|
||||||
cluster.startDataNode(server);
|
cluster.startDataNode(server);
|
||||||
cluster.waitForDataNodeToStart(server, startDataNodeTimeout);
|
cluster.waitForDataNodeToStart(server, startDataNodeTimeout);
|
||||||
LOG.info("Started datanode {}", server);
|
getLogger().info("Started datanode {}", server);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void killNameNode(ServerName server) throws IOException {
|
protected void killNameNode(ServerName server) throws IOException {
|
||||||
LOG.info("Killing namenode :-{}", server.getHostname());
|
getLogger().info("Killing namenode :-{}", server.getHostname());
|
||||||
cluster.killNameNode(server);
|
cluster.killNameNode(server);
|
||||||
cluster.waitForNameNodeToStop(server, killNameNodeTimeout);
|
cluster.waitForNameNodeToStop(server, killNameNodeTimeout);
|
||||||
LOG.info("Killed namenode:{}. Reported num of rs:{}", server,
|
getLogger().info("Killed namenode:{}. Reported num of rs:{}", server,
|
||||||
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void startNameNode(ServerName server) throws IOException {
|
protected void startNameNode(ServerName server) throws IOException {
|
||||||
LOG.info("Starting Namenode :-{}", server.getHostname());
|
getLogger().info("Starting Namenode :-{}", server.getHostname());
|
||||||
cluster.startNameNode(server);
|
cluster.startNameNode(server);
|
||||||
cluster.waitForNameNodeToStart(server, startNameNodeTimeout);
|
cluster.waitForNameNodeToStart(server, startNameNodeTimeout);
|
||||||
LOG.info("Started namenode:{}", server);
|
getLogger().info("Started namenode:{}", server);
|
||||||
}
|
}
|
||||||
protected void unbalanceRegions(ClusterMetrics clusterStatus,
|
protected void unbalanceRegions(ClusterMetrics clusterStatus,
|
||||||
List<ServerName> fromServers, List<ServerName> toServers,
|
List<ServerName> fromServers, List<ServerName> toServers,
|
||||||
|
@ -283,7 +285,7 @@ public class Action {
|
||||||
// Ugh.
|
// Ugh.
|
||||||
List<byte[]> regions = new LinkedList<>(serverLoad.getRegionMetrics().keySet());
|
List<byte[]> regions = new LinkedList<>(serverLoad.getRegionMetrics().keySet());
|
||||||
int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
|
int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
|
||||||
LOG.debug("Removing {} regions from {}", victimRegionCount, sn);
|
getLogger().debug("Removing {} regions from {}", victimRegionCount, sn);
|
||||||
for (int i = 0; i < victimRegionCount; ++i) {
|
for (int i = 0; i < victimRegionCount; ++i) {
|
||||||
int victimIx = RandomUtils.nextInt(0, regions.size());
|
int victimIx = RandomUtils.nextInt(0, regions.size());
|
||||||
String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
|
String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
|
||||||
|
@ -291,8 +293,8 @@ public class Action {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Moving {} regions from {} servers to {} different servers", victimRegions.size(),
|
getLogger().info("Moving {} regions from {} servers to {} different servers",
|
||||||
fromServers.size(), toServers.size());
|
victimRegions.size(), fromServers.size(), toServers.size());
|
||||||
Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin();
|
Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin();
|
||||||
for (byte[] victimRegion : victimRegions) {
|
for (byte[] victimRegion : victimRegions) {
|
||||||
// Don't keep moving regions if we're
|
// Don't keep moving regions if we're
|
||||||
|
@ -311,10 +313,10 @@ public class Action {
|
||||||
try {
|
try {
|
||||||
result = admin.balance();
|
result = admin.balance();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.warn("Got exception while doing balance ", e);
|
getLogger().warn("Got exception while doing balance ", e);
|
||||||
}
|
}
|
||||||
if (!result) {
|
if (!result) {
|
||||||
LOG.error("Balancer didn't succeed");
|
getLogger().error("Balancer didn't succeed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -323,7 +325,7 @@ public class Action {
|
||||||
try {
|
try {
|
||||||
admin.balancerSwitch(onOrOff, synchronous);
|
admin.balancerSwitch(onOrOff, synchronous);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.warn("Got exception while switching balance ", e);
|
getLogger().warn("Got exception while switching balance ", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -338,7 +340,8 @@ public class Action {
|
||||||
* @param transform the modification to perform. Callers will have the
|
* @param transform the modification to perform. Callers will have the
|
||||||
* column name as a string and a column family builder available to them
|
* column name as a string and a column family builder available to them
|
||||||
*/
|
*/
|
||||||
protected void modifyAllTableColumns(TableName tableName, BiConsumer<String, ColumnFamilyDescriptorBuilder> transform) throws IOException {
|
protected void modifyAllTableColumns(TableName tableName,
|
||||||
|
BiConsumer<String, ColumnFamilyDescriptorBuilder> transform) throws IOException {
|
||||||
HBaseTestingUtility util = this.context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = this.context.getHBaseIntegrationTestingUtility();
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
|
|
||||||
|
@ -369,7 +372,8 @@ public class Action {
|
||||||
* @param tableName the table to modify
|
* @param tableName the table to modify
|
||||||
* @param transform the modification to perform on each column family descriptor builder
|
* @param transform the modification to perform on each column family descriptor builder
|
||||||
*/
|
*/
|
||||||
protected void modifyAllTableColumns(TableName tableName, Consumer<ColumnFamilyDescriptorBuilder> transform) throws IOException {
|
protected void modifyAllTableColumns(TableName tableName,
|
||||||
|
Consumer<ColumnFamilyDescriptorBuilder> transform) throws IOException {
|
||||||
modifyAllTableColumns(tableName, (name, cfd) -> transform.accept(cfd));
|
modifyAllTableColumns(tableName, (name, cfd) -> transform.accept(cfd));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -29,13 +28,13 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that adds high cpu load to a random regionserver for a given duration
|
* Action that adds high cpu load to a random regionserver for a given duration
|
||||||
*/
|
*/
|
||||||
public class AddCPULoadAction extends SudoCommandAction {
|
public class AddCPULoadAction extends SudoCommandAction {
|
||||||
protected static final Logger LOG = LoggerFactory.getLogger(AddCPULoadAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(AddCPULoadAction.class);
|
||||||
private static final String CPU_LOAD_COMMAND =
|
private static final String CPU_LOAD_COMMAND =
|
||||||
"seq 1 %s | xargs -I{} -n 1 -P %s timeout %s dd if=/dev/urandom of=/dev/null bs=1M " +
|
"seq 1 %s | xargs -I{} -n 1 -P %s timeout %s dd if=/dev/urandom of=/dev/null bs=1M " +
|
||||||
"iflag=fullblock";
|
"iflag=fullblock";
|
||||||
|
|
||||||
private final long duration;
|
private final long duration;
|
||||||
private long processes;
|
private final long processes;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add high load to cpu
|
* Add high load to cpu
|
||||||
|
@ -49,18 +48,22 @@ public class AddCPULoadAction extends SudoCommandAction {
|
||||||
this.processes = processes;
|
this.processes = processes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
protected void localPerform() throws IOException {
|
protected void localPerform() throws IOException {
|
||||||
LOG.info("Starting to execute AddCPULoadAction");
|
getLogger().info("Starting to execute AddCPULoadAction");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
||||||
String hostname = server.getHostname();
|
String hostname = server.getHostname();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
clusterManager.execSudo(hostname, timeout, getCommand());
|
clusterManager.execSudo(hostname, timeout, getCommand());
|
||||||
} catch (IOException ex){
|
} catch (IOException ex){
|
||||||
//This will always happen. We use timeout to kill a continously running process
|
//This will always happen. We use timeout to kill a continuously running process
|
||||||
//after the duration expires
|
//after the duration expires
|
||||||
}
|
}
|
||||||
LOG.info("Finished to execute AddCPULoadAction");
|
getLogger().info("Finished to execute AddCPULoadAction");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getCommand(){
|
private String getCommand(){
|
||||||
|
|
|
@ -42,6 +42,10 @@ public class AddColumnAction extends Action {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(ActionContext context) throws IOException {
|
public void init(ActionContext context) throws IOException {
|
||||||
super.init(context);
|
super.init(context);
|
||||||
|
@ -63,7 +67,7 @@ public class AddColumnAction extends Action {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.debug("Performing action: Adding " + columnDescriptor + " to " + tableName);
|
getLogger().debug("Performing action: Adding " + columnDescriptor + " to " + tableName);
|
||||||
|
|
||||||
TableDescriptor modifiedTable = TableDescriptorBuilder.newBuilder(tableDescriptor)
|
TableDescriptor modifiedTable = TableDescriptorBuilder.newBuilder(tableDescriptor)
|
||||||
.setColumnFamily(columnDescriptor).build();
|
.setColumnFamily(columnDescriptor).build();
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -32,17 +31,20 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class BatchRestartRsAction extends RestartActionBaseAction {
|
public class BatchRestartRsAction extends RestartActionBaseAction {
|
||||||
float ratio; //ratio of regionservers to restart
|
float ratio; //ratio of regionservers to restart
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(BatchRestartRsAction.class);
|
||||||
LoggerFactory.getLogger(BatchRestartRsAction.class);
|
|
||||||
|
|
||||||
public BatchRestartRsAction(long sleepTime, float ratio) {
|
public BatchRestartRsAction(long sleepTime, float ratio) {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
this.ratio = ratio;
|
this.ratio = ratio;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
|
getLogger().info(String.format("Performing action: Batch restarting %d%% of region servers",
|
||||||
(int)(ratio * 100)));
|
(int)(ratio * 100)));
|
||||||
List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
|
List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
|
||||||
ratio);
|
ratio);
|
||||||
|
@ -55,7 +57,7 @@ public class BatchRestartRsAction extends RestartActionBaseAction {
|
||||||
if (context.isStopping()) {
|
if (context.isStopping()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
LOG.info("Killing region server:" + server);
|
getLogger().info("Killing region server:" + server);
|
||||||
cluster.killRegionServer(server);
|
cluster.killRegionServer(server);
|
||||||
killedServers.add(server);
|
killedServers.add(server);
|
||||||
}
|
}
|
||||||
|
@ -64,13 +66,13 @@ public class BatchRestartRsAction extends RestartActionBaseAction {
|
||||||
cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
|
cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Killed " + killedServers.size() + " region servers. Reported num of rs:"
|
getLogger().info("Killed " + killedServers.size() + " region servers. Reported num of rs:"
|
||||||
+ cluster.getClusterMetrics().getLiveServerMetrics().size());
|
+ cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
|
|
||||||
sleep(sleepTime);
|
sleep(sleepTime);
|
||||||
|
|
||||||
for (ServerName server : killedServers) {
|
for (ServerName server : killedServers) {
|
||||||
LOG.info("Starting region server:" + server.getHostname());
|
getLogger().info("Starting region server:" + server.getHostname());
|
||||||
cluster.startRegionServer(server.getHostname(), server.getPort());
|
cluster.startRegionServer(server.getHostname(), server.getPort());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -79,7 +81,7 @@ public class BatchRestartRsAction extends RestartActionBaseAction {
|
||||||
server.getPort(),
|
server.getPort(),
|
||||||
PolicyBasedChaosMonkey.TIMEOUT);
|
PolicyBasedChaosMonkey.TIMEOUT);
|
||||||
}
|
}
|
||||||
LOG.info("Started " + killedServers.size() +" region servers. Reported num of rs:"
|
getLogger().info("Started " + killedServers.size() +" region servers. Reported num of rs:"
|
||||||
+ cluster.getClusterMetrics().getLiveServerMetrics().size());
|
+ cluster.getClusterMetrics().getLiveServerMetrics().size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.regionserver.BloomType;
|
import org.apache.hadoop.hbase.regionserver.BloomType;
|
||||||
import org.apache.hadoop.hbase.util.BloomFilterUtil;
|
import org.apache.hadoop.hbase.util.BloomFilterUtil;
|
||||||
|
@ -44,17 +43,21 @@ public class ChangeBloomFilterAction extends Action {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
final Random random = new Random();
|
final Random random = new Random();
|
||||||
final BloomType[] bloomArray = BloomType.values();
|
final BloomType[] bloomArray = BloomType.values();
|
||||||
final int bloomArraySize = bloomArray.length;
|
final int bloomArraySize = bloomArray.length;
|
||||||
|
|
||||||
LOG.info("Performing action: Change bloom filter on all columns of table " + tableName);
|
getLogger().info("Performing action: Change bloom filter on all columns of table " + tableName);
|
||||||
|
|
||||||
modifyAllTableColumns(tableName, (columnName, columnBuilder) -> {
|
modifyAllTableColumns(tableName, (columnName, columnBuilder) -> {
|
||||||
BloomType bloomType = bloomArray[random.nextInt(bloomArraySize)];
|
BloomType bloomType = bloomArray[random.nextInt(bloomArraySize)];
|
||||||
LOG.debug("Performing action: About to set bloom filter type to "
|
getLogger().debug("Performing action: About to set bloom filter type to "
|
||||||
+ bloomType + " on column " + columnName + " of table " + tableName);
|
+ bloomType + " on column " + columnName + " of table " + tableName);
|
||||||
columnBuilder.setBloomFilterType(bloomType);
|
columnBuilder.setBloomFilterType(bloomType);
|
||||||
if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) {
|
if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) {
|
||||||
|
@ -62,6 +65,6 @@ public class ChangeBloomFilterAction extends Action {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
LOG.debug("Performing action: Just set bloom filter types on table " + tableName);
|
getLogger().debug("Performing action: Just set bloom filter types on table " + tableName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
|
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
|
||||||
import org.apache.hadoop.io.compress.Compressor;
|
import org.apache.hadoop.io.compress.Compressor;
|
||||||
|
@ -40,6 +39,10 @@ public class ChangeCompressionAction extends Action {
|
||||||
this.random = new Random();
|
this.random = new Random();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws IOException {
|
public void perform() throws IOException {
|
||||||
// Possible compression algorithms. If an algorithm is not supported,
|
// Possible compression algorithms. If an algorithm is not supported,
|
||||||
|
@ -63,13 +66,13 @@ public class ChangeCompressionAction extends Action {
|
||||||
algo.returnCompressor(c);
|
algo.returnCompressor(c);
|
||||||
break;
|
break;
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
LOG.info("Performing action: Changing compression algorithms to " + algo +
|
getLogger().info("Performing action: Changing compression algorithms to " + algo +
|
||||||
" is not supported, pick another one");
|
" is not supported, pick another one");
|
||||||
}
|
}
|
||||||
} while (true);
|
} while (true);
|
||||||
|
|
||||||
final Algorithm chosenAlgo = algo; // for use in lambda
|
final Algorithm chosenAlgo = algo; // for use in lambda
|
||||||
LOG.debug("Performing action: Changing compression algorithms on "
|
getLogger().debug("Performing action: Changing compression algorithms on "
|
||||||
+ tableName.getNameAsString() + " to " + chosenAlgo);
|
+ tableName.getNameAsString() + " to " + chosenAlgo);
|
||||||
modifyAllTableColumns(tableName, columnFamilyDescriptorBuilder -> {
|
modifyAllTableColumns(tableName, columnFamilyDescriptorBuilder -> {
|
||||||
if (random.nextBoolean()) {
|
if (random.nextBoolean()) {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
|
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -39,9 +38,13 @@ public class ChangeEncodingAction extends Action {
|
||||||
this.random = new Random();
|
this.random = new Random();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws IOException {
|
public void perform() throws IOException {
|
||||||
LOG.debug("Performing action: Changing encodings on " + tableName);
|
getLogger().debug("Performing action: Changing encodings on " + tableName);
|
||||||
// possible DataBlockEncoding id's
|
// possible DataBlockEncoding id's
|
||||||
final int[] possibleIds = {0, 2, 3, 4, 6};
|
final int[] possibleIds = {0, 2, 3, 4, 6};
|
||||||
|
|
||||||
|
@ -49,7 +52,7 @@ public class ChangeEncodingAction extends Action {
|
||||||
short id = (short) possibleIds[random.nextInt(possibleIds.length)];
|
short id = (short) possibleIds[random.nextInt(possibleIds.length)];
|
||||||
DataBlockEncoding encoding = DataBlockEncoding.getEncodingById(id);
|
DataBlockEncoding encoding = DataBlockEncoding.getEncodingById(id);
|
||||||
columnBuilder.setDataBlockEncoding(encoding);
|
columnBuilder.setDataBlockEncoding(encoding);
|
||||||
LOG.debug("Set encoding of column family " + columnName + " to: " + encoding);
|
getLogger().debug("Set encoding of column family " + columnName + " to: " + encoding);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -18,7 +18,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
|
@ -46,18 +45,21 @@ public class ChangeSplitPolicyAction extends Action {
|
||||||
this.random = new Random();
|
this.random = new Random();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
|
|
||||||
LOG.info("Performing action: Change split policy of table " + tableName);
|
getLogger().info("Performing action: Change split policy of table " + tableName);
|
||||||
TableDescriptor tableDescriptor = admin.getDescriptor(tableName);
|
TableDescriptor tableDescriptor = admin.getDescriptor(tableName);
|
||||||
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor);
|
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor);
|
||||||
String chosenPolicy = possiblePolicies[random.nextInt(possiblePolicies.length)];
|
String chosenPolicy = possiblePolicies[random.nextInt(possiblePolicies.length)];
|
||||||
builder.setRegionSplitPolicyClassName(chosenPolicy);
|
builder.setRegionSplitPolicyClassName(chosenPolicy);
|
||||||
LOG.info("Changing " + tableName + " split policy to " + chosenPolicy);
|
getLogger().info("Changing " + tableName + " split policy to " + chosenPolicy);
|
||||||
admin.modifyTable(builder.build());
|
admin.modifyTable(builder.build());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -31,24 +30,28 @@ import org.slf4j.LoggerFactory;
|
||||||
* Always keeps at least 1 as the number of versions.
|
* Always keeps at least 1 as the number of versions.
|
||||||
*/
|
*/
|
||||||
public class ChangeVersionsAction extends Action {
|
public class ChangeVersionsAction extends Action {
|
||||||
private final TableName tableName;
|
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(ChangeVersionsAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(ChangeVersionsAction.class);
|
||||||
|
private final TableName tableName;
|
||||||
|
|
||||||
private Random random;
|
private final Random random;
|
||||||
|
|
||||||
public ChangeVersionsAction(TableName tableName) {
|
public ChangeVersionsAction(TableName tableName) {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
this.random = new Random();
|
this.random = new Random();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws IOException {
|
public void perform() throws IOException {
|
||||||
final int versions = random.nextInt(3) + 1;
|
final int versions = random.nextInt(3) + 1;
|
||||||
|
|
||||||
LOG.debug("Performing action: Changing versions on " + tableName + " to " + versions);
|
getLogger().debug("Performing action: Changing versions on " + tableName + " to " + versions);
|
||||||
modifyAllTableColumns(tableName, columnBuilder -> {
|
modifyAllTableColumns(tableName, columnBuilder -> {
|
||||||
columnBuilder.setMinVersions(versions).setMaxVersions(versions);
|
columnBuilder.setMinVersions(versions).setMaxVersions(versions);
|
||||||
});
|
});
|
||||||
LOG.debug("Performing action: Just changed versions on " + tableName);
|
getLogger().debug("Performing action: Just changed versions on " + tableName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -46,6 +46,10 @@ public class CompactMobAction extends Action {
|
||||||
this.sleepTime = sleepTime;
|
this.sleepTime = sleepTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
|
@ -57,7 +61,7 @@ public class CompactMobAction extends Action {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Performing action: Compact mob of table " + tableName + ", major=" + major);
|
getLogger().info("Performing action: Compact mob of table " + tableName + ", major=" + major);
|
||||||
try {
|
try {
|
||||||
if (major) {
|
if (major) {
|
||||||
admin.majorCompact(tableName, CompactType.MOB);
|
admin.majorCompact(tableName, CompactType.MOB);
|
||||||
|
@ -65,7 +69,7 @@ public class CompactMobAction extends Action {
|
||||||
admin.compact(tableName, CompactType.MOB);
|
admin.compact(tableName, CompactType.MOB);
|
||||||
}
|
}
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.warn("Mob Compaction failed, might be caused by other chaos: " + ex.getMessage());
|
getLogger().warn("Mob Compaction failed, might be caused by other chaos: " + ex.getMessage());
|
||||||
}
|
}
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang3.RandomUtils;
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
@ -33,11 +32,11 @@ import org.slf4j.LoggerFactory;
|
||||||
* Region that queues a compaction of a random region from the table.
|
* Region that queues a compaction of a random region from the table.
|
||||||
*/
|
*/
|
||||||
public class CompactRandomRegionOfTableAction extends Action {
|
public class CompactRandomRegionOfTableAction extends Action {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(CompactRandomRegionOfTableAction.class);
|
||||||
|
|
||||||
private final int majorRatio;
|
private final int majorRatio;
|
||||||
private final long sleepTime;
|
private final long sleepTime;
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
private static final Logger LOG =
|
|
||||||
LoggerFactory.getLogger(CompactRandomRegionOfTableAction.class);
|
|
||||||
|
|
||||||
public CompactRandomRegionOfTableAction(
|
public CompactRandomRegionOfTableAction(
|
||||||
TableName tableName, float majorRatio) {
|
TableName tableName, float majorRatio) {
|
||||||
|
@ -51,33 +50,37 @@ public class CompactRandomRegionOfTableAction extends Action {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
boolean major = RandomUtils.nextInt(0, 100) < majorRatio;
|
boolean major = RandomUtils.nextInt(0, 100) < majorRatio;
|
||||||
|
|
||||||
LOG.info("Performing action: Compact random region of table "
|
getLogger().info("Performing action: Compact random region of table "
|
||||||
+ tableName + ", major=" + major);
|
+ tableName + ", major=" + major);
|
||||||
List<RegionInfo> regions = admin.getRegions(tableName);
|
List<RegionInfo> regions = admin.getRegions(tableName);
|
||||||
if (regions == null || regions.isEmpty()) {
|
if (regions == null || regions.isEmpty()) {
|
||||||
LOG.info("Table " + tableName + " doesn't have regions to compact");
|
getLogger().info("Table " + tableName + " doesn't have regions to compact");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
|
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
|
||||||
regions.toArray(new RegionInfo[regions.size()]));
|
regions.toArray(new RegionInfo[0]));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
if (major) {
|
if (major) {
|
||||||
LOG.debug("Major compacting region " + region.getRegionNameAsString());
|
getLogger().debug("Major compacting region " + region.getRegionNameAsString());
|
||||||
admin.majorCompactRegion(region.getRegionName());
|
admin.majorCompactRegion(region.getRegionName());
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("Compacting region " + region.getRegionNameAsString());
|
getLogger().debug("Compacting region " + region.getRegionNameAsString());
|
||||||
admin.compactRegion(region.getRegionName());
|
admin.compactRegion(region.getRegionName());
|
||||||
}
|
}
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.warn("Compaction failed, might be caused by other chaos: " + ex.getMessage());
|
getLogger().warn("Compaction failed, might be caused by other chaos: " + ex.getMessage());
|
||||||
}
|
}
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -29,10 +29,11 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that queues a table compaction.
|
* Action that queues a table compaction.
|
||||||
*/
|
*/
|
||||||
public class CompactTableAction extends Action {
|
public class CompactTableAction extends Action {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(CompactTableAction.class);
|
||||||
|
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
private final int majorRatio;
|
private final int majorRatio;
|
||||||
private final long sleepTime;
|
private final long sleepTime;
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(CompactTableAction.class);
|
|
||||||
|
|
||||||
public CompactTableAction(TableName tableName, float majorRatio) {
|
public CompactTableAction(TableName tableName, float majorRatio) {
|
||||||
this(-1, tableName, majorRatio);
|
this(-1, tableName, majorRatio);
|
||||||
|
@ -45,13 +46,17 @@ public class CompactTableAction extends Action {
|
||||||
this.sleepTime = sleepTime;
|
this.sleepTime = sleepTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
boolean major = RandomUtils.nextInt(0, 100) < majorRatio;
|
boolean major = RandomUtils.nextInt(0, 100) < majorRatio;
|
||||||
|
|
||||||
LOG.info("Performing action: Compact table " + tableName + ", major=" + major);
|
getLogger().info("Performing action: Compact table " + tableName + ", major=" + major);
|
||||||
try {
|
try {
|
||||||
if (major) {
|
if (major) {
|
||||||
admin.majorCompact(tableName);
|
admin.majorCompact(tableName);
|
||||||
|
@ -59,7 +64,7 @@ public class CompactTableAction extends Action {
|
||||||
admin.compact(tableName);
|
admin.compact(tableName);
|
||||||
}
|
}
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.warn("Compaction failed, might be caused by other chaos: " + ex.getMessage());
|
getLogger().warn("Compaction failed, might be caused by other chaos: " + ex.getMessage());
|
||||||
}
|
}
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -34,7 +34,7 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class CorruptDataFilesAction extends Action {
|
public class CorruptDataFilesAction extends Action {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(CorruptDataFilesAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(CorruptDataFilesAction.class);
|
||||||
private float chance;
|
private final float chance;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Corrupts HFiles with a certain chance
|
* Corrupts HFiles with a certain chance
|
||||||
|
@ -44,9 +44,13 @@ public class CorruptDataFilesAction extends Action {
|
||||||
this.chance = chance * 100;
|
this.chance = chance * 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Start corrupting data files");
|
getLogger().info("Start corrupting data files");
|
||||||
|
|
||||||
FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
|
FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
|
||||||
Path rootDir = CommonFSUtils.getRootDir(getConf());
|
Path rootDir = CommonFSUtils.getRootDir(getConf());
|
||||||
|
@ -67,9 +71,9 @@ public class CorruptDataFilesAction extends Action {
|
||||||
} finally {
|
} finally {
|
||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
LOG.info("Corrupting {}", status.getPath());
|
getLogger().info("Corrupting {}", status.getPath());
|
||||||
}
|
}
|
||||||
LOG.info("Done corrupting data files");
|
getLogger().info("Done corrupting data files");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -31,8 +30,8 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class CorruptPacketsCommandAction extends TCCommandAction {
|
public class CorruptPacketsCommandAction extends TCCommandAction {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(CorruptPacketsCommandAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(CorruptPacketsCommandAction.class);
|
||||||
private float ratio;
|
private final float ratio;
|
||||||
private long duration;
|
private final long duration;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Corrupt network packets on a random regionserver.
|
* Corrupt network packets on a random regionserver.
|
||||||
|
@ -48,8 +47,12 @@ public class CorruptPacketsCommandAction extends TCCommandAction {
|
||||||
this.duration = duration;
|
this.duration = duration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
protected void localPerform() throws IOException {
|
protected void localPerform() throws IOException {
|
||||||
LOG.info("Starting to execute CorruptPacketsCommandAction");
|
getLogger().info("Starting to execute CorruptPacketsCommandAction");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
||||||
String hostname = server.getHostname();
|
String hostname = server.getHostname();
|
||||||
|
|
||||||
|
@ -57,12 +60,12 @@ public class CorruptPacketsCommandAction extends TCCommandAction {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
||||||
Thread.sleep(duration);
|
Thread.sleep(duration);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
LOG.debug("Failed to run the command for the full duration", e);
|
getLogger().debug("Failed to run the command for the full duration", e);
|
||||||
} finally {
|
} finally {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Finished to execute CorruptPacketsCommandAction");
|
getLogger().info("Finished to execute CorruptPacketsCommandAction");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getCommand(String operation){
|
private String getCommand(String operation){
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -20,16 +20,18 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||||
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class DecreaseMaxHFileSizeAction extends Action {
|
public class DecreaseMaxHFileSizeAction extends Action {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(DecreaseMaxHFileSizeAction.class);
|
||||||
|
|
||||||
private static final long minFileSize = 1 * 1024 * 1024 * 1024L;
|
private static final long minFileSize = 1024 * 1024 * 1024L;
|
||||||
|
|
||||||
private final long sleepTime;
|
private final long sleepTime;
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
|
@ -42,6 +44,10 @@ public class DecreaseMaxHFileSizeAction extends Action {
|
||||||
this.random = new Random();
|
this.random = new Random();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(ActionContext context) throws IOException {
|
public void init(ActionContext context) throws IOException {
|
||||||
super.init(context);
|
super.init(context);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -30,8 +29,8 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class DelayPacketsCommandAction extends TCCommandAction {
|
public class DelayPacketsCommandAction extends TCCommandAction {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(DelayPacketsCommandAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(DelayPacketsCommandAction.class);
|
||||||
private long delay;
|
private final long delay;
|
||||||
private long duration;
|
private final long duration;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds latency to communication on a random region server
|
* Adds latency to communication on a random region server
|
||||||
|
@ -47,8 +46,12 @@ public class DelayPacketsCommandAction extends TCCommandAction {
|
||||||
this.duration = duration;
|
this.duration = duration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
protected void localPerform() throws IOException {
|
protected void localPerform() throws IOException {
|
||||||
LOG.info("Starting to execute DelayPacketsCommandAction");
|
getLogger().info("Starting to execute DelayPacketsCommandAction");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
||||||
String hostname = server.getHostname();
|
String hostname = server.getHostname();
|
||||||
|
|
||||||
|
@ -56,12 +59,12 @@ public class DelayPacketsCommandAction extends TCCommandAction {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
||||||
Thread.sleep(duration);
|
Thread.sleep(duration);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
LOG.debug("Failed to run the command for the full duration", e);
|
getLogger().debug("Failed to run the command for the full duration", e);
|
||||||
} finally {
|
} finally {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Finished to execute DelayPacketsCommandAction");
|
getLogger().info("Finished to execute DelayPacketsCommandAction");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getCommand(String operation){
|
private String getCommand(String operation){
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -33,7 +33,7 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class DeleteDataFilesAction extends Action {
|
public class DeleteDataFilesAction extends Action {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(DeleteDataFilesAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(DeleteDataFilesAction.class);
|
||||||
private float chance;
|
private final float chance;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delets HFiles with a certain chance
|
* Delets HFiles with a certain chance
|
||||||
|
@ -43,9 +43,13 @@ public class DeleteDataFilesAction extends Action {
|
||||||
this.chance = chance * 100;
|
this.chance = chance * 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Start deleting data files");
|
getLogger().info("Start deleting data files");
|
||||||
FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
|
FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
|
||||||
Path rootDir = CommonFSUtils.getRootDir(getConf());
|
Path rootDir = CommonFSUtils.getRootDir(getConf());
|
||||||
Path defaultDir = rootDir.suffix("/data/default");
|
Path defaultDir = rootDir.suffix("/data/default");
|
||||||
|
@ -58,9 +62,9 @@ public class DeleteDataFilesAction extends Action {
|
||||||
if(RandomUtils.nextFloat(0, 100) > chance){
|
if(RandomUtils.nextFloat(0, 100) > chance){
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
fs.delete(status.getPath());
|
fs.delete(status.getPath(), true);
|
||||||
LOG.info("Deleting {}", status.getPath());
|
getLogger().info("Deleting {}", status.getPath());
|
||||||
}
|
}
|
||||||
LOG.info("Done deleting data files");
|
getLogger().info("Done deleting data files");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -27,8 +26,11 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action to dump the cluster status.
|
* Action to dump the cluster status.
|
||||||
*/
|
*/
|
||||||
public class DumpClusterStatusAction extends Action {
|
public class DumpClusterStatusAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(DumpClusterStatusAction.class);
|
||||||
LoggerFactory.getLogger(DumpClusterStatusAction.class);
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(ActionContext context) throws IOException {
|
public void init(ActionContext context) throws IOException {
|
||||||
|
@ -37,7 +39,7 @@ public class DumpClusterStatusAction extends Action {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.debug("Performing action: Dump cluster status");
|
getLogger().debug("Performing action: Dump cluster status");
|
||||||
LOG.info("Cluster status\n" + cluster.getClusterMetrics());
|
getLogger().info("Cluster status\n" + cluster.getClusterMetrics());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -31,8 +30,8 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class DuplicatePacketsCommandAction extends TCCommandAction {
|
public class DuplicatePacketsCommandAction extends TCCommandAction {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(DuplicatePacketsCommandAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(DuplicatePacketsCommandAction.class);
|
||||||
private float ratio;
|
private final float ratio;
|
||||||
private long duration;
|
private final long duration;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Duplicate network packets on a random regionserver.
|
* Duplicate network packets on a random regionserver.
|
||||||
|
@ -48,8 +47,12 @@ public class DuplicatePacketsCommandAction extends TCCommandAction {
|
||||||
this.duration = duration;
|
this.duration = duration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
protected void localPerform() throws IOException {
|
protected void localPerform() throws IOException {
|
||||||
LOG.info("Starting to execute DuplicatePacketsCommandAction");
|
getLogger().info("Starting to execute DuplicatePacketsCommandAction");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
||||||
String hostname = server.getHostname();
|
String hostname = server.getHostname();
|
||||||
|
|
||||||
|
@ -57,12 +60,12 @@ public class DuplicatePacketsCommandAction extends TCCommandAction {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
||||||
Thread.sleep(duration);
|
Thread.sleep(duration);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
LOG.debug("Failed to run the command for the full duration", e);
|
getLogger().debug("Failed to run the command for the full duration", e);
|
||||||
} finally {
|
} finally {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Finished to execute DuplicatePacketsCommandAction");
|
getLogger().info("Finished to execute DuplicatePacketsCommandAction");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getCommand(String operation){
|
private String getCommand(String operation){
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -31,9 +30,9 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class FillDiskCommandAction extends SudoCommandAction {
|
public class FillDiskCommandAction extends SudoCommandAction {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(FillDiskCommandAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(FillDiskCommandAction.class);
|
||||||
private long size;
|
private final long size;
|
||||||
private long duration;
|
private final long duration;
|
||||||
private String path;
|
private final String path;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fill the disk on a random regionserver.
|
* Fill the disk on a random regionserver.
|
||||||
|
@ -52,20 +51,24 @@ public class FillDiskCommandAction extends SudoCommandAction {
|
||||||
this.path = path;
|
this.path = path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
protected void localPerform() throws IOException {
|
protected void localPerform() throws IOException {
|
||||||
LOG.info("Starting to execute FillDiskCommandAction");
|
getLogger().info("Starting to execute FillDiskCommandAction");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
||||||
String hostname = server.getHostname();
|
String hostname = server.getHostname();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
clusterManager.execSudo(hostname, duration, getFillCommand());
|
clusterManager.execSudo(hostname, duration, getFillCommand());
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
LOG.info("Potential timeout. We try to stop the dd process on target machine");
|
getLogger().info("Potential timeout. We try to stop the dd process on target machine");
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getStopCommand());
|
clusterManager.execSudoWithRetries(hostname, timeout, getStopCommand());
|
||||||
throw ex;
|
throw ex;
|
||||||
} finally {
|
} finally {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getClearCommand());
|
clusterManager.execSudoWithRetries(hostname, timeout, getClearCommand());
|
||||||
LOG.info("Finished to execute FillDiskCommandAction");
|
getLogger().info("Finished to execute FillDiskCommandAction");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -82,6 +85,6 @@ public class FillDiskCommandAction extends SudoCommandAction {
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getStopCommand() {
|
private String getStopCommand() {
|
||||||
return String.format("killall dd");
|
return "killall dd";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -45,25 +45,29 @@ public class FlushRandomRegionOfTableAction extends Action {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
|
|
||||||
LOG.info("Performing action: Flush random region of table " + tableName);
|
getLogger().info("Performing action: Flush random region of table " + tableName);
|
||||||
List<RegionInfo> regions = admin.getRegions(tableName);
|
List<RegionInfo> regions = admin.getRegions(tableName);
|
||||||
if (regions == null || regions.isEmpty()) {
|
if (regions == null || regions.isEmpty()) {
|
||||||
LOG.info("Table " + tableName + " doesn't have regions to flush");
|
getLogger().info("Table " + tableName + " doesn't have regions to flush");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
|
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
|
||||||
regions.toArray(new RegionInfo[regions.size()]));
|
regions.toArray(new RegionInfo[0]));
|
||||||
LOG.debug("Flushing region " + region.getRegionNameAsString());
|
getLogger().debug("Flushing region " + region.getRegionNameAsString());
|
||||||
try {
|
try {
|
||||||
admin.flushRegion(region.getRegionName());
|
admin.flushRegion(region.getRegionName());
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.warn("Flush failed, might be caused by other chaos: " + ex.getMessage());
|
getLogger().warn("Flush failed, might be caused by other chaos: " + ex.getMessage());
|
||||||
}
|
}
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -28,8 +28,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that tries to flush a table.
|
* Action that tries to flush a table.
|
||||||
*/
|
*/
|
||||||
public class FlushTableAction extends Action {
|
public class FlushTableAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(FlushTableAction.class);
|
||||||
LoggerFactory.getLogger(FlushTableAction.class);
|
|
||||||
private final long sleepTime;
|
private final long sleepTime;
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
|
|
||||||
|
@ -42,6 +41,10 @@ public class FlushTableAction extends Action {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
|
@ -52,11 +55,11 @@ public class FlushTableAction extends Action {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Performing action: Flush table " + tableName);
|
getLogger().info("Performing action: Flush table " + tableName);
|
||||||
try {
|
try {
|
||||||
admin.flush(tableName);
|
admin.flush(tableName);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.warn("Flush failed, might be caused by other chaos: " + ex.getMessage());
|
getLogger().warn("Flush failed, might be caused by other chaos: " + ex.getMessage());
|
||||||
}
|
}
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -25,8 +25,11 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that tries to force a balancer run.
|
* Action that tries to force a balancer run.
|
||||||
*/
|
*/
|
||||||
public class ForceBalancerAction extends Action {
|
public class ForceBalancerAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(ForceBalancerAction.class);
|
||||||
LoggerFactory.getLogger(ForceBalancerAction.class);
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
|
@ -34,7 +37,7 @@ public class ForceBalancerAction extends Action {
|
||||||
if (context.isStopping()) {
|
if (context.isStopping()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG.info("Balancing regions");
|
getLogger().info("Balancing regions");
|
||||||
forceBalancer();
|
forceBalancer();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -39,35 +39,38 @@ public class GracefulRollingRestartRsAction extends RestartActionBaseAction {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Performing action: Rolling restarting non-master region servers");
|
getLogger().info("Performing action: Rolling restarting non-master region servers");
|
||||||
List<ServerName> selectedServers = selectServers();
|
List<ServerName> selectedServers = selectServers();
|
||||||
|
|
||||||
LOG.info("Disabling balancer to make unloading possible");
|
getLogger().info("Disabling balancer to make unloading possible");
|
||||||
setBalancer(false, true);
|
setBalancer(false, true);
|
||||||
|
|
||||||
for (ServerName server : selectedServers) {
|
for (ServerName server : selectedServers) {
|
||||||
String rsName = server.getAddress().toString();
|
String rsName = server.getAddress().toString();
|
||||||
try (RegionMover rm =
|
try (RegionMover rm =
|
||||||
new RegionMover.RegionMoverBuilder(rsName, getConf()).ack(true).build()) {
|
new RegionMover.RegionMoverBuilder(rsName, getConf()).ack(true).build()) {
|
||||||
LOG.info("Unloading {}", server);
|
getLogger().info("Unloading {}", server);
|
||||||
rm.unload();
|
rm.unload();
|
||||||
LOG.info("Restarting {}", server);
|
getLogger().info("Restarting {}", server);
|
||||||
gracefulRestartRs(server, sleepTime);
|
gracefulRestartRs(server, sleepTime);
|
||||||
LOG.info("Loading {}", server);
|
getLogger().info("Loading {}", server);
|
||||||
rm.load();
|
rm.load();
|
||||||
} catch (Shell.ExitCodeException e) {
|
} catch (Shell.ExitCodeException e) {
|
||||||
LOG.info("Problem restarting but presume successful; code={}", e.getExitCode(), e);
|
getLogger().info("Problem restarting but presume successful; code={}", e.getExitCode(), e);
|
||||||
}
|
}
|
||||||
sleep(RandomUtils.nextInt(0, (int)sleepTime));
|
sleep(RandomUtils.nextInt(0, (int)sleepTime));
|
||||||
}
|
}
|
||||||
LOG.info("Enabling balancer");
|
getLogger().info("Enabling balancer");
|
||||||
setBalancer(true, true);
|
setBalancer(true, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected List<ServerName> selectServers() throws IOException {
|
protected List<ServerName> selectServers() throws IOException {
|
||||||
return Arrays.asList(getCurrentServers());
|
return Arrays.asList(getCurrentServers());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -31,8 +30,8 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class LosePacketsCommandAction extends TCCommandAction {
|
public class LosePacketsCommandAction extends TCCommandAction {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(LosePacketsCommandAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(LosePacketsCommandAction.class);
|
||||||
private float ratio;
|
private final float ratio;
|
||||||
private long duration;
|
private final long duration;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Lose network packets on a random regionserver.
|
* Lose network packets on a random regionserver.
|
||||||
|
@ -48,8 +47,12 @@ public class LosePacketsCommandAction extends TCCommandAction {
|
||||||
this.duration = duration;
|
this.duration = duration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
protected void localPerform() throws IOException {
|
protected void localPerform() throws IOException {
|
||||||
LOG.info("Starting to execute LosePacketsCommandAction");
|
getLogger().info("Starting to execute LosePacketsCommandAction");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
||||||
String hostname = server.getHostname();
|
String hostname = server.getHostname();
|
||||||
|
|
||||||
|
@ -57,12 +60,12 @@ public class LosePacketsCommandAction extends TCCommandAction {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
||||||
Thread.sleep(duration);
|
Thread.sleep(duration);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
LOG.debug("Failed to run the command for the full duration", e);
|
getLogger().debug("Failed to run the command for the full duration", e);
|
||||||
} finally {
|
} finally {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Finished to execute LosePacketsCommandAction");
|
getLogger().info("Finished to execute LosePacketsCommandAction");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getCommand(String operation){
|
private String getCommand(String operation){
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -45,22 +45,26 @@ public class MergeRandomAdjacentRegionsOfTableAction extends Action {
|
||||||
this.sleepTime = sleepTime;
|
this.sleepTime = sleepTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
|
|
||||||
LOG.info("Performing action: Merge random adjacent regions of table " + tableName);
|
getLogger().info("Performing action: Merge random adjacent regions of table " + tableName);
|
||||||
List<RegionInfo> regions = admin.getRegions(tableName);
|
List<RegionInfo> regions = admin.getRegions(tableName);
|
||||||
if (regions == null || regions.size() < 2) {
|
if (regions == null || regions.size() < 2) {
|
||||||
LOG.info("Table " + tableName + " doesn't have enough regions to merge");
|
getLogger().info("Table " + tableName + " doesn't have enough regions to merge");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int i = RandomUtils.nextInt(0, regions.size() - 1);
|
int i = RandomUtils.nextInt(0, regions.size() - 1);
|
||||||
RegionInfo a = regions.get(i++);
|
RegionInfo a = regions.get(i++);
|
||||||
RegionInfo b = regions.get(i);
|
RegionInfo b = regions.get(i);
|
||||||
LOG.debug("Merging " + a.getRegionNameAsString() + " and " + b.getRegionNameAsString());
|
getLogger().debug("Merging " + a.getRegionNameAsString() + " and " + b.getRegionNameAsString());
|
||||||
|
|
||||||
// Don't try the merge if we're stopping
|
// Don't try the merge if we're stopping
|
||||||
if (context.isStopping()) {
|
if (context.isStopping()) {
|
||||||
|
@ -70,7 +74,7 @@ public class MergeRandomAdjacentRegionsOfTableAction extends Action {
|
||||||
try {
|
try {
|
||||||
admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false);
|
admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.warn("Merge failed, might be caused by other chaos: " + ex.getMessage());
|
getLogger().warn("Merge failed, might be caused by other chaos: " + ex.getMessage());
|
||||||
}
|
}
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
|
@ -32,8 +31,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that tries to move a random region of a table.
|
* Action that tries to move a random region of a table.
|
||||||
*/
|
*/
|
||||||
public class MoveRandomRegionOfTableAction extends Action {
|
public class MoveRandomRegionOfTableAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(MoveRandomRegionOfTableAction.class);
|
||||||
LoggerFactory.getLogger(MoveRandomRegionOfTableAction.class);
|
|
||||||
private final long sleepTime;
|
private final long sleepTime;
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
|
|
||||||
|
@ -46,6 +44,10 @@ public class MoveRandomRegionOfTableAction extends Action {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
|
@ -55,18 +57,19 @@ public class MoveRandomRegionOfTableAction extends Action {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
|
|
||||||
LOG.info("Performing action: Move random region of table " + tableName);
|
getLogger().info("Performing action: Move random region of table " + tableName);
|
||||||
List<RegionInfo> regions = admin.getRegions(tableName);
|
List<RegionInfo> regions = admin.getRegions(tableName);
|
||||||
if (regions == null || regions.isEmpty()) {
|
if (regions == null || regions.isEmpty()) {
|
||||||
LOG.info("Table " + tableName + " doesn't have regions to move");
|
getLogger().info("Table " + tableName + " doesn't have regions to move");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
|
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
|
||||||
regions.toArray(new RegionInfo[regions.size()]));
|
regions.toArray(new RegionInfo[0]));
|
||||||
LOG.debug("Move random region {}", region.getRegionNameAsString());
|
getLogger().debug("Move random region {}", region.getRegionNameAsString());
|
||||||
// Use facility over in MoveRegionsOfTableAction...
|
// Use facility over in MoveRegionsOfTableAction...
|
||||||
MoveRegionsOfTableAction.moveRegion(admin, MoveRegionsOfTableAction.getServers(admin), region);
|
MoveRegionsOfTableAction.moveRegion(admin, MoveRegionsOfTableAction.getServers(admin),
|
||||||
|
region, getLogger());
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -22,11 +22,9 @@ import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang3.RandomUtils;
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
|
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -36,22 +34,21 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that tries to move every region of a table.
|
* Action that tries to move every region of a table.
|
||||||
*/
|
*/
|
||||||
public class MoveRegionsOfTableAction extends Action {
|
public class MoveRegionsOfTableAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(MoveRegionsOfTableAction.class);
|
||||||
LoggerFactory.getLogger(MoveRegionsOfTableAction.class);
|
|
||||||
private final long sleepTime;
|
private final long sleepTime;
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
private final long maxTime;
|
private final long maxTime;
|
||||||
|
|
||||||
public MoveRegionsOfTableAction(TableName tableName) {
|
|
||||||
this(-1, MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName);
|
|
||||||
}
|
|
||||||
|
|
||||||
public MoveRegionsOfTableAction(long sleepTime, long maxSleepTime, TableName tableName) {
|
public MoveRegionsOfTableAction(long sleepTime, long maxSleepTime, TableName tableName) {
|
||||||
this.sleepTime = sleepTime;
|
this.sleepTime = sleepTime;
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
this.maxTime = maxSleepTime;
|
this.maxTime = maxSleepTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
|
@ -61,10 +58,10 @@ public class MoveRegionsOfTableAction extends Action {
|
||||||
Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin();
|
Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin();
|
||||||
ServerName[] servers = getServers(admin);
|
ServerName[] servers = getServers(admin);
|
||||||
|
|
||||||
LOG.info("Performing action: Move regions of table {}", tableName);
|
getLogger().info("Performing action: Move regions of table {}", tableName);
|
||||||
List<RegionInfo> regions = admin.getRegions(tableName);
|
List<RegionInfo> regions = admin.getRegions(tableName);
|
||||||
if (regions == null || regions.isEmpty()) {
|
if (regions == null || regions.isEmpty()) {
|
||||||
LOG.info("Table {} doesn't have regions to move", tableName);
|
getLogger().info("Table {} doesn't have regions to move", tableName);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,7 +74,7 @@ public class MoveRegionsOfTableAction extends Action {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
moveRegion(admin, servers, regionInfo);
|
moveRegion(admin, servers, regionInfo, getLogger());
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
}
|
}
|
||||||
|
@ -92,16 +89,16 @@ public class MoveRegionsOfTableAction extends Action {
|
||||||
|
|
||||||
static ServerName [] getServers(Admin admin) throws IOException {
|
static ServerName [] getServers(Admin admin) throws IOException {
|
||||||
Collection<ServerName> serversList = admin.getRegionServers();
|
Collection<ServerName> serversList = admin.getRegionServers();
|
||||||
return serversList.toArray(new ServerName[serversList.size()]);
|
return serversList.toArray(new ServerName[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void moveRegion(Admin admin, ServerName [] servers, RegionInfo regionInfo) {
|
static void moveRegion(Admin admin, ServerName [] servers, RegionInfo regionInfo, Logger logger) {
|
||||||
try {
|
try {
|
||||||
ServerName destServerName = servers[RandomUtils.nextInt(0, servers.length)];
|
ServerName destServerName = servers[RandomUtils.nextInt(0, servers.length)];
|
||||||
LOG.debug("Moving {} to {}", regionInfo.getRegionNameAsString(), destServerName);
|
logger.debug("Moving {} to {}", regionInfo.getRegionNameAsString(), destServerName);
|
||||||
admin.move(regionInfo.getEncodedNameAsBytes(), destServerName);
|
admin.move(regionInfo.getEncodedNameAsBytes(), destServerName);
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.warn("Move failed, might be caused by other chaos: {}", ex.getMessage());
|
logger.warn("Move failed, might be caused by other chaos: {}", ex.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||||
|
@ -40,7 +39,7 @@ public class RemoveColumnAction extends Action {
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
private final Set<String> protectedColumns;
|
private final Set<String> protectedColumns;
|
||||||
private Admin admin;
|
private Admin admin;
|
||||||
private Random random;
|
private final Random random;
|
||||||
|
|
||||||
public RemoveColumnAction(TableName tableName, Set<String> protectedColumns) {
|
public RemoveColumnAction(TableName tableName, Set<String> protectedColumns) {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
|
@ -48,6 +47,10 @@ public class RemoveColumnAction extends Action {
|
||||||
random = new Random();
|
random = new Random();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void init(ActionContext context) throws IOException {
|
public void init(ActionContext context) throws IOException {
|
||||||
super.init(context);
|
super.init(context);
|
||||||
|
@ -69,7 +72,7 @@ public class RemoveColumnAction extends Action {
|
||||||
index = random.nextInt(columnDescriptors.length);
|
index = random.nextInt(columnDescriptors.length);
|
||||||
}
|
}
|
||||||
byte[] colDescName = columnDescriptors[index].getName();
|
byte[] colDescName = columnDescriptors[index].getName();
|
||||||
LOG.debug("Performing action: Removing " + Bytes.toString(colDescName)+ " from "
|
getLogger().debug("Performing action: Removing " + Bytes.toString(colDescName)+ " from "
|
||||||
+ tableName.getNameAsString());
|
+ tableName.getNameAsString());
|
||||||
|
|
||||||
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor);
|
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
@ -31,9 +30,9 @@ import org.slf4j.LoggerFactory;
|
||||||
*/
|
*/
|
||||||
public class ReorderPacketsCommandAction extends TCCommandAction {
|
public class ReorderPacketsCommandAction extends TCCommandAction {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(ReorderPacketsCommandAction.class);
|
private static final Logger LOG = LoggerFactory.getLogger(ReorderPacketsCommandAction.class);
|
||||||
private float ratio;
|
private final float ratio;
|
||||||
private long duration;
|
private final long duration;
|
||||||
private long delay;
|
private final long delay;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reorder network packets on a random regionserver.
|
* Reorder network packets on a random regionserver.
|
||||||
|
@ -52,8 +51,12 @@ public class ReorderPacketsCommandAction extends TCCommandAction {
|
||||||
this.delay = delay;
|
this.delay = delay;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
protected void localPerform() throws IOException {
|
protected void localPerform() throws IOException {
|
||||||
LOG.info("Starting to execute ReorderPacketsCommandAction");
|
getLogger().info("Starting to execute ReorderPacketsCommandAction");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
||||||
String hostname = server.getHostname();
|
String hostname = server.getHostname();
|
||||||
|
|
||||||
|
@ -61,12 +64,12 @@ public class ReorderPacketsCommandAction extends TCCommandAction {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
|
||||||
Thread.sleep(duration);
|
Thread.sleep(duration);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
LOG.debug("Failed to run the command for the full duration", e);
|
getLogger().debug("Failed to run the command for the full duration", e);
|
||||||
} finally {
|
} finally {
|
||||||
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Finished to execute ReorderPacketsCommandAction");
|
getLogger().info("Finished to execute ReorderPacketsCommandAction");
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getCommand(String operation){
|
private String getCommand(String operation){
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,18 +19,13 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.util.Threads;
|
import org.apache.hadoop.hbase.util.Threads;
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for restarting HBaseServer's
|
* Base class for restarting HBaseServer's
|
||||||
*/
|
*/
|
||||||
public class RestartActionBaseAction extends Action {
|
public abstract class RestartActionBaseAction extends Action {
|
||||||
private static final Logger LOG =
|
|
||||||
LoggerFactory.getLogger(RestartActionBaseAction.class);
|
|
||||||
long sleepTime; // how long should we sleep
|
long sleepTime; // how long should we sleep
|
||||||
|
|
||||||
public RestartActionBaseAction(long sleepTime) {
|
public RestartActionBaseAction(long sleepTime) {
|
||||||
|
@ -38,7 +33,7 @@ public class RestartActionBaseAction extends Action {
|
||||||
}
|
}
|
||||||
|
|
||||||
void sleep(long sleepTime) {
|
void sleep(long sleepTime) {
|
||||||
LOG.info("Sleeping for:" + sleepTime);
|
getLogger().info("Sleeping for:" + sleepTime);
|
||||||
Threads.sleep(sleepTime);
|
Threads.sleep(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -49,10 +44,10 @@ public class RestartActionBaseAction extends Action {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Killing master: {}", server);
|
getLogger().info("Killing master: {}", server);
|
||||||
killMaster(server);
|
killMaster(server);
|
||||||
sleep(sleepTime);
|
sleep(sleepTime);
|
||||||
LOG.info("Starting master: {}", server);
|
getLogger().info("Starting master: {}", server);
|
||||||
startMaster(server);
|
startMaster(server);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,10 +63,10 @@ public class RestartActionBaseAction extends Action {
|
||||||
if (context.isStopping()) {
|
if (context.isStopping()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG.info("Stopping region server: {}", server);
|
getLogger().info("Stopping region server: {}", server);
|
||||||
stopRs(server);
|
stopRs(server);
|
||||||
sleep(sleepTime);
|
sleep(sleepTime);
|
||||||
LOG.info("Starting region server: {}", server);
|
getLogger().info("Starting region server: {}", server);
|
||||||
startRs(server);
|
startRs(server);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,10 +76,10 @@ public class RestartActionBaseAction extends Action {
|
||||||
if (context.isStopping()) {
|
if (context.isStopping()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG.info("Killing region server: {}", server);
|
getLogger().info("Killing region server: {}", server);
|
||||||
killRs(server);
|
killRs(server);
|
||||||
sleep(sleepTime);
|
sleep(sleepTime);
|
||||||
LOG.info("Starting region server: {}", server);
|
getLogger().info("Starting region server: {}", server);
|
||||||
startRs(server);
|
startRs(server);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,10 +89,10 @@ public class RestartActionBaseAction extends Action {
|
||||||
if (context.isStopping()) {
|
if (context.isStopping()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG.info("Killing zookeeper node: {}", server);
|
getLogger().info("Killing zookeeper node: {}", server);
|
||||||
killZKNode(server);
|
killZKNode(server);
|
||||||
sleep(sleepTime);
|
sleep(sleepTime);
|
||||||
LOG.info("Starting zookeeper node: {}", server);
|
getLogger().info("Starting zookeeper node: {}", server);
|
||||||
startZKNode(server);
|
startZKNode(server);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,10 +102,10 @@ public class RestartActionBaseAction extends Action {
|
||||||
if (context.isStopping()) {
|
if (context.isStopping()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG.info("Killing data node: {}", server);
|
getLogger().info("Killing data node: {}", server);
|
||||||
killDataNode(server);
|
killDataNode(server);
|
||||||
sleep(sleepTime);
|
sleep(sleepTime);
|
||||||
LOG.info("Starting data node: {}", server);
|
getLogger().info("Starting data node: {}", server);
|
||||||
startDataNode(server);
|
startDataNode(server);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,11 +115,10 @@ public class RestartActionBaseAction extends Action {
|
||||||
if (context.isStopping()) {
|
if (context.isStopping()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG.info("Killing name node: {}", server);
|
getLogger().info("Killing name node: {}", server);
|
||||||
killNameNode(server);
|
killNameNode(server);
|
||||||
sleep(sleepTime);
|
sleep(sleepTime);
|
||||||
LOG.info("Starting name node: {}", server);
|
getLogger().info("Starting name node: {}", server);
|
||||||
startNameNode(server);
|
startNameNode(server);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -31,9 +31,14 @@ public class RestartActiveMasterAction extends RestartActionBaseAction {
|
||||||
public RestartActiveMasterAction(long sleepTime) {
|
public RestartActiveMasterAction(long sleepTime) {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart active master");
|
getLogger().info("Performing action: Restart active master");
|
||||||
|
|
||||||
ServerName master = cluster.getClusterMetrics().getMasterName();
|
ServerName master = cluster.getClusterMetrics().getMasterName();
|
||||||
restartMaster(master, sleepTime);
|
restartMaster(master, sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -51,9 +51,13 @@ public class RestartActiveNameNodeAction extends RestartActionBaseAction {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart active namenode");
|
getLogger().info("Performing action: Restart active namenode");
|
||||||
Configuration conf = CommonFSUtils.getRootDir(getConf()).getFileSystem(getConf()).getConf();
|
Configuration conf = CommonFSUtils.getRootDir(getConf()).getFileSystem(getConf()).getConf();
|
||||||
String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
|
String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
|
||||||
if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
|
if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
|
||||||
|
@ -85,9 +89,9 @@ public class RestartActiveNameNodeAction extends RestartActionBaseAction {
|
||||||
if (activeNamenode == null) {
|
if (activeNamenode == null) {
|
||||||
throw new Exception("No active Name node found in zookeeper under " + hadoopHAZkNode);
|
throw new Exception("No active Name node found in zookeeper under " + hadoopHAZkNode);
|
||||||
}
|
}
|
||||||
LOG.info("Found active namenode host:" + activeNamenode);
|
getLogger().info("Found active namenode host:" + activeNamenode);
|
||||||
ServerName activeNNHost = ServerName.valueOf(activeNamenode, -1, -1);
|
ServerName activeNNHost = ServerName.valueOf(activeNamenode, -1, -1);
|
||||||
LOG.info("Restarting Active NameNode :" + activeNamenode);
|
getLogger().info("Restarting Active NameNode :" + activeNamenode);
|
||||||
restartNameNode(activeNNHost, sleepTime);
|
restartNameNode(activeNNHost, sleepTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -41,9 +41,13 @@ public class RestartRandomDataNodeAction extends RestartActionBaseAction {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart random data node");
|
getLogger().info("Performing action: Restart random data node");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getDataNodes());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getDataNodes());
|
||||||
restartDataNode(server, sleepTime);
|
restartDataNode(server, sleepTime);
|
||||||
}
|
}
|
||||||
|
@ -56,6 +60,6 @@ public class RestartRandomDataNodeAction extends RestartActionBaseAction {
|
||||||
for (DatanodeInfo dataNode: dfsClient.datanodeReport(HdfsConstants.DatanodeReportType.LIVE)) {
|
for (DatanodeInfo dataNode: dfsClient.datanodeReport(HdfsConstants.DatanodeReportType.LIVE)) {
|
||||||
hosts.add(ServerName.valueOf(dataNode.getHostName(), -1, -1));
|
hosts.add(ServerName.valueOf(dataNode.getHostName(), -1, -1));
|
||||||
}
|
}
|
||||||
return hosts.toArray(new ServerName[hosts.size()]);
|
return hosts.toArray(new ServerName[0]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -33,9 +33,13 @@ public class RestartRandomRsAction extends RestartActionBaseAction {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart random region server");
|
getLogger().info("Performing action: Restart random region server");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
|
||||||
|
|
||||||
restartRs(server, sleepTime);
|
restartRs(server, sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -20,12 +20,20 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
public class RestartRandomRsExceptMetaAction extends RestartRandomRsAction {
|
public class RestartRandomRsExceptMetaAction extends RestartRandomRsAction {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(RestartRandomRsExceptMetaAction.class);
|
||||||
|
|
||||||
public RestartRandomRsExceptMetaAction(long sleepTime) {
|
public RestartRandomRsExceptMetaAction(long sleepTime) {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
int tries = 10;
|
int tries = 10;
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -34,9 +34,13 @@ public class RestartRandomZKNodeAction extends RestartActionBaseAction {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart random zookeeper node");
|
getLogger().info("Performing action: Restart random zookeeper node");
|
||||||
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(
|
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(
|
||||||
ZKServerTool.readZKNodes(getConf()));
|
ZKServerTool.readZKNodes(getConf()));
|
||||||
restartZKNode(server, sleepTime);
|
restartZKNode(server, sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -33,12 +33,17 @@ public class RestartRsHoldingMetaAction extends RestartActionBaseAction {
|
||||||
public RestartRsHoldingMetaAction(long sleepTime) {
|
public RestartRsHoldingMetaAction(long sleepTime) {
|
||||||
super(sleepTime);
|
super(sleepTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart region server holding META");
|
getLogger().info("Performing action: Restart region server holding META");
|
||||||
ServerName server = cluster.getServerHoldingMeta();
|
ServerName server = cluster.getServerHoldingMeta();
|
||||||
if (server == null) {
|
if (server == null) {
|
||||||
LOG.warn("No server is holding hbase:meta right now.");
|
getLogger().warn("No server is holding hbase:meta right now.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
ClusterMetrics clusterStatus = cluster.getClusterMetrics();
|
ClusterMetrics clusterStatus = cluster.getClusterMetrics();
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang3.RandomUtils;
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
import org.apache.hadoop.hbase.HRegionLocation;
|
import org.apache.hadoop.hbase.HRegionLocation;
|
||||||
import org.apache.hadoop.hbase.client.RegionLocator;
|
import org.apache.hadoop.hbase.client.RegionLocator;
|
||||||
|
@ -30,8 +29,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that restarts an HRegionServer holding one of the regions of the table.
|
* Action that restarts an HRegionServer holding one of the regions of the table.
|
||||||
*/
|
*/
|
||||||
public class RestartRsHoldingTableAction extends RestartActionBaseAction {
|
public class RestartRsHoldingTableAction extends RestartActionBaseAction {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(RestartRsHoldingTableAction.class);
|
||||||
LoggerFactory.getLogger(RestartRsHoldingTableAction.class);
|
|
||||||
|
|
||||||
private final RegionLocator locator;
|
private final RegionLocator locator;
|
||||||
|
|
||||||
|
@ -40,9 +38,14 @@ public class RestartRsHoldingTableAction extends RestartActionBaseAction {
|
||||||
this.locator = locator;
|
this.locator = locator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Performing action: Restart random RS holding table " + this.locator.getName());
|
getLogger().info(
|
||||||
|
"Performing action: Restart random RS holding table " + this.locator.getName());
|
||||||
|
|
||||||
List<HRegionLocation> locations = locator.getAllRegionLocations();
|
List<HRegionLocation> locations = locator.getAllRegionLocations();
|
||||||
restartRs(locations.get(RandomUtils.nextInt(0, locations.size())).getServerName(), sleepTime);
|
restartRs(locations.get(RandomUtils.nextInt(0, locations.size())).getServerName(), sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -23,7 +23,6 @@ import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
|
|
||||||
import org.apache.commons.lang3.RandomUtils;
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
|
@ -60,10 +59,14 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
|
||||||
START
|
START
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
|
getLogger().info("Performing action: Rolling batch restarting {}% of region servers",
|
||||||
(int)(ratio * 100)));
|
(int)(ratio * 100));
|
||||||
List<ServerName> selectedServers = selectServers();
|
List<ServerName> selectedServers = selectServers();
|
||||||
|
|
||||||
Queue<ServerName> serversToBeKilled = new LinkedList<>(selectedServers);
|
Queue<ServerName> serversToBeKilled = new LinkedList<>(selectedServers);
|
||||||
|
@ -71,8 +74,8 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
|
||||||
|
|
||||||
// loop while there are servers to be killed or dead servers to be restarted
|
// loop while there are servers to be killed or dead servers to be restarted
|
||||||
while ((!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) && !context.isStopping()) {
|
while ((!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) && !context.isStopping()) {
|
||||||
KillOrStart action = KillOrStart.KILL;
|
|
||||||
|
|
||||||
|
final KillOrStart action;
|
||||||
if (serversToBeKilled.isEmpty()) { // no more servers to kill
|
if (serversToBeKilled.isEmpty()) { // no more servers to kill
|
||||||
action = KillOrStart.START;
|
action = KillOrStart.START;
|
||||||
} else if (deadServers.isEmpty()) {
|
} else if (deadServers.isEmpty()) {
|
||||||
|
@ -95,7 +98,7 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
|
||||||
} catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
|
} catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
|
||||||
// We've seen this in test runs where we timeout but the kill went through. HBASE-9743
|
// We've seen this in test runs where we timeout but the kill went through. HBASE-9743
|
||||||
// So, add to deadServers even if exception so the start gets called.
|
// So, add to deadServers even if exception so the start gets called.
|
||||||
LOG.info("Problem killing but presume successful; code=" + e.getExitCode(), e);
|
getLogger().info("Problem killing but presume successful; code={}", e.getExitCode(), e);
|
||||||
}
|
}
|
||||||
deadServers.add(server);
|
deadServers.add(server);
|
||||||
break;
|
break;
|
||||||
|
@ -106,7 +109,7 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
|
||||||
} catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
|
} catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
|
||||||
// The start may fail but better to just keep going though we may lose server.
|
// The start may fail but better to just keep going though we may lose server.
|
||||||
//
|
//
|
||||||
LOG.info("Problem starting, will retry; code=" + e.getExitCode(), e);
|
getLogger().info("Problem starting, will retry; code={}", e.getExitCode(), e);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -121,25 +124,23 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Small test to ensure the class basically works.
|
* Small test to ensure the class basically works.
|
||||||
* @param args
|
|
||||||
* @throws Exception
|
|
||||||
*/
|
*/
|
||||||
public static void main(final String[] args) throws Exception {
|
public static void main(final String[] args) throws Exception {
|
||||||
RollingBatchRestartRsAction action = new RollingBatchRestartRsAction(1, 1.0f) {
|
RollingBatchRestartRsAction action = new RollingBatchRestartRsAction(1, 1.0f) {
|
||||||
private int invocations = 0;
|
private int invocations = 0;
|
||||||
@Override
|
@Override
|
||||||
protected ServerName[] getCurrentServers() throws IOException {
|
protected ServerName[] getCurrentServers() {
|
||||||
final int count = 4;
|
final int count = 4;
|
||||||
List<ServerName> serverNames = new ArrayList<>(count);
|
List<ServerName> serverNames = new ArrayList<>(count);
|
||||||
for (int i = 0; i < 4; i++) {
|
for (int i = 0; i < 4; i++) {
|
||||||
serverNames.add(ServerName.valueOf(i + ".example.org", i, i));
|
serverNames.add(ServerName.valueOf(i + ".example.org", i, i));
|
||||||
}
|
}
|
||||||
return serverNames.toArray(new ServerName[serverNames.size()]);
|
return serverNames.toArray(new ServerName[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void killRs(ServerName server) throws IOException {
|
protected void killRs(ServerName server) throws IOException {
|
||||||
LOG.info("Killed " + server);
|
LOG.info("Killed {}", server);
|
||||||
if (this.invocations++ % 3 == 0) {
|
if (this.invocations++ % 3 == 0) {
|
||||||
throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
|
throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
|
||||||
}
|
}
|
||||||
|
@ -147,7 +148,7 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void startRs(ServerName server) throws IOException {
|
protected void startRs(ServerName server) throws IOException {
|
||||||
LOG.info("Started " + server);
|
LOG.info("Started {}", server);
|
||||||
if (this.invocations++ % 3 == 0) {
|
if (this.invocations++ % 3 == 0) {
|
||||||
throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
|
throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -22,7 +22,6 @@ import java.io.IOException;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
|
|
||||||
import org.apache.commons.lang3.RandomUtils;
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
|
@ -40,9 +39,9 @@ import org.slf4j.LoggerFactory;
|
||||||
public class RollingBatchSuspendResumeRsAction extends Action {
|
public class RollingBatchSuspendResumeRsAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(RollingBatchSuspendResumeRsAction.class);
|
LoggerFactory.getLogger(RollingBatchSuspendResumeRsAction.class);
|
||||||
private float ratio;
|
private final float ratio;
|
||||||
private long sleepTime;
|
private final long sleepTime;
|
||||||
private int maxSuspendedServers; // number of maximum suspended servers at any given time.
|
private final int maxSuspendedServers; // number of maximum suspended servers at any given time.
|
||||||
|
|
||||||
public RollingBatchSuspendResumeRsAction(long sleepTime, float ratio) {
|
public RollingBatchSuspendResumeRsAction(long sleepTime, float ratio) {
|
||||||
this(sleepTime, ratio, 5);
|
this(sleepTime, ratio, 5);
|
||||||
|
@ -58,10 +57,14 @@ public class RollingBatchSuspendResumeRsAction extends Action {
|
||||||
SUSPEND, RESUME
|
SUSPEND, RESUME
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
|
getLogger().info("Performing action: Rolling batch restarting {}% of region servers",
|
||||||
(int) (ratio * 100)));
|
(int) (ratio * 100));
|
||||||
List<ServerName> selectedServers = selectServers();
|
List<ServerName> selectedServers = selectServers();
|
||||||
|
|
||||||
Queue<ServerName> serversToBeSuspended = new LinkedList<>(selectedServers);
|
Queue<ServerName> serversToBeSuspended = new LinkedList<>(selectedServers);
|
||||||
|
@ -70,8 +73,8 @@ public class RollingBatchSuspendResumeRsAction extends Action {
|
||||||
// loop while there are servers to be suspended or suspended servers to be resumed
|
// loop while there are servers to be suspended or suspended servers to be resumed
|
||||||
while ((!serversToBeSuspended.isEmpty() || !suspendedServers.isEmpty()) && !context
|
while ((!serversToBeSuspended.isEmpty() || !suspendedServers.isEmpty()) && !context
|
||||||
.isStopping()) {
|
.isStopping()) {
|
||||||
SuspendOrResume action;
|
|
||||||
|
|
||||||
|
final SuspendOrResume action;
|
||||||
if (serversToBeSuspended.isEmpty()) { // no more servers to suspend
|
if (serversToBeSuspended.isEmpty()) { // no more servers to suspend
|
||||||
action = SuspendOrResume.RESUME;
|
action = SuspendOrResume.RESUME;
|
||||||
} else if (suspendedServers.isEmpty()) {
|
} else if (suspendedServers.isEmpty()) {
|
||||||
|
@ -105,7 +108,7 @@ public class RollingBatchSuspendResumeRsAction extends Action {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Sleeping for:{}", sleepTime);
|
getLogger().info("Sleeping for:{}", sleepTime);
|
||||||
Threads.sleep(sleepTime);
|
Threads.sleep(sleepTime);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -28,8 +28,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that tries to take a snapshot of a table.
|
* Action that tries to take a snapshot of a table.
|
||||||
*/
|
*/
|
||||||
public class SnapshotTableAction extends Action {
|
public class SnapshotTableAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(SnapshotTableAction.class);
|
||||||
LoggerFactory.getLogger(SnapshotTableAction.class);
|
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
private final long sleepTime;
|
private final long sleepTime;
|
||||||
|
|
||||||
|
@ -42,6 +41,10 @@ public class SnapshotTableAction extends Action {
|
||||||
this.sleepTime = sleepTime;
|
this.sleepTime = sleepTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
|
@ -53,7 +56,7 @@ public class SnapshotTableAction extends Action {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("Performing action: Snapshot table " + tableName);
|
getLogger().info("Performing action: Snapshot table {}", tableName);
|
||||||
admin.snapshot(snapshotName, tableName);
|
admin.snapshot(snapshotName, tableName);
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,14 +19,12 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.concurrent.ThreadLocalRandom;
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
|
||||||
public class SplitAllRegionOfTableAction extends Action {
|
public class SplitAllRegionOfTableAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(SplitAllRegionOfTableAction.class);
|
LoggerFactory.getLogger(SplitAllRegionOfTableAction.class);
|
||||||
|
@ -47,6 +45,10 @@ public class SplitAllRegionOfTableAction extends Action {
|
||||||
this.maxFullTableSplits = getConf().getInt(MAX_SPLIT_KEY, DEFAULT_MAX_SPLITS);
|
this.maxFullTableSplits = getConf().getInt(MAX_SPLIT_KEY, DEFAULT_MAX_SPLITS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
|
@ -61,10 +63,10 @@ public class SplitAllRegionOfTableAction extends Action {
|
||||||
if (ThreadLocalRandom.current().nextDouble()
|
if (ThreadLocalRandom.current().nextDouble()
|
||||||
< (((double) splits) / ((double) maxFullTableSplits)) / ((double) 2)) {
|
< (((double) splits) / ((double) maxFullTableSplits)) / ((double) 2)) {
|
||||||
splits++;
|
splits++;
|
||||||
LOG.info("Performing action: Split all regions of " + tableName);
|
getLogger().info("Performing action: Split all regions of {}", tableName);
|
||||||
admin.split(tableName);
|
admin.split(tableName);
|
||||||
} else {
|
} else {
|
||||||
LOG.info("Skipping split of all regions.");
|
getLogger().info("Skipping split of all regions.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
|
||||||
|
@ -46,15 +45,19 @@ public class SplitRandomRegionOfTableAction extends Action {
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
Admin admin = util.getAdmin();
|
Admin admin = util.getAdmin();
|
||||||
|
|
||||||
LOG.info("Performing action: Split random region of table " + tableName);
|
getLogger().info("Performing action: Split random region of table " + tableName);
|
||||||
List<RegionInfo> regions = admin.getRegions(tableName);
|
List<RegionInfo> regions = admin.getRegions(tableName);
|
||||||
if (regions == null || regions.isEmpty()) {
|
if (regions == null || regions.isEmpty()) {
|
||||||
LOG.info("Table " + tableName + " doesn't have regions to split");
|
getLogger().info("Table " + tableName + " doesn't have regions to split");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Don't try the split if we're stopping
|
// Don't try the split if we're stopping
|
||||||
|
@ -63,12 +66,12 @@ public class SplitRandomRegionOfTableAction extends Action {
|
||||||
}
|
}
|
||||||
|
|
||||||
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
|
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
|
||||||
regions.toArray(new RegionInfo[regions.size()]));
|
regions.toArray(new RegionInfo[0]));
|
||||||
LOG.debug("Splitting region " + region.getRegionNameAsString());
|
getLogger().debug("Splitting region " + region.getRegionNameAsString());
|
||||||
try {
|
try {
|
||||||
admin.splitRegionAsync(region.getRegionName()).get();
|
admin.splitRegionAsync(region.getRegionName()).get();
|
||||||
} catch (Exception ex) {
|
} catch (Exception ex) {
|
||||||
LOG.warn("Split failed, might be caused by other chaos: " + ex.getMessage());
|
getLogger().warn("Split failed, might be caused by other chaos: " + ex.getMessage());
|
||||||
}
|
}
|
||||||
if (sleepTime > 0) {
|
if (sleepTime > 0) {
|
||||||
Thread.sleep(sleepTime);
|
Thread.sleep(sleepTime);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,18 +19,14 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.DistributedHBaseCluster;
|
import org.apache.hadoop.hbase.DistributedHBaseCluster;
|
||||||
import org.apache.hadoop.hbase.HBaseCluster;
|
import org.apache.hadoop.hbase.HBaseCluster;
|
||||||
import org.apache.hadoop.hbase.HBaseClusterManager;
|
import org.apache.hadoop.hbase.HBaseClusterManager;
|
||||||
import org.slf4j.Logger;
|
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base class for performing Actions based on linux commands requiring sudo privileges
|
* Base class for performing Actions based on linux commands requiring sudo privileges
|
||||||
*/
|
*/
|
||||||
abstract public class SudoCommandAction extends Action {
|
abstract public class SudoCommandAction extends Action {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(SudoCommandAction.class);
|
|
||||||
|
|
||||||
protected long timeout;
|
protected long timeout;
|
||||||
protected HBaseClusterManager clusterManager;
|
protected HBaseClusterManager clusterManager;
|
||||||
|
@ -43,9 +39,9 @@ abstract public class SudoCommandAction extends Action {
|
||||||
public void init(ActionContext context) throws IOException {
|
public void init(ActionContext context) throws IOException {
|
||||||
super.init(context);
|
super.init(context);
|
||||||
HBaseCluster cluster = context.getHBaseCluster();
|
HBaseCluster cluster = context.getHBaseCluster();
|
||||||
if(cluster != null && cluster instanceof DistributedHBaseCluster){
|
if (cluster instanceof DistributedHBaseCluster){
|
||||||
Object manager = ((DistributedHBaseCluster)cluster).getClusterManager();
|
Object manager = ((DistributedHBaseCluster)cluster).getClusterManager();
|
||||||
if(manager != null && manager instanceof HBaseClusterManager){
|
if (manager instanceof HBaseClusterManager){
|
||||||
clusterManager = (HBaseClusterManager) manager;
|
clusterManager = (HBaseClusterManager) manager;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -54,7 +50,7 @@ abstract public class SudoCommandAction extends Action {
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
if (clusterManager == null){
|
if (clusterManager == null){
|
||||||
LOG.info("Couldn't perform command action, it requires a distributed cluster.");
|
getLogger().info("Couldn't perform command action, it requires a distributed cluster.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -33,16 +33,14 @@ import org.junit.experimental.categories.Category;
|
||||||
import org.mockito.Mockito;
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
@Category({MediumTests.class})
|
@Category({MediumTests.class})
|
||||||
public class TestChangeSplitPolicyAction extends Action {
|
public class TestChangeSplitPolicyAction {
|
||||||
|
|
||||||
@ClassRule
|
@ClassRule
|
||||||
public static final HBaseClassTestRule CLASS_RULE =
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
HBaseClassTestRule.forClass(TestChangeSplitPolicyAction.class);
|
HBaseClassTestRule.forClass(TestChangeSplitPolicyAction.class);
|
||||||
|
|
||||||
private final static IntegrationTestingUtility TEST_UTIL = new IntegrationTestingUtility();
|
private final static IntegrationTestingUtility TEST_UTIL = new IntegrationTestingUtility();
|
||||||
private static ChangeSplitPolicyAction action;
|
private final TableName tableName = TableName.valueOf("ChangeSplitPolicyAction");
|
||||||
private Admin admin;
|
|
||||||
private TableName tableName = TableName.valueOf("ChangeSplitPolicyAction");
|
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void setUpBeforeClass() throws Exception {
|
public static void setUpBeforeClass() throws Exception {
|
||||||
|
@ -54,17 +52,17 @@ public class TestChangeSplitPolicyAction extends Action {
|
||||||
}
|
}
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
this.admin = TEST_UTIL.getAdmin();
|
Admin admin = TEST_UTIL.getAdmin();
|
||||||
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
|
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
|
||||||
admin.createTable(builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build());
|
admin.createTable(builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testChangeSplitPolicyAction() throws Exception {
|
public void testChangeSplitPolicyAction() throws Exception {
|
||||||
ActionContext ctx = Mockito.mock(ActionContext.class);
|
Action.ActionContext ctx = Mockito.mock(Action.ActionContext.class);
|
||||||
Mockito.when(ctx.getHBaseIntegrationTestingUtility()).thenReturn(TEST_UTIL);
|
Mockito.when(ctx.getHBaseIntegrationTestingUtility()).thenReturn(TEST_UTIL);
|
||||||
Mockito.when(ctx.getHBaseCluster()).thenReturn(TEST_UTIL.getHBaseCluster());
|
Mockito.when(ctx.getHBaseCluster()).thenReturn(TEST_UTIL.getHBaseCluster());
|
||||||
action = new ChangeSplitPolicyAction(tableName);
|
ChangeSplitPolicyAction action = new ChangeSplitPolicyAction(tableName);
|
||||||
action.init(ctx);
|
action.init(ctx);
|
||||||
action.perform();
|
action.perform();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -19,7 +19,6 @@
|
||||||
package org.apache.hadoop.hbase.chaos.actions;
|
package org.apache.hadoop.hbase.chaos.actions;
|
||||||
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
import org.apache.hadoop.hbase.client.Admin;
|
import org.apache.hadoop.hbase.client.Admin;
|
||||||
|
@ -30,8 +29,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that tries to truncate of a table.
|
* Action that tries to truncate of a table.
|
||||||
*/
|
*/
|
||||||
public class TruncateTableAction extends Action {
|
public class TruncateTableAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(TruncateTableAction.class);
|
||||||
LoggerFactory.getLogger(TruncateTableAction.class);
|
|
||||||
private final TableName tableName;
|
private final TableName tableName;
|
||||||
private final Random random;
|
private final Random random;
|
||||||
|
|
||||||
|
@ -40,6 +38,10 @@ public class TruncateTableAction extends Action {
|
||||||
this.random = new Random();
|
this.random = new Random();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
|
||||||
|
@ -51,8 +53,8 @@ public class TruncateTableAction extends Action {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean preserveSplits = random.nextBoolean();
|
boolean preserveSplits = random.nextBoolean();
|
||||||
LOG.info("Performing action: Truncate table " + tableName.getNameAsString() +
|
getLogger().info("Performing action: Truncate table {} preserve splits {}",
|
||||||
"preserve splits " + preserveSplits);
|
tableName.getNameAsString(), preserveSplits);
|
||||||
admin.truncateTable(tableName, preserveSplits);
|
admin.truncateTable(tableName, preserveSplits);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -23,7 +23,6 @@ import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.commons.lang3.RandomUtils;
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
import org.apache.hadoop.hbase.ClusterMetrics;
|
import org.apache.hadoop.hbase.ClusterMetrics;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
@ -42,10 +41,10 @@ public class UnbalanceKillAndRebalanceAction extends Action {
|
||||||
private static final double HOARD_FRC_OF_REGIONS = 0.8;
|
private static final double HOARD_FRC_OF_REGIONS = 0.8;
|
||||||
/** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance
|
/** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance
|
||||||
* and restarting the servers; to make sure these events have time to impact the cluster. */
|
* and restarting the servers; to make sure these events have time to impact the cluster. */
|
||||||
private long waitForUnbalanceMilliSec;
|
private final long waitForUnbalanceMilliSec;
|
||||||
private long waitForKillsMilliSec;
|
private final long waitForKillsMilliSec;
|
||||||
private long waitAfterBalanceMilliSec;
|
private final long waitAfterBalanceMilliSec;
|
||||||
private boolean killMetaRs;
|
private final boolean killMetaRs;
|
||||||
|
|
||||||
public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance,
|
public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance,
|
||||||
boolean killMetaRs) {
|
boolean killMetaRs) {
|
||||||
|
@ -56,6 +55,10 @@ public class UnbalanceKillAndRebalanceAction extends Action {
|
||||||
this.killMetaRs = killMetaRs;
|
this.killMetaRs = killMetaRs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
ClusterMetrics status = this.cluster.getClusterMetrics();
|
ClusterMetrics status = this.cluster.getClusterMetrics();
|
||||||
|
@ -86,7 +89,7 @@ public class UnbalanceKillAndRebalanceAction extends Action {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!killMetaRs && targetServer.equals(metaServer)) {
|
if (!killMetaRs && targetServer.equals(metaServer)) {
|
||||||
LOG.info("Not killing server because it holds hbase:meta.");
|
getLogger().info("Not killing server because it holds hbase:meta.");
|
||||||
} else {
|
} else {
|
||||||
killRs(targetServer);
|
killRs(targetServer);
|
||||||
killedServers.add(targetServer);
|
killedServers.add(targetServer);
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.chaos.actions;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.commons.lang3.RandomUtils;
|
import org.apache.commons.lang3.RandomUtils;
|
||||||
import org.apache.hadoop.hbase.ClusterMetrics;
|
import org.apache.hadoop.hbase.ClusterMetrics;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
@ -32,10 +31,9 @@ import org.slf4j.LoggerFactory;
|
||||||
* Action that tries to unbalance the regions of a cluster.
|
* Action that tries to unbalance the regions of a cluster.
|
||||||
*/
|
*/
|
||||||
public class UnbalanceRegionsAction extends Action {
|
public class UnbalanceRegionsAction extends Action {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG = LoggerFactory.getLogger(UnbalanceRegionsAction.class);
|
||||||
LoggerFactory.getLogger(UnbalanceRegionsAction.class);
|
private final double fractionOfRegions;
|
||||||
private double fractionOfRegions;
|
private final double fractionOfServers;
|
||||||
private double fractionOfServers;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unbalances the regions on the cluster by choosing "target" servers, and moving
|
* Unbalances the regions on the cluster by choosing "target" servers, and moving
|
||||||
|
@ -48,9 +46,13 @@ public class UnbalanceRegionsAction extends Action {
|
||||||
this.fractionOfServers = fractionOfServers;
|
this.fractionOfServers = fractionOfServers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override protected Logger getLogger() {
|
||||||
|
return LOG;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void perform() throws Exception {
|
public void perform() throws Exception {
|
||||||
LOG.info("Unbalancing regions");
|
getLogger().info("Unbalancing regions");
|
||||||
ClusterMetrics status = this.cluster.getClusterMetrics();
|
ClusterMetrics status = this.cluster.getClusterMetrics();
|
||||||
List<ServerName> victimServers = new LinkedList<>(status.getLiveServerMetrics().keySet());
|
List<ServerName> victimServers = new LinkedList<>(status.getLiveServerMetrics().keySet());
|
||||||
int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size());
|
int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size());
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
/**
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
* or more contributor license agreements. See the NOTICE file
|
* or more contributor license agreements. See the NOTICE file
|
||||||
* distributed with this work for additional information
|
* distributed with this work for additional information
|
||||||
|
@ -23,7 +23,7 @@ import java.util.List;
|
||||||
|
|
||||||
/** A policy that runs multiple other policies one after the other */
|
/** A policy that runs multiple other policies one after the other */
|
||||||
public class CompositeSequentialPolicy extends Policy {
|
public class CompositeSequentialPolicy extends Policy {
|
||||||
private List<Policy> policies;
|
private final List<Policy> policies;
|
||||||
public CompositeSequentialPolicy(Policy... policies) {
|
public CompositeSequentialPolicy(Policy... policies) {
|
||||||
this.policies = Arrays.asList(policies);
|
this.policies = Arrays.asList(policies);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue