HBASE-24295 [Chaos Monkey] abstract logging through the class hierarchy

Adds `protected abstract Logger getLogger()` to `Action` so that
implementation's names are logged when actions are performed.

Signed-off-by: stack <stack@apache.org>
Signed-off-by: Jan Hentschel <jan.hentschel@ultratendency.com>
This commit is contained in:
Nick Dimiduk 2020-04-30 15:00:57 -07:00 committed by Nick Dimiduk
parent e37aafcfc2
commit 204a1fad92
50 changed files with 484 additions and 347 deletions

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -50,12 +50,11 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* A (possibly mischievous) action that the ChaosMonkey can perform. * A (possibly mischievous) action that the ChaosMonkey can perform.
*/ */
public class Action { public abstract class Action {
public static final String KILL_MASTER_TIMEOUT_KEY = public static final String KILL_MASTER_TIMEOUT_KEY =
"hbase.chaosmonkey.action.killmastertimeout"; "hbase.chaosmonkey.action.killmastertimeout";
@ -76,8 +75,6 @@ public class Action {
public static final String START_NAMENODE_TIMEOUT_KEY = public static final String START_NAMENODE_TIMEOUT_KEY =
"hbase.chaosmonkey.action.startnamenodetimeout"; "hbase.chaosmonkey.action.startnamenodetimeout";
private static final Logger LOG = LoggerFactory.getLogger(Action.class);
protected static final long KILL_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT; protected static final long KILL_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
protected static final long START_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT; protected static final long START_MASTER_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
protected static final long KILL_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT; protected static final long KILL_RS_TIMEOUT_DEFAULT = PolicyBasedChaosMonkey.TIMEOUT;
@ -107,12 +104,17 @@ public class Action {
protected long startNameNodeTimeout; protected long startNameNodeTimeout;
protected boolean skipMetaRS; protected boolean skipMetaRS;
/**
* Retrieve the instance's {@link Logger}, for use throughout the class hierarchy.
*/
protected abstract Logger getLogger();
public void init(ActionContext context) throws IOException { public void init(ActionContext context) throws IOException {
this.context = context; this.context = context;
cluster = context.getHBaseCluster(); cluster = context.getHBaseCluster();
initialStatus = cluster.getInitialClusterMetrics(); initialStatus = cluster.getInitialClusterMetrics();
Collection<ServerName> regionServers = initialStatus.getLiveServerMetrics().keySet(); Collection<ServerName> regionServers = initialStatus.getLiveServerMetrics().keySet();
initialServers = regionServers.toArray(new ServerName[regionServers.size()]); initialServers = regionServers.toArray(new ServerName[0]);
monkeyProps = context.getMonkeyProps(); monkeyProps = context.getMonkeyProps();
if (monkeyProps == null){ if (monkeyProps == null){
@ -150,12 +152,12 @@ public class Action {
protected ServerName[] getCurrentServers() throws IOException { protected ServerName[] getCurrentServers() throws IOException {
ClusterMetrics clusterStatus = cluster.getClusterMetrics(); ClusterMetrics clusterStatus = cluster.getClusterMetrics();
Collection<ServerName> regionServers = clusterStatus.getLiveServerMetrics().keySet(); Collection<ServerName> regionServers = clusterStatus.getLiveServerMetrics().keySet();
int count = regionServers == null ? 0 : regionServers.size(); int count = regionServers.size();
if (count <= 0) { if (count <= 0) {
return new ServerName [] {}; return new ServerName [] {};
} }
ServerName master = clusterStatus.getMasterName(); ServerName master = clusterStatus.getMasterName();
Set<ServerName> masters = new HashSet<ServerName>(); Set<ServerName> masters = new HashSet<>();
masters.add(master); masters.add(master);
masters.addAll(clusterStatus.getBackupMasterNames()); masters.addAll(clusterStatus.getBackupMasterNames());
ArrayList<ServerName> tmp = new ArrayList<>(count); ArrayList<ServerName> tmp = new ArrayList<>(count);
@ -167,110 +169,110 @@ public class Action {
tmp.remove(metaServer); tmp.remove(metaServer);
} }
return tmp.toArray(new ServerName[tmp.size()]); return tmp.toArray(new ServerName[0]);
} }
protected void killMaster(ServerName server) throws IOException { protected void killMaster(ServerName server) throws IOException {
LOG.info("Killing master {}", server); getLogger().info("Killing master {}", server);
cluster.killMaster(server); cluster.killMaster(server);
cluster.waitForMasterToStop(server, killMasterTimeout); cluster.waitForMasterToStop(server, killMasterTimeout);
LOG.info("Killed master " + server); getLogger().info("Killed master " + server);
} }
protected void startMaster(ServerName server) throws IOException { protected void startMaster(ServerName server) throws IOException {
LOG.info("Starting master {}", server.getHostname()); getLogger().info("Starting master {}", server.getHostname());
cluster.startMaster(server.getHostname(), server.getPort()); cluster.startMaster(server.getHostname(), server.getPort());
cluster.waitForActiveAndReadyMaster(startMasterTimeout); cluster.waitForActiveAndReadyMaster(startMasterTimeout);
LOG.info("Started master " + server.getHostname()); getLogger().info("Started master " + server.getHostname());
} }
protected void stopRs(ServerName server) throws IOException { protected void stopRs(ServerName server) throws IOException {
LOG.info("Stopping regionserver {}", server); getLogger().info("Stopping regionserver {}", server);
cluster.stopRegionServer(server); cluster.stopRegionServer(server);
cluster.waitForRegionServerToStop(server, killRsTimeout); cluster.waitForRegionServerToStop(server, killRsTimeout);
LOG.info("Stoppiong regionserver {}. Reported num of rs:{}", server, getLogger().info("Stoppiong regionserver {}. Reported num of rs:{}", server,
cluster.getClusterMetrics().getLiveServerMetrics().size()); cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
protected void suspendRs(ServerName server) throws IOException { protected void suspendRs(ServerName server) throws IOException {
LOG.info("Suspending regionserver {}", server); getLogger().info("Suspending regionserver {}", server);
cluster.suspendRegionServer(server); cluster.suspendRegionServer(server);
if(!(cluster instanceof MiniHBaseCluster)){ if(!(cluster instanceof MiniHBaseCluster)){
cluster.waitForRegionServerToStop(server, killRsTimeout); cluster.waitForRegionServerToStop(server, killRsTimeout);
} }
LOG.info("Suspending regionserver {}. Reported num of rs:{}", server, getLogger().info("Suspending regionserver {}. Reported num of rs:{}", server,
cluster.getClusterMetrics().getLiveServerMetrics().size()); cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
protected void resumeRs(ServerName server) throws IOException { protected void resumeRs(ServerName server) throws IOException {
LOG.info("Resuming regionserver {}", server); getLogger().info("Resuming regionserver {}", server);
cluster.resumeRegionServer(server); cluster.resumeRegionServer(server);
if(!(cluster instanceof MiniHBaseCluster)){ if(!(cluster instanceof MiniHBaseCluster)){
cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout); cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout);
} }
LOG.info("Resuming regionserver {}. Reported num of rs:{}", server, getLogger().info("Resuming regionserver {}. Reported num of rs:{}", server,
cluster.getClusterMetrics().getLiveServerMetrics().size()); cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
protected void killRs(ServerName server) throws IOException { protected void killRs(ServerName server) throws IOException {
LOG.info("Killing regionserver {}", server); getLogger().info("Killing regionserver {}", server);
cluster.killRegionServer(server); cluster.killRegionServer(server);
cluster.waitForRegionServerToStop(server, killRsTimeout); cluster.waitForRegionServerToStop(server, killRsTimeout);
LOG.info("Killed regionserver {}. Reported num of rs:{}", server, getLogger().info("Killed regionserver {}. Reported num of rs:{}", server,
cluster.getClusterMetrics().getLiveServerMetrics().size()); cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
protected void startRs(ServerName server) throws IOException { protected void startRs(ServerName server) throws IOException {
LOG.info("Starting regionserver {}", server.getAddress()); getLogger().info("Starting regionserver {}", server.getAddress());
cluster.startRegionServer(server.getHostname(), server.getPort()); cluster.startRegionServer(server.getHostname(), server.getPort());
cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout); cluster.waitForRegionServerToStart(server.getHostname(), server.getPort(), startRsTimeout);
LOG.info("Started regionserver {}. Reported num of rs:{}", server.getAddress(), getLogger().info("Started regionserver {}. Reported num of rs:{}", server.getAddress(),
cluster.getClusterMetrics().getLiveServerMetrics().size()); cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
protected void killZKNode(ServerName server) throws IOException { protected void killZKNode(ServerName server) throws IOException {
LOG.info("Killing zookeeper node {}", server); getLogger().info("Killing zookeeper node {}", server);
cluster.killZkNode(server); cluster.killZkNode(server);
cluster.waitForZkNodeToStop(server, killZkNodeTimeout); cluster.waitForZkNodeToStop(server, killZkNodeTimeout);
LOG.info("Killed zookeeper node {}. Reported num of rs:{}", server, getLogger().info("Killed zookeeper node {}. Reported num of rs:{}", server,
cluster.getClusterMetrics().getLiveServerMetrics().size()); cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
protected void startZKNode(ServerName server) throws IOException { protected void startZKNode(ServerName server) throws IOException {
LOG.info("Starting zookeeper node {}", server.getHostname()); getLogger().info("Starting zookeeper node {}", server.getHostname());
cluster.startZkNode(server.getHostname(), server.getPort()); cluster.startZkNode(server.getHostname(), server.getPort());
cluster.waitForZkNodeToStart(server, startZkNodeTimeout); cluster.waitForZkNodeToStart(server, startZkNodeTimeout);
LOG.info("Started zookeeper node {}", server); getLogger().info("Started zookeeper node {}", server);
} }
protected void killDataNode(ServerName server) throws IOException { protected void killDataNode(ServerName server) throws IOException {
LOG.info("Killing datanode {}", server); getLogger().info("Killing datanode {}", server);
cluster.killDataNode(server); cluster.killDataNode(server);
cluster.waitForDataNodeToStop(server, killDataNodeTimeout); cluster.waitForDataNodeToStop(server, killDataNodeTimeout);
LOG.info("Killed datanode {}. Reported num of rs:{}", server, getLogger().info("Killed datanode {}. Reported num of rs:{}", server,
cluster.getClusterMetrics().getLiveServerMetrics().size()); cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
protected void startDataNode(ServerName server) throws IOException { protected void startDataNode(ServerName server) throws IOException {
LOG.info("Starting datanode {}", server.getHostname()); getLogger().info("Starting datanode {}", server.getHostname());
cluster.startDataNode(server); cluster.startDataNode(server);
cluster.waitForDataNodeToStart(server, startDataNodeTimeout); cluster.waitForDataNodeToStart(server, startDataNodeTimeout);
LOG.info("Started datanode {}", server); getLogger().info("Started datanode {}", server);
} }
protected void killNameNode(ServerName server) throws IOException { protected void killNameNode(ServerName server) throws IOException {
LOG.info("Killing namenode :-{}", server.getHostname()); getLogger().info("Killing namenode :-{}", server.getHostname());
cluster.killNameNode(server); cluster.killNameNode(server);
cluster.waitForNameNodeToStop(server, killNameNodeTimeout); cluster.waitForNameNodeToStop(server, killNameNodeTimeout);
LOG.info("Killed namenode:{}. Reported num of rs:{}", server, getLogger().info("Killed namenode:{}. Reported num of rs:{}", server,
cluster.getClusterMetrics().getLiveServerMetrics().size()); cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
protected void startNameNode(ServerName server) throws IOException { protected void startNameNode(ServerName server) throws IOException {
LOG.info("Starting Namenode :-{}", server.getHostname()); getLogger().info("Starting Namenode :-{}", server.getHostname());
cluster.startNameNode(server); cluster.startNameNode(server);
cluster.waitForNameNodeToStart(server, startNameNodeTimeout); cluster.waitForNameNodeToStart(server, startNameNodeTimeout);
LOG.info("Started namenode:{}", server); getLogger().info("Started namenode:{}", server);
} }
protected void unbalanceRegions(ClusterMetrics clusterStatus, protected void unbalanceRegions(ClusterMetrics clusterStatus,
List<ServerName> fromServers, List<ServerName> toServers, List<ServerName> fromServers, List<ServerName> toServers,
@ -283,7 +285,7 @@ public class Action {
// Ugh. // Ugh.
List<byte[]> regions = new LinkedList<>(serverLoad.getRegionMetrics().keySet()); List<byte[]> regions = new LinkedList<>(serverLoad.getRegionMetrics().keySet());
int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size()); int victimRegionCount = (int)Math.ceil(fractionOfRegions * regions.size());
LOG.debug("Removing {} regions from {}", victimRegionCount, sn); getLogger().debug("Removing {} regions from {}", victimRegionCount, sn);
for (int i = 0; i < victimRegionCount; ++i) { for (int i = 0; i < victimRegionCount; ++i) {
int victimIx = RandomUtils.nextInt(0, regions.size()); int victimIx = RandomUtils.nextInt(0, regions.size());
String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx)); String regionId = HRegionInfo.encodeRegionName(regions.remove(victimIx));
@ -291,8 +293,8 @@ public class Action {
} }
} }
LOG.info("Moving {} regions from {} servers to {} different servers", victimRegions.size(), getLogger().info("Moving {} regions from {} servers to {} different servers",
fromServers.size(), toServers.size()); victimRegions.size(), fromServers.size(), toServers.size());
Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin(); Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin();
for (byte[] victimRegion : victimRegions) { for (byte[] victimRegion : victimRegions) {
// Don't keep moving regions if we're // Don't keep moving regions if we're
@ -311,10 +313,10 @@ public class Action {
try { try {
result = admin.balance(); result = admin.balance();
} catch (Exception e) { } catch (Exception e) {
LOG.warn("Got exception while doing balance ", e); getLogger().warn("Got exception while doing balance ", e);
} }
if (!result) { if (!result) {
LOG.error("Balancer didn't succeed"); getLogger().error("Balancer didn't succeed");
} }
} }
@ -323,7 +325,7 @@ public class Action {
try { try {
admin.balancerSwitch(onOrOff, synchronous); admin.balancerSwitch(onOrOff, synchronous);
} catch (Exception e) { } catch (Exception e) {
LOG.warn("Got exception while switching balance ", e); getLogger().warn("Got exception while switching balance ", e);
} }
} }
@ -338,7 +340,8 @@ public class Action {
* @param transform the modification to perform. Callers will have the * @param transform the modification to perform. Callers will have the
* column name as a string and a column family builder available to them * column name as a string and a column family builder available to them
*/ */
protected void modifyAllTableColumns(TableName tableName, BiConsumer<String, ColumnFamilyDescriptorBuilder> transform) throws IOException { protected void modifyAllTableColumns(TableName tableName,
BiConsumer<String, ColumnFamilyDescriptorBuilder> transform) throws IOException {
HBaseTestingUtility util = this.context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = this.context.getHBaseIntegrationTestingUtility();
Admin admin = util.getAdmin(); Admin admin = util.getAdmin();
@ -369,7 +372,8 @@ public class Action {
* @param tableName the table to modify * @param tableName the table to modify
* @param transform the modification to perform on each column family descriptor builder * @param transform the modification to perform on each column family descriptor builder
*/ */
protected void modifyAllTableColumns(TableName tableName, Consumer<ColumnFamilyDescriptorBuilder> transform) throws IOException { protected void modifyAllTableColumns(TableName tableName,
Consumer<ColumnFamilyDescriptorBuilder> transform) throws IOException {
modifyAllTableColumns(tableName, (name, cfd) -> transform.accept(cfd)); modifyAllTableColumns(tableName, (name, cfd) -> transform.accept(cfd));
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -29,13 +28,13 @@ import org.slf4j.LoggerFactory;
* Action that adds high cpu load to a random regionserver for a given duration * Action that adds high cpu load to a random regionserver for a given duration
*/ */
public class AddCPULoadAction extends SudoCommandAction { public class AddCPULoadAction extends SudoCommandAction {
protected static final Logger LOG = LoggerFactory.getLogger(AddCPULoadAction.class); private static final Logger LOG = LoggerFactory.getLogger(AddCPULoadAction.class);
private static final String CPU_LOAD_COMMAND = private static final String CPU_LOAD_COMMAND =
"seq 1 %s | xargs -I{} -n 1 -P %s timeout %s dd if=/dev/urandom of=/dev/null bs=1M " + "seq 1 %s | xargs -I{} -n 1 -P %s timeout %s dd if=/dev/urandom of=/dev/null bs=1M " +
"iflag=fullblock"; "iflag=fullblock";
private final long duration; private final long duration;
private long processes; private final long processes;
/** /**
* Add high load to cpu * Add high load to cpu
@ -49,18 +48,22 @@ public class AddCPULoadAction extends SudoCommandAction {
this.processes = processes; this.processes = processes;
} }
@Override protected Logger getLogger() {
return LOG;
}
protected void localPerform() throws IOException { protected void localPerform() throws IOException {
LOG.info("Starting to execute AddCPULoadAction"); getLogger().info("Starting to execute AddCPULoadAction");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
String hostname = server.getHostname(); String hostname = server.getHostname();
try { try {
clusterManager.execSudo(hostname, timeout, getCommand()); clusterManager.execSudo(hostname, timeout, getCommand());
} catch (IOException ex){ } catch (IOException ex){
//This will always happen. We use timeout to kill a continously running process //This will always happen. We use timeout to kill a continuously running process
//after the duration expires //after the duration expires
} }
LOG.info("Finished to execute AddCPULoadAction"); getLogger().info("Finished to execute AddCPULoadAction");
} }
private String getCommand(){ private String getCommand(){

View File

@ -42,6 +42,10 @@ public class AddColumnAction extends Action {
this.tableName = tableName; this.tableName = tableName;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void init(ActionContext context) throws IOException { public void init(ActionContext context) throws IOException {
super.init(context); super.init(context);
@ -63,7 +67,7 @@ public class AddColumnAction extends Action {
return; return;
} }
LOG.debug("Performing action: Adding " + columnDescriptor + " to " + tableName); getLogger().debug("Performing action: Adding " + columnDescriptor + " to " + tableName);
TableDescriptor modifiedTable = TableDescriptorBuilder.newBuilder(tableDescriptor) TableDescriptor modifiedTable = TableDescriptorBuilder.newBuilder(tableDescriptor)
.setColumnFamily(columnDescriptor).build(); .setColumnFamily(columnDescriptor).build();

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.chaos.actions;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -32,17 +31,20 @@ import org.slf4j.LoggerFactory;
*/ */
public class BatchRestartRsAction extends RestartActionBaseAction { public class BatchRestartRsAction extends RestartActionBaseAction {
float ratio; //ratio of regionservers to restart float ratio; //ratio of regionservers to restart
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(BatchRestartRsAction.class);
LoggerFactory.getLogger(BatchRestartRsAction.class);
public BatchRestartRsAction(long sleepTime, float ratio) { public BatchRestartRsAction(long sleepTime, float ratio) {
super(sleepTime); super(sleepTime);
this.ratio = ratio; this.ratio = ratio;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info(String.format("Performing action: Batch restarting %d%% of region servers", getLogger().info(String.format("Performing action: Batch restarting %d%% of region servers",
(int)(ratio * 100))); (int)(ratio * 100)));
List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(), List<ServerName> selectedServers = PolicyBasedChaosMonkey.selectRandomItems(getCurrentServers(),
ratio); ratio);
@ -55,7 +57,7 @@ public class BatchRestartRsAction extends RestartActionBaseAction {
if (context.isStopping()) { if (context.isStopping()) {
break; break;
} }
LOG.info("Killing region server:" + server); getLogger().info("Killing region server:" + server);
cluster.killRegionServer(server); cluster.killRegionServer(server);
killedServers.add(server); killedServers.add(server);
} }
@ -64,13 +66,13 @@ public class BatchRestartRsAction extends RestartActionBaseAction {
cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT); cluster.waitForRegionServerToStop(server, PolicyBasedChaosMonkey.TIMEOUT);
} }
LOG.info("Killed " + killedServers.size() + " region servers. Reported num of rs:" getLogger().info("Killed " + killedServers.size() + " region servers. Reported num of rs:"
+ cluster.getClusterMetrics().getLiveServerMetrics().size()); + cluster.getClusterMetrics().getLiveServerMetrics().size());
sleep(sleepTime); sleep(sleepTime);
for (ServerName server : killedServers) { for (ServerName server : killedServers) {
LOG.info("Starting region server:" + server.getHostname()); getLogger().info("Starting region server:" + server.getHostname());
cluster.startRegionServer(server.getHostname(), server.getPort()); cluster.startRegionServer(server.getHostname(), server.getPort());
} }
@ -79,7 +81,7 @@ public class BatchRestartRsAction extends RestartActionBaseAction {
server.getPort(), server.getPort(),
PolicyBasedChaosMonkey.TIMEOUT); PolicyBasedChaosMonkey.TIMEOUT);
} }
LOG.info("Started " + killedServers.size() +" region servers. Reported num of rs:" getLogger().info("Started " + killedServers.size() +" region servers. Reported num of rs:"
+ cluster.getClusterMetrics().getLiveServerMetrics().size()); + cluster.getClusterMetrics().getLiveServerMetrics().size());
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.util.Random; import java.util.Random;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.BloomFilterUtil; import org.apache.hadoop.hbase.util.BloomFilterUtil;
@ -44,17 +43,21 @@ public class ChangeBloomFilterAction extends Action {
this.tableName = tableName; this.tableName = tableName;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
final Random random = new Random(); final Random random = new Random();
final BloomType[] bloomArray = BloomType.values(); final BloomType[] bloomArray = BloomType.values();
final int bloomArraySize = bloomArray.length; final int bloomArraySize = bloomArray.length;
LOG.info("Performing action: Change bloom filter on all columns of table " + tableName); getLogger().info("Performing action: Change bloom filter on all columns of table " + tableName);
modifyAllTableColumns(tableName, (columnName, columnBuilder) -> { modifyAllTableColumns(tableName, (columnName, columnBuilder) -> {
BloomType bloomType = bloomArray[random.nextInt(bloomArraySize)]; BloomType bloomType = bloomArray[random.nextInt(bloomArraySize)];
LOG.debug("Performing action: About to set bloom filter type to " getLogger().debug("Performing action: About to set bloom filter type to "
+ bloomType + " on column " + columnName + " of table " + tableName); + bloomType + " on column " + columnName + " of table " + tableName);
columnBuilder.setBloomFilterType(bloomType); columnBuilder.setBloomFilterType(bloomType);
if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) { if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) {
@ -62,6 +65,6 @@ public class ChangeBloomFilterAction extends Action {
} }
}); });
LOG.debug("Performing action: Just set bloom filter types on table " + tableName); getLogger().debug("Performing action: Just set bloom filter types on table " + tableName);
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import java.util.Random; import java.util.Random;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.io.compress.Compressor; import org.apache.hadoop.io.compress.Compressor;
@ -40,6 +39,10 @@ public class ChangeCompressionAction extends Action {
this.random = new Random(); this.random = new Random();
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws IOException { public void perform() throws IOException {
// Possible compression algorithms. If an algorithm is not supported, // Possible compression algorithms. If an algorithm is not supported,
@ -63,13 +66,13 @@ public class ChangeCompressionAction extends Action {
algo.returnCompressor(c); algo.returnCompressor(c);
break; break;
} catch (Throwable t) { } catch (Throwable t) {
LOG.info("Performing action: Changing compression algorithms to " + algo + getLogger().info("Performing action: Changing compression algorithms to " + algo +
" is not supported, pick another one"); " is not supported, pick another one");
} }
} while (true); } while (true);
final Algorithm chosenAlgo = algo; // for use in lambda final Algorithm chosenAlgo = algo; // for use in lambda
LOG.debug("Performing action: Changing compression algorithms on " getLogger().debug("Performing action: Changing compression algorithms on "
+ tableName.getNameAsString() + " to " + chosenAlgo); + tableName.getNameAsString() + " to " + chosenAlgo);
modifyAllTableColumns(tableName, columnFamilyDescriptorBuilder -> { modifyAllTableColumns(tableName, columnFamilyDescriptorBuilder -> {
if (random.nextBoolean()) { if (random.nextBoolean()) {

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import java.util.Random; import java.util.Random;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -39,9 +38,13 @@ public class ChangeEncodingAction extends Action {
this.random = new Random(); this.random = new Random();
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws IOException { public void perform() throws IOException {
LOG.debug("Performing action: Changing encodings on " + tableName); getLogger().debug("Performing action: Changing encodings on " + tableName);
// possible DataBlockEncoding id's // possible DataBlockEncoding id's
final int[] possibleIds = {0, 2, 3, 4, 6}; final int[] possibleIds = {0, 2, 3, 4, 6};
@ -49,7 +52,7 @@ public class ChangeEncodingAction extends Action {
short id = (short) possibleIds[random.nextInt(possibleIds.length)]; short id = (short) possibleIds[random.nextInt(possibleIds.length)];
DataBlockEncoding encoding = DataBlockEncoding.getEncodingById(id); DataBlockEncoding encoding = DataBlockEncoding.getEncodingById(id);
columnBuilder.setDataBlockEncoding(encoding); columnBuilder.setDataBlockEncoding(encoding);
LOG.debug("Set encoding of column family " + columnName + " to: " + encoding); getLogger().debug("Set encoding of column family " + columnName + " to: " + encoding);
}); });
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -18,7 +18,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.util.Random; import java.util.Random;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
@ -46,18 +45,21 @@ public class ChangeSplitPolicyAction extends Action {
this.random = new Random(); this.random = new Random();
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
Admin admin = util.getAdmin(); Admin admin = util.getAdmin();
LOG.info("Performing action: Change split policy of table " + tableName); getLogger().info("Performing action: Change split policy of table " + tableName);
TableDescriptor tableDescriptor = admin.getDescriptor(tableName); TableDescriptor tableDescriptor = admin.getDescriptor(tableName);
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor); TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor);
String chosenPolicy = possiblePolicies[random.nextInt(possiblePolicies.length)]; String chosenPolicy = possiblePolicies[random.nextInt(possiblePolicies.length)];
builder.setRegionSplitPolicyClassName(chosenPolicy); builder.setRegionSplitPolicyClassName(chosenPolicy);
LOG.info("Changing " + tableName + " split policy to " + chosenPolicy); getLogger().info("Changing " + tableName + " split policy to " + chosenPolicy);
admin.modifyTable(builder.build()); admin.modifyTable(builder.build());
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import java.util.Random; import java.util.Random;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -31,24 +30,28 @@ import org.slf4j.LoggerFactory;
* Always keeps at least 1 as the number of versions. * Always keeps at least 1 as the number of versions.
*/ */
public class ChangeVersionsAction extends Action { public class ChangeVersionsAction extends Action {
private final TableName tableName;
private static final Logger LOG = LoggerFactory.getLogger(ChangeVersionsAction.class); private static final Logger LOG = LoggerFactory.getLogger(ChangeVersionsAction.class);
private final TableName tableName;
private Random random; private final Random random;
public ChangeVersionsAction(TableName tableName) { public ChangeVersionsAction(TableName tableName) {
this.tableName = tableName; this.tableName = tableName;
this.random = new Random(); this.random = new Random();
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws IOException { public void perform() throws IOException {
final int versions = random.nextInt(3) + 1; final int versions = random.nextInt(3) + 1;
LOG.debug("Performing action: Changing versions on " + tableName + " to " + versions); getLogger().debug("Performing action: Changing versions on " + tableName + " to " + versions);
modifyAllTableColumns(tableName, columnBuilder -> { modifyAllTableColumns(tableName, columnBuilder -> {
columnBuilder.setMinVersions(versions).setMaxVersions(versions); columnBuilder.setMinVersions(versions).setMaxVersions(versions);
}); });
LOG.debug("Performing action: Just changed versions on " + tableName); getLogger().debug("Performing action: Just changed versions on " + tableName);
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -46,6 +46,10 @@ public class CompactMobAction extends Action {
this.sleepTime = sleepTime; this.sleepTime = sleepTime;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
@ -57,7 +61,7 @@ public class CompactMobAction extends Action {
return; return;
} }
LOG.info("Performing action: Compact mob of table " + tableName + ", major=" + major); getLogger().info("Performing action: Compact mob of table " + tableName + ", major=" + major);
try { try {
if (major) { if (major) {
admin.majorCompact(tableName, CompactType.MOB); admin.majorCompact(tableName, CompactType.MOB);
@ -65,7 +69,7 @@ public class CompactMobAction extends Action {
admin.compact(tableName, CompactType.MOB); admin.compact(tableName, CompactType.MOB);
} }
} catch (Exception ex) { } catch (Exception ex) {
LOG.warn("Mob Compaction failed, might be caused by other chaos: " + ex.getMessage()); getLogger().warn("Mob Compaction failed, might be caused by other chaos: " + ex.getMessage());
} }
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
@ -33,11 +32,11 @@ import org.slf4j.LoggerFactory;
* Region that queues a compaction of a random region from the table. * Region that queues a compaction of a random region from the table.
*/ */
public class CompactRandomRegionOfTableAction extends Action { public class CompactRandomRegionOfTableAction extends Action {
private static final Logger LOG = LoggerFactory.getLogger(CompactRandomRegionOfTableAction.class);
private final int majorRatio; private final int majorRatio;
private final long sleepTime; private final long sleepTime;
private final TableName tableName; private final TableName tableName;
private static final Logger LOG =
LoggerFactory.getLogger(CompactRandomRegionOfTableAction.class);
public CompactRandomRegionOfTableAction( public CompactRandomRegionOfTableAction(
TableName tableName, float majorRatio) { TableName tableName, float majorRatio) {
@ -51,33 +50,37 @@ public class CompactRandomRegionOfTableAction extends Action {
this.tableName = tableName; this.tableName = tableName;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
Admin admin = util.getAdmin(); Admin admin = util.getAdmin();
boolean major = RandomUtils.nextInt(0, 100) < majorRatio; boolean major = RandomUtils.nextInt(0, 100) < majorRatio;
LOG.info("Performing action: Compact random region of table " getLogger().info("Performing action: Compact random region of table "
+ tableName + ", major=" + major); + tableName + ", major=" + major);
List<RegionInfo> regions = admin.getRegions(tableName); List<RegionInfo> regions = admin.getRegions(tableName);
if (regions == null || regions.isEmpty()) { if (regions == null || regions.isEmpty()) {
LOG.info("Table " + tableName + " doesn't have regions to compact"); getLogger().info("Table " + tableName + " doesn't have regions to compact");
return; return;
} }
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
regions.toArray(new RegionInfo[regions.size()])); regions.toArray(new RegionInfo[0]));
try { try {
if (major) { if (major) {
LOG.debug("Major compacting region " + region.getRegionNameAsString()); getLogger().debug("Major compacting region " + region.getRegionNameAsString());
admin.majorCompactRegion(region.getRegionName()); admin.majorCompactRegion(region.getRegionName());
} else { } else {
LOG.debug("Compacting region " + region.getRegionNameAsString()); getLogger().debug("Compacting region " + region.getRegionNameAsString());
admin.compactRegion(region.getRegionName()); admin.compactRegion(region.getRegionName());
} }
} catch (Exception ex) { } catch (Exception ex) {
LOG.warn("Compaction failed, might be caused by other chaos: " + ex.getMessage()); getLogger().warn("Compaction failed, might be caused by other chaos: " + ex.getMessage());
} }
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -29,10 +29,11 @@ import org.slf4j.LoggerFactory;
* Action that queues a table compaction. * Action that queues a table compaction.
*/ */
public class CompactTableAction extends Action { public class CompactTableAction extends Action {
private static final Logger LOG = LoggerFactory.getLogger(CompactTableAction.class);
private final TableName tableName; private final TableName tableName;
private final int majorRatio; private final int majorRatio;
private final long sleepTime; private final long sleepTime;
private static final Logger LOG = LoggerFactory.getLogger(CompactTableAction.class);
public CompactTableAction(TableName tableName, float majorRatio) { public CompactTableAction(TableName tableName, float majorRatio) {
this(-1, tableName, majorRatio); this(-1, tableName, majorRatio);
@ -45,13 +46,17 @@ public class CompactTableAction extends Action {
this.sleepTime = sleepTime; this.sleepTime = sleepTime;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
Admin admin = util.getAdmin(); Admin admin = util.getAdmin();
boolean major = RandomUtils.nextInt(0, 100) < majorRatio; boolean major = RandomUtils.nextInt(0, 100) < majorRatio;
LOG.info("Performing action: Compact table " + tableName + ", major=" + major); getLogger().info("Performing action: Compact table " + tableName + ", major=" + major);
try { try {
if (major) { if (major) {
admin.majorCompact(tableName); admin.majorCompact(tableName);
@ -59,7 +64,7 @@ public class CompactTableAction extends Action {
admin.compact(tableName); admin.compact(tableName);
} }
} catch (Exception ex) { } catch (Exception ex) {
LOG.warn("Compaction failed, might be caused by other chaos: " + ex.getMessage()); getLogger().warn("Compaction failed, might be caused by other chaos: " + ex.getMessage());
} }
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -34,7 +34,7 @@ import org.slf4j.LoggerFactory;
*/ */
public class CorruptDataFilesAction extends Action { public class CorruptDataFilesAction extends Action {
private static final Logger LOG = LoggerFactory.getLogger(CorruptDataFilesAction.class); private static final Logger LOG = LoggerFactory.getLogger(CorruptDataFilesAction.class);
private float chance; private final float chance;
/** /**
* Corrupts HFiles with a certain chance * Corrupts HFiles with a certain chance
@ -44,9 +44,13 @@ public class CorruptDataFilesAction extends Action {
this.chance = chance * 100; this.chance = chance * 100;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Start corrupting data files"); getLogger().info("Start corrupting data files");
FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf()); FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
Path rootDir = CommonFSUtils.getRootDir(getConf()); Path rootDir = CommonFSUtils.getRootDir(getConf());
@ -67,9 +71,9 @@ public class CorruptDataFilesAction extends Action {
} finally { } finally {
out.close(); out.close();
} }
LOG.info("Corrupting {}", status.getPath()); getLogger().info("Corrupting {}", status.getPath());
} }
LOG.info("Done corrupting data files"); getLogger().info("Done corrupting data files");
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -31,8 +30,8 @@ import org.slf4j.LoggerFactory;
*/ */
public class CorruptPacketsCommandAction extends TCCommandAction { public class CorruptPacketsCommandAction extends TCCommandAction {
private static final Logger LOG = LoggerFactory.getLogger(CorruptPacketsCommandAction.class); private static final Logger LOG = LoggerFactory.getLogger(CorruptPacketsCommandAction.class);
private float ratio; private final float ratio;
private long duration; private final long duration;
/** /**
* Corrupt network packets on a random regionserver. * Corrupt network packets on a random regionserver.
@ -48,8 +47,12 @@ public class CorruptPacketsCommandAction extends TCCommandAction {
this.duration = duration; this.duration = duration;
} }
@Override protected Logger getLogger() {
return LOG;
}
protected void localPerform() throws IOException { protected void localPerform() throws IOException {
LOG.info("Starting to execute CorruptPacketsCommandAction"); getLogger().info("Starting to execute CorruptPacketsCommandAction");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
String hostname = server.getHostname(); String hostname = server.getHostname();
@ -57,12 +60,12 @@ public class CorruptPacketsCommandAction extends TCCommandAction {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
Thread.sleep(duration); Thread.sleep(duration);
} catch (InterruptedException e) { } catch (InterruptedException e) {
LOG.debug("Failed to run the command for the full duration", e); getLogger().debug("Failed to run the command for the full duration", e);
} finally { } finally {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
} }
LOG.info("Finished to execute CorruptPacketsCommandAction"); getLogger().info("Finished to execute CorruptPacketsCommandAction");
} }
private String getCommand(String operation){ private String getCommand(String operation){

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -20,16 +20,18 @@ package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import java.util.Random; import java.util.Random;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class DecreaseMaxHFileSizeAction extends Action { public class DecreaseMaxHFileSizeAction extends Action {
private static final Logger LOG = LoggerFactory.getLogger(DecreaseMaxHFileSizeAction.class);
private static final long minFileSize = 1 * 1024 * 1024 * 1024L; private static final long minFileSize = 1024 * 1024 * 1024L;
private final long sleepTime; private final long sleepTime;
private final TableName tableName; private final TableName tableName;
@ -42,6 +44,10 @@ public class DecreaseMaxHFileSizeAction extends Action {
this.random = new Random(); this.random = new Random();
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void init(ActionContext context) throws IOException { public void init(ActionContext context) throws IOException {
super.init(context); super.init(context);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -30,8 +29,8 @@ import org.slf4j.LoggerFactory;
*/ */
public class DelayPacketsCommandAction extends TCCommandAction { public class DelayPacketsCommandAction extends TCCommandAction {
private static final Logger LOG = LoggerFactory.getLogger(DelayPacketsCommandAction.class); private static final Logger LOG = LoggerFactory.getLogger(DelayPacketsCommandAction.class);
private long delay; private final long delay;
private long duration; private final long duration;
/** /**
* Adds latency to communication on a random region server * Adds latency to communication on a random region server
@ -47,8 +46,12 @@ public class DelayPacketsCommandAction extends TCCommandAction {
this.duration = duration; this.duration = duration;
} }
@Override protected Logger getLogger() {
return LOG;
}
protected void localPerform() throws IOException { protected void localPerform() throws IOException {
LOG.info("Starting to execute DelayPacketsCommandAction"); getLogger().info("Starting to execute DelayPacketsCommandAction");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
String hostname = server.getHostname(); String hostname = server.getHostname();
@ -56,12 +59,12 @@ public class DelayPacketsCommandAction extends TCCommandAction {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
Thread.sleep(duration); Thread.sleep(duration);
} catch (InterruptedException e) { } catch (InterruptedException e) {
LOG.debug("Failed to run the command for the full duration", e); getLogger().debug("Failed to run the command for the full duration", e);
} finally { } finally {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
} }
LOG.info("Finished to execute DelayPacketsCommandAction"); getLogger().info("Finished to execute DelayPacketsCommandAction");
} }
private String getCommand(String operation){ private String getCommand(String operation){

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -33,7 +33,7 @@ import org.slf4j.LoggerFactory;
*/ */
public class DeleteDataFilesAction extends Action { public class DeleteDataFilesAction extends Action {
private static final Logger LOG = LoggerFactory.getLogger(DeleteDataFilesAction.class); private static final Logger LOG = LoggerFactory.getLogger(DeleteDataFilesAction.class);
private float chance; private final float chance;
/** /**
* Delets HFiles with a certain chance * Delets HFiles with a certain chance
@ -43,9 +43,13 @@ public class DeleteDataFilesAction extends Action {
this.chance = chance * 100; this.chance = chance * 100;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Start deleting data files"); getLogger().info("Start deleting data files");
FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf()); FileSystem fs = CommonFSUtils.getRootDirFileSystem(getConf());
Path rootDir = CommonFSUtils.getRootDir(getConf()); Path rootDir = CommonFSUtils.getRootDir(getConf());
Path defaultDir = rootDir.suffix("/data/default"); Path defaultDir = rootDir.suffix("/data/default");
@ -58,9 +62,9 @@ public class DeleteDataFilesAction extends Action {
if(RandomUtils.nextFloat(0, 100) > chance){ if(RandomUtils.nextFloat(0, 100) > chance){
continue; continue;
} }
fs.delete(status.getPath()); fs.delete(status.getPath(), true);
LOG.info("Deleting {}", status.getPath()); getLogger().info("Deleting {}", status.getPath());
} }
LOG.info("Done deleting data files"); getLogger().info("Done deleting data files");
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -27,8 +26,11 @@ import org.slf4j.LoggerFactory;
* Action to dump the cluster status. * Action to dump the cluster status.
*/ */
public class DumpClusterStatusAction extends Action { public class DumpClusterStatusAction extends Action {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(DumpClusterStatusAction.class);
LoggerFactory.getLogger(DumpClusterStatusAction.class);
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void init(ActionContext context) throws IOException { public void init(ActionContext context) throws IOException {
@ -37,7 +39,7 @@ public class DumpClusterStatusAction extends Action {
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.debug("Performing action: Dump cluster status"); getLogger().debug("Performing action: Dump cluster status");
LOG.info("Cluster status\n" + cluster.getClusterMetrics()); getLogger().info("Cluster status\n" + cluster.getClusterMetrics());
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -31,8 +30,8 @@ import org.slf4j.LoggerFactory;
*/ */
public class DuplicatePacketsCommandAction extends TCCommandAction { public class DuplicatePacketsCommandAction extends TCCommandAction {
private static final Logger LOG = LoggerFactory.getLogger(DuplicatePacketsCommandAction.class); private static final Logger LOG = LoggerFactory.getLogger(DuplicatePacketsCommandAction.class);
private float ratio; private final float ratio;
private long duration; private final long duration;
/** /**
* Duplicate network packets on a random regionserver. * Duplicate network packets on a random regionserver.
@ -48,8 +47,12 @@ public class DuplicatePacketsCommandAction extends TCCommandAction {
this.duration = duration; this.duration = duration;
} }
@Override protected Logger getLogger() {
return LOG;
}
protected void localPerform() throws IOException { protected void localPerform() throws IOException {
LOG.info("Starting to execute DuplicatePacketsCommandAction"); getLogger().info("Starting to execute DuplicatePacketsCommandAction");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
String hostname = server.getHostname(); String hostname = server.getHostname();
@ -57,12 +60,12 @@ public class DuplicatePacketsCommandAction extends TCCommandAction {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
Thread.sleep(duration); Thread.sleep(duration);
} catch (InterruptedException e) { } catch (InterruptedException e) {
LOG.debug("Failed to run the command for the full duration", e); getLogger().debug("Failed to run the command for the full duration", e);
} finally { } finally {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
} }
LOG.info("Finished to execute DuplicatePacketsCommandAction"); getLogger().info("Finished to execute DuplicatePacketsCommandAction");
} }
private String getCommand(String operation){ private String getCommand(String operation){

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -31,9 +30,9 @@ import org.slf4j.LoggerFactory;
*/ */
public class FillDiskCommandAction extends SudoCommandAction { public class FillDiskCommandAction extends SudoCommandAction {
private static final Logger LOG = LoggerFactory.getLogger(FillDiskCommandAction.class); private static final Logger LOG = LoggerFactory.getLogger(FillDiskCommandAction.class);
private long size; private final long size;
private long duration; private final long duration;
private String path; private final String path;
/** /**
* Fill the disk on a random regionserver. * Fill the disk on a random regionserver.
@ -52,20 +51,24 @@ public class FillDiskCommandAction extends SudoCommandAction {
this.path = path; this.path = path;
} }
@Override protected Logger getLogger() {
return LOG;
}
protected void localPerform() throws IOException { protected void localPerform() throws IOException {
LOG.info("Starting to execute FillDiskCommandAction"); getLogger().info("Starting to execute FillDiskCommandAction");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
String hostname = server.getHostname(); String hostname = server.getHostname();
try { try {
clusterManager.execSudo(hostname, duration, getFillCommand()); clusterManager.execSudo(hostname, duration, getFillCommand());
} catch (IOException ex) { } catch (IOException ex) {
LOG.info("Potential timeout. We try to stop the dd process on target machine"); getLogger().info("Potential timeout. We try to stop the dd process on target machine");
clusterManager.execSudoWithRetries(hostname, timeout, getStopCommand()); clusterManager.execSudoWithRetries(hostname, timeout, getStopCommand());
throw ex; throw ex;
} finally { } finally {
clusterManager.execSudoWithRetries(hostname, timeout, getClearCommand()); clusterManager.execSudoWithRetries(hostname, timeout, getClearCommand());
LOG.info("Finished to execute FillDiskCommandAction"); getLogger().info("Finished to execute FillDiskCommandAction");
} }
} }
@ -82,6 +85,6 @@ public class FillDiskCommandAction extends SudoCommandAction {
} }
private String getStopCommand() { private String getStopCommand() {
return String.format("killall dd"); return "killall dd";
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -45,25 +45,29 @@ public class FlushRandomRegionOfTableAction extends Action {
this.tableName = tableName; this.tableName = tableName;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
Admin admin = util.getAdmin(); Admin admin = util.getAdmin();
LOG.info("Performing action: Flush random region of table " + tableName); getLogger().info("Performing action: Flush random region of table " + tableName);
List<RegionInfo> regions = admin.getRegions(tableName); List<RegionInfo> regions = admin.getRegions(tableName);
if (regions == null || regions.isEmpty()) { if (regions == null || regions.isEmpty()) {
LOG.info("Table " + tableName + " doesn't have regions to flush"); getLogger().info("Table " + tableName + " doesn't have regions to flush");
return; return;
} }
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
regions.toArray(new RegionInfo[regions.size()])); regions.toArray(new RegionInfo[0]));
LOG.debug("Flushing region " + region.getRegionNameAsString()); getLogger().debug("Flushing region " + region.getRegionNameAsString());
try { try {
admin.flushRegion(region.getRegionName()); admin.flushRegion(region.getRegionName());
} catch (Exception ex) { } catch (Exception ex) {
LOG.warn("Flush failed, might be caused by other chaos: " + ex.getMessage()); getLogger().warn("Flush failed, might be caused by other chaos: " + ex.getMessage());
} }
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -28,8 +28,7 @@ import org.slf4j.LoggerFactory;
* Action that tries to flush a table. * Action that tries to flush a table.
*/ */
public class FlushTableAction extends Action { public class FlushTableAction extends Action {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(FlushTableAction.class);
LoggerFactory.getLogger(FlushTableAction.class);
private final long sleepTime; private final long sleepTime;
private final TableName tableName; private final TableName tableName;
@ -42,6 +41,10 @@ public class FlushTableAction extends Action {
this.tableName = tableName; this.tableName = tableName;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
@ -52,11 +55,11 @@ public class FlushTableAction extends Action {
return; return;
} }
LOG.info("Performing action: Flush table " + tableName); getLogger().info("Performing action: Flush table " + tableName);
try { try {
admin.flush(tableName); admin.flush(tableName);
} catch (Exception ex) { } catch (Exception ex) {
LOG.warn("Flush failed, might be caused by other chaos: " + ex.getMessage()); getLogger().warn("Flush failed, might be caused by other chaos: " + ex.getMessage());
} }
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -25,8 +25,11 @@ import org.slf4j.LoggerFactory;
* Action that tries to force a balancer run. * Action that tries to force a balancer run.
*/ */
public class ForceBalancerAction extends Action { public class ForceBalancerAction extends Action {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(ForceBalancerAction.class);
LoggerFactory.getLogger(ForceBalancerAction.class);
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
@ -34,7 +37,7 @@ public class ForceBalancerAction extends Action {
if (context.isStopping()) { if (context.isStopping()) {
return; return;
} }
LOG.info("Balancing regions"); getLogger().info("Balancing regions");
forceBalancer(); forceBalancer();
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -39,35 +39,38 @@ public class GracefulRollingRestartRsAction extends RestartActionBaseAction {
super(sleepTime); super(sleepTime);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Performing action: Rolling restarting non-master region servers"); getLogger().info("Performing action: Rolling restarting non-master region servers");
List<ServerName> selectedServers = selectServers(); List<ServerName> selectedServers = selectServers();
LOG.info("Disabling balancer to make unloading possible"); getLogger().info("Disabling balancer to make unloading possible");
setBalancer(false, true); setBalancer(false, true);
for (ServerName server : selectedServers) { for (ServerName server : selectedServers) {
String rsName = server.getAddress().toString(); String rsName = server.getAddress().toString();
try (RegionMover rm = try (RegionMover rm =
new RegionMover.RegionMoverBuilder(rsName, getConf()).ack(true).build()) { new RegionMover.RegionMoverBuilder(rsName, getConf()).ack(true).build()) {
LOG.info("Unloading {}", server); getLogger().info("Unloading {}", server);
rm.unload(); rm.unload();
LOG.info("Restarting {}", server); getLogger().info("Restarting {}", server);
gracefulRestartRs(server, sleepTime); gracefulRestartRs(server, sleepTime);
LOG.info("Loading {}", server); getLogger().info("Loading {}", server);
rm.load(); rm.load();
} catch (Shell.ExitCodeException e) { } catch (Shell.ExitCodeException e) {
LOG.info("Problem restarting but presume successful; code={}", e.getExitCode(), e); getLogger().info("Problem restarting but presume successful; code={}", e.getExitCode(), e);
} }
sleep(RandomUtils.nextInt(0, (int)sleepTime)); sleep(RandomUtils.nextInt(0, (int)sleepTime));
} }
LOG.info("Enabling balancer"); getLogger().info("Enabling balancer");
setBalancer(true, true); setBalancer(true, true);
} }
protected List<ServerName> selectServers() throws IOException { protected List<ServerName> selectServers() throws IOException {
return Arrays.asList(getCurrentServers()); return Arrays.asList(getCurrentServers());
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -31,8 +30,8 @@ import org.slf4j.LoggerFactory;
*/ */
public class LosePacketsCommandAction extends TCCommandAction { public class LosePacketsCommandAction extends TCCommandAction {
private static final Logger LOG = LoggerFactory.getLogger(LosePacketsCommandAction.class); private static final Logger LOG = LoggerFactory.getLogger(LosePacketsCommandAction.class);
private float ratio; private final float ratio;
private long duration; private final long duration;
/** /**
* Lose network packets on a random regionserver. * Lose network packets on a random regionserver.
@ -48,8 +47,12 @@ public class LosePacketsCommandAction extends TCCommandAction {
this.duration = duration; this.duration = duration;
} }
@Override protected Logger getLogger() {
return LOG;
}
protected void localPerform() throws IOException { protected void localPerform() throws IOException {
LOG.info("Starting to execute LosePacketsCommandAction"); getLogger().info("Starting to execute LosePacketsCommandAction");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
String hostname = server.getHostname(); String hostname = server.getHostname();
@ -57,12 +60,12 @@ public class LosePacketsCommandAction extends TCCommandAction {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
Thread.sleep(duration); Thread.sleep(duration);
} catch (InterruptedException e) { } catch (InterruptedException e) {
LOG.debug("Failed to run the command for the full duration", e); getLogger().debug("Failed to run the command for the full duration", e);
} finally { } finally {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
} }
LOG.info("Finished to execute LosePacketsCommandAction"); getLogger().info("Finished to execute LosePacketsCommandAction");
} }
private String getCommand(String operation){ private String getCommand(String operation){

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -45,22 +45,26 @@ public class MergeRandomAdjacentRegionsOfTableAction extends Action {
this.sleepTime = sleepTime; this.sleepTime = sleepTime;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
Admin admin = util.getAdmin(); Admin admin = util.getAdmin();
LOG.info("Performing action: Merge random adjacent regions of table " + tableName); getLogger().info("Performing action: Merge random adjacent regions of table " + tableName);
List<RegionInfo> regions = admin.getRegions(tableName); List<RegionInfo> regions = admin.getRegions(tableName);
if (regions == null || regions.size() < 2) { if (regions == null || regions.size() < 2) {
LOG.info("Table " + tableName + " doesn't have enough regions to merge"); getLogger().info("Table " + tableName + " doesn't have enough regions to merge");
return; return;
} }
int i = RandomUtils.nextInt(0, regions.size() - 1); int i = RandomUtils.nextInt(0, regions.size() - 1);
RegionInfo a = regions.get(i++); RegionInfo a = regions.get(i++);
RegionInfo b = regions.get(i); RegionInfo b = regions.get(i);
LOG.debug("Merging " + a.getRegionNameAsString() + " and " + b.getRegionNameAsString()); getLogger().debug("Merging " + a.getRegionNameAsString() + " and " + b.getRegionNameAsString());
// Don't try the merge if we're stopping // Don't try the merge if we're stopping
if (context.isStopping()) { if (context.isStopping()) {
@ -70,7 +74,7 @@ public class MergeRandomAdjacentRegionsOfTableAction extends Action {
try { try {
admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false); admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false);
} catch (Exception ex) { } catch (Exception ex) {
LOG.warn("Merge failed, might be caused by other chaos: " + ex.getMessage()); getLogger().warn("Merge failed, might be caused by other chaos: " + ex.getMessage());
} }
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.util.List; import java.util.List;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
@ -32,8 +31,7 @@ import org.slf4j.LoggerFactory;
* Action that tries to move a random region of a table. * Action that tries to move a random region of a table.
*/ */
public class MoveRandomRegionOfTableAction extends Action { public class MoveRandomRegionOfTableAction extends Action {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(MoveRandomRegionOfTableAction.class);
LoggerFactory.getLogger(MoveRandomRegionOfTableAction.class);
private final long sleepTime; private final long sleepTime;
private final TableName tableName; private final TableName tableName;
@ -46,6 +44,10 @@ public class MoveRandomRegionOfTableAction extends Action {
this.tableName = tableName; this.tableName = tableName;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
if (sleepTime > 0) { if (sleepTime > 0) {
@ -55,18 +57,19 @@ public class MoveRandomRegionOfTableAction extends Action {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
Admin admin = util.getAdmin(); Admin admin = util.getAdmin();
LOG.info("Performing action: Move random region of table " + tableName); getLogger().info("Performing action: Move random region of table " + tableName);
List<RegionInfo> regions = admin.getRegions(tableName); List<RegionInfo> regions = admin.getRegions(tableName);
if (regions == null || regions.isEmpty()) { if (regions == null || regions.isEmpty()) {
LOG.info("Table " + tableName + " doesn't have regions to move"); getLogger().info("Table " + tableName + " doesn't have regions to move");
return; return;
} }
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
regions.toArray(new RegionInfo[regions.size()])); regions.toArray(new RegionInfo[0]));
LOG.debug("Move random region {}", region.getRegionNameAsString()); getLogger().debug("Move random region {}", region.getRegionNameAsString());
// Use facility over in MoveRegionsOfTableAction... // Use facility over in MoveRegionsOfTableAction...
MoveRegionsOfTableAction.moveRegion(admin, MoveRegionsOfTableAction.getServers(admin), region); MoveRegionsOfTableAction.moveRegion(admin, MoveRegionsOfTableAction.getServers(admin),
region, getLogger());
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -22,11 +22,9 @@ import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.chaos.factories.MonkeyConstants;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -36,22 +34,21 @@ import org.slf4j.LoggerFactory;
* Action that tries to move every region of a table. * Action that tries to move every region of a table.
*/ */
public class MoveRegionsOfTableAction extends Action { public class MoveRegionsOfTableAction extends Action {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(MoveRegionsOfTableAction.class);
LoggerFactory.getLogger(MoveRegionsOfTableAction.class);
private final long sleepTime; private final long sleepTime;
private final TableName tableName; private final TableName tableName;
private final long maxTime; private final long maxTime;
public MoveRegionsOfTableAction(TableName tableName) {
this(-1, MonkeyConstants.DEFAULT_MOVE_REGIONS_MAX_TIME, tableName);
}
public MoveRegionsOfTableAction(long sleepTime, long maxSleepTime, TableName tableName) { public MoveRegionsOfTableAction(long sleepTime, long maxSleepTime, TableName tableName) {
this.sleepTime = sleepTime; this.sleepTime = sleepTime;
this.tableName = tableName; this.tableName = tableName;
this.maxTime = maxSleepTime; this.maxTime = maxSleepTime;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
if (sleepTime > 0) { if (sleepTime > 0) {
@ -61,10 +58,10 @@ public class MoveRegionsOfTableAction extends Action {
Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin(); Admin admin = this.context.getHBaseIntegrationTestingUtility().getAdmin();
ServerName[] servers = getServers(admin); ServerName[] servers = getServers(admin);
LOG.info("Performing action: Move regions of table {}", tableName); getLogger().info("Performing action: Move regions of table {}", tableName);
List<RegionInfo> regions = admin.getRegions(tableName); List<RegionInfo> regions = admin.getRegions(tableName);
if (regions == null || regions.isEmpty()) { if (regions == null || regions.isEmpty()) {
LOG.info("Table {} doesn't have regions to move", tableName); getLogger().info("Table {} doesn't have regions to move", tableName);
return; return;
} }
@ -77,7 +74,7 @@ public class MoveRegionsOfTableAction extends Action {
return; return;
} }
moveRegion(admin, servers, regionInfo); moveRegion(admin, servers, regionInfo, getLogger());
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);
} }
@ -92,16 +89,16 @@ public class MoveRegionsOfTableAction extends Action {
static ServerName [] getServers(Admin admin) throws IOException { static ServerName [] getServers(Admin admin) throws IOException {
Collection<ServerName> serversList = admin.getRegionServers(); Collection<ServerName> serversList = admin.getRegionServers();
return serversList.toArray(new ServerName[serversList.size()]); return serversList.toArray(new ServerName[0]);
} }
static void moveRegion(Admin admin, ServerName [] servers, RegionInfo regionInfo) { static void moveRegion(Admin admin, ServerName [] servers, RegionInfo regionInfo, Logger logger) {
try { try {
ServerName destServerName = servers[RandomUtils.nextInt(0, servers.length)]; ServerName destServerName = servers[RandomUtils.nextInt(0, servers.length)];
LOG.debug("Moving {} to {}", regionInfo.getRegionNameAsString(), destServerName); logger.debug("Moving {} to {}", regionInfo.getRegionNameAsString(), destServerName);
admin.move(regionInfo.getEncodedNameAsBytes(), destServerName); admin.move(regionInfo.getEncodedNameAsBytes(), destServerName);
} catch (Exception ex) { } catch (Exception ex) {
LOG.warn("Move failed, might be caused by other chaos: {}", ex.getMessage()); logger.warn("Move failed, might be caused by other chaos: {}", ex.getMessage());
} }
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
@ -40,7 +39,7 @@ public class RemoveColumnAction extends Action {
private final TableName tableName; private final TableName tableName;
private final Set<String> protectedColumns; private final Set<String> protectedColumns;
private Admin admin; private Admin admin;
private Random random; private final Random random;
public RemoveColumnAction(TableName tableName, Set<String> protectedColumns) { public RemoveColumnAction(TableName tableName, Set<String> protectedColumns) {
this.tableName = tableName; this.tableName = tableName;
@ -48,6 +47,10 @@ public class RemoveColumnAction extends Action {
random = new Random(); random = new Random();
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void init(ActionContext context) throws IOException { public void init(ActionContext context) throws IOException {
super.init(context); super.init(context);
@ -69,7 +72,7 @@ public class RemoveColumnAction extends Action {
index = random.nextInt(columnDescriptors.length); index = random.nextInt(columnDescriptors.length);
} }
byte[] colDescName = columnDescriptors[index].getName(); byte[] colDescName = columnDescriptors[index].getName();
LOG.debug("Performing action: Removing " + Bytes.toString(colDescName)+ " from " getLogger().debug("Performing action: Removing " + Bytes.toString(colDescName)+ " from "
+ tableName.getNameAsString()); + tableName.getNameAsString());
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor); TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableDescriptor);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -31,9 +30,9 @@ import org.slf4j.LoggerFactory;
*/ */
public class ReorderPacketsCommandAction extends TCCommandAction { public class ReorderPacketsCommandAction extends TCCommandAction {
private static final Logger LOG = LoggerFactory.getLogger(ReorderPacketsCommandAction.class); private static final Logger LOG = LoggerFactory.getLogger(ReorderPacketsCommandAction.class);
private float ratio; private final float ratio;
private long duration; private final long duration;
private long delay; private final long delay;
/** /**
* Reorder network packets on a random regionserver. * Reorder network packets on a random regionserver.
@ -52,8 +51,12 @@ public class ReorderPacketsCommandAction extends TCCommandAction {
this.delay = delay; this.delay = delay;
} }
@Override protected Logger getLogger() {
return LOG;
}
protected void localPerform() throws IOException { protected void localPerform() throws IOException {
LOG.info("Starting to execute ReorderPacketsCommandAction"); getLogger().info("Starting to execute ReorderPacketsCommandAction");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
String hostname = server.getHostname(); String hostname = server.getHostname();
@ -61,12 +64,12 @@ public class ReorderPacketsCommandAction extends TCCommandAction {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(ADD));
Thread.sleep(duration); Thread.sleep(duration);
} catch (InterruptedException e) { } catch (InterruptedException e) {
LOG.debug("Failed to run the command for the full duration", e); getLogger().debug("Failed to run the command for the full duration", e);
} finally { } finally {
clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE)); clusterManager.execSudoWithRetries(hostname, timeout, getCommand(DELETE));
} }
LOG.info("Finished to execute ReorderPacketsCommandAction"); getLogger().info("Finished to execute ReorderPacketsCommandAction");
} }
private String getCommand(String operation){ private String getCommand(String operation){

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,18 +19,13 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.Threads;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* Base class for restarting HBaseServer's * Base class for restarting HBaseServer's
*/ */
public class RestartActionBaseAction extends Action { public abstract class RestartActionBaseAction extends Action {
private static final Logger LOG =
LoggerFactory.getLogger(RestartActionBaseAction.class);
long sleepTime; // how long should we sleep long sleepTime; // how long should we sleep
public RestartActionBaseAction(long sleepTime) { public RestartActionBaseAction(long sleepTime) {
@ -38,7 +33,7 @@ public class RestartActionBaseAction extends Action {
} }
void sleep(long sleepTime) { void sleep(long sleepTime) {
LOG.info("Sleeping for:" + sleepTime); getLogger().info("Sleeping for:" + sleepTime);
Threads.sleep(sleepTime); Threads.sleep(sleepTime);
} }
@ -49,10 +44,10 @@ public class RestartActionBaseAction extends Action {
return; return;
} }
LOG.info("Killing master: {}", server); getLogger().info("Killing master: {}", server);
killMaster(server); killMaster(server);
sleep(sleepTime); sleep(sleepTime);
LOG.info("Starting master: {}", server); getLogger().info("Starting master: {}", server);
startMaster(server); startMaster(server);
} }
@ -68,10 +63,10 @@ public class RestartActionBaseAction extends Action {
if (context.isStopping()) { if (context.isStopping()) {
return; return;
} }
LOG.info("Stopping region server: {}", server); getLogger().info("Stopping region server: {}", server);
stopRs(server); stopRs(server);
sleep(sleepTime); sleep(sleepTime);
LOG.info("Starting region server: {}", server); getLogger().info("Starting region server: {}", server);
startRs(server); startRs(server);
} }
@ -81,10 +76,10 @@ public class RestartActionBaseAction extends Action {
if (context.isStopping()) { if (context.isStopping()) {
return; return;
} }
LOG.info("Killing region server: {}", server); getLogger().info("Killing region server: {}", server);
killRs(server); killRs(server);
sleep(sleepTime); sleep(sleepTime);
LOG.info("Starting region server: {}", server); getLogger().info("Starting region server: {}", server);
startRs(server); startRs(server);
} }
@ -94,10 +89,10 @@ public class RestartActionBaseAction extends Action {
if (context.isStopping()) { if (context.isStopping()) {
return; return;
} }
LOG.info("Killing zookeeper node: {}", server); getLogger().info("Killing zookeeper node: {}", server);
killZKNode(server); killZKNode(server);
sleep(sleepTime); sleep(sleepTime);
LOG.info("Starting zookeeper node: {}", server); getLogger().info("Starting zookeeper node: {}", server);
startZKNode(server); startZKNode(server);
} }
@ -107,10 +102,10 @@ public class RestartActionBaseAction extends Action {
if (context.isStopping()) { if (context.isStopping()) {
return; return;
} }
LOG.info("Killing data node: {}", server); getLogger().info("Killing data node: {}", server);
killDataNode(server); killDataNode(server);
sleep(sleepTime); sleep(sleepTime);
LOG.info("Starting data node: {}", server); getLogger().info("Starting data node: {}", server);
startDataNode(server); startDataNode(server);
} }
@ -120,11 +115,10 @@ public class RestartActionBaseAction extends Action {
if (context.isStopping()) { if (context.isStopping()) {
return; return;
} }
LOG.info("Killing name node: {}", server); getLogger().info("Killing name node: {}", server);
killNameNode(server); killNameNode(server);
sleep(sleepTime); sleep(sleepTime);
LOG.info("Starting name node: {}", server); getLogger().info("Starting name node: {}", server);
startNameNode(server); startNameNode(server);
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -31,9 +31,14 @@ public class RestartActiveMasterAction extends RestartActionBaseAction {
public RestartActiveMasterAction(long sleepTime) { public RestartActiveMasterAction(long sleepTime) {
super(sleepTime); super(sleepTime);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Performing action: Restart active master"); getLogger().info("Performing action: Restart active master");
ServerName master = cluster.getClusterMetrics().getMasterName(); ServerName master = cluster.getClusterMetrics().getMasterName();
restartMaster(master, sleepTime); restartMaster(master, sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -51,9 +51,13 @@ public class RestartActiveNameNodeAction extends RestartActionBaseAction {
super(sleepTime); super(sleepTime);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Performing action: Restart active namenode"); getLogger().info("Performing action: Restart active namenode");
Configuration conf = CommonFSUtils.getRootDir(getConf()).getFileSystem(getConf()).getConf(); Configuration conf = CommonFSUtils.getRootDir(getConf()).getFileSystem(getConf()).getConf();
String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf); String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
if (!HAUtil.isHAEnabled(conf, nameServiceID)) { if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
@ -85,9 +89,9 @@ public class RestartActiveNameNodeAction extends RestartActionBaseAction {
if (activeNamenode == null) { if (activeNamenode == null) {
throw new Exception("No active Name node found in zookeeper under " + hadoopHAZkNode); throw new Exception("No active Name node found in zookeeper under " + hadoopHAZkNode);
} }
LOG.info("Found active namenode host:" + activeNamenode); getLogger().info("Found active namenode host:" + activeNamenode);
ServerName activeNNHost = ServerName.valueOf(activeNamenode, -1, -1); ServerName activeNNHost = ServerName.valueOf(activeNamenode, -1, -1);
LOG.info("Restarting Active NameNode :" + activeNamenode); getLogger().info("Restarting Active NameNode :" + activeNamenode);
restartNameNode(activeNNHost, sleepTime); restartNameNode(activeNNHost, sleepTime);
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -41,9 +41,13 @@ public class RestartRandomDataNodeAction extends RestartActionBaseAction {
super(sleepTime); super(sleepTime);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Performing action: Restart random data node"); getLogger().info("Performing action: Restart random data node");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getDataNodes()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getDataNodes());
restartDataNode(server, sleepTime); restartDataNode(server, sleepTime);
} }
@ -56,6 +60,6 @@ public class RestartRandomDataNodeAction extends RestartActionBaseAction {
for (DatanodeInfo dataNode: dfsClient.datanodeReport(HdfsConstants.DatanodeReportType.LIVE)) { for (DatanodeInfo dataNode: dfsClient.datanodeReport(HdfsConstants.DatanodeReportType.LIVE)) {
hosts.add(ServerName.valueOf(dataNode.getHostName(), -1, -1)); hosts.add(ServerName.valueOf(dataNode.getHostName(), -1, -1));
} }
return hosts.toArray(new ServerName[hosts.size()]); return hosts.toArray(new ServerName[0]);
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -33,9 +33,13 @@ public class RestartRandomRsAction extends RestartActionBaseAction {
super(sleepTime); super(sleepTime);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Performing action: Restart random region server"); getLogger().info("Performing action: Restart random region server");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers()); ServerName server = PolicyBasedChaosMonkey.selectRandomItem(getCurrentServers());
restartRs(server, sleepTime); restartRs(server, sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -20,12 +20,20 @@ package org.apache.hadoop.hbase.chaos.actions;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class RestartRandomRsExceptMetaAction extends RestartRandomRsAction { public class RestartRandomRsExceptMetaAction extends RestartRandomRsAction {
private static final Logger LOG = LoggerFactory.getLogger(RestartRandomRsExceptMetaAction.class);
public RestartRandomRsExceptMetaAction(long sleepTime) { public RestartRandomRsExceptMetaAction(long sleepTime) {
super(sleepTime); super(sleepTime);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
int tries = 10; int tries = 10;

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -34,9 +34,13 @@ public class RestartRandomZKNodeAction extends RestartActionBaseAction {
super(sleepTime); super(sleepTime);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Performing action: Restart random zookeeper node"); getLogger().info("Performing action: Restart random zookeeper node");
ServerName server = PolicyBasedChaosMonkey.selectRandomItem( ServerName server = PolicyBasedChaosMonkey.selectRandomItem(
ZKServerTool.readZKNodes(getConf())); ZKServerTool.readZKNodes(getConf()));
restartZKNode(server, sleepTime); restartZKNode(server, sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -33,12 +33,17 @@ public class RestartRsHoldingMetaAction extends RestartActionBaseAction {
public RestartRsHoldingMetaAction(long sleepTime) { public RestartRsHoldingMetaAction(long sleepTime) {
super(sleepTime); super(sleepTime);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Performing action: Restart region server holding META"); getLogger().info("Performing action: Restart region server holding META");
ServerName server = cluster.getServerHoldingMeta(); ServerName server = cluster.getServerHoldingMeta();
if (server == null) { if (server == null) {
LOG.warn("No server is holding hbase:meta right now."); getLogger().warn("No server is holding hbase:meta right now.");
return; return;
} }
ClusterMetrics clusterStatus = cluster.getClusterMetrics(); ClusterMetrics clusterStatus = cluster.getClusterMetrics();

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.RegionLocator;
@ -30,8 +29,7 @@ import org.slf4j.LoggerFactory;
* Action that restarts an HRegionServer holding one of the regions of the table. * Action that restarts an HRegionServer holding one of the regions of the table.
*/ */
public class RestartRsHoldingTableAction extends RestartActionBaseAction { public class RestartRsHoldingTableAction extends RestartActionBaseAction {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(RestartRsHoldingTableAction.class);
LoggerFactory.getLogger(RestartRsHoldingTableAction.class);
private final RegionLocator locator; private final RegionLocator locator;
@ -40,9 +38,14 @@ public class RestartRsHoldingTableAction extends RestartActionBaseAction {
this.locator = locator; this.locator = locator;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Performing action: Restart random RS holding table " + this.locator.getName()); getLogger().info(
"Performing action: Restart random RS holding table " + this.locator.getName());
List<HRegionLocation> locations = locator.getAllRegionLocations(); List<HRegionLocation> locations = locator.getAllRegionLocations();
restartRs(locations.get(RandomUtils.nextInt(0, locations.size())).getServerName(), sleepTime); restartRs(locations.get(RandomUtils.nextInt(0, locations.size())).getServerName(), sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -23,7 +23,6 @@ import java.util.ArrayList;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Queue; import java.util.Queue;
import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
@ -60,10 +59,14 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
START START
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", getLogger().info("Performing action: Rolling batch restarting {}% of region servers",
(int)(ratio * 100))); (int)(ratio * 100));
List<ServerName> selectedServers = selectServers(); List<ServerName> selectedServers = selectServers();
Queue<ServerName> serversToBeKilled = new LinkedList<>(selectedServers); Queue<ServerName> serversToBeKilled = new LinkedList<>(selectedServers);
@ -71,8 +74,8 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
// loop while there are servers to be killed or dead servers to be restarted // loop while there are servers to be killed or dead servers to be restarted
while ((!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) && !context.isStopping()) { while ((!serversToBeKilled.isEmpty() || !deadServers.isEmpty()) && !context.isStopping()) {
KillOrStart action = KillOrStart.KILL;
final KillOrStart action;
if (serversToBeKilled.isEmpty()) { // no more servers to kill if (serversToBeKilled.isEmpty()) { // no more servers to kill
action = KillOrStart.START; action = KillOrStart.START;
} else if (deadServers.isEmpty()) { } else if (deadServers.isEmpty()) {
@ -95,7 +98,7 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
} catch (org.apache.hadoop.util.Shell.ExitCodeException e) { } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
// We've seen this in test runs where we timeout but the kill went through. HBASE-9743 // We've seen this in test runs where we timeout but the kill went through. HBASE-9743
// So, add to deadServers even if exception so the start gets called. // So, add to deadServers even if exception so the start gets called.
LOG.info("Problem killing but presume successful; code=" + e.getExitCode(), e); getLogger().info("Problem killing but presume successful; code={}", e.getExitCode(), e);
} }
deadServers.add(server); deadServers.add(server);
break; break;
@ -106,7 +109,7 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
} catch (org.apache.hadoop.util.Shell.ExitCodeException e) { } catch (org.apache.hadoop.util.Shell.ExitCodeException e) {
// The start may fail but better to just keep going though we may lose server. // The start may fail but better to just keep going though we may lose server.
// //
LOG.info("Problem starting, will retry; code=" + e.getExitCode(), e); getLogger().info("Problem starting, will retry; code={}", e.getExitCode(), e);
} }
break; break;
} }
@ -121,25 +124,23 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
/** /**
* Small test to ensure the class basically works. * Small test to ensure the class basically works.
* @param args
* @throws Exception
*/ */
public static void main(final String[] args) throws Exception { public static void main(final String[] args) throws Exception {
RollingBatchRestartRsAction action = new RollingBatchRestartRsAction(1, 1.0f) { RollingBatchRestartRsAction action = new RollingBatchRestartRsAction(1, 1.0f) {
private int invocations = 0; private int invocations = 0;
@Override @Override
protected ServerName[] getCurrentServers() throws IOException { protected ServerName[] getCurrentServers() {
final int count = 4; final int count = 4;
List<ServerName> serverNames = new ArrayList<>(count); List<ServerName> serverNames = new ArrayList<>(count);
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
serverNames.add(ServerName.valueOf(i + ".example.org", i, i)); serverNames.add(ServerName.valueOf(i + ".example.org", i, i));
} }
return serverNames.toArray(new ServerName[serverNames.size()]); return serverNames.toArray(new ServerName[0]);
} }
@Override @Override
protected void killRs(ServerName server) throws IOException { protected void killRs(ServerName server) throws IOException {
LOG.info("Killed " + server); LOG.info("Killed {}", server);
if (this.invocations++ % 3 == 0) { if (this.invocations++ % 3 == 0) {
throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed"); throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
} }
@ -147,7 +148,7 @@ public class RollingBatchRestartRsAction extends BatchRestartRsAction {
@Override @Override
protected void startRs(ServerName server) throws IOException { protected void startRs(ServerName server) throws IOException {
LOG.info("Started " + server); LOG.info("Started {}", server);
if (this.invocations++ % 3 == 0) { if (this.invocations++ % 3 == 0) {
throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed"); throw new org.apache.hadoop.util.Shell.ExitCodeException(-1, "Failed");
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -22,7 +22,6 @@ import java.io.IOException;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Queue; import java.util.Queue;
import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
@ -40,9 +39,9 @@ import org.slf4j.LoggerFactory;
public class RollingBatchSuspendResumeRsAction extends Action { public class RollingBatchSuspendResumeRsAction extends Action {
private static final Logger LOG = private static final Logger LOG =
LoggerFactory.getLogger(RollingBatchSuspendResumeRsAction.class); LoggerFactory.getLogger(RollingBatchSuspendResumeRsAction.class);
private float ratio; private final float ratio;
private long sleepTime; private final long sleepTime;
private int maxSuspendedServers; // number of maximum suspended servers at any given time. private final int maxSuspendedServers; // number of maximum suspended servers at any given time.
public RollingBatchSuspendResumeRsAction(long sleepTime, float ratio) { public RollingBatchSuspendResumeRsAction(long sleepTime, float ratio) {
this(sleepTime, ratio, 5); this(sleepTime, ratio, 5);
@ -58,10 +57,14 @@ public class RollingBatchSuspendResumeRsAction extends Action {
SUSPEND, RESUME SUSPEND, RESUME
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers", getLogger().info("Performing action: Rolling batch restarting {}% of region servers",
(int) (ratio * 100))); (int) (ratio * 100));
List<ServerName> selectedServers = selectServers(); List<ServerName> selectedServers = selectServers();
Queue<ServerName> serversToBeSuspended = new LinkedList<>(selectedServers); Queue<ServerName> serversToBeSuspended = new LinkedList<>(selectedServers);
@ -70,8 +73,8 @@ public class RollingBatchSuspendResumeRsAction extends Action {
// loop while there are servers to be suspended or suspended servers to be resumed // loop while there are servers to be suspended or suspended servers to be resumed
while ((!serversToBeSuspended.isEmpty() || !suspendedServers.isEmpty()) && !context while ((!serversToBeSuspended.isEmpty() || !suspendedServers.isEmpty()) && !context
.isStopping()) { .isStopping()) {
SuspendOrResume action;
final SuspendOrResume action;
if (serversToBeSuspended.isEmpty()) { // no more servers to suspend if (serversToBeSuspended.isEmpty()) { // no more servers to suspend
action = SuspendOrResume.RESUME; action = SuspendOrResume.RESUME;
} else if (suspendedServers.isEmpty()) { } else if (suspendedServers.isEmpty()) {
@ -105,7 +108,7 @@ public class RollingBatchSuspendResumeRsAction extends Action {
break; break;
} }
LOG.info("Sleeping for:{}", sleepTime); getLogger().info("Sleeping for:{}", sleepTime);
Threads.sleep(sleepTime); Threads.sleep(sleepTime);
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -28,8 +28,7 @@ import org.slf4j.LoggerFactory;
* Action that tries to take a snapshot of a table. * Action that tries to take a snapshot of a table.
*/ */
public class SnapshotTableAction extends Action { public class SnapshotTableAction extends Action {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(SnapshotTableAction.class);
LoggerFactory.getLogger(SnapshotTableAction.class);
private final TableName tableName; private final TableName tableName;
private final long sleepTime; private final long sleepTime;
@ -42,6 +41,10 @@ public class SnapshotTableAction extends Action {
this.sleepTime = sleepTime; this.sleepTime = sleepTime;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
@ -53,7 +56,7 @@ public class SnapshotTableAction extends Action {
return; return;
} }
LOG.info("Performing action: Snapshot table " + tableName); getLogger().info("Performing action: Snapshot table {}", tableName);
admin.snapshot(snapshotName, tableName); admin.snapshot(snapshotName, tableName);
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,14 +19,12 @@ package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.ThreadLocalRandom;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
public class SplitAllRegionOfTableAction extends Action { public class SplitAllRegionOfTableAction extends Action {
private static final Logger LOG = private static final Logger LOG =
LoggerFactory.getLogger(SplitAllRegionOfTableAction.class); LoggerFactory.getLogger(SplitAllRegionOfTableAction.class);
@ -47,6 +45,10 @@ public class SplitAllRegionOfTableAction extends Action {
this.maxFullTableSplits = getConf().getInt(MAX_SPLIT_KEY, DEFAULT_MAX_SPLITS); this.maxFullTableSplits = getConf().getInt(MAX_SPLIT_KEY, DEFAULT_MAX_SPLITS);
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
@ -61,10 +63,10 @@ public class SplitAllRegionOfTableAction extends Action {
if (ThreadLocalRandom.current().nextDouble() if (ThreadLocalRandom.current().nextDouble()
< (((double) splits) / ((double) maxFullTableSplits)) / ((double) 2)) { < (((double) splits) / ((double) maxFullTableSplits)) / ((double) 2)) {
splits++; splits++;
LOG.info("Performing action: Split all regions of " + tableName); getLogger().info("Performing action: Split all regions of {}", tableName);
admin.split(tableName); admin.split(tableName);
} else { } else {
LOG.info("Skipping split of all regions."); getLogger().info("Skipping split of all regions.");
} }
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.util.List; import java.util.List;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey; import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
@ -46,15 +45,19 @@ public class SplitRandomRegionOfTableAction extends Action {
this.tableName = tableName; this.tableName = tableName;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
Admin admin = util.getAdmin(); Admin admin = util.getAdmin();
LOG.info("Performing action: Split random region of table " + tableName); getLogger().info("Performing action: Split random region of table " + tableName);
List<RegionInfo> regions = admin.getRegions(tableName); List<RegionInfo> regions = admin.getRegions(tableName);
if (regions == null || regions.isEmpty()) { if (regions == null || regions.isEmpty()) {
LOG.info("Table " + tableName + " doesn't have regions to split"); getLogger().info("Table " + tableName + " doesn't have regions to split");
return; return;
} }
// Don't try the split if we're stopping // Don't try the split if we're stopping
@ -63,12 +66,12 @@ public class SplitRandomRegionOfTableAction extends Action {
} }
RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem( RegionInfo region = PolicyBasedChaosMonkey.selectRandomItem(
regions.toArray(new RegionInfo[regions.size()])); regions.toArray(new RegionInfo[0]));
LOG.debug("Splitting region " + region.getRegionNameAsString()); getLogger().debug("Splitting region " + region.getRegionNameAsString());
try { try {
admin.splitRegionAsync(region.getRegionName()).get(); admin.splitRegionAsync(region.getRegionName()).get();
} catch (Exception ex) { } catch (Exception ex) {
LOG.warn("Split failed, might be caused by other chaos: " + ex.getMessage()); getLogger().warn("Split failed, might be caused by other chaos: " + ex.getMessage());
} }
if (sleepTime > 0) { if (sleepTime > 0) {
Thread.sleep(sleepTime); Thread.sleep(sleepTime);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,18 +19,14 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.hbase.DistributedHBaseCluster; import org.apache.hadoop.hbase.DistributedHBaseCluster;
import org.apache.hadoop.hbase.HBaseCluster; import org.apache.hadoop.hbase.HBaseCluster;
import org.apache.hadoop.hbase.HBaseClusterManager; import org.apache.hadoop.hbase.HBaseClusterManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* Base class for performing Actions based on linux commands requiring sudo privileges * Base class for performing Actions based on linux commands requiring sudo privileges
*/ */
abstract public class SudoCommandAction extends Action { abstract public class SudoCommandAction extends Action {
private static final Logger LOG = LoggerFactory.getLogger(SudoCommandAction.class);
protected long timeout; protected long timeout;
protected HBaseClusterManager clusterManager; protected HBaseClusterManager clusterManager;
@ -43,9 +39,9 @@ abstract public class SudoCommandAction extends Action {
public void init(ActionContext context) throws IOException { public void init(ActionContext context) throws IOException {
super.init(context); super.init(context);
HBaseCluster cluster = context.getHBaseCluster(); HBaseCluster cluster = context.getHBaseCluster();
if(cluster != null && cluster instanceof DistributedHBaseCluster){ if (cluster instanceof DistributedHBaseCluster){
Object manager = ((DistributedHBaseCluster)cluster).getClusterManager(); Object manager = ((DistributedHBaseCluster)cluster).getClusterManager();
if(manager != null && manager instanceof HBaseClusterManager){ if (manager instanceof HBaseClusterManager){
clusterManager = (HBaseClusterManager) manager; clusterManager = (HBaseClusterManager) manager;
} }
} }
@ -54,7 +50,7 @@ abstract public class SudoCommandAction extends Action {
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
if (clusterManager == null){ if (clusterManager == null){
LOG.info("Couldn't perform command action, it requires a distributed cluster."); getLogger().info("Couldn't perform command action, it requires a distributed cluster.");
return; return;
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -33,16 +33,14 @@ import org.junit.experimental.categories.Category;
import org.mockito.Mockito; import org.mockito.Mockito;
@Category({MediumTests.class}) @Category({MediumTests.class})
public class TestChangeSplitPolicyAction extends Action { public class TestChangeSplitPolicyAction {
@ClassRule @ClassRule
public static final HBaseClassTestRule CLASS_RULE = public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestChangeSplitPolicyAction.class); HBaseClassTestRule.forClass(TestChangeSplitPolicyAction.class);
private final static IntegrationTestingUtility TEST_UTIL = new IntegrationTestingUtility(); private final static IntegrationTestingUtility TEST_UTIL = new IntegrationTestingUtility();
private static ChangeSplitPolicyAction action; private final TableName tableName = TableName.valueOf("ChangeSplitPolicyAction");
private Admin admin;
private TableName tableName = TableName.valueOf("ChangeSplitPolicyAction");
@BeforeClass @BeforeClass
public static void setUpBeforeClass() throws Exception { public static void setUpBeforeClass() throws Exception {
@ -54,17 +52,17 @@ public class TestChangeSplitPolicyAction extends Action {
} }
@Before @Before
public void setUp() throws Exception { public void setUp() throws Exception {
this.admin = TEST_UTIL.getAdmin(); Admin admin = TEST_UTIL.getAdmin();
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);
admin.createTable(builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build()); admin.createTable(builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of("fam")).build());
} }
@Test @Test
public void testChangeSplitPolicyAction() throws Exception { public void testChangeSplitPolicyAction() throws Exception {
ActionContext ctx = Mockito.mock(ActionContext.class); Action.ActionContext ctx = Mockito.mock(Action.ActionContext.class);
Mockito.when(ctx.getHBaseIntegrationTestingUtility()).thenReturn(TEST_UTIL); Mockito.when(ctx.getHBaseIntegrationTestingUtility()).thenReturn(TEST_UTIL);
Mockito.when(ctx.getHBaseCluster()).thenReturn(TEST_UTIL.getHBaseCluster()); Mockito.when(ctx.getHBaseCluster()).thenReturn(TEST_UTIL.getHBaseCluster());
action = new ChangeSplitPolicyAction(tableName); ChangeSplitPolicyAction action = new ChangeSplitPolicyAction(tableName);
action.init(ctx); action.init(ctx);
action.perform(); action.perform();
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -19,7 +19,6 @@
package org.apache.hadoop.hbase.chaos.actions; package org.apache.hadoop.hbase.chaos.actions;
import java.util.Random; import java.util.Random;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
@ -30,8 +29,7 @@ import org.slf4j.LoggerFactory;
* Action that tries to truncate of a table. * Action that tries to truncate of a table.
*/ */
public class TruncateTableAction extends Action { public class TruncateTableAction extends Action {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(TruncateTableAction.class);
LoggerFactory.getLogger(TruncateTableAction.class);
private final TableName tableName; private final TableName tableName;
private final Random random; private final Random random;
@ -40,6 +38,10 @@ public class TruncateTableAction extends Action {
this.random = new Random(); this.random = new Random();
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility(); HBaseTestingUtility util = context.getHBaseIntegrationTestingUtility();
@ -51,8 +53,8 @@ public class TruncateTableAction extends Action {
} }
boolean preserveSplits = random.nextBoolean(); boolean preserveSplits = random.nextBoolean();
LOG.info("Performing action: Truncate table " + tableName.getNameAsString() + getLogger().info("Performing action: Truncate table {} preserve splits {}",
"preserve splits " + preserveSplits); tableName.getNameAsString(), preserveSplits);
admin.truncateTable(tableName, preserveSplits); admin.truncateTable(tableName, preserveSplits);
} }
} }

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -23,7 +23,6 @@ import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hbase.ClusterMetrics; import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
@ -42,10 +41,10 @@ public class UnbalanceKillAndRebalanceAction extends Action {
private static final double HOARD_FRC_OF_REGIONS = 0.8; private static final double HOARD_FRC_OF_REGIONS = 0.8;
/** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance /** Waits between calling unbalance and killing servers, kills and rebalance, and rebalance
* and restarting the servers; to make sure these events have time to impact the cluster. */ * and restarting the servers; to make sure these events have time to impact the cluster. */
private long waitForUnbalanceMilliSec; private final long waitForUnbalanceMilliSec;
private long waitForKillsMilliSec; private final long waitForKillsMilliSec;
private long waitAfterBalanceMilliSec; private final long waitAfterBalanceMilliSec;
private boolean killMetaRs; private final boolean killMetaRs;
public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance, public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance,
boolean killMetaRs) { boolean killMetaRs) {
@ -56,6 +55,10 @@ public class UnbalanceKillAndRebalanceAction extends Action {
this.killMetaRs = killMetaRs; this.killMetaRs = killMetaRs;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
ClusterMetrics status = this.cluster.getClusterMetrics(); ClusterMetrics status = this.cluster.getClusterMetrics();
@ -86,7 +89,7 @@ public class UnbalanceKillAndRebalanceAction extends Action {
} }
if (!killMetaRs && targetServer.equals(metaServer)) { if (!killMetaRs && targetServer.equals(metaServer)) {
LOG.info("Not killing server because it holds hbase:meta."); getLogger().info("Not killing server because it holds hbase:meta.");
} else { } else {
killRs(targetServer); killRs(targetServer);
killedServers.add(targetServer); killedServers.add(targetServer);

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -21,7 +21,6 @@ package org.apache.hadoop.hbase.chaos.actions;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.RandomUtils;
import org.apache.hadoop.hbase.ClusterMetrics; import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ServerName;
@ -32,10 +31,9 @@ import org.slf4j.LoggerFactory;
* Action that tries to unbalance the regions of a cluster. * Action that tries to unbalance the regions of a cluster.
*/ */
public class UnbalanceRegionsAction extends Action { public class UnbalanceRegionsAction extends Action {
private static final Logger LOG = private static final Logger LOG = LoggerFactory.getLogger(UnbalanceRegionsAction.class);
LoggerFactory.getLogger(UnbalanceRegionsAction.class); private final double fractionOfRegions;
private double fractionOfRegions; private final double fractionOfServers;
private double fractionOfServers;
/** /**
* Unbalances the regions on the cluster by choosing "target" servers, and moving * Unbalances the regions on the cluster by choosing "target" servers, and moving
@ -48,9 +46,13 @@ public class UnbalanceRegionsAction extends Action {
this.fractionOfServers = fractionOfServers; this.fractionOfServers = fractionOfServers;
} }
@Override protected Logger getLogger() {
return LOG;
}
@Override @Override
public void perform() throws Exception { public void perform() throws Exception {
LOG.info("Unbalancing regions"); getLogger().info("Unbalancing regions");
ClusterMetrics status = this.cluster.getClusterMetrics(); ClusterMetrics status = this.cluster.getClusterMetrics();
List<ServerName> victimServers = new LinkedList<>(status.getLiveServerMetrics().keySet()); List<ServerName> victimServers = new LinkedList<>(status.getLiveServerMetrics().keySet());
int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size()); int targetServerCount = (int)Math.ceil(fractionOfServers * victimServers.size());

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -23,7 +23,7 @@ import java.util.List;
/** A policy that runs multiple other policies one after the other */ /** A policy that runs multiple other policies one after the other */
public class CompositeSequentialPolicy extends Policy { public class CompositeSequentialPolicy extends Policy {
private List<Policy> policies; private final List<Policy> policies;
public CompositeSequentialPolicy(Policy... policies) { public CompositeSequentialPolicy(Policy... policies) {
this.policies = Arrays.asList(policies); this.policies = Arrays.asList(policies);
} }