HBASE-26556 IT and Chaos Monkey improvements (#3932)

Signed-off-by: Josh Elser <elserj@apache.org>
Reviewed-by: Tak Lon (Stephen) Wu <taklwu@apache.org>
(cherry picked from commit a36d41af73)
This commit is contained in:
Wellington Ramos Chevreuil 2021-12-14 21:22:28 +00:00 committed by Wellington Chevreuil
parent 348877d257
commit 0866975238
4 changed files with 155 additions and 37 deletions

View File

@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.hbase.util.RetryCounter;
import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
import org.apache.hadoop.hbase.util.RetryCounterFactory;
@ -216,7 +217,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
}
public String signalCommand(ServiceType service, String signal) {
return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal);
}
}
@ -322,7 +323,10 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
case ZOOKEEPER_SERVER:
return new ZookeeperShellCommandProvider(getConf());
default:
return new HBaseShellCommandProvider(getConf());
Class<? extends CommandProvider> provider = getConf()
.getClass("hbase.it.clustermanager.hbase.command.provider",
HBaseShellCommandProvider.class, CommandProvider.class);
return ReflectionUtils.newInstance(provider, getConf());
}
}

View File

@ -0,0 +1,100 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.chaos.factories;
import java.lang.reflect.Constructor;
import java.util.function.Function;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.chaos.actions.Action;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ConfigurableSlowDeterministicMonkeyFactory extends SlowDeterministicMonkeyFactory {
private static final Logger LOG =
LoggerFactory.getLogger(ConfigurableSlowDeterministicMonkeyFactory.class);
final static String HEAVY_ACTIONS = "heavy.actions";
final static String TABLE_PARAM = "\\$table_name";
public enum SupportedTypes {
FLOAT(p->Float.parseFloat(p)),
LONG(p-> Long.parseLong(p)),
INT(p-> Integer.parseInt(p)),
TABLENAME(p-> TableName.valueOf(p));
final Function<String,Object> converter;
SupportedTypes(Function<String,Object> converter){
this.converter = converter;
}
Object convert(String param){
return converter.apply(param);
}
}
@Override
protected Action[] getHeavyWeightedActions() {
String actions = this.properties.getProperty(HEAVY_ACTIONS);
if(actions==null || actions.isEmpty()){
return super.getHeavyWeightedActions();
} else {
try {
String[] actionClasses = actions.split(";");
Action[] heavyActions = new Action[actionClasses.length];
for (int i = 0; i < actionClasses.length; i++) {
heavyActions[i] = instantiateAction(actionClasses[i]);
}
LOG.info("Created actions {}", heavyActions);
return heavyActions;
} catch(Exception e) {
LOG.error("Error trying to instantiate heavy actions. Returning null array.", e);
}
return null;
}
}
private Action instantiateAction(String actionString) throws Exception {
final String packageName = "org.apache.hadoop.hbase.chaos.actions";
String[] classAndParams = actionString.split("\\)")[0].split("\\(");
String className = packageName + "." + classAndParams[0];
String[] params = classAndParams[1].replaceAll(TABLE_PARAM,
tableName.getNameAsString()).split(",");
LOG.info("About to instantiate action class: {}; With constructor params: {}",
className, params);
Class<? extends Action> actionClass = (Class<? extends Action>)Class.forName(className);
Constructor<? extends Action>[] constructors =
(Constructor<? extends Action>[]) actionClass.getDeclaredConstructors();
for(Constructor<? extends Action> c : constructors){
if (c.getParameterCount() != params.length){
continue;
}
Class[] paramTypes = c.getParameterTypes();
Object[] constructorParams = new Object[paramTypes.length];
for(int i=0; i<paramTypes.length; i++){
constructorParams[i] = SupportedTypes.valueOf(paramTypes[i].getSimpleName().toUpperCase())
.convert(params[i]);
}
return c.newInstance(constructorParams);
}
throw new IllegalArgumentException("Couldn't find any matching constructor for: " +
actionString);
}
}

View File

@ -79,6 +79,7 @@ public abstract class MonkeyFactory {
public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
public static final String DISTRIBUTED_ISSUES = "distributedIssues";
public static final String DATA_ISSUES = "dataIssues";
public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic";
public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
.put(CALM, new CalmMonkeyFactory())
@ -93,6 +94,7 @@ public abstract class MonkeyFactory {
.put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
.put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
.put(DATA_ISSUES, new DataIssuesMonkeyFactory())
.put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory())
.build();
public static MonkeyFactory getFactory(String factoryName) {

View File

@ -74,6 +74,50 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;
protected Action[] getLightWeightedActions(){
return new Action[] {
new CompactTableAction(tableName, compactTableRatio),
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveRandomRegionOfTableAction(tableName)
};
}
protected Action[] getMidWeightedActions(){
return new Action[] {
new SplitRandomRegionOfTableAction(tableName),
new MergeRandomAdjacentRegionsOfTableAction(tableName),
new SnapshotTableAction(tableName),
new AddColumnAction(tableName),
new RemoveColumnAction(tableName, columnFamilies),
new ChangeEncodingAction(tableName),
new ChangeCompressionAction(tableName),
new ChangeBloomFilterAction(tableName),
new ChangeVersionsAction(tableName),
new ChangeSplitPolicyAction(tableName),
};
}
protected Action[] getHeavyWeightedActions() {
return new Action[] {
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
tableName),
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
new RestartRandomRsAction(restartRandomRSSleepTime),
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
new RestartActiveMasterAction(restartActiveMasterSleepTime),
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
rollingBatchRestartRSRatio),
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
new SplitAllRegionOfTableAction(tableName),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
};
}
@Override
public ChaosMonkey build() {
@ -81,47 +125,15 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
// Actions such as compact/flush a table/region,
// move one region around. They are not so destructive,
// can be executed more frequently.
Action[] actions1 = new Action[] {
new CompactTableAction(tableName, compactTableRatio),
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
new FlushTableAction(tableName),
new FlushRandomRegionOfTableAction(tableName),
new MoveRandomRegionOfTableAction(tableName)
};
Action[] actions1 = getLightWeightedActions();
// Actions such as split/merge/snapshot.
// They should not cause data loss, or unreliability
// such as region stuck in transition.
Action[] actions2 = new Action[] {
new SplitRandomRegionOfTableAction(tableName),
new MergeRandomAdjacentRegionsOfTableAction(tableName),
new SnapshotTableAction(tableName),
new AddColumnAction(tableName),
new RemoveColumnAction(tableName, columnFamilies),
new ChangeEncodingAction(tableName),
new ChangeCompressionAction(tableName),
new ChangeBloomFilterAction(tableName),
new ChangeVersionsAction(tableName),
new ChangeSplitPolicyAction(tableName),
};
Action[] actions2 = getMidWeightedActions();
// Destructive actions to mess things around.
Action[] actions3 = new Action[] {
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
tableName),
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
new RestartRandomRsAction(restartRandomRSSleepTime),
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
new RestartActiveMasterAction(restartActiveMasterSleepTime),
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
rollingBatchRestartRSRatio),
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
new SplitAllRegionOfTableAction(tableName),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
};
Action[] actions3 = getHeavyWeightedActions();
// Action to log more info for debugging
Action[] actions4 = new Action[] {