HBASE-26556 IT and Chaos Monkey improvements (#3932)
Signed-off-by: Josh Elser <elserj@apache.org>
Reviewed-by: Tak Lon (Stephen) Wu <taklwu@apache.org>
(cherry picked from commit a36d41af73
)
This commit is contained in:
parent
cb3155459f
commit
5defd8c35f
|
@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.hadoop.hbase.util.ReflectionUtils;
|
||||
import org.apache.hadoop.hbase.util.RetryCounter;
|
||||
import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
|
||||
import org.apache.hadoop.hbase.util.RetryCounterFactory;
|
||||
|
@ -216,7 +217,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
|||
}
|
||||
|
||||
public String signalCommand(ServiceType service, String signal) {
|
||||
return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
|
||||
return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -322,7 +323,10 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
|||
case ZOOKEEPER_SERVER:
|
||||
return new ZookeeperShellCommandProvider(getConf());
|
||||
default:
|
||||
return new HBaseShellCommandProvider(getConf());
|
||||
Class<? extends CommandProvider> provider = getConf()
|
||||
.getClass("hbase.it.clustermanager.hbase.command.provider",
|
||||
HBaseShellCommandProvider.class, CommandProvider.class);
|
||||
return ReflectionUtils.newInstance(provider, getConf());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.chaos.factories;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.chaos.actions.Action;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class ConfigurableSlowDeterministicMonkeyFactory extends SlowDeterministicMonkeyFactory {
|
||||
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(ConfigurableSlowDeterministicMonkeyFactory.class);
|
||||
|
||||
final static String HEAVY_ACTIONS = "heavy.actions";
|
||||
final static String TABLE_PARAM = "\\$table_name";
|
||||
|
||||
public enum SupportedTypes {
|
||||
FLOAT(p->Float.parseFloat(p)),
|
||||
LONG(p-> Long.parseLong(p)),
|
||||
INT(p-> Integer.parseInt(p)),
|
||||
TABLENAME(p-> TableName.valueOf(p));
|
||||
|
||||
final Function<String,Object> converter;
|
||||
|
||||
SupportedTypes(Function<String,Object> converter){
|
||||
this.converter = converter;
|
||||
}
|
||||
|
||||
Object convert(String param){
|
||||
return converter.apply(param);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Action[] getHeavyWeightedActions() {
|
||||
String actions = this.properties.getProperty(HEAVY_ACTIONS);
|
||||
if(actions==null || actions.isEmpty()){
|
||||
return super.getHeavyWeightedActions();
|
||||
} else {
|
||||
try {
|
||||
String[] actionClasses = actions.split(";");
|
||||
Action[] heavyActions = new Action[actionClasses.length];
|
||||
for (int i = 0; i < actionClasses.length; i++) {
|
||||
heavyActions[i] = instantiateAction(actionClasses[i]);
|
||||
}
|
||||
LOG.info("Created actions {}", heavyActions);
|
||||
return heavyActions;
|
||||
} catch(Exception e) {
|
||||
LOG.error("Error trying to instantiate heavy actions. Returning null array.", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private Action instantiateAction(String actionString) throws Exception {
|
||||
final String packageName = "org.apache.hadoop.hbase.chaos.actions";
|
||||
String[] classAndParams = actionString.split("\\)")[0].split("\\(");
|
||||
String className = packageName + "." + classAndParams[0];
|
||||
String[] params = classAndParams[1].replaceAll(TABLE_PARAM,
|
||||
tableName.getNameAsString()).split(",");
|
||||
LOG.info("About to instantiate action class: {}; With constructor params: {}",
|
||||
className, params);
|
||||
Class<? extends Action> actionClass = (Class<? extends Action>)Class.forName(className);
|
||||
Constructor<? extends Action>[] constructors =
|
||||
(Constructor<? extends Action>[]) actionClass.getDeclaredConstructors();
|
||||
for(Constructor<? extends Action> c : constructors){
|
||||
if (c.getParameterCount() != params.length){
|
||||
continue;
|
||||
}
|
||||
Class[] paramTypes = c.getParameterTypes();
|
||||
Object[] constructorParams = new Object[paramTypes.length];
|
||||
for(int i=0; i<paramTypes.length; i++){
|
||||
constructorParams[i] = SupportedTypes.valueOf(paramTypes[i].getSimpleName().toUpperCase())
|
||||
.convert(params[i]);
|
||||
}
|
||||
return c.newInstance(constructorParams);
|
||||
}
|
||||
throw new IllegalArgumentException("Couldn't find any matching constructor for: " +
|
||||
actionString);
|
||||
}
|
||||
}
|
|
@ -79,6 +79,7 @@ public abstract class MonkeyFactory {
|
|||
public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
|
||||
public static final String DISTRIBUTED_ISSUES = "distributedIssues";
|
||||
public static final String DATA_ISSUES = "dataIssues";
|
||||
public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic";
|
||||
|
||||
public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
|
||||
.put(CALM, new CalmMonkeyFactory())
|
||||
|
@ -93,6 +94,7 @@ public abstract class MonkeyFactory {
|
|||
.put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
|
||||
.put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
|
||||
.put(DATA_ISSUES, new DataIssuesMonkeyFactory())
|
||||
.put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory())
|
||||
.build();
|
||||
|
||||
public static MonkeyFactory getFactory(String factoryName) {
|
||||
|
|
|
@ -74,6 +74,50 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
|
|||
private long rollingBatchSuspendRSSleepTime;
|
||||
private float rollingBatchSuspendtRSRatio;
|
||||
|
||||
protected Action[] getLightWeightedActions(){
|
||||
return new Action[] {
|
||||
new CompactTableAction(tableName, compactTableRatio),
|
||||
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
|
||||
new FlushTableAction(tableName),
|
||||
new FlushRandomRegionOfTableAction(tableName),
|
||||
new MoveRandomRegionOfTableAction(tableName)
|
||||
};
|
||||
}
|
||||
|
||||
protected Action[] getMidWeightedActions(){
|
||||
return new Action[] {
|
||||
new SplitRandomRegionOfTableAction(tableName),
|
||||
new MergeRandomAdjacentRegionsOfTableAction(tableName),
|
||||
new SnapshotTableAction(tableName),
|
||||
new AddColumnAction(tableName),
|
||||
new RemoveColumnAction(tableName, columnFamilies),
|
||||
new ChangeEncodingAction(tableName),
|
||||
new ChangeCompressionAction(tableName),
|
||||
new ChangeBloomFilterAction(tableName),
|
||||
new ChangeVersionsAction(tableName),
|
||||
new ChangeSplitPolicyAction(tableName),
|
||||
};
|
||||
}
|
||||
|
||||
protected Action[] getHeavyWeightedActions() {
|
||||
return new Action[] {
|
||||
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
|
||||
tableName),
|
||||
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
|
||||
new RestartRandomRsAction(restartRandomRSSleepTime),
|
||||
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
|
||||
new RestartActiveMasterAction(restartActiveMasterSleepTime),
|
||||
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
|
||||
rollingBatchRestartRSRatio),
|
||||
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
|
||||
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
|
||||
new SplitAllRegionOfTableAction(tableName),
|
||||
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
|
||||
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
|
||||
rollingBatchSuspendtRSRatio)
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public ChaosMonkey build() {
|
||||
|
||||
|
@ -81,47 +125,15 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
|
|||
// Actions such as compact/flush a table/region,
|
||||
// move one region around. They are not so destructive,
|
||||
// can be executed more frequently.
|
||||
Action[] actions1 = new Action[] {
|
||||
new CompactTableAction(tableName, compactTableRatio),
|
||||
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
|
||||
new FlushTableAction(tableName),
|
||||
new FlushRandomRegionOfTableAction(tableName),
|
||||
new MoveRandomRegionOfTableAction(tableName)
|
||||
};
|
||||
Action[] actions1 = getLightWeightedActions();
|
||||
|
||||
// Actions such as split/merge/snapshot.
|
||||
// They should not cause data loss, or unreliability
|
||||
// such as region stuck in transition.
|
||||
Action[] actions2 = new Action[] {
|
||||
new SplitRandomRegionOfTableAction(tableName),
|
||||
new MergeRandomAdjacentRegionsOfTableAction(tableName),
|
||||
new SnapshotTableAction(tableName),
|
||||
new AddColumnAction(tableName),
|
||||
new RemoveColumnAction(tableName, columnFamilies),
|
||||
new ChangeEncodingAction(tableName),
|
||||
new ChangeCompressionAction(tableName),
|
||||
new ChangeBloomFilterAction(tableName),
|
||||
new ChangeVersionsAction(tableName),
|
||||
new ChangeSplitPolicyAction(tableName),
|
||||
};
|
||||
Action[] actions2 = getMidWeightedActions();
|
||||
|
||||
// Destructive actions to mess things around.
|
||||
Action[] actions3 = new Action[] {
|
||||
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
|
||||
tableName),
|
||||
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
|
||||
new RestartRandomRsAction(restartRandomRSSleepTime),
|
||||
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
|
||||
new RestartActiveMasterAction(restartActiveMasterSleepTime),
|
||||
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
|
||||
rollingBatchRestartRSRatio),
|
||||
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
|
||||
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
|
||||
new SplitAllRegionOfTableAction(tableName),
|
||||
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
|
||||
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
|
||||
rollingBatchSuspendtRSRatio)
|
||||
};
|
||||
Action[] actions3 = getHeavyWeightedActions();
|
||||
|
||||
// Action to log more info for debugging
|
||||
Action[] actions4 = new Action[] {
|
||||
|
|
Loading…
Reference in New Issue