HBASE-26556 IT and Chaos Monkey improvements (#3932)
Signed-off-by: Josh Elser <elserj@apache.org>
Reviewed-by: Tak Lon (Stephen) Wu <taklwu@apache.org>
(cherry picked from commit a36d41af73
)
This commit is contained in:
parent
2bfb743d79
commit
55fdff50ca
|
@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.conf.Configured;
|
import org.apache.hadoop.conf.Configured;
|
||||||
import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
|
import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation;
|
||||||
import org.apache.hadoop.hbase.util.Pair;
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.apache.hadoop.hbase.util.ReflectionUtils;
|
||||||
import org.apache.hadoop.hbase.util.RetryCounter;
|
import org.apache.hadoop.hbase.util.RetryCounter;
|
||||||
import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
|
import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig;
|
||||||
import org.apache.hadoop.hbase.util.RetryCounterFactory;
|
import org.apache.hadoop.hbase.util.RetryCounterFactory;
|
||||||
|
@ -216,7 +217,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String signalCommand(ServiceType service, String signal) {
|
public String signalCommand(ServiceType service, String signal) {
|
||||||
return String.format("%s | xargs kill -s %s", findPidCommand(service), signal);
|
return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -322,7 +323,10 @@ public class HBaseClusterManager extends Configured implements ClusterManager {
|
||||||
case ZOOKEEPER_SERVER:
|
case ZOOKEEPER_SERVER:
|
||||||
return new ZookeeperShellCommandProvider(getConf());
|
return new ZookeeperShellCommandProvider(getConf());
|
||||||
default:
|
default:
|
||||||
return new HBaseShellCommandProvider(getConf());
|
Class<? extends CommandProvider> provider = getConf()
|
||||||
|
.getClass("hbase.it.clustermanager.hbase.command.provider",
|
||||||
|
HBaseShellCommandProvider.class, CommandProvider.class);
|
||||||
|
return ReflectionUtils.newInstance(provider, getConf());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,100 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.chaos.factories;
|
||||||
|
|
||||||
|
import java.lang.reflect.Constructor;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.chaos.actions.Action;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
public class ConfigurableSlowDeterministicMonkeyFactory extends SlowDeterministicMonkeyFactory {
|
||||||
|
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(ConfigurableSlowDeterministicMonkeyFactory.class);
|
||||||
|
|
||||||
|
final static String HEAVY_ACTIONS = "heavy.actions";
|
||||||
|
final static String TABLE_PARAM = "\\$table_name";
|
||||||
|
|
||||||
|
public enum SupportedTypes {
|
||||||
|
FLOAT(p->Float.parseFloat(p)),
|
||||||
|
LONG(p-> Long.parseLong(p)),
|
||||||
|
INT(p-> Integer.parseInt(p)),
|
||||||
|
TABLENAME(p-> TableName.valueOf(p));
|
||||||
|
|
||||||
|
final Function<String,Object> converter;
|
||||||
|
|
||||||
|
SupportedTypes(Function<String,Object> converter){
|
||||||
|
this.converter = converter;
|
||||||
|
}
|
||||||
|
|
||||||
|
Object convert(String param){
|
||||||
|
return converter.apply(param);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Action[] getHeavyWeightedActions() {
|
||||||
|
String actions = this.properties.getProperty(HEAVY_ACTIONS);
|
||||||
|
if(actions==null || actions.isEmpty()){
|
||||||
|
return super.getHeavyWeightedActions();
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
String[] actionClasses = actions.split(";");
|
||||||
|
Action[] heavyActions = new Action[actionClasses.length];
|
||||||
|
for (int i = 0; i < actionClasses.length; i++) {
|
||||||
|
heavyActions[i] = instantiateAction(actionClasses[i]);
|
||||||
|
}
|
||||||
|
LOG.info("Created actions {}", heavyActions);
|
||||||
|
return heavyActions;
|
||||||
|
} catch(Exception e) {
|
||||||
|
LOG.error("Error trying to instantiate heavy actions. Returning null array.", e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Action instantiateAction(String actionString) throws Exception {
|
||||||
|
final String packageName = "org.apache.hadoop.hbase.chaos.actions";
|
||||||
|
String[] classAndParams = actionString.split("\\)")[0].split("\\(");
|
||||||
|
String className = packageName + "." + classAndParams[0];
|
||||||
|
String[] params = classAndParams[1].replaceAll(TABLE_PARAM,
|
||||||
|
tableName.getNameAsString()).split(",");
|
||||||
|
LOG.info("About to instantiate action class: {}; With constructor params: {}",
|
||||||
|
className, params);
|
||||||
|
Class<? extends Action> actionClass = (Class<? extends Action>)Class.forName(className);
|
||||||
|
Constructor<? extends Action>[] constructors =
|
||||||
|
(Constructor<? extends Action>[]) actionClass.getDeclaredConstructors();
|
||||||
|
for(Constructor<? extends Action> c : constructors){
|
||||||
|
if (c.getParameterCount() != params.length){
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Class[] paramTypes = c.getParameterTypes();
|
||||||
|
Object[] constructorParams = new Object[paramTypes.length];
|
||||||
|
for(int i=0; i<paramTypes.length; i++){
|
||||||
|
constructorParams[i] = SupportedTypes.valueOf(paramTypes[i].getSimpleName().toUpperCase())
|
||||||
|
.convert(params[i]);
|
||||||
|
}
|
||||||
|
return c.newInstance(constructorParams);
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Couldn't find any matching constructor for: " +
|
||||||
|
actionString);
|
||||||
|
}
|
||||||
|
}
|
|
@ -79,6 +79,7 @@ public abstract class MonkeyFactory {
|
||||||
public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
|
public static final String SERVER_AND_DEPENDENCIES_KILLING = "serverAndDependenciesKilling";
|
||||||
public static final String DISTRIBUTED_ISSUES = "distributedIssues";
|
public static final String DISTRIBUTED_ISSUES = "distributedIssues";
|
||||||
public static final String DATA_ISSUES = "dataIssues";
|
public static final String DATA_ISSUES = "dataIssues";
|
||||||
|
public static final String CONFIGURABLE_SLOW_DETERMINISTIC = "configurableSlowDeterministic";
|
||||||
|
|
||||||
public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
|
public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
|
||||||
.put(CALM, new CalmMonkeyFactory())
|
.put(CALM, new CalmMonkeyFactory())
|
||||||
|
@ -93,6 +94,7 @@ public abstract class MonkeyFactory {
|
||||||
.put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
|
.put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory())
|
||||||
.put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
|
.put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory())
|
||||||
.put(DATA_ISSUES, new DataIssuesMonkeyFactory())
|
.put(DATA_ISSUES, new DataIssuesMonkeyFactory())
|
||||||
|
.put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory())
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
public static MonkeyFactory getFactory(String factoryName) {
|
public static MonkeyFactory getFactory(String factoryName) {
|
||||||
|
|
|
@ -74,6 +74,50 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
|
||||||
private long rollingBatchSuspendRSSleepTime;
|
private long rollingBatchSuspendRSSleepTime;
|
||||||
private float rollingBatchSuspendtRSRatio;
|
private float rollingBatchSuspendtRSRatio;
|
||||||
|
|
||||||
|
protected Action[] getLightWeightedActions(){
|
||||||
|
return new Action[] {
|
||||||
|
new CompactTableAction(tableName, compactTableRatio),
|
||||||
|
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
|
||||||
|
new FlushTableAction(tableName),
|
||||||
|
new FlushRandomRegionOfTableAction(tableName),
|
||||||
|
new MoveRandomRegionOfTableAction(tableName)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Action[] getMidWeightedActions(){
|
||||||
|
return new Action[] {
|
||||||
|
new SplitRandomRegionOfTableAction(tableName),
|
||||||
|
new MergeRandomAdjacentRegionsOfTableAction(tableName),
|
||||||
|
new SnapshotTableAction(tableName),
|
||||||
|
new AddColumnAction(tableName),
|
||||||
|
new RemoveColumnAction(tableName, columnFamilies),
|
||||||
|
new ChangeEncodingAction(tableName),
|
||||||
|
new ChangeCompressionAction(tableName),
|
||||||
|
new ChangeBloomFilterAction(tableName),
|
||||||
|
new ChangeVersionsAction(tableName),
|
||||||
|
new ChangeSplitPolicyAction(tableName),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Action[] getHeavyWeightedActions() {
|
||||||
|
return new Action[] {
|
||||||
|
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
|
||||||
|
tableName),
|
||||||
|
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
|
||||||
|
new RestartRandomRsAction(restartRandomRSSleepTime),
|
||||||
|
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
|
||||||
|
new RestartActiveMasterAction(restartActiveMasterSleepTime),
|
||||||
|
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
|
||||||
|
rollingBatchRestartRSRatio),
|
||||||
|
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
|
||||||
|
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
|
||||||
|
new SplitAllRegionOfTableAction(tableName),
|
||||||
|
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
|
||||||
|
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
|
||||||
|
rollingBatchSuspendtRSRatio)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ChaosMonkey build() {
|
public ChaosMonkey build() {
|
||||||
|
|
||||||
|
@ -81,47 +125,15 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory {
|
||||||
// Actions such as compact/flush a table/region,
|
// Actions such as compact/flush a table/region,
|
||||||
// move one region around. They are not so destructive,
|
// move one region around. They are not so destructive,
|
||||||
// can be executed more frequently.
|
// can be executed more frequently.
|
||||||
Action[] actions1 = new Action[] {
|
Action[] actions1 = getLightWeightedActions();
|
||||||
new CompactTableAction(tableName, compactTableRatio),
|
|
||||||
new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio),
|
|
||||||
new FlushTableAction(tableName),
|
|
||||||
new FlushRandomRegionOfTableAction(tableName),
|
|
||||||
new MoveRandomRegionOfTableAction(tableName)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Actions such as split/merge/snapshot.
|
// Actions such as split/merge/snapshot.
|
||||||
// They should not cause data loss, or unreliability
|
// They should not cause data loss, or unreliability
|
||||||
// such as region stuck in transition.
|
// such as region stuck in transition.
|
||||||
Action[] actions2 = new Action[] {
|
Action[] actions2 = getMidWeightedActions();
|
||||||
new SplitRandomRegionOfTableAction(tableName),
|
|
||||||
new MergeRandomAdjacentRegionsOfTableAction(tableName),
|
|
||||||
new SnapshotTableAction(tableName),
|
|
||||||
new AddColumnAction(tableName),
|
|
||||||
new RemoveColumnAction(tableName, columnFamilies),
|
|
||||||
new ChangeEncodingAction(tableName),
|
|
||||||
new ChangeCompressionAction(tableName),
|
|
||||||
new ChangeBloomFilterAction(tableName),
|
|
||||||
new ChangeVersionsAction(tableName),
|
|
||||||
new ChangeSplitPolicyAction(tableName),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Destructive actions to mess things around.
|
// Destructive actions to mess things around.
|
||||||
Action[] actions3 = new Action[] {
|
Action[] actions3 = getHeavyWeightedActions();
|
||||||
new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime,
|
|
||||||
tableName),
|
|
||||||
new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName),
|
|
||||||
new RestartRandomRsAction(restartRandomRSSleepTime),
|
|
||||||
new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio),
|
|
||||||
new RestartActiveMasterAction(restartActiveMasterSleepTime),
|
|
||||||
new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime,
|
|
||||||
rollingBatchRestartRSRatio),
|
|
||||||
new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime),
|
|
||||||
new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName),
|
|
||||||
new SplitAllRegionOfTableAction(tableName),
|
|
||||||
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
|
|
||||||
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
|
|
||||||
rollingBatchSuspendtRSRatio)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Action to log more info for debugging
|
// Action to log more info for debugging
|
||||||
Action[] actions4 = new Action[] {
|
Action[] actions4 = new Action[] {
|
||||||
|
|
Loading…
Reference in New Issue