From a36d41af739159073a6f4e6143fe26d77760535b Mon Sep 17 00:00:00 2001 From: Wellington Ramos Chevreuil Date: Tue, 14 Dec 2021 21:22:28 +0000 Subject: [PATCH] HBASE-26556 IT and Chaos Monkey improvements (#3932) Signed-off-by: Josh Elser Reviewed-by: Tak Lon (Stephen) Wu --- .../hadoop/hbase/HBaseClusterManager.java | 8 +- ...gurableSlowDeterministicMonkeyFactory.java | 100 ++++++++++++++++++ .../hbase/chaos/factories/MonkeyFactory.java | 2 + .../SlowDeterministicMonkeyFactory.java | 82 ++++++++------ 4 files changed, 155 insertions(+), 37 deletions(-) create mode 100644 hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ConfigurableSlowDeterministicMonkeyFactory.java diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java index 122fad5a0a9..f8df7a14325 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/HBaseClusterManager.java @@ -27,6 +27,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.hbase.HBaseClusterManager.CommandProvider.Operation; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.ReflectionUtils; import org.apache.hadoop.hbase.util.RetryCounter; import org.apache.hadoop.hbase.util.RetryCounter.RetryConfig; import org.apache.hadoop.hbase.util.RetryCounterFactory; @@ -216,7 +217,7 @@ public class HBaseClusterManager extends Configured implements ClusterManager { } public String signalCommand(ServiceType service, String signal) { - return String.format("%s | xargs kill -s %s", findPidCommand(service), signal); + return String.format("%s | xargs sudo kill -s %s", findPidCommand(service), signal); } } @@ -322,7 +323,10 @@ public class HBaseClusterManager extends Configured implements ClusterManager { case ZOOKEEPER_SERVER: return new ZookeeperShellCommandProvider(getConf()); default: - return new HBaseShellCommandProvider(getConf()); + Class provider = getConf() + .getClass("hbase.it.clustermanager.hbase.command.provider", + HBaseShellCommandProvider.class, CommandProvider.class); + return ReflectionUtils.newInstance(provider, getConf()); } } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ConfigurableSlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ConfigurableSlowDeterministicMonkeyFactory.java new file mode 100644 index 00000000000..c8ee40c35ef --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ConfigurableSlowDeterministicMonkeyFactory.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.chaos.factories; + +import java.lang.reflect.Constructor; +import java.util.function.Function; + +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.chaos.actions.Action; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ConfigurableSlowDeterministicMonkeyFactory extends SlowDeterministicMonkeyFactory { + + private static final Logger LOG = + LoggerFactory.getLogger(ConfigurableSlowDeterministicMonkeyFactory.class); + + final static String HEAVY_ACTIONS = "heavy.actions"; + final static String TABLE_PARAM = "\\$table_name"; + + public enum SupportedTypes { + FLOAT(p->Float.parseFloat(p)), + LONG(p-> Long.parseLong(p)), + INT(p-> Integer.parseInt(p)), + TABLENAME(p-> TableName.valueOf(p)); + + final Function converter; + + SupportedTypes(Function converter){ + this.converter = converter; + } + + Object convert(String param){ + return converter.apply(param); + } + } + + @Override + protected Action[] getHeavyWeightedActions() { + String actions = this.properties.getProperty(HEAVY_ACTIONS); + if(actions==null || actions.isEmpty()){ + return super.getHeavyWeightedActions(); + } else { + try { + String[] actionClasses = actions.split(";"); + Action[] heavyActions = new Action[actionClasses.length]; + for (int i = 0; i < actionClasses.length; i++) { + heavyActions[i] = instantiateAction(actionClasses[i]); + } + LOG.info("Created actions {}", heavyActions); + return heavyActions; + } catch(Exception e) { + LOG.error("Error trying to instantiate heavy actions. Returning null array.", e); + } + return null; + } + } + + private Action instantiateAction(String actionString) throws Exception { + final String packageName = "org.apache.hadoop.hbase.chaos.actions"; + String[] classAndParams = actionString.split("\\)")[0].split("\\("); + String className = packageName + "." + classAndParams[0]; + String[] params = classAndParams[1].replaceAll(TABLE_PARAM, + tableName.getNameAsString()).split(","); + LOG.info("About to instantiate action class: {}; With constructor params: {}", + className, params); + Class actionClass = (Class)Class.forName(className); + Constructor[] constructors = + (Constructor[]) actionClass.getDeclaredConstructors(); + for(Constructor c : constructors){ + if (c.getParameterCount() != params.length){ + continue; + } + Class[] paramTypes = c.getParameterTypes(); + Object[] constructorParams = new Object[paramTypes.length]; + for(int i=0; i FACTORIES = ImmutableMap.builder() .put(CALM, new CalmMonkeyFactory()) @@ -93,6 +94,7 @@ public abstract class MonkeyFactory { .put(SERVER_AND_DEPENDENCIES_KILLING, new ServerAndDependenciesKillingMonkeyFactory()) .put(DISTRIBUTED_ISSUES, new DistributedIssuesMonkeyFactory()) .put(DATA_ISSUES, new DataIssuesMonkeyFactory()) + .put(CONFIGURABLE_SLOW_DETERMINISTIC, new ConfigurableSlowDeterministicMonkeyFactory()) .build(); public static MonkeyFactory getFactory(String factoryName) { diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java index 432fd8bf004..91c407564bf 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/SlowDeterministicMonkeyFactory.java @@ -72,53 +72,65 @@ public class SlowDeterministicMonkeyFactory extends MonkeyFactory { private long rollingBatchSuspendRSSleepTime; private float rollingBatchSuspendtRSRatio; + protected Action[] getLightWeightedActions(){ + return new Action[] { + new CompactTableAction(tableName, compactTableRatio), + new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio), + new FlushTableAction(tableName), + new FlushRandomRegionOfTableAction(tableName), + new MoveRandomRegionOfTableAction(tableName) + }; + } + + protected Action[] getMidWeightedActions(){ + return new Action[] { + new SplitRandomRegionOfTableAction(tableName), + new MergeRandomAdjacentRegionsOfTableAction(tableName), + new SnapshotTableAction(tableName), + new AddColumnAction(tableName), + new RemoveColumnAction(tableName, columnFamilies), + new ChangeEncodingAction(tableName), + new ChangeCompressionAction(tableName), + new ChangeBloomFilterAction(tableName), + new ChangeVersionsAction(tableName), + new ChangeSplitPolicyAction(tableName), + }; + } + + protected Action[] getHeavyWeightedActions() { + return new Action[] { + new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime, + tableName), + new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName), + new RestartRandomRsAction(restartRandomRSSleepTime), + new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio), + new RestartActiveMasterAction(restartActiveMasterSleepTime), + new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, + rollingBatchRestartRSRatio), + new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime), + new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName), + new SplitAllRegionOfTableAction(tableName), + new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), + new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, + rollingBatchSuspendtRSRatio) + }; + } + @Override public ChaosMonkey build() { loadProperties(); // Actions such as compact/flush a table/region, // move one region around. They are not so destructive, // can be executed more frequently. - Action[] actions1 = new Action[] { - new CompactTableAction(tableName, compactTableRatio), - new CompactRandomRegionOfTableAction(tableName, compactRandomRegionRatio), - new FlushTableAction(tableName), - new FlushRandomRegionOfTableAction(tableName), - new MoveRandomRegionOfTableAction(tableName) - }; + Action[] actions1 = getLightWeightedActions(); // Actions such as split/merge/snapshot. // They should not cause data loss, or unreliability // such as region stuck in transition. - Action[] actions2 = new Action[] { - new SplitRandomRegionOfTableAction(tableName), - new MergeRandomAdjacentRegionsOfTableAction(tableName), - new SnapshotTableAction(tableName), - new AddColumnAction(tableName), - new RemoveColumnAction(tableName, columnFamilies), - new ChangeEncodingAction(tableName), - new ChangeCompressionAction(tableName), - new ChangeBloomFilterAction(tableName), - new ChangeVersionsAction(tableName), - new ChangeSplitPolicyAction(tableName), - }; + Action[] actions2 = getMidWeightedActions(); // Destructive actions to mess things around. - Action[] actions3 = new Action[] { - new MoveRegionsOfTableAction(moveRegionsSleepTime, moveRegionsMaxTime, - tableName), - new MoveRandomRegionOfTableAction(moveRandomRegionSleepTime, tableName), - new RestartRandomRsAction(restartRandomRSSleepTime), - new BatchRestartRsAction(batchRestartRSSleepTime, batchRestartRSRatio), - new RestartActiveMasterAction(restartActiveMasterSleepTime), - new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, - rollingBatchRestartRSRatio), - new RestartRsHoldingMetaAction(restartRsHoldingMetaSleepTime), - new DecreaseMaxHFileSizeAction(decreaseHFileSizeSleepTime, tableName), - new SplitAllRegionOfTableAction(tableName), - new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime), - new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime, - rollingBatchSuspendtRSRatio) - }; + Action[] actions3 = getHeavyWeightedActions(); // Action to log more info for debugging Action[] actions4 = new Action[] {