HBASE-8882 Create an Integration Test to Test MTTR
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1501042 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
26d5bcf418
commit
631b20dfe3
|
@ -74,7 +74,7 @@ public class IntegrationTestRebalanceAndKillServersTargeted extends IngestIntegr
|
|||
private static final long WAIT_AFTER_BALANCE_MS = 5 * 1000;
|
||||
|
||||
@Override
|
||||
protected void perform() throws Exception {
|
||||
public void perform() throws Exception {
|
||||
ClusterStatus status = this.cluster.getClusterStatus();
|
||||
List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
|
||||
int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size());
|
||||
|
|
|
@ -0,0 +1,458 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hbase.mttr;
|
||||
|
||||
import com.google.common.base.Objects;
|
||||
import org.apache.commons.lang.RandomStringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.ClusterStatus;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.IntegrationTestingUtility;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.ChaosMonkey;
|
||||
import org.apache.hadoop.hbase.util.LoadTestTool;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static junit.framework.Assert.assertEquals;
|
||||
|
||||
/**
|
||||
* Integration test that should benchmark how fast HBase can recover from failures. This test starts
|
||||
* different threads:
|
||||
* <ol>
|
||||
* <li>
|
||||
* Load Test Tool.<br/>
|
||||
* This runs so that all RegionServers will have some load and HLogs will be full.
|
||||
* </li>
|
||||
* <li>
|
||||
* Scan thread.<br/>
|
||||
* This thread runs a very short scan over and over again recording how log it takes to respond.
|
||||
* The longest response is assumed to be the time it took to recover.
|
||||
* </li>
|
||||
* <li>
|
||||
* Put thread.<br/>
|
||||
* This thread just like the scan thread except it does a very small put.
|
||||
* </li>
|
||||
* <li>
|
||||
* Admin thread. <br/>
|
||||
* This thread will continually go to the master to try and get the cluster status. Just like the
|
||||
* put and scan threads, the time to respond is recorded.
|
||||
* </li>
|
||||
* <li>
|
||||
* Chaos Monkey thread.<br/>
|
||||
* This thread runs a ChaosMonkey.Action.
|
||||
* </li>
|
||||
* </ol>
|
||||
* <p/>
|
||||
* The ChaosMonkey actions currently run are:
|
||||
* <ul>
|
||||
* <li>Restart the RegionServer holding meta.</li>
|
||||
* <li>Restart the RegionServer holding the table the scan and put threads are targeting.</li>
|
||||
* <li>Move the Regions of the table used by the scan and put threads.</li>
|
||||
* <li>Restart the master.</li>
|
||||
* </ul>
|
||||
* <p/>
|
||||
* At the end of the test a log line is output on the INFO level containing the timing data that was
|
||||
*/
|
||||
public class IntegrationTestMTTR {
|
||||
/**
|
||||
* Constants.
|
||||
*/
|
||||
private static final byte[] FAMILY = Bytes.toBytes("d");
|
||||
private static final Log LOG = LogFactory.getLog(IntegrationTestMTTR.class);
|
||||
private static final long SLEEP_TIME = 60 * 1000l;
|
||||
|
||||
/**
|
||||
* Configurable table names.
|
||||
*/
|
||||
private static String tableName;
|
||||
private static byte[] tableNameBytes;
|
||||
private static String loadTableName;
|
||||
|
||||
/**
|
||||
* Util to get at the cluster.
|
||||
*/
|
||||
private static IntegrationTestingUtility util;
|
||||
|
||||
/**
|
||||
* Executor for test threads.
|
||||
*/
|
||||
private static ExecutorService executorService;
|
||||
|
||||
/**
|
||||
* All of the chaos monkey actions used.
|
||||
*/
|
||||
private static ChaosMonkey.Action restartRSAction;
|
||||
private static ChaosMonkey.Action restartMetaAction;
|
||||
private static ChaosMonkey.Action moveRegionAction;
|
||||
private static ChaosMonkey.Action restartMasterAction;
|
||||
|
||||
/**
|
||||
* The load test tool used to create load and make sure that HLogs aren't empty.
|
||||
*/
|
||||
private static LoadTestTool loadTool;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUp() throws Exception {
|
||||
// Set up the integration test util
|
||||
if (util == null) {
|
||||
util = new IntegrationTestingUtility();
|
||||
}
|
||||
|
||||
// Make sure there are three servers.
|
||||
util.initializeCluster(3);
|
||||
|
||||
// Set up the load test tool.
|
||||
loadTool = new LoadTestTool();
|
||||
loadTool.setConf(util.getConfiguration());
|
||||
|
||||
// Create executor with enough threads to restart rs's,
|
||||
// run scans, puts, admin ops and load test tool.
|
||||
executorService = Executors.newFixedThreadPool(8);
|
||||
|
||||
// Set up the tables needed.
|
||||
setupTables();
|
||||
|
||||
// Set up the actions.
|
||||
setupActions();
|
||||
}
|
||||
|
||||
private static void setupActions() throws IOException {
|
||||
// Set up the action that will restart a region server holding a region from our table
|
||||
// because this table should only have one region we should be good.
|
||||
restartRSAction = new ChaosMonkey.RestartRsHoldingTable(SLEEP_TIME, tableName);
|
||||
|
||||
// Set up the action that will kill the region holding meta.
|
||||
restartMetaAction = new ChaosMonkey.RestartRsHoldingMeta(SLEEP_TIME);
|
||||
|
||||
// Set up the action that will move the regions of our table.
|
||||
moveRegionAction = new ChaosMonkey.MoveRegionsOfTable(SLEEP_TIME, tableName);
|
||||
|
||||
// Kill the master (No sleep time because there is only one master running at this time.)
|
||||
restartMasterAction = new ChaosMonkey.RestartActiveMaster(0l);
|
||||
|
||||
// Give the action the access to the cluster.
|
||||
ChaosMonkey.ActionContext actionContext = new ChaosMonkey.ActionContext(util);
|
||||
restartRSAction.init(actionContext);
|
||||
restartMetaAction.init(actionContext);
|
||||
moveRegionAction.init(actionContext);
|
||||
restartMasterAction.init(actionContext);
|
||||
}
|
||||
|
||||
private static void setupTables() throws IOException {
|
||||
// Get the table name.
|
||||
tableName = util.getConfiguration()
|
||||
.get("hbase.IntegrationTestMTTR.tableName", "IntegrationTestMTTR");
|
||||
tableNameBytes = Bytes.toBytes(tableName);
|
||||
|
||||
loadTableName = util.getConfiguration()
|
||||
.get("hbase.IntegrationTestMTTR.loadTableName", "IntegrationTestMTTRLoadTestTool");
|
||||
|
||||
if (util.getHBaseAdmin().tableExists(tableNameBytes)) {
|
||||
util.deleteTable(tableNameBytes);
|
||||
}
|
||||
|
||||
if (util.getHBaseAdmin().tableExists(loadTableName)) {
|
||||
util.deleteTable(loadTableName);
|
||||
}
|
||||
|
||||
// Create the table. If this fails then fail everything.
|
||||
HTableDescriptor tableDescriptor = new HTableDescriptor(tableNameBytes);
|
||||
|
||||
// Make the max file size huge so that splits don't happen during the test.
|
||||
tableDescriptor.setMaxFileSize(2 * 1024 * 1024 * 1024);
|
||||
|
||||
HColumnDescriptor descriptor = new HColumnDescriptor(FAMILY);
|
||||
descriptor.setMaxVersions(1);
|
||||
tableDescriptor.addFamily(descriptor);
|
||||
util.getHBaseAdmin().createTable(tableDescriptor);
|
||||
|
||||
// Setup the table for LoadTestTool
|
||||
int ret = loadTool.run(new String[]{"-tn", loadTableName, "-init_only"});
|
||||
assertEquals("Failed to initialize LoadTestTool", 0, ret);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void after() throws IOException {
|
||||
// Clean everything up.
|
||||
util.restoreCluster();
|
||||
util = null;
|
||||
|
||||
// Stop the threads so that we know everything is complete.
|
||||
executorService.shutdown();
|
||||
executorService = null;
|
||||
|
||||
// Clean up the actions.
|
||||
moveRegionAction = null;
|
||||
restartMetaAction = null;
|
||||
restartRSAction = null;
|
||||
restartMasterAction = null;
|
||||
|
||||
loadTool = null;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRestartRsHoldingTable() throws Exception {
|
||||
run(new ActionCallable(restartRSAction), "RestartRsHoldingTable");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKillRsHoldingMeta() throws Exception {
|
||||
run(new ActionCallable(restartMetaAction), "KillRsHoldingMeta");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMoveRegion() throws Exception {
|
||||
run(new ActionCallable(moveRegionAction), "MoveRegion");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRestartMaster() throws Exception {
|
||||
run(new ActionCallable(restartMasterAction), "RestartMaster");
|
||||
}
|
||||
|
||||
public void run(Callable<Boolean> monkeyCallable, String testName) throws Exception {
|
||||
int maxIters = util.getHBaseClusterInterface().isDistributedCluster() ? 10 : 3;
|
||||
|
||||
// Array to keep track of times.
|
||||
ArrayList<Long> timesPutMs = new ArrayList<Long>(maxIters);
|
||||
ArrayList<Long> timesScanMs = new ArrayList<Long>(maxIters);
|
||||
ArrayList<Long> timesAdminMs = new ArrayList<Long>(maxIters);
|
||||
long start = System.nanoTime();
|
||||
|
||||
// We're going to try this multiple times
|
||||
for (int fullIterations = 0; fullIterations < maxIters; fullIterations++) {
|
||||
// Create and start executing a callable that will kill the servers
|
||||
Future<Boolean> monkeyFuture = executorService.submit(monkeyCallable);
|
||||
|
||||
// Pass that future to the timing Callables.
|
||||
Future<Long> putFuture = executorService.submit(new PutCallable(monkeyFuture));
|
||||
Future<Long> scanFuture = executorService.submit(new ScanCallable(monkeyFuture));
|
||||
Future<Long> adminFuture = executorService.submit(new AdminCallable(monkeyFuture));
|
||||
|
||||
Future<Boolean> loadFuture = executorService.submit(new LoadCallable(monkeyFuture));
|
||||
|
||||
monkeyFuture.get();
|
||||
loadFuture.get();
|
||||
|
||||
// Get the values from the futures.
|
||||
long putTime = TimeUnit.MILLISECONDS.convert(putFuture.get(), TimeUnit.NANOSECONDS);
|
||||
long scanTime = TimeUnit.MILLISECONDS.convert(scanFuture.get(), TimeUnit.NANOSECONDS);
|
||||
long adminTime = TimeUnit.MILLISECONDS.convert(adminFuture.get(), TimeUnit.NANOSECONDS);
|
||||
|
||||
// Store the times to display later.
|
||||
timesPutMs.add(putTime);
|
||||
timesScanMs.add(scanTime);
|
||||
timesAdminMs.add(adminTime);
|
||||
|
||||
// Wait some time for everything to settle down.
|
||||
Thread.sleep(5000l);
|
||||
}
|
||||
|
||||
long runtimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - start, TimeUnit.NANOSECONDS);
|
||||
|
||||
Objects.ToStringHelper helper = Objects.toStringHelper("MTTRResults")
|
||||
.add("timesPutMs", timesPutMs)
|
||||
.add("timesScanMs", timesScanMs)
|
||||
.add("timesAdminMs", timesAdminMs)
|
||||
.add("totalRuntimeMs", runtimeMs)
|
||||
.add("name", testName);
|
||||
|
||||
// Log the info
|
||||
LOG.info(helper.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for actions that need to record the time needed to recover from a failure.
|
||||
*/
|
||||
public abstract class TimingCallable implements Callable<Long> {
|
||||
protected final Future future;
|
||||
|
||||
public TimingCallable(Future f) {
|
||||
future = f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Long call() throws Exception {
|
||||
// TODO(eclark): Do more than just max. Full histogram support.
|
||||
long maxTime = 0;
|
||||
int numAfterDone = 0;
|
||||
// Keep trying until the rs is back up and we've gotten a put through
|
||||
while (numAfterDone < 10) {
|
||||
long start = System.nanoTime();
|
||||
try {
|
||||
boolean result = doAction();
|
||||
if (result && future.isDone()) {
|
||||
numAfterDone ++;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
numAfterDone = 0;
|
||||
}
|
||||
maxTime = Math.max(maxTime, System.nanoTime() - start);
|
||||
}
|
||||
return maxTime;
|
||||
}
|
||||
|
||||
protected abstract boolean doAction() throws Exception;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callable that will keep putting small amounts of data into a table
|
||||
* until the future supplied returns. It keeps track of the max time.
|
||||
*/
|
||||
public class PutCallable extends TimingCallable {
|
||||
|
||||
private final HTable table;
|
||||
|
||||
public PutCallable(Future f) throws IOException {
|
||||
super(f);
|
||||
this.table = new HTable(util.getConfiguration(), tableNameBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doAction() throws Exception {
|
||||
Put p = new Put(Bytes.toBytes(RandomStringUtils.randomAlphanumeric(5)));
|
||||
p.add(FAMILY, Bytes.toBytes("\0"), Bytes.toBytes(RandomStringUtils.randomAscii(5)));
|
||||
table.put(p);
|
||||
table.flushCommits();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callable that will keep scanning for small amounts of data until the
|
||||
* supplied future returns. Returns the max time taken to scan.
|
||||
*/
|
||||
public class ScanCallable extends TimingCallable {
|
||||
private final HTable table;
|
||||
|
||||
public ScanCallable(Future f) throws IOException {
|
||||
super(f);
|
||||
this.table = new HTable(util.getConfiguration(), tableNameBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doAction() throws Exception {
|
||||
ResultScanner rs = null;
|
||||
try {
|
||||
Scan s = new Scan();
|
||||
s.setBatch(2);
|
||||
s.addFamily(FAMILY);
|
||||
s.setFilter(new KeyOnlyFilter());
|
||||
s.setMaxVersions(1);
|
||||
|
||||
rs = table.getScanner(s);
|
||||
Result result = rs.next();
|
||||
return rs != null && result != null && result.size() > 0;
|
||||
} finally {
|
||||
if (rs != null) {
|
||||
rs.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callable that will keep going to the master for cluster status. Returns the max time taken.
|
||||
*/
|
||||
public class AdminCallable extends TimingCallable {
|
||||
|
||||
public AdminCallable(Future f) throws IOException {
|
||||
super(f);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean doAction() throws Exception {
|
||||
HBaseAdmin admin = new HBaseAdmin(util.getConfiguration());
|
||||
ClusterStatus status = admin.getClusterStatus();
|
||||
return status != null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public class ActionCallable implements Callable<Boolean> {
|
||||
private final ChaosMonkey.Action action;
|
||||
|
||||
public ActionCallable(ChaosMonkey.Action action) {
|
||||
|
||||
this.action = action;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean call() throws Exception {
|
||||
this.action.perform();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Callable used to make sure the cluster has some load on it.
|
||||
* This callable uses LoadTest tool to
|
||||
*/
|
||||
public class LoadCallable implements Callable<Boolean> {
|
||||
|
||||
private final Future future;
|
||||
|
||||
public LoadCallable(Future f) {
|
||||
future = f;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Boolean call() throws Exception {
|
||||
int colsPerKey = 10;
|
||||
int recordSize = 500;
|
||||
int numServers = util.getHBaseClusterInterface().getInitialClusterStatus().getServersSize();
|
||||
int numKeys = numServers * 5000;
|
||||
int writeThreads = 10;
|
||||
|
||||
|
||||
// Loop until the chaos monkey future is done.
|
||||
// But always go in just in case some action completes quickly
|
||||
do {
|
||||
int ret = loadTool.run(new String[]{
|
||||
"-tn", loadTableName,
|
||||
"-write", String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads),
|
||||
"-num_keys", String.valueOf(numKeys),
|
||||
"-skip_init"
|
||||
});
|
||||
assertEquals("Load failed", 0, ret);
|
||||
} while (!future.isDone());
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -29,6 +29,7 @@ import java.util.Queue;
|
|||
import java.util.Random;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.lang.math.RandomUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -42,6 +43,7 @@ import org.apache.hadoop.hbase.ServerLoad;
|
|||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.Stoppable;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.util.ToolRunner;
|
||||
|
||||
|
@ -110,10 +112,10 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
/**
|
||||
* Context for Action's
|
||||
*/
|
||||
private static class ActionContext {
|
||||
public static class ActionContext {
|
||||
private IntegrationTestingUtility util;
|
||||
|
||||
ActionContext(IntegrationTestingUtility util) {
|
||||
public ActionContext(IntegrationTestingUtility util) {
|
||||
this.util = util;
|
||||
}
|
||||
|
||||
|
@ -141,7 +143,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
protected ServerName[] initialServers;
|
||||
protected Random random = new Random();
|
||||
|
||||
void init(ActionContext context) throws Exception {
|
||||
public void init(ActionContext context) throws IOException {
|
||||
this.context = context;
|
||||
cluster = context.getHBaseCluster();
|
||||
initialStatus = cluster.getInitialClusterStatus();
|
||||
|
@ -149,7 +151,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
initialServers = regionServers.toArray(new ServerName[regionServers.size()]);
|
||||
}
|
||||
|
||||
protected void perform() throws Exception { };
|
||||
public void perform() throws Exception { };
|
||||
|
||||
// TODO: perhaps these methods should be elsewhere?
|
||||
/** Returns current region servers */
|
||||
|
@ -254,7 +256,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
super(sleepTime);
|
||||
}
|
||||
@Override
|
||||
protected void perform() throws Exception {
|
||||
public void perform() throws Exception {
|
||||
LOG.info("Performing action: Restart active master");
|
||||
|
||||
ServerName master = cluster.getClusterStatus().getMaster();
|
||||
|
@ -268,7 +270,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void perform() throws Exception {
|
||||
public void perform() throws Exception {
|
||||
LOG.info("Performing action: Restart random region server");
|
||||
ServerName server = selectRandomItem(getCurrentServers());
|
||||
|
||||
|
@ -276,12 +278,12 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
}
|
||||
}
|
||||
|
||||
public static class RestartRsHoldingMeta extends RestartRandomRs {
|
||||
public static class RestartRsHoldingMeta extends RestartActionBase {
|
||||
public RestartRsHoldingMeta(long sleepTime) {
|
||||
super(sleepTime);
|
||||
}
|
||||
@Override
|
||||
protected void perform() throws Exception {
|
||||
public void perform() throws Exception {
|
||||
LOG.info("Performing action: Restart region server holding META");
|
||||
ServerName server = cluster.getServerHoldingMeta();
|
||||
if (server == null) {
|
||||
|
@ -292,6 +294,62 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
}
|
||||
}
|
||||
|
||||
public static class RestartRsHoldingTable extends RestartActionBase {
|
||||
|
||||
private final String tableName;
|
||||
|
||||
public RestartRsHoldingTable(long sleepTime, String tableName) {
|
||||
super(sleepTime);
|
||||
this.tableName = tableName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void perform() throws Exception {
|
||||
HTable table = null;
|
||||
try {
|
||||
Configuration conf = context.getHaseIntegrationTestingUtility().getConfiguration();
|
||||
table = new HTable(conf, tableName);
|
||||
} catch (IOException e) {
|
||||
LOG.debug("Error creating HTable used to get list of region locations.", e);
|
||||
return;
|
||||
}
|
||||
|
||||
Collection<ServerName> serverNames = table.getRegionLocations().values();
|
||||
ServerName[] nameArray = serverNames.toArray(new ServerName[serverNames.size()]);
|
||||
|
||||
restartRs(nameArray[random.nextInt(nameArray.length)], sleepTime);
|
||||
}
|
||||
}
|
||||
|
||||
public static class MoveRegionsOfTable extends Action {
|
||||
private final long sleepTime;
|
||||
private final byte[] tableNameBytes;
|
||||
|
||||
public MoveRegionsOfTable(long sleepTime, String tableName) {
|
||||
this.sleepTime = sleepTime;
|
||||
this.tableNameBytes = Bytes.toBytes(tableName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void perform() throws Exception {
|
||||
HBaseAdmin admin = this.context.getHaseIntegrationTestingUtility().getHBaseAdmin();
|
||||
|
||||
List<HRegionInfo> regions = admin.getTableRegions(tableNameBytes);
|
||||
Collection<ServerName> serversList = admin.getClusterStatus().getServers();
|
||||
ServerName[] servers = serversList.toArray(new ServerName[serversList.size()]);
|
||||
|
||||
for (HRegionInfo regionInfo:regions) {
|
||||
try {
|
||||
byte[] destServerName =
|
||||
Bytes.toBytes(servers[RandomUtils.nextInt(servers.length)].getServerName());
|
||||
admin.move(regionInfo.getRegionName(), destServerName);
|
||||
} catch (Exception e) {
|
||||
LOG.debug("Error moving region", e);
|
||||
}
|
||||
}
|
||||
Thread.sleep(sleepTime);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Restarts a ratio of the running regionservers at the same time
|
||||
*/
|
||||
|
@ -304,7 +362,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void perform() throws Exception {
|
||||
public void perform() throws Exception {
|
||||
LOG.info(String.format("Performing action: Batch restarting %d%% of region servers",
|
||||
(int)(ratio * 100)));
|
||||
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
|
||||
|
@ -346,7 +404,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void perform() throws Exception {
|
||||
public void perform() throws Exception {
|
||||
LOG.info(String.format("Performing action: Rolling batch restarting %d%% of region servers",
|
||||
(int)(ratio * 100)));
|
||||
List<ServerName> selectedServers = selectRandomItems(getCurrentServers(), ratio);
|
||||
|
@ -394,7 +452,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void perform() throws Exception {
|
||||
public void perform() throws Exception {
|
||||
LOG.info("Unbalancing regions");
|
||||
ClusterStatus status = this.cluster.getClusterStatus();
|
||||
List<ServerName> victimServers = new LinkedList<ServerName>(status.getServers());
|
||||
|
@ -410,7 +468,7 @@ public class ChaosMonkey extends AbstractHBaseTool implements Stoppable {
|
|||
|
||||
public static class ForceBalancerAction extends Action {
|
||||
@Override
|
||||
protected void perform() throws Exception {
|
||||
public void perform() throws Exception {
|
||||
LOG.info("Balancing regions");
|
||||
forceBalancer();
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
@ -79,7 +80,7 @@ public abstract class AbstractHBaseTool implements Tool {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final int run(String[] args) throws Exception {
|
||||
public final int run(String[] args) throws IOException {
|
||||
if (conf == null) {
|
||||
LOG.error("Tool configuration is not initialized");
|
||||
throw new NullPointerException("conf");
|
||||
|
|
Loading…
Reference in New Issue