HBASE-13470 High level Integration test for master DDL operations (Sophia Feng)

This commit is contained in:
Enis Soztutar 2015-06-16 22:22:35 -07:00
parent f469c3bd97
commit 1b5ad45b85
3 changed files with 900 additions and 0 deletions

View File

@ -0,0 +1,827 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.commons.lang.math.RandomUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.testclassification.IntegrationTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.HBaseFsck;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.util.hbck.HbckTestingUtil;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Assert;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
*
* Integration test that verifies Procedure V2. <br/><br/>
*
* DDL operations should go through (rollforward or rollback) when primary master is killed by
* ChaosMonkey (default MASTER_KILLING)<br/><br/>
*
* Multiple Worker threads are started to randomly do the following Actions in loops:<br/>
* Actions generating and populating tables:
* <ul>
* <li>CreateTableAction</li>
* <li>DisableTableAction</li>
* <li>EnableTableAction</li>
* <li>DeleteTableAction</li>
* <li>AddRowAction</li>
* </ul>
* Actions performing DDL operations:
* <ul>
* <li>AddColumnFamilyAction</li>
* <li>AlterColumnFamilyVersionsAction</li>
* <li>AlterColumnFamilyEncodingAction</li>
* <li>DeleteColumnFamilyAction</li>
* </ul>
* <br/>
*
* The threads run for a period of time (default 20 minutes) then are stopped at the end of
* runtime. Verification is performed towards those checkpoints:
* <ol>
* <li>No Actions throw Exceptions.</li>
* <li>No inconsistencies are detected in hbck.</li>
* </ol>
*
* <p>
* This test should be run by the hbase user since it invokes hbck at the end
* </p><p>
* Usage:
* hbase org.apache.hadoop.hbase.IntegrationTestDDLMasterFailover
* -Dhbase.IntegrationTestDDLMasterFailover.runtime=1200000
* -Dhbase.IntegrationTestDDLMasterFailover.numThreads=20
* -Dhbase.IntegrationTestDDLMasterFailover.numRegions=50 --monkey masterKilling
*/
@Category(IntegrationTests.class)
public class IntegrationTestDDLMasterFailover extends IntegrationTestBase {
private static final Log LOG = LogFactory.getLog(IntegrationTestDDLMasterFailover.class);
private static final int SERVER_COUNT = 1; // number of slaves for the smallest cluster
protected static final long DEFAULT_RUN_TIME = 20 * 60 * 1000;
protected static final int DEFAULT_NUM_THREADS = 20;
protected static final int DEFAULT_NUM_REGIONS = 50; // number of regions in pre-split tables
protected HBaseCluster cluster;
protected Connection connection;
/**
* A soft limit on how long we should run
*/
protected static final String RUN_TIME_KEY = "hbase.%s.runtime";
protected static final String NUM_THREADS_KEY = "hbase.%s.numThreads";
protected static final String NUM_REGIONS_KEY = "hbase.%s.numRegions";
protected AtomicBoolean running = new AtomicBoolean(true);
protected AtomicBoolean create_table = new AtomicBoolean(true);
protected int numThreads, numRegions;
ConcurrentHashMap<TableName, HTableDescriptor> enabledTables =
new ConcurrentHashMap<TableName, HTableDescriptor>();
ConcurrentHashMap<TableName, HTableDescriptor> disabledTables =
new ConcurrentHashMap<TableName, HTableDescriptor>();
ConcurrentHashMap<TableName, HTableDescriptor> deletedTables =
new ConcurrentHashMap<TableName, HTableDescriptor>();
@Override
public void setUpCluster() throws Exception {
util = getTestingUtil(getConf());
LOG.debug("Initializing/checking cluster has " + SERVER_COUNT + " servers");
util.initializeCluster(getMinServerCount());
LOG.debug("Done initializing/checking cluster");
cluster = util.getHBaseClusterInterface();
}
@Override
public void cleanUpCluster() throws Exception {
Admin admin = util.getHBaseAdmin();
admin.disableTables("ittable-\\d+");
admin.deleteTables("ittable-\\d+");
Connection connection = getConnection();
connection.close();
super.cleanUpCluster();
}
protected int getMinServerCount() {
return SERVER_COUNT;
}
protected synchronized void setConnection(Connection connection){
this.connection = connection;
}
protected synchronized Connection getConnection(){
if (this.connection == null) {
try {
Connection connection = ConnectionFactory.createConnection(getConf());
setConnection(connection);
} catch (IOException e) {
LOG.fatal("Failed to establish connection.", e);
}
}
return connection;
}
protected void verifyTables() throws IOException{
Connection connection = getConnection();
Admin admin = connection.getAdmin();
// iterating concurrent map
for (TableName tableName : enabledTables.keySet()){
Assert.assertTrue("Table: " + tableName + " in enabledTables is not enabled",
admin.isTableEnabled(tableName));
}
for (TableName tableName : disabledTables.keySet()){
Assert.assertTrue("Table: " + tableName + " in disabledTables is not disabled",
admin.isTableDisabled(tableName));
}
for (TableName tableName : deletedTables.keySet()){
Assert.assertFalse("Table: " + tableName + " in deletedTables is not deleted",
admin.tableExists(tableName));
}
admin.close();
}
@Test
public void testAsUnitTest() throws Exception {
runTest();
}
@Override
public int runTestFromCommandLine() throws Exception {
int ret = runTest();
return ret;
}
private abstract class MasterAction{
Connection connection = getConnection();
abstract void perform() throws IOException;
}
private abstract class TableAction extends MasterAction{
// TableAction has implemented selectTable() shared by multiple table Actions
protected HTableDescriptor selectTable(ConcurrentHashMap<TableName, HTableDescriptor> tableMap)
{
// randomly select table from tableMap
if (tableMap.isEmpty()){
return null;
}
// synchronization to prevent removal from multiple threads
synchronized (tableMap){
ArrayList<TableName> tableList = new ArrayList<TableName>(tableMap.keySet());
TableName randomKey = tableList.get(RandomUtils.nextInt(tableList.size()));
HTableDescriptor randomHtd = tableMap.get(randomKey);
// remove from tableMap
tableMap.remove(randomKey);
return randomHtd;
}
}
}
private class CreateTableAction extends TableAction {
@Override
void perform() throws IOException {
Admin admin = connection.getAdmin();
try {
HTableDescriptor htd = createTableDesc();
TableName tableName = htd.getTableName();
if ( admin.tableExists(tableName)){
return;
}
String numRegionKey = String.format(NUM_REGIONS_KEY, this.getClass().getSimpleName());
numRegions = getConf().getInt(numRegionKey, DEFAULT_NUM_REGIONS);
byte[] startKey = Bytes.toBytes("row-0000000000");
byte[] endKey = Bytes.toBytes("row-" + Integer.MAX_VALUE);
LOG.info("Creating table:" + htd);
admin.createTable(htd, startKey, endKey, numRegions);
Assert.assertTrue("Table: " + htd + " was not created", admin.tableExists(tableName));
HTableDescriptor freshTableDesc = admin.getTableDescriptor(tableName);
enabledTables.put(tableName, freshTableDesc);
LOG.info("Created table:" + freshTableDesc);
} catch (Exception e){
LOG.warn("Caught exception in action: " + this.getClass());
// TODO workaround
// when master failover happens during CREATE_TABLE, client will do RPC retry and get TableExistsException
// ignore for now till better resolution
if (e instanceof TableExistsException) {
LOG.warn("Caught TableExistsException in action: " + this.getClass(), e);
} else {
throw e;
}
} finally {
admin.close();
}
verifyTables();
}
private HTableDescriptor createTableDesc() {
String tableName = "ittable-" + String.format("%010d",
RandomUtils.nextInt(Integer.MAX_VALUE));
String familyName = "cf-" + Math.abs(RandomUtils.nextInt());
HTableDescriptor htd = new HTableDescriptor(TableName.valueOf(tableName));
// add random column family
htd.addFamily(new HColumnDescriptor(familyName));
return htd;
}
}
private class DisableTableAction extends TableAction {
@Override
void perform() throws IOException {
HTableDescriptor selected = selectTable(enabledTables);
if (selected == null) {
return;
}
Admin admin = connection.getAdmin();
try {
TableName tableName = selected.getTableName();
LOG.info("Disabling table :" + selected);
admin.disableTable(tableName);
Assert.assertTrue("Table: " + selected + " was not disabled",
admin.isTableDisabled(tableName));
HTableDescriptor freshTableDesc = admin.getTableDescriptor(tableName);
disabledTables.put(tableName, freshTableDesc);
LOG.info("Disabled table :" + freshTableDesc);
} catch (Exception e){
LOG.warn("Caught exception in action: " + this.getClass());
// TODO workaround
// loose restriction for TableNotDisabledException/TableNotEnabledException thrown in sync
// operations
// 1) when enable/disable starts, the table state is changed to ENABLING/DISABLING (ZK node
// in 1.x), which will be further changed to ENABLED/DISABLED once the operation completes
// 2) if master failover happens in the middle of the enable/disable operation, the new
// master will try to recover the tables in ENABLING/DISABLING state, as programmed in
// AssignmentManager#recoverTableInEnablingState() and
// AssignmentManager#recoverTableInDisablingState()
// 3) after the new master initialization completes, the procedure tries to re-do the
// enable/disable operation, which was already done. Ignore those exceptions before change
// of behaviors of AssignmentManager in presence of PV2
if (e instanceof TableNotEnabledException) {
LOG.warn("Caught TableNotEnabledException in action: " + this.getClass());
e.printStackTrace();
} else {
throw e;
}
} finally {
admin.close();
}
verifyTables();
}
}
private class EnableTableAction extends TableAction {
@Override
void perform() throws IOException {
HTableDescriptor selected = selectTable(disabledTables);
if (selected == null ) {
return;
}
Admin admin = connection.getAdmin();
try {
TableName tableName = selected.getTableName();
LOG.info("Enabling table :" + selected);
admin.enableTable(tableName);
Assert.assertTrue("Table: " + selected + " was not enabled",
admin.isTableEnabled(tableName));
HTableDescriptor freshTableDesc = admin.getTableDescriptor(tableName);
enabledTables.put(tableName, freshTableDesc);
LOG.info("Enabled table :" + freshTableDesc);
} catch (Exception e){
LOG.warn("Caught exception in action: " + this.getClass());
// TODO workaround
// loose restriction for TableNotDisabledException/TableNotEnabledException thrown in sync
// operations 1) when enable/disable starts, the table state is changed to
// ENABLING/DISABLING (ZK node in 1.x), which will be further changed to ENABLED/DISABLED
// once the operation completes 2) if master failover happens in the middle of the
// enable/disable operation, the new master will try to recover the tables in
// ENABLING/DISABLING state, as programmed in
// AssignmentManager#recoverTableInEnablingState() and
// AssignmentManager#recoverTableInDisablingState()
// 3) after the new master initialization completes, the procedure tries to re-do the
// enable/disable operation, which was already done. Ignore those exceptions before
// change of behaviors of AssignmentManager in presence of PV2
if (e instanceof TableNotDisabledException) {
LOG.warn("Caught TableNotDisabledException in action: " + this.getClass());
e.printStackTrace();
} else {
throw e;
}
} finally {
admin.close();
}
verifyTables();
}
}
private class DeleteTableAction extends TableAction {
@Override
void perform() throws IOException {
HTableDescriptor selected = selectTable(disabledTables);
if (selected == null) {
return;
}
Admin admin = connection.getAdmin();
try {
TableName tableName = selected.getTableName();
LOG.info("Deleting table :" + selected);
admin.deleteTable(tableName);
Assert.assertFalse("Table: " + selected + " was not deleted",
admin.tableExists(tableName));
deletedTables.put(tableName, selected);
LOG.info("Deleted table :" + selected);
} catch (Exception e){
LOG.warn("Caught exception in action: " + this.getClass());
// TODO workaround
// when master failover happens during DELETE_TABLE, client will do RPC retry and get
// TableNotFoundException ignore for now till better resolution
if (e instanceof TableNotFoundException) {
LOG.warn("Caught TableNotFoundException in action: " + this.getClass());
e.printStackTrace();
} else {
throw e;
}
} finally {
admin.close();
}
verifyTables();
}
}
private abstract class ColumnAction extends TableAction{
// ColumnAction has implemented selectFamily() shared by multiple family Actions
protected HColumnDescriptor selectFamily(HTableDescriptor htd) {
if (htd == null) {
return null;
}
HColumnDescriptor[] families = htd.getColumnFamilies();
if (families.length == 0){
LOG.info("No column families in table: " + htd);
return null;
}
HColumnDescriptor randomCfd = families[RandomUtils.nextInt(families.length)];
return randomCfd;
}
}
private class AddColumnFamilyAction extends ColumnAction {
@Override
void perform() throws IOException {
HTableDescriptor selected = selectTable(disabledTables);
if (selected == null) {
return;
}
Admin admin = connection.getAdmin();
try {
HColumnDescriptor cfd = createFamilyDesc();
if (selected.hasFamily(cfd.getName())){
LOG.info(new String(cfd.getName()) + " already exists in table "
+ selected.getTableName());
return;
}
TableName tableName = selected.getTableName();
LOG.info("Adding column family: " + cfd + " to table: " + tableName);
admin.addColumn(tableName, cfd);
// assertion
HTableDescriptor freshTableDesc = admin.getTableDescriptor(tableName);
Assert.assertTrue("Column family: " + cfd + " was not added",
freshTableDesc.hasFamily(cfd.getName()));
LOG.info("Added column family: " + cfd + " to table: " + tableName);
disabledTables.put(tableName, freshTableDesc);
} catch (Exception e){
LOG.warn("Caught exception in action: " + this.getClass());
// TODO HBASE-13415
// loose restriction for InvalidFamilyOperationException thrown in async operations before
// HBASE-13415 completes when failover happens, multiple procids may be created from the
// same request when 1 procedure succeeds, the others would complain about family already
// exists
if (e instanceof InvalidFamilyOperationException) {
LOG.warn("Caught InvalidFamilyOperationException in action: " + this.getClass());
e.printStackTrace();
} else {
throw e;
}
} finally {
admin.close();
}
verifyTables();
}
private HColumnDescriptor createFamilyDesc() {
String familyName = "cf-" + String.format("%010d", RandomUtils.nextInt(Integer.MAX_VALUE));
HColumnDescriptor cfd = new HColumnDescriptor(familyName);
return cfd;
}
}
private class AlterFamilyVersionsAction extends ColumnAction {
@Override
void perform() throws IOException {
HTableDescriptor selected = selectTable(disabledTables);
if (selected == null) {
return;
}
HColumnDescriptor columnDesc = selectFamily(selected);
if (columnDesc == null){
return;
}
Admin admin = connection.getAdmin();
int versions = RandomUtils.nextInt(10) + 3;
try {
TableName tableName = selected.getTableName();
LOG.info("Altering versions of column family: " + columnDesc + " to: " + versions +
" in table: " + tableName);
columnDesc.setMinVersions(versions);
columnDesc.setMaxVersions(versions);
admin.modifyTable(tableName, selected);
// assertion
HTableDescriptor freshTableDesc = admin.getTableDescriptor(tableName);
HColumnDescriptor freshColumnDesc = freshTableDesc.getFamily(columnDesc.getName());
Assert.assertEquals("Column family: " + columnDesc + " was not altered",
freshColumnDesc.getMaxVersions(), versions);
Assert.assertEquals("Column family: " + freshColumnDesc + " was not altered",
freshColumnDesc.getMinVersions(), versions);
LOG.info("Altered versions of column family: " + columnDesc + " to: " + versions +
" in table: " + tableName);
disabledTables.put(tableName, freshTableDesc);
} catch (Exception e) {
LOG.warn("Caught exception in action: " + this.getClass());
throw e;
} finally {
admin.close();
}
verifyTables();
}
}
private class AlterFamilyEncodingAction extends ColumnAction {
@Override
void perform() throws IOException {
HTableDescriptor selected = selectTable(disabledTables);
if (selected == null) {
return;
}
HColumnDescriptor columnDesc = selectFamily(selected);
if (columnDesc == null){
return;
}
Admin admin = connection.getAdmin();
try {
TableName tableName = selected.getTableName();
// possible DataBlockEncoding ids
int[] possibleIds = {0, 2, 3, 4, 6};
short id = (short) possibleIds[RandomUtils.nextInt(possibleIds.length)];
LOG.info("Altering encoding of column family: " + columnDesc + " to: " + id +
" in table: " + tableName);
columnDesc.setDataBlockEncoding(DataBlockEncoding.getEncodingById(id));
admin.modifyTable(tableName, selected);
// assertion
HTableDescriptor freshTableDesc = admin.getTableDescriptor(tableName);
HColumnDescriptor freshColumnDesc = freshTableDesc.getFamily(columnDesc.getName());
Assert.assertEquals("Encoding of column family: " + columnDesc + " was not altered",
freshColumnDesc.getDataBlockEncoding().getId(), id);
LOG.info("Altered encoding of column family: " + freshColumnDesc + " to: " + id +
" in table: " + tableName);
disabledTables.put(tableName, freshTableDesc);
} catch (Exception e) {
LOG.warn("Caught exception in action: " + this.getClass());
throw e;
} finally {
admin.close();
}
verifyTables();
}
}
private class DeleteColumnFamilyAction extends ColumnAction {
@Override
void perform() throws IOException {
HTableDescriptor selected = selectTable(disabledTables);
HColumnDescriptor cfd = selectFamily(selected);
if (selected == null || cfd == null) {
return;
}
Admin admin = connection.getAdmin();
try {
if (selected.getColumnFamilies().length < 2) {
LOG.info("No enough column families to delete in table " + selected.getTableName());
return;
}
TableName tableName = selected.getTableName();
LOG.info("Deleting column family: " + cfd + " from table: " + tableName);
admin.deleteColumn(tableName, cfd.getName());
// assertion
HTableDescriptor freshTableDesc = admin.getTableDescriptor(tableName);
Assert.assertFalse("Column family: " + cfd + " was not added",
freshTableDesc.hasFamily(cfd.getName()));
LOG.info("Deleted column family: " + cfd + " from table: " + tableName);
disabledTables.put(tableName, freshTableDesc);
} catch (Exception e) {
LOG.warn("Caught exception in action: " + this.getClass());
// TODO HBASE-13415
// loose restriction for InvalidFamilyOperationException thrown in async operations before
// HBASE-13415 completes when failover happens, multiple procids may be created from the
// same request when 1 procedure succeeds, the others would complain about family not
// exists
if (e instanceof InvalidFamilyOperationException) {
LOG.warn("Caught InvalidFamilyOperationException in action: " + this.getClass());
e.printStackTrace();
} else {
throw e;
}
} finally {
admin.close();
}
verifyTables();
}
}
private class AddRowAction extends ColumnAction {
// populate tables
@Override
void perform() throws IOException {
HTableDescriptor selected = selectTable(enabledTables);
if (selected == null ) {
return;
}
Admin admin = connection.getAdmin();
TableName tableName = selected.getTableName();
try (Table table = connection.getTable(tableName)){
ArrayList<HRegionInfo> regionInfos = new ArrayList<HRegionInfo>(admin.getTableRegions(
selected.getTableName()));
int numRegions = regionInfos.size();
// average number of rows to be added per action to each region
int average_rows = 1;
int numRows = average_rows * numRegions;
LOG.info("Adding " + numRows + " rows to table: " + selected);
for (int i = 0; i < numRows; i++){
// nextInt(Integer.MAX_VALUE)) to return positive numbers only
byte[] rowKey = Bytes.toBytes(
"row-" + String.format("%010d", RandomUtils.nextInt(Integer.MAX_VALUE)));
HColumnDescriptor cfd = selectFamily(selected);
if (cfd == null){
return;
}
byte[] family = cfd.getName();
byte[] qualifier = Bytes.toBytes("col-" + RandomUtils.nextInt(Integer.MAX_VALUE) % 10);
byte[] value = Bytes.toBytes("val-" + RandomStringUtils.randomAlphanumeric(10));
Put put = new Put(rowKey);
put.addColumn(family, qualifier, value);
table.put(put);
}
HTableDescriptor freshTableDesc = admin.getTableDescriptor(tableName);
enabledTables.put(tableName, freshTableDesc);
LOG.info("Added " + numRows + " rows to table: " + selected);
} catch (Exception e) {
LOG.warn("Caught exception in action: " + this.getClass());
throw e;
} finally {
admin.close();
}
verifyTables();
}
}
private enum ACTION {
CREATE_TABLE,
DISABLE_TABLE,
ENABLE_TABLE,
DELETE_TABLE,
ADD_COLUMNFAMILY,
DELETE_COLUMNFAMILY,
ALTER_FAMILYVERSIONS,
ALTER_FAMILYENCODING,
ADD_ROW
}
private class Worker extends Thread {
private Exception savedException;
private ACTION action;
@Override
public void run() {
while (running.get()) {
// select random action
ACTION selectedAction = ACTION.values()[RandomUtils.nextInt() % ACTION.values().length];
this.action = selectedAction;
LOG.info("Performing Action: " + selectedAction);
try {
switch (selectedAction) {
case CREATE_TABLE:
// stop creating new tables in the later stage of the test to avoid too many empty
// tables
if (create_table.get()) {
new CreateTableAction().perform();
}
break;
case ADD_ROW:
new AddRowAction().perform();
break;
case DISABLE_TABLE:
new DisableTableAction().perform();
break;
case ENABLE_TABLE:
new EnableTableAction().perform();
break;
case DELETE_TABLE:
// reduce probability of deleting table to 20%
if (RandomUtils.nextInt(100) < 20) {
new DeleteTableAction().perform();
}
break;
case ADD_COLUMNFAMILY:
new AddColumnFamilyAction().perform();
break;
case DELETE_COLUMNFAMILY:
// reduce probability of deleting column family to 20%
if (RandomUtils.nextInt(100) < 20) {
new DeleteColumnFamilyAction().perform();
}
break;
case ALTER_FAMILYVERSIONS:
new AlterFamilyVersionsAction().perform();
break;
case ALTER_FAMILYENCODING:
new AlterFamilyEncodingAction().perform();
break;
}
} catch (Exception ex) {
this.savedException = ex;
return;
}
}
LOG.info(this.getName() + " stopped");
}
public Exception getSavedException(){
return this.savedException;
}
public ACTION getAction(){
return this.action;
}
}
private void checkException(List<Worker> workers){
if(workers == null || workers.isEmpty())
return;
for (Worker worker : workers){
Exception e = worker.getSavedException();
if (e != null) {
LOG.error("Found exception in thread: " + worker.getName());
e.printStackTrace();
}
Assert.assertNull("Action failed: " + worker.getAction() + " in thread: "
+ worker.getName(), e);
}
}
private int runTest() throws Exception {
LOG.info("Starting the test");
String runtimeKey = String.format(RUN_TIME_KEY, this.getClass().getSimpleName());
long runtime = util.getConfiguration().getLong(runtimeKey, DEFAULT_RUN_TIME);
String numThreadKey = String.format(NUM_THREADS_KEY, this.getClass().getSimpleName());
numThreads = util.getConfiguration().getInt(numThreadKey, DEFAULT_NUM_THREADS);
ArrayList<Worker> workers = new ArrayList<>();
for (int i = 0; i < numThreads; i++) {
checkException(workers);
Worker worker = new Worker();
LOG.info("Launching worker thread " + worker.getName());
workers.add(worker);
worker.start();
}
Threads.sleep(runtime / 2);
LOG.info("Stopping creating new tables");
create_table.set(false);
Threads.sleep(runtime / 2);
LOG.info("Runtime is up");
running.set(false);
checkException(workers);
for (Worker worker : workers) {
worker.join();
}
LOG.info("All Worker threads stopped");
// verify
LOG.info("Verify actions of all threads succeeded");
checkException(workers);
LOG.info("Verify states of all tables");
verifyTables();
// RUN HBCK
HBaseFsck hbck = null;
try {
LOG.info("Running hbck");
hbck = HbckTestingUtil.doFsck(util.getConfiguration(), false);
HbckTestingUtil.assertNoErrors(hbck);
LOG.info("Finished hbck");
} finally {
if (hbck != null) {
hbck.close();
}
}
return 0;
}
@Override
public TableName getTablename() {
return null;
}
@Override
protected Set<String> getColumnFamilies() {
return null;
}
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
IntegrationTestingUtility.setUseDistributedCluster(conf);
IntegrationTestDDLMasterFailover masterFailover = new IntegrationTestDDLMasterFailover();
Connection connection = null;
int ret = 1;
try {
// Initialize connection once, then pass to Actions
LOG.debug("Setting up connection ...");
connection = ConnectionFactory.createConnection(conf);
masterFailover.setConnection(connection);
ret = ToolRunner.run(conf, masterFailover, args);
} catch (IOException e){
LOG.fatal("Failed to establish connection. Aborting test ...", e);
} finally {
connection = masterFailover.getConnection();
if (connection != null){
connection.close();
}
System.exit(ret);
}
}
}

View File

@ -0,0 +1,71 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.chaos.factories;
import org.apache.hadoop.hbase.chaos.actions.Action;
import org.apache.hadoop.hbase.chaos.actions.DumpClusterStatusAction;
import org.apache.hadoop.hbase.chaos.actions.RestartActiveMasterAction;
import org.apache.hadoop.hbase.chaos.monkies.ChaosMonkey;
import org.apache.hadoop.hbase.chaos.monkies.PolicyBasedChaosMonkey;
import org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
/**
* A chaos monkey to kill the active master periodically. Can be run in single master
* or multi master setup.
*/
public class MasterKillingMonkeyFactory extends MonkeyFactory {
private long action1Period;
private long action2Period;
private long restartActiveMasterSleepTime;
@Override
public ChaosMonkey build() {
loadProperties();
// Destructive actions to mess things around.
Action[] actions1 = new Action[] {
new RestartActiveMasterAction(restartActiveMasterSleepTime),
};
// Action to log more info for debugging
Action[] actions2 = new Action[] {
new DumpClusterStatusAction()
};
return new PolicyBasedChaosMonkey(util,
new PeriodicRandomActionPolicy(action1Period, actions1),
new PeriodicRandomActionPolicy(action2Period, actions2));
}
private void loadProperties() {
action1Period = Long.parseLong(this.properties.getProperty(
MonkeyConstants.PERIODIC_ACTION1_PERIOD,
MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
action2Period = Long.parseLong(this.properties.getProperty(
MonkeyConstants.PERIODIC_ACTION2_PERIOD,
MonkeyConstants.DEFAULT_PERIODIC_ACTION2_PERIOD + ""));
restartActiveMasterSleepTime = Long.parseLong(this.properties.getProperty(
MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
}
}

View File

@ -69,6 +69,7 @@ public abstract class MonkeyFactory {
public static final String SERVER_KILLING = "serverKilling";
public static final String STRESS_AM = "stressAM";
public static final String NO_KILL = "noKill";
public static final String MASTER_KILLING = "masterKilling";
public static Map<String, MonkeyFactory> FACTORIES = ImmutableMap.<String,MonkeyFactory>builder()
.put(CALM, new CalmMonkeyFactory())
@ -77,6 +78,7 @@ public abstract class MonkeyFactory {
.put(SERVER_KILLING, new ServerKillingMonkeyFactory())
.put(STRESS_AM, new StressAssignmentManagerMonkeyFactory())
.put(NO_KILL, new NoKillMonkeyFactory())
.put(MASTER_KILLING, new MasterKillingMonkeyFactory())
.build();
public static MonkeyFactory getFactory(String factoryName) {