diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java index 1ac14589f81..0035c2c4c72 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/actions/UnbalanceKillAndRebalanceAction.java @@ -41,12 +41,15 @@ public class UnbalanceKillAndRebalanceAction extends Action { private long waitForUnbalanceMilliSec; private long waitForKillsMilliSec; private long waitAfterBalanceMilliSec; + private boolean killMetaRs; - public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance) { + public UnbalanceKillAndRebalanceAction(long waitUnbalance, long waitKill, long waitAfterBalance, + boolean killMetaRs) { super(); waitForUnbalanceMilliSec = waitUnbalance; waitForKillsMilliSec = waitKill; waitAfterBalanceMilliSec = waitAfterBalance; + this.killMetaRs = killMetaRs; } @Override @@ -57,7 +60,9 @@ public class UnbalanceKillAndRebalanceAction extends Action { int liveCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_LIVE * victimServers.size()); int deadCount = (int)Math.ceil(FRC_SERVERS_THAT_HOARD_AND_DIE * victimServers.size()); - Assert.assertTrue((liveCount + deadCount) < victimServers.size()); + Assert.assertTrue( + "There are not enough victim servers: " + victimServers.size(), + liveCount + deadCount < victimServers.size()); List targetServers = new ArrayList(liveCount); for (int i = 0; i < liveCount + deadCount; ++i) { int victimIx = RandomUtils.nextInt(victimServers.size()); @@ -65,14 +70,23 @@ public class UnbalanceKillAndRebalanceAction extends Action { } unbalanceRegions(status, victimServers, targetServers, HOARD_FRC_OF_REGIONS); Thread.sleep(waitForUnbalanceMilliSec); - for (int i = 0; i < liveCount; ++i) { + ServerName metaServer = cluster.getServerHoldingMeta(); + for (ServerName targetServer: targetServers) { // Don't keep killing servers if we're // trying to stop the monkey. if (context.isStopping()) { break; } - killRs(targetServers.get(i)); - killedServers.add(targetServers.get(i)); + if (killedServers.size() >= liveCount) { + break; + } + + if (!killMetaRs && targetServer.equals(metaServer)) { + LOG.info("Not killing server because it holds hbase:meta."); + } else { + killRs(targetServer); + killedServers.add(targetServer); + } } Thread.sleep(waitForKillsMilliSec); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java index 49f3b554f77..452d903f101 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java @@ -39,6 +39,7 @@ public interface MonkeyConstants { String UNBALANCE_WAIT_FOR_UNBALANCE_MS = "unbalance.action.wait.period"; String UNBALANCE_WAIT_FOR_KILLS_MS = "unbalance.action.kill.period"; String UNBALANCE_WAIT_AFTER_BALANCE_MS = "unbalance.action.wait.after.period"; + String UNBALANCE_KILL_META_RS = "unbalance.action.kill.meta.rs"; String DECREASE_HFILE_SIZE_SLEEP_TIME = "decrease.hfile.size.sleep.time"; long DEFAULT_PERIODIC_ACTION1_PERIOD = 60 * 1000; @@ -61,5 +62,6 @@ public interface MonkeyConstants { long DEFAULT_UNBALANCE_WAIT_FOR_UNBALANCE_MS = 2 * 1000; long DEFAULT_UNBALANCE_WAIT_FOR_KILLS_MS = 2 * 1000; long DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS = 5 * 1000; + boolean DEFAULT_UNBALANCE_KILL_META_RS = true; long DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME = 30 * 1000; } diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java index f4ea4351d84..28ab7a03bed 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/UnbalanceMonkeyFactory.java @@ -31,13 +31,14 @@ public class UnbalanceMonkeyFactory extends MonkeyFactory { private long waitForUnbalanceMilliSec; private long waitForKillMilliSec; private long waitAfterBalanceMilliSec; + private boolean killMetaRs; @Override public ChaosMonkey build() { loadProperties(); Policy chaosPolicy = new PeriodicRandomActionPolicy(chaosEveryMilliSec, new UnbalanceKillAndRebalanceAction(waitForUnbalanceMilliSec, waitForKillMilliSec, - waitAfterBalanceMilliSec)); + waitAfterBalanceMilliSec, killMetaRs)); return new PolicyBasedChaosMonkey(util, chaosPolicy); } @@ -55,5 +56,8 @@ public class UnbalanceMonkeyFactory extends MonkeyFactory { waitAfterBalanceMilliSec = Long.parseLong(this.properties.getProperty( MonkeyConstants.UNBALANCE_WAIT_AFTER_BALANCE_MS, MonkeyConstants.DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS + "")); + killMetaRs = Boolean.parseBoolean(this.properties.getProperty( + MonkeyConstants.UNBALANCE_KILL_META_RS, + MonkeyConstants.DEFAULT_UNBALANCE_KILL_META_RS + "")); } }