From 7ebc6170266df0154b95369eb261dc112be63042 Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Mon, 29 Jun 2020 16:35:26 -0700 Subject: [PATCH] HBASE-24658 Update PolicyBasedChaosMonkey to handle uncaught exceptions Running `ServerKillingChaosMonkey` via `RESTApiClusterManager` for any duration of time slowly leaks region servers. I see failures on the RESTApi side go unreported on the ChaosMonkey side. It seems like `RuntimeException`s are being thrown and lost. `PolicyBasedChaosMonkey` uses a primitive means of thread management anyway. Update to use a thread pool, thread groups, and an uncaughtExceptionHandler. Signed-off-by: Bharath Vissapragada Signed-off-by: Viraj Jasani --- .../chaos/monkies/PolicyBasedChaosMonkey.java | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java index dc2ac13c396..e3e2e49ac26 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/monkies/PolicyBasedChaosMonkey.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -22,14 +22,18 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.Objects; import java.util.Properties; - +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; import org.apache.commons.lang3.RandomUtils; import org.apache.hadoop.hbase.IntegrationTestingUtility; import org.apache.hadoop.hbase.chaos.policies.Policy; import org.apache.hadoop.hbase.util.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** * Chaos monkey that given multiple policies will run actions against the cluster. @@ -38,7 +42,6 @@ public class PolicyBasedChaosMonkey extends ChaosMonkey { private static final Logger LOG = LoggerFactory.getLogger(PolicyBasedChaosMonkey.class); private static final long ONE_SEC = 1000; - private static final long FIVE_SEC = 5 * ONE_SEC; private static final long ONE_MIN = 60 * ONE_SEC; public static final long TIMEOUT = ONE_MIN; @@ -46,6 +49,9 @@ public class PolicyBasedChaosMonkey extends ChaosMonkey { final IntegrationTestingUtility util; final Properties monkeyProps; + private final Policy[] policies; + private final ExecutorService monkeyThreadPool; + /** * Construct a new ChaosMonkey * @param util the HBaseIntegrationTestingUtility already configured @@ -60,19 +66,30 @@ public class PolicyBasedChaosMonkey extends ChaosMonkey { } public PolicyBasedChaosMonkey(Properties monkeyProps, IntegrationTestingUtility util, - Policy... policies) { - this.monkeyProps = monkeyProps; - this.util = util; - this.policies = policies; + Collection policies) { + this(monkeyProps, util, policies.toArray(new Policy[0])); } public PolicyBasedChaosMonkey(Properties monkeyProps, IntegrationTestingUtility util, - Collection policies) { + Policy... policies) { this.monkeyProps = monkeyProps; - this.util = util; - this.policies = policies.toArray(new Policy[policies.size()]); + this.util = Objects.requireNonNull(util); + this.policies = Objects.requireNonNull(policies); + if (policies.length == 0) { + throw new IllegalArgumentException("policies may not be empty"); + } + this.monkeyThreadPool = buildMonkeyThreadPool(policies.length); } + private static ExecutorService buildMonkeyThreadPool(final int size) { + return Executors.newFixedThreadPool(size, new ThreadFactoryBuilder() + .setDaemon(false) + .setNameFormat("ChaosMonkey-%d") + .setUncaughtExceptionHandler((t, e) -> { + throw new RuntimeException(e); + }) + .build()); + } /** Selects a random item from the given items */ public static T selectRandomItem(T[] items) { @@ -114,27 +131,20 @@ public class PolicyBasedChaosMonkey extends ChaosMonkey { return originalItems.subList(startIndex, startIndex + selectedNumber); } - private Policy[] policies; - private Thread[] monkeyThreads; - @Override public void start() throws Exception { - monkeyThreads = new Thread[policies.length]; - Policy.PolicyContext context = new Policy.PolicyContext(monkeyProps, this.util); - for (int i=0; i