diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java index 05a0c2cf649..ae679c6967f 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java @@ -217,6 +217,15 @@ public class ProcedureExecutor { */ private TimeoutExecutorThread timeoutExecutor; + /** + * WorkerMonitor check for stuck workers and new worker thread when necessary, for example if + * there is no worker to assign meta, it will new worker thread for it, so it is very important. + * TimeoutExecutor execute many tasks like DeadServerMetricRegionChore RegionInTransitionChore + * and so on, some tasks may execute for a long time so will block other tasks like + * WorkerMonitor, so use a dedicated thread for executing WorkerMonitor. + */ + private TimeoutExecutorThread workerMonitorExecutor; + private int corePoolSize; private int maxPoolSize; @@ -560,7 +569,8 @@ public class ProcedureExecutor { corePoolSize, maxPoolSize); this.threadGroup = new ThreadGroup("PEWorkerGroup"); - this.timeoutExecutor = new TimeoutExecutorThread<>(this, threadGroup); + this.timeoutExecutor = new TimeoutExecutorThread<>(this, threadGroup, "ProcExecTimeout"); + this.workerMonitorExecutor = new TimeoutExecutorThread<>(this, threadGroup, "WorkerMonitor"); // Create the workers workerId.set(0); @@ -604,12 +614,13 @@ public class ProcedureExecutor { // Start the executors. Here we must have the lastProcId set. LOG.trace("Start workers {}", workerThreads.size()); timeoutExecutor.start(); + workerMonitorExecutor.start(); for (WorkerThread worker: workerThreads) { worker.start(); } // Internal chores - timeoutExecutor.add(new WorkerMonitor()); + workerMonitorExecutor.add(new WorkerMonitor()); // Add completed cleaner chore addChore(new CompletedProcedureCleaner<>(conf, store, procExecutionLock, completed, @@ -624,6 +635,7 @@ public class ProcedureExecutor { LOG.info("Stopping"); scheduler.stop(); timeoutExecutor.sendStopSignal(); + workerMonitorExecutor.sendStopSignal(); } @VisibleForTesting @@ -632,6 +644,8 @@ public class ProcedureExecutor { // stop the timeout executor timeoutExecutor.awaitTermination(); + // stop the work monitor executor + workerMonitorExecutor.awaitTermination(); // stop the worker threads for (WorkerThread worker: workerThreads) { diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java index 30a52d4fda2..1e796d9ba5a 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/TimeoutExecutorThread.java @@ -37,8 +37,9 @@ class TimeoutExecutorThread extends StoppableThread { private final DelayQueue queue = new DelayQueue<>(); - public TimeoutExecutorThread(ProcedureExecutor executor, ThreadGroup group) { - super(group, "ProcExecTimeout"); + public TimeoutExecutorThread(ProcedureExecutor executor, ThreadGroup group, + String name) { + super(group, name); setDaemon(true); this.executor = executor; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestProcedureAdmin.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestProcedureAdmin.java index c54aea4c869..bd0be29e5e3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestProcedureAdmin.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestProcedureAdmin.java @@ -57,7 +57,6 @@ public class TestProcedureAdmin { protected static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); - private static void setupConf(Configuration conf) { conf.setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); }