HBASE-23151 Backport HBASE-23083 (Collect Executor status info periodically and report to metrics system) to branch-1
Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
parent
12d45f8973
commit
425d84dc14
|
@ -946,7 +946,7 @@ public final class HConstants {
|
||||||
/**
|
/**
|
||||||
* Pattern that matches a coprocessor specification. Form is:
|
* Pattern that matches a coprocessor specification. Form is:
|
||||||
* <code>
|
* <code>
|
||||||
*<coprocessor jar file location> '|' <<class name> ['|' <priority> ['|' <arguments>]]
|
*<coprocessor jar file location> '|' <class name> ['|' <priority> ['|' <arguments>]]
|
||||||
* </code>
|
* </code>
|
||||||
* ...where arguments are <code><KEY> '=' <VALUE> [,...]</code>
|
* ...where arguments are <code><KEY> '=' <VALUE> [,...]</code>
|
||||||
* <p>For example: <code>hdfs:///foo.jar|com.foo.FooRegionObserver|1001|arg1=1,arg2=2</code>
|
* <p>For example: <code>hdfs:///foo.jar|com.foo.FooRegionObserver|1001|arg1=1,arg2=2</code>
|
||||||
|
@ -1137,6 +1137,9 @@ public final class HConstants {
|
||||||
"hbase.node.health.failure.threshold";
|
"hbase.node.health.failure.threshold";
|
||||||
public static final int DEFAULT_HEALTH_FAILURE_THRESHOLD = 3;
|
public static final int DEFAULT_HEALTH_FAILURE_THRESHOLD = 3;
|
||||||
|
|
||||||
|
public static final String EXECUTOR_STATUS_COLLECT_ENABLED =
|
||||||
|
"hbase.executors.status.collect.enabled";
|
||||||
|
public static final boolean DEFAULT_EXECUTOR_STATUS_COLLECT_ENABLED = true;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Setting to activate, or not, the publication of the status by the master. Default
|
* Setting to activate, or not, the publication of the status by the master. Default
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.hbase.executor.ExecutorService;
|
||||||
|
import org.apache.hadoop.hbase.executor.ExecutorService.ExecutorStatus;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceImpl;
|
||||||
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.apache.hadoop.metrics2.lib.DynamicMetricsRegistry;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
||||||
|
import org.apache.hadoop.util.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Class ExecutorStatusChore for collect Executor status info periodically
|
||||||
|
* and report to metrics system
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public class ExecutorStatusChore extends ScheduledChore {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(HealthCheckChore.class);
|
||||||
|
public static final String WAKE_FREQ = "hbase.executors.status.collect.period";
|
||||||
|
public static final int DEFAULT_WAKE_FREQ = 60000;
|
||||||
|
private ExecutorService service;
|
||||||
|
private DynamicMetricsRegistry metricsRegistry;
|
||||||
|
|
||||||
|
public ExecutorStatusChore(int sleepTime, Stoppable stopper, ExecutorService service,
|
||||||
|
MetricsRegionServerSource metrics) {
|
||||||
|
super("ExecutorStatusChore", stopper, sleepTime);
|
||||||
|
LOG.info("ExecutorStatusChore runs every {} ", StringUtils.formatTime(sleepTime));
|
||||||
|
this.service = service;
|
||||||
|
this.metricsRegistry = ((MetricsRegionServerSourceImpl) metrics).getMetricsRegistry();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void chore() {
|
||||||
|
try{
|
||||||
|
// thread pool monitor
|
||||||
|
Map<String, ExecutorStatus> statuses = service.getAllExecutorStatuses();
|
||||||
|
for (Map.Entry<String, ExecutorStatus> statusEntry : statuses.entrySet()) {
|
||||||
|
String name = statusEntry.getKey();
|
||||||
|
// Executor's name is generate by ExecutorType#getExecutorName
|
||||||
|
// include ExecutorType & Servername(split by '-'), here we only need the ExecutorType
|
||||||
|
String poolName = name.split("-")[0];
|
||||||
|
ExecutorStatus status = statusEntry.getValue();
|
||||||
|
MutableGaugeLong queued = metricsRegistry.getGauge(poolName + "_queued", 0L);
|
||||||
|
MutableGaugeLong running = metricsRegistry.getGauge(poolName + "_running", 0L);
|
||||||
|
int queueSize = status.getQueuedEvents().size();
|
||||||
|
int runningSize = status.getRunning().size();
|
||||||
|
if (queueSize > 0) {
|
||||||
|
LOG.warn("{}'s size info, queued: {}, running: {}", poolName, queueSize, runningSize);
|
||||||
|
}
|
||||||
|
queued.set(queueSize);
|
||||||
|
running.set(runningSize);
|
||||||
|
}
|
||||||
|
} catch(Throwable e) {
|
||||||
|
LOG.error(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public Pair<Long, Long> getExecutorStatus(String poolName) {
|
||||||
|
MutableGaugeLong running = metricsRegistry.getGauge(poolName + "_running", 0L);
|
||||||
|
MutableGaugeLong queued = metricsRegistry.getGauge(poolName + "_queued", 0L);
|
||||||
|
return new Pair<Long, Long>(running.value(), queued.value());
|
||||||
|
}
|
||||||
|
}
|
|
@ -320,6 +320,14 @@ public class ExecutorService {
|
||||||
this.running = running;
|
this.running = running;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<EventHandler> getQueuedEvents() {
|
||||||
|
return queuedEvents;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<RunningEventStatus> getRunning() {
|
||||||
|
return running;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Dump a textual representation of the executor's status
|
* Dump a textual representation of the executor's status
|
||||||
* to the given writer.
|
* to the given writer.
|
||||||
|
|
|
@ -79,6 +79,7 @@ import org.apache.hadoop.hbase.ChoreService;
|
||||||
import org.apache.hadoop.hbase.ClockOutOfSyncException;
|
import org.apache.hadoop.hbase.ClockOutOfSyncException;
|
||||||
import org.apache.hadoop.hbase.CoordinatedStateManager;
|
import org.apache.hadoop.hbase.CoordinatedStateManager;
|
||||||
import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
|
import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
|
||||||
|
import org.apache.hadoop.hbase.ExecutorStatusChore;
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
|
@ -423,6 +424,9 @@ public class HRegionServer extends HasThread implements
|
||||||
/** The health check chore. */
|
/** The health check chore. */
|
||||||
private HealthCheckChore healthCheckChore;
|
private HealthCheckChore healthCheckChore;
|
||||||
|
|
||||||
|
/** The Executor status collect chore. */
|
||||||
|
private ExecutorStatusChore executorStatusChore;
|
||||||
|
|
||||||
/** The nonce manager chore. */
|
/** The nonce manager chore. */
|
||||||
private ScheduledChore nonceManagerChore;
|
private ScheduledChore nonceManagerChore;
|
||||||
|
|
||||||
|
@ -1500,6 +1504,16 @@ public class HRegionServer extends HasThread implements
|
||||||
pauseMonitor.start();
|
pauseMonitor.start();
|
||||||
|
|
||||||
startServiceThreads();
|
startServiceThreads();
|
||||||
|
// start Executor status collect thread. can't do this in preRegistrationInitialization
|
||||||
|
// since MetricsRegionServer has not been instantiated
|
||||||
|
if (this.conf.getBoolean(HConstants.EXECUTOR_STATUS_COLLECT_ENABLED,
|
||||||
|
HConstants.DEFAULT_EXECUTOR_STATUS_COLLECT_ENABLED)) {
|
||||||
|
int sleepTime = this.conf.getInt(ExecutorStatusChore.WAKE_FREQ,
|
||||||
|
ExecutorStatusChore.DEFAULT_WAKE_FREQ);
|
||||||
|
executorStatusChore = new ExecutorStatusChore(sleepTime, this, this.getExecutorService(),
|
||||||
|
this.getRegionServerMetrics().getMetricsSource());
|
||||||
|
}
|
||||||
|
|
||||||
startHeapMemoryManager();
|
startHeapMemoryManager();
|
||||||
LOG.info("Serving as " + this.serverName +
|
LOG.info("Serving as " + this.serverName +
|
||||||
", RpcServer on " + rpcServices.isa +
|
", RpcServer on " + rpcServices.isa +
|
||||||
|
@ -1854,13 +1868,27 @@ public class HRegionServer extends HasThread implements
|
||||||
if (this.cacheFlusher != null) {
|
if (this.cacheFlusher != null) {
|
||||||
this.cacheFlusher.start(uncaughtExceptionHandler);
|
this.cacheFlusher.start(uncaughtExceptionHandler);
|
||||||
}
|
}
|
||||||
|
if (this.compactionChecker != null) {
|
||||||
if (this.compactionChecker != null) choreService.scheduleChore(compactionChecker);
|
choreService.scheduleChore(compactionChecker);
|
||||||
if (this.periodicFlusher != null) choreService.scheduleChore(periodicFlusher);
|
}
|
||||||
if (this.healthCheckChore != null) choreService.scheduleChore(healthCheckChore);
|
if (this.periodicFlusher != null) {
|
||||||
if (this.nonceManagerChore != null) choreService.scheduleChore(nonceManagerChore);
|
choreService.scheduleChore(periodicFlusher);
|
||||||
if (this.storefileRefresher != null) choreService.scheduleChore(storefileRefresher);
|
}
|
||||||
if (this.movedRegionsCleaner != null) choreService.scheduleChore(movedRegionsCleaner);
|
if (this.healthCheckChore != null) {
|
||||||
|
choreService.scheduleChore(healthCheckChore);
|
||||||
|
}
|
||||||
|
if (this.executorStatusChore != null) {
|
||||||
|
choreService.scheduleChore(executorStatusChore);
|
||||||
|
}
|
||||||
|
if (this.nonceManagerChore != null) {
|
||||||
|
choreService.scheduleChore(nonceManagerChore);
|
||||||
|
}
|
||||||
|
if (this.storefileRefresher != null) {
|
||||||
|
choreService.scheduleChore(storefileRefresher);
|
||||||
|
}
|
||||||
|
if (this.movedRegionsCleaner != null) {
|
||||||
|
choreService.scheduleChore(movedRegionsCleaner);
|
||||||
|
}
|
||||||
|
|
||||||
// Leases is not a Thread. Internally it runs a daemon thread. If it gets
|
// Leases is not a Thread. Internally it runs a daemon thread. If it gets
|
||||||
// an unhandled exception, it will just exit.
|
// an unhandled exception, it will just exit.
|
||||||
|
@ -2297,6 +2325,7 @@ public class HRegionServer extends HasThread implements
|
||||||
choreService.cancelChore(compactionChecker);
|
choreService.cancelChore(compactionChecker);
|
||||||
choreService.cancelChore(periodicFlusher);
|
choreService.cancelChore(periodicFlusher);
|
||||||
choreService.cancelChore(healthCheckChore);
|
choreService.cancelChore(healthCheckChore);
|
||||||
|
choreService.cancelChore(executorStatusChore);
|
||||||
choreService.cancelChore(storefileRefresher);
|
choreService.cancelChore(storefileRefresher);
|
||||||
choreService.cancelChore(movedRegionsCleaner);
|
choreService.cancelChore(movedRegionsCleaner);
|
||||||
// clean up the remaining scheduled chores (in case we missed out any)
|
// clean up the remaining scheduled chores (in case we missed out any)
|
||||||
|
|
|
@ -0,0 +1,97 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import org.apache.hadoop.hbase.executor.EventType;
|
||||||
|
import org.apache.hadoop.hbase.executor.ExecutorService;
|
||||||
|
import org.apache.hadoop.hbase.executor.ExecutorType;
|
||||||
|
import org.apache.hadoop.hbase.executor.TestExecutorService.TestEventHandler;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceFactory;
|
||||||
|
import org.apache.hadoop.hbase.regionserver.MetricsRegionServerSourceImpl;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MiscTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.SmallTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@Category({MiscTests.class, SmallTests.class})
|
||||||
|
public class TestExecutorStatusChore {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(TestExecutorStatusChore.class);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMetricsCollect() throws Exception {
|
||||||
|
int maxThreads = 5;
|
||||||
|
int maxTries = 10;
|
||||||
|
int sleepInterval = 10;
|
||||||
|
|
||||||
|
Server mockedServer = mock(Server.class);
|
||||||
|
when(mockedServer.getConfiguration()).thenReturn(HBaseConfiguration.create());
|
||||||
|
|
||||||
|
// Start an executor service pool with max 5 threads
|
||||||
|
ExecutorService executorService = new ExecutorService("unit_test");
|
||||||
|
executorService.startExecutorService(
|
||||||
|
ExecutorType.RS_PARALLEL_SEEK, maxThreads);
|
||||||
|
|
||||||
|
MetricsRegionServerSource serverSource = CompatibilitySingletonFactory
|
||||||
|
.getInstance(MetricsRegionServerSourceFactory.class).createServer(null);
|
||||||
|
assertTrue(serverSource instanceof MetricsRegionServerSourceImpl);
|
||||||
|
|
||||||
|
ExecutorStatusChore statusChore = new ExecutorStatusChore(60000,
|
||||||
|
mockedServer, executorService, serverSource);
|
||||||
|
|
||||||
|
AtomicBoolean lock = new AtomicBoolean(true);
|
||||||
|
AtomicInteger counter = new AtomicInteger(0);
|
||||||
|
|
||||||
|
for (int i = 0; i < maxThreads + 1; i++) {
|
||||||
|
executorService.submit(new TestEventHandler(mockedServer,
|
||||||
|
EventType.RS_PARALLEL_SEEK, lock, counter));
|
||||||
|
}
|
||||||
|
|
||||||
|
// The TestEventHandler will increment counter when it starts.
|
||||||
|
int tries = 0;
|
||||||
|
while (counter.get() < maxThreads && tries < maxTries) {
|
||||||
|
LOG.info("Waiting for all event handlers to start...");
|
||||||
|
Thread.sleep(sleepInterval);
|
||||||
|
tries++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert that pool is at max threads.
|
||||||
|
assertEquals(maxThreads, counter.get());
|
||||||
|
|
||||||
|
statusChore.chore();
|
||||||
|
Pair<Long, Long> executorStatus = statusChore.getExecutorStatus("RS_PARALLEL_SEEK");
|
||||||
|
assertEquals(maxThreads, executorStatus.getFirst().intValue()); // running
|
||||||
|
assertEquals(1, executorStatus.getSecond().intValue()); // pending
|
||||||
|
|
||||||
|
// Now interrupt the running Executor
|
||||||
|
synchronized (lock) {
|
||||||
|
lock.set(false);
|
||||||
|
lock.notifyAll();
|
||||||
|
}
|
||||||
|
executorService.shutdown();
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue