HBASE-14274 Deadlock in region metrics on shutdown: MetricsRegionSourceImpl vs MetricsRegionAggregateSourceImpl
Signed-off-by: stack <stack@apache.org>
This commit is contained in:
parent
ba7ea0b524
commit
bcef28eefa
|
@ -18,10 +18,10 @@
|
|||
|
||||
package org.apache.hadoop.hbase.regionserver;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.hbase.metrics.BaseSourceImpl;
|
||||
|
@ -34,12 +34,11 @@ import org.apache.hadoop.metrics2.lib.MetricsExecutorImpl;
|
|||
@InterfaceAudience.Private
|
||||
public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
|
||||
implements MetricsRegionAggregateSource {
|
||||
// lock to guard against concurrent access to regionSources
|
||||
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
|
||||
|
||||
private final MetricsExecutorImpl executor = new MetricsExecutorImpl();
|
||||
|
||||
private final HashSet<MetricsRegionSource> regionSources =
|
||||
new HashSet<MetricsRegionSource>();
|
||||
private final Set<MetricsRegionSource> regionSources =
|
||||
Collections.newSetFromMap(new ConcurrentHashMap<MetricsRegionSource, Boolean>());
|
||||
|
||||
public MetricsRegionAggregateSourceImpl() {
|
||||
this(METRICS_NAME, METRICS_DESCRIPTION, METRICS_CONTEXT, METRICS_JMX_CONTEXT);
|
||||
|
@ -62,36 +61,18 @@ public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
|
|||
|
||||
@Override
|
||||
public void register(MetricsRegionSource source) {
|
||||
Lock l = lock.writeLock();
|
||||
l.lock();
|
||||
try {
|
||||
regionSources.add(source);
|
||||
clearCache();
|
||||
} finally {
|
||||
l.unlock();
|
||||
}
|
||||
regionSources.add(source);
|
||||
clearCache();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deregister(MetricsRegionSource toRemove) {
|
||||
Lock l = lock.writeLock();
|
||||
l.lock();
|
||||
try {
|
||||
regionSources.remove(toRemove);
|
||||
clearCache();
|
||||
} finally {
|
||||
l.unlock();
|
||||
}
|
||||
regionSources.remove(toRemove);
|
||||
clearCache();
|
||||
}
|
||||
|
||||
private synchronized void clearCache() {
|
||||
JmxCacheBuster.clearJmxCache(true);
|
||||
executor.getExecutor().schedule(new Runnable() {
|
||||
@Override
|
||||
public void run() {
|
||||
JmxCacheBuster.clearJmxCache();
|
||||
}
|
||||
}, 5, TimeUnit.MINUTES);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -107,18 +88,12 @@ public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
|
|||
MetricsRecordBuilder mrb = collector.addRecord(metricsName);
|
||||
|
||||
if (regionSources != null) {
|
||||
Lock l = lock.readLock();
|
||||
l.lock();
|
||||
try {
|
||||
for (MetricsRegionSource regionMetricSource : regionSources) {
|
||||
if (regionMetricSource instanceof MetricsRegionSourceImpl) {
|
||||
((MetricsRegionSourceImpl) regionMetricSource).snapshot(mrb, all);
|
||||
}
|
||||
for (MetricsRegionSource regionMetricSource : regionSources) {
|
||||
if (regionMetricSource instanceof MetricsRegionSourceImpl) {
|
||||
((MetricsRegionSourceImpl) regionMetricSource).snapshot(mrb, all);
|
||||
}
|
||||
mrb.addGauge(Interns.info(NUM_REGIONS, NUMBER_OF_REGIONS_DESC), regionSources.size());
|
||||
} finally {
|
||||
l.unlock();
|
||||
}
|
||||
mrb.addGauge(Interns.info(NUM_REGIONS, NUMBER_OF_REGIONS_DESC), regionSources.size());
|
||||
metricsRegistry.snapshot(mrb, all);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
package org.apache.hadoop.hbase.regionserver;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
@ -39,12 +40,7 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
|
|||
|
||||
private static final Log LOG = LogFactory.getLog(MetricsRegionSourceImpl.class);
|
||||
|
||||
private boolean closed = false;
|
||||
|
||||
// lock to ensure that lock and pushing metrics can't race.
|
||||
// When changing or acting on the closed boolean this lock must be held.
|
||||
// The write lock must be held when changing closed.
|
||||
private final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(false);
|
||||
private AtomicBoolean closed = new AtomicBoolean(false);
|
||||
|
||||
// Non-final so that we can null out the wrapper
|
||||
// This is just paranoia. We really really don't want to
|
||||
|
@ -109,16 +105,20 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
|
|||
|
||||
@Override
|
||||
public void close() {
|
||||
Lock lock = readWriteLock.writeLock();
|
||||
lock.lock();
|
||||
try {
|
||||
if (closed) {
|
||||
return;
|
||||
}
|
||||
boolean wasClosed = closed.getAndSet(false);
|
||||
|
||||
closed = true;
|
||||
agg.deregister(this);
|
||||
// Has someone else already closed this for us?
|
||||
if (wasClosed) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Before removing the metrics remove this region from the aggregate region bean.
|
||||
// This should mean that it's unlikely that snapshot and close happen at the same time.
|
||||
agg.deregister(this);
|
||||
|
||||
// While it's un-likely that snapshot and close happen at the same time it's still possible.
|
||||
// So grab the lock to ensure that all calls to snapshot are done before we remove the metrics
|
||||
synchronized (this) {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("Removing region Metrics: " + regionWrapper.getRegionName());
|
||||
}
|
||||
|
@ -133,10 +133,6 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
|
|||
registry.removeHistogramMetrics(regionScanNextKey);
|
||||
|
||||
regionWrapper = null;
|
||||
|
||||
JmxCacheBuster.clearJmxCache();
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -186,13 +182,23 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
|
|||
}
|
||||
|
||||
void snapshot(MetricsRecordBuilder mrb, boolean ignored) {
|
||||
Lock lock = readWriteLock.readLock();
|
||||
|
||||
// Grab the read lock.
|
||||
// This ensures that
|
||||
lock.lock();
|
||||
try {
|
||||
if (closed) {
|
||||
// If there is a close that started be double extra sure
|
||||
// that we're not getting any locks and not putting data
|
||||
// into the metrics that should be removed. So early out
|
||||
// before even getting the lock.
|
||||
if (closed.get()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Grab the read
|
||||
// This ensures that removes of the metrics
|
||||
// can't happen while we are putting them back in.
|
||||
synchronized (this) {
|
||||
|
||||
// It's possible that a close happened between checking
|
||||
// the closed variable and getting the lock.
|
||||
if (closed.get()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -254,8 +260,6 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
|
|||
MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "99th percentile: "), ds
|
||||
.getPercentile(99d) / 1000);
|
||||
}
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue