HBASE-14274 Deadlock in region metrics on shutdown: MetricsRegionSourceImpl vs MetricsRegionAggregateSourceImpl

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
Elliott Clark 2015-08-20 13:32:39 -07:00 committed by stack
parent ba7ea0b524
commit bcef28eefa
2 changed files with 44 additions and 65 deletions

View File

@ -18,10 +18,10 @@
package org.apache.hadoop.hbase.regionserver;
import java.util.HashSet;
import java.util.Collections;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.metrics.BaseSourceImpl;
@ -34,12 +34,11 @@ import org.apache.hadoop.metrics2.lib.MetricsExecutorImpl;
@InterfaceAudience.Private
public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
implements MetricsRegionAggregateSource {
// lock to guard against concurrent access to regionSources
private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
private final MetricsExecutorImpl executor = new MetricsExecutorImpl();
private final HashSet<MetricsRegionSource> regionSources =
new HashSet<MetricsRegionSource>();
private final Set<MetricsRegionSource> regionSources =
Collections.newSetFromMap(new ConcurrentHashMap<MetricsRegionSource, Boolean>());
public MetricsRegionAggregateSourceImpl() {
this(METRICS_NAME, METRICS_DESCRIPTION, METRICS_CONTEXT, METRICS_JMX_CONTEXT);
@ -62,36 +61,18 @@ public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
@Override
public void register(MetricsRegionSource source) {
Lock l = lock.writeLock();
l.lock();
try {
regionSources.add(source);
clearCache();
} finally {
l.unlock();
}
regionSources.add(source);
clearCache();
}
@Override
public void deregister(MetricsRegionSource toRemove) {
Lock l = lock.writeLock();
l.lock();
try {
regionSources.remove(toRemove);
clearCache();
} finally {
l.unlock();
}
regionSources.remove(toRemove);
clearCache();
}
private synchronized void clearCache() {
JmxCacheBuster.clearJmxCache(true);
executor.getExecutor().schedule(new Runnable() {
@Override
public void run() {
JmxCacheBuster.clearJmxCache();
}
}, 5, TimeUnit.MINUTES);
}
/**
@ -107,18 +88,12 @@ public class MetricsRegionAggregateSourceImpl extends BaseSourceImpl
MetricsRecordBuilder mrb = collector.addRecord(metricsName);
if (regionSources != null) {
Lock l = lock.readLock();
l.lock();
try {
for (MetricsRegionSource regionMetricSource : regionSources) {
if (regionMetricSource instanceof MetricsRegionSourceImpl) {
((MetricsRegionSourceImpl) regionMetricSource).snapshot(mrb, all);
}
for (MetricsRegionSource regionMetricSource : regionSources) {
if (regionMetricSource instanceof MetricsRegionSourceImpl) {
((MetricsRegionSourceImpl) regionMetricSource).snapshot(mrb, all);
}
mrb.addGauge(Interns.info(NUM_REGIONS, NUMBER_OF_REGIONS_DESC), regionSources.size());
} finally {
l.unlock();
}
mrb.addGauge(Interns.info(NUM_REGIONS, NUMBER_OF_REGIONS_DESC), regionSources.size());
metricsRegistry.snapshot(mrb, all);
}
}

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.hbase.regionserver;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
@ -39,12 +40,7 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
private static final Log LOG = LogFactory.getLog(MetricsRegionSourceImpl.class);
private boolean closed = false;
// lock to ensure that lock and pushing metrics can't race.
// When changing or acting on the closed boolean this lock must be held.
// The write lock must be held when changing closed.
private final ReadWriteLock readWriteLock = new ReentrantReadWriteLock(false);
private AtomicBoolean closed = new AtomicBoolean(false);
// Non-final so that we can null out the wrapper
// This is just paranoia. We really really don't want to
@ -109,16 +105,20 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
@Override
public void close() {
Lock lock = readWriteLock.writeLock();
lock.lock();
try {
if (closed) {
return;
}
boolean wasClosed = closed.getAndSet(false);
closed = true;
agg.deregister(this);
// Has someone else already closed this for us?
if (wasClosed) {
return;
}
// Before removing the metrics remove this region from the aggregate region bean.
// This should mean that it's unlikely that snapshot and close happen at the same time.
agg.deregister(this);
// While it's un-likely that snapshot and close happen at the same time it's still possible.
// So grab the lock to ensure that all calls to snapshot are done before we remove the metrics
synchronized (this) {
if (LOG.isTraceEnabled()) {
LOG.trace("Removing region Metrics: " + regionWrapper.getRegionName());
}
@ -133,10 +133,6 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
registry.removeHistogramMetrics(regionScanNextKey);
regionWrapper = null;
JmxCacheBuster.clearJmxCache();
} finally {
lock.unlock();
}
}
@ -186,13 +182,23 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
}
void snapshot(MetricsRecordBuilder mrb, boolean ignored) {
Lock lock = readWriteLock.readLock();
// Grab the read lock.
// This ensures that
lock.lock();
try {
if (closed) {
// If there is a close that started be double extra sure
// that we're not getting any locks and not putting data
// into the metrics that should be removed. So early out
// before even getting the lock.
if (closed.get()) {
return;
}
// Grab the read
// This ensures that removes of the metrics
// can't happen while we are putting them back in.
synchronized (this) {
// It's possible that a close happened between checking
// the closed variable and getting the lock.
if (closed.get()) {
return;
}
@ -254,8 +260,6 @@ public class MetricsRegionSourceImpl implements MetricsRegionSource {
MetricsRegionSource.COPROCESSOR_EXECUTION_STATISTICS_DESC + "99th percentile: "), ds
.getPercentile(99d) / 1000);
}
} finally {
lock.unlock();
}
}