HBASE-15390 Unnecessary MetaCache evictions cause elevated number of requests to meta
This commit is contained in:
parent
3bf0945a11
commit
3adcc750e3
|
@ -30,4 +30,8 @@ public class CallQueueTooBigException extends IOException {
|
||||||
public CallQueueTooBigException() {
|
public CallQueueTooBigException() {
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public CallQueueTooBigException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -1179,7 +1179,7 @@ class AsyncProcess {
|
||||||
Retry canRetry = errorsByServer.canTryMore(numAttempt)
|
Retry canRetry = errorsByServer.canTryMore(numAttempt)
|
||||||
? Retry.YES : Retry.NO_RETRIES_EXHAUSTED;
|
? Retry.YES : Retry.NO_RETRIES_EXHAUSTED;
|
||||||
|
|
||||||
if (tableName == null) {
|
if (tableName == null && ClientExceptionsUtil.isMetaClearingException(t)) {
|
||||||
// tableName is null when we made a cross-table RPC call.
|
// tableName is null when we made a cross-table RPC call.
|
||||||
connection.clearCaches(server);
|
connection.clearCaches(server);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1860,6 +1860,9 @@ class ConnectionImplementation implements ClusterConnection, Closeable {
|
||||||
|
|
||||||
if (regionName == null) {
|
if (regionName == null) {
|
||||||
// we do not know which region, so just remove the cache entry for the row and server
|
// we do not know which region, so just remove the cache entry for the row and server
|
||||||
|
if (metrics != null) {
|
||||||
|
metrics.incrCacheDroppingExceptions(exception);
|
||||||
|
}
|
||||||
metaCache.clearCache(tableName, rowkey, source);
|
metaCache.clearCache(tableName, rowkey, source);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1899,6 +1902,10 @@ class ConnectionImplementation implements ClusterConnection, Closeable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (metrics != null) {
|
||||||
|
metrics.incrCacheDroppingExceptions(exception);
|
||||||
|
}
|
||||||
|
|
||||||
// If we're here, it means that can cannot be sure about the location, so we remove it from
|
// If we're here, it means that can cannot be sure about the location, so we remove it from
|
||||||
// the cache. Do not send the source because source can be a new server in the same host:port
|
// the cache. Do not send the source because source can be a new server in the same host:port
|
||||||
metaCache.clearCache(regionInfo);
|
metaCache.clearCache(regionInfo);
|
||||||
|
|
|
@ -278,8 +278,13 @@ public class MetaCache {
|
||||||
}
|
}
|
||||||
this.cachedServers.remove(serverName);
|
this.cachedServers.remove(serverName);
|
||||||
}
|
}
|
||||||
if (deletedSomething && LOG.isTraceEnabled()) {
|
if (deletedSomething) {
|
||||||
LOG.trace("Removed all cached region locations that map to " + serverName);
|
if (metrics != null) {
|
||||||
|
metrics.incrMetaCacheNumClearServer();
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Removed all cached region locations that map to " + serverName);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,34 +298,6 @@ public class MetaCache {
|
||||||
this.cachedRegionLocations.remove(tableName);
|
this.cachedRegionLocations.remove(tableName);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Delete a cached location, no matter what it is. Called when we were told to not use cache.
|
|
||||||
* @param tableName tableName
|
|
||||||
* @param row
|
|
||||||
*/
|
|
||||||
public void clearCache(final TableName tableName, final byte [] row, int replicaId) {
|
|
||||||
ConcurrentMap<byte[], RegionLocations> tableLocations = getTableLocations(tableName);
|
|
||||||
|
|
||||||
boolean removed = false;
|
|
||||||
RegionLocations regionLocations = getCachedLocation(tableName, row);
|
|
||||||
if (regionLocations != null) {
|
|
||||||
HRegionLocation toBeRemoved = regionLocations.getRegionLocation(replicaId);
|
|
||||||
RegionLocations updatedLocations = regionLocations.remove(replicaId);
|
|
||||||
if (updatedLocations != regionLocations) {
|
|
||||||
byte[] startKey = regionLocations.getRegionLocation().getRegionInfo().getStartKey();
|
|
||||||
if (updatedLocations.isEmpty()) {
|
|
||||||
removed = tableLocations.remove(startKey, regionLocations);
|
|
||||||
} else {
|
|
||||||
removed = tableLocations.replace(startKey, regionLocations, updatedLocations);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (removed && LOG.isTraceEnabled() && toBeRemoved != null) {
|
|
||||||
LOG.trace("Removed " + toBeRemoved + " from cache");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Delete a cached location, no matter what it is. Called when we were told to not use cache.
|
* Delete a cached location, no matter what it is. Called when we were told to not use cache.
|
||||||
* @param tableName tableName
|
* @param tableName tableName
|
||||||
|
@ -333,8 +310,13 @@ public class MetaCache {
|
||||||
if (regionLocations != null) {
|
if (regionLocations != null) {
|
||||||
byte[] startKey = regionLocations.getRegionLocation().getRegionInfo().getStartKey();
|
byte[] startKey = regionLocations.getRegionLocation().getRegionInfo().getStartKey();
|
||||||
boolean removed = tableLocations.remove(startKey, regionLocations);
|
boolean removed = tableLocations.remove(startKey, regionLocations);
|
||||||
if (removed && LOG.isTraceEnabled()) {
|
if (removed) {
|
||||||
LOG.trace("Removed " + regionLocations + " from cache");
|
if (metrics != null) {
|
||||||
|
metrics.incrMetaCacheNumClearRegion();
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Removed " + regionLocations + " from cache");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -356,9 +338,14 @@ public class MetaCache {
|
||||||
} else {
|
} else {
|
||||||
removed = tableLocations.replace(startKey, regionLocations, updatedLocations);
|
removed = tableLocations.replace(startKey, regionLocations, updatedLocations);
|
||||||
}
|
}
|
||||||
if (removed && LOG.isTraceEnabled()) {
|
if (removed) {
|
||||||
LOG.trace("Removed locations of table: " + tableName + " ,row: " + Bytes.toString(row)
|
if (metrics != null) {
|
||||||
+ " mapping to server: " + serverName + " from cache");
|
metrics.incrMetaCacheNumClearRegion();
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Removed locations of table: " + tableName + " ,row: " + Bytes.toString(row)
|
||||||
|
+ " mapping to server: " + serverName + " from cache");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -375,15 +362,20 @@ public class MetaCache {
|
||||||
HRegionLocation oldLocation = regionLocations.getRegionLocation(hri.getReplicaId());
|
HRegionLocation oldLocation = regionLocations.getRegionLocation(hri.getReplicaId());
|
||||||
if (oldLocation == null) return;
|
if (oldLocation == null) return;
|
||||||
RegionLocations updatedLocations = regionLocations.remove(oldLocation);
|
RegionLocations updatedLocations = regionLocations.remove(oldLocation);
|
||||||
boolean removed = false;
|
boolean removed;
|
||||||
if (updatedLocations != regionLocations) {
|
if (updatedLocations != regionLocations) {
|
||||||
if (updatedLocations.isEmpty()) {
|
if (updatedLocations.isEmpty()) {
|
||||||
removed = tableLocations.remove(hri.getStartKey(), regionLocations);
|
removed = tableLocations.remove(hri.getStartKey(), regionLocations);
|
||||||
} else {
|
} else {
|
||||||
removed = tableLocations.replace(hri.getStartKey(), regionLocations, updatedLocations);
|
removed = tableLocations.replace(hri.getStartKey(), regionLocations, updatedLocations);
|
||||||
}
|
}
|
||||||
if (removed && LOG.isTraceEnabled()) {
|
if (removed) {
|
||||||
LOG.trace("Removed " + oldLocation + " from cache");
|
if (metrics != null) {
|
||||||
|
metrics.incrMetaCacheNumClearRegion();
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Removed " + oldLocation + " from cache");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -398,7 +390,7 @@ public class MetaCache {
|
||||||
RegionLocations regionLocations = tableLocations.get(location.getRegionInfo().getStartKey());
|
RegionLocations regionLocations = tableLocations.get(location.getRegionInfo().getStartKey());
|
||||||
if (regionLocations != null) {
|
if (regionLocations != null) {
|
||||||
RegionLocations updatedLocations = regionLocations.remove(location);
|
RegionLocations updatedLocations = regionLocations.remove(location);
|
||||||
boolean removed = false;
|
boolean removed;
|
||||||
if (updatedLocations != regionLocations) {
|
if (updatedLocations != regionLocations) {
|
||||||
if (updatedLocations.isEmpty()) {
|
if (updatedLocations.isEmpty()) {
|
||||||
removed = tableLocations.remove(location.getRegionInfo().getStartKey(), regionLocations);
|
removed = tableLocations.remove(location.getRegionInfo().getStartKey(), regionLocations);
|
||||||
|
@ -406,8 +398,13 @@ public class MetaCache {
|
||||||
removed = tableLocations.replace(location.getRegionInfo().getStartKey(), regionLocations,
|
removed = tableLocations.replace(location.getRegionInfo().getStartKey(), regionLocations,
|
||||||
updatedLocations);
|
updatedLocations);
|
||||||
}
|
}
|
||||||
if (removed && LOG.isTraceEnabled()) {
|
if (removed) {
|
||||||
LOG.trace("Removed " + location + " from cache");
|
if (metrics != null) {
|
||||||
|
metrics.incrMetaCacheNumClearRegion();
|
||||||
|
}
|
||||||
|
if (LOG.isTraceEnabled()) {
|
||||||
|
LOG.trace("Removed " + location + " from cache");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,6 +62,7 @@ public class MetricsConnection implements StatisticTrackable {
|
||||||
private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
|
private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
|
||||||
private static final String MEMLOAD_BASE = "memstoreLoad_";
|
private static final String MEMLOAD_BASE = "memstoreLoad_";
|
||||||
private static final String HEAP_BASE = "heapOccupancy_";
|
private static final String HEAP_BASE = "heapOccupancy_";
|
||||||
|
private static final String CACHE_BASE = "cacheDroppingExceptions_";
|
||||||
private static final String CLIENT_SVC = ClientService.getDescriptor().getName();
|
private static final String CLIENT_SVC = ClientService.getDescriptor().getName();
|
||||||
|
|
||||||
/** A container class for collecting details about the RPC call as it percolates. */
|
/** A container class for collecting details about the RPC call as it percolates. */
|
||||||
|
@ -263,6 +264,12 @@ public class MetricsConnection implements StatisticTrackable {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
private final NewMetric<Counter> counterFactory = new NewMetric<Counter>() {
|
||||||
|
@Override public Counter newMetric(Class<?> clazz, String name, String scope) {
|
||||||
|
return registry.counter(name(clazz, name, scope));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// static metrics
|
// static metrics
|
||||||
|
|
||||||
@VisibleForTesting protected final Counter metaCacheHits;
|
@VisibleForTesting protected final Counter metaCacheHits;
|
||||||
|
@ -275,6 +282,8 @@ public class MetricsConnection implements StatisticTrackable {
|
||||||
@VisibleForTesting protected final CallTracker putTracker;
|
@VisibleForTesting protected final CallTracker putTracker;
|
||||||
@VisibleForTesting protected final CallTracker multiTracker;
|
@VisibleForTesting protected final CallTracker multiTracker;
|
||||||
@VisibleForTesting protected final RunnerStats runnerStats;
|
@VisibleForTesting protected final RunnerStats runnerStats;
|
||||||
|
private final Counter metaCacheNumClearServer;
|
||||||
|
private final Counter metaCacheNumClearRegion;
|
||||||
|
|
||||||
// dynamic metrics
|
// dynamic metrics
|
||||||
|
|
||||||
|
@ -286,6 +295,8 @@ public class MetricsConnection implements StatisticTrackable {
|
||||||
@VisibleForTesting protected final ConcurrentMap<String, Histogram> rpcHistograms =
|
@VisibleForTesting protected final ConcurrentMap<String, Histogram> rpcHistograms =
|
||||||
new ConcurrentHashMap<>(CAPACITY * 2 /* tracking both request and response sizes */,
|
new ConcurrentHashMap<>(CAPACITY * 2 /* tracking both request and response sizes */,
|
||||||
LOAD_FACTOR, CONCURRENCY_LEVEL);
|
LOAD_FACTOR, CONCURRENCY_LEVEL);
|
||||||
|
private final ConcurrentMap<String, Counter> cacheDroppingExceptions =
|
||||||
|
new ConcurrentHashMap<>(CAPACITY, LOAD_FACTOR, CONCURRENCY_LEVEL);
|
||||||
|
|
||||||
public MetricsConnection(final ConnectionImplementation conn) {
|
public MetricsConnection(final ConnectionImplementation conn) {
|
||||||
this.scope = conn.toString();
|
this.scope = conn.toString();
|
||||||
|
@ -309,6 +320,10 @@ public class MetricsConnection implements StatisticTrackable {
|
||||||
});
|
});
|
||||||
this.metaCacheHits = registry.counter(name(this.getClass(), "metaCacheHits", scope));
|
this.metaCacheHits = registry.counter(name(this.getClass(), "metaCacheHits", scope));
|
||||||
this.metaCacheMisses = registry.counter(name(this.getClass(), "metaCacheMisses", scope));
|
this.metaCacheMisses = registry.counter(name(this.getClass(), "metaCacheMisses", scope));
|
||||||
|
this.metaCacheNumClearServer = registry.counter(name(this.getClass(),
|
||||||
|
"metaCacheNumClearServer", scope));
|
||||||
|
this.metaCacheNumClearRegion = registry.counter(name(this.getClass(),
|
||||||
|
"metaCacheNumClearRegion", scope));
|
||||||
this.getTracker = new CallTracker(this.registry, "Get", scope);
|
this.getTracker = new CallTracker(this.registry, "Get", scope);
|
||||||
this.scanTracker = new CallTracker(this.registry, "Scan", scope);
|
this.scanTracker = new CallTracker(this.registry, "Scan", scope);
|
||||||
this.appendTracker = new CallTracker(this.registry, "Mutate", "Append", scope);
|
this.appendTracker = new CallTracker(this.registry, "Mutate", "Append", scope);
|
||||||
|
@ -342,6 +357,16 @@ public class MetricsConnection implements StatisticTrackable {
|
||||||
metaCacheMisses.inc();
|
metaCacheMisses.inc();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Increment the number of meta cache drops requested for entire RegionServer. */
|
||||||
|
public void incrMetaCacheNumClearServer() {
|
||||||
|
metaCacheNumClearServer.inc();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Increment the number of meta cache drops requested for individual region. */
|
||||||
|
public void incrMetaCacheNumClearRegion() {
|
||||||
|
metaCacheNumClearRegion.inc();
|
||||||
|
}
|
||||||
|
|
||||||
/** Increment the number of normal runner counts. */
|
/** Increment the number of normal runner counts. */
|
||||||
public void incrNormalRunners() {
|
public void incrNormalRunners() {
|
||||||
this.runnerStats.incrNormalRunners();
|
this.runnerStats.incrNormalRunners();
|
||||||
|
@ -364,7 +389,8 @@ public class MetricsConnection implements StatisticTrackable {
|
||||||
T t = map.get(key);
|
T t = map.get(key);
|
||||||
if (t == null) {
|
if (t == null) {
|
||||||
t = factory.newMetric(this.getClass(), key, scope);
|
t = factory.newMetric(this.getClass(), key, scope);
|
||||||
map.putIfAbsent(key, t);
|
T tmp = map.putIfAbsent(key, t);
|
||||||
|
t = (tmp == null) ? t : tmp;
|
||||||
}
|
}
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
@ -436,4 +462,9 @@ public class MetricsConnection implements StatisticTrackable {
|
||||||
// Fallback to dynamic registry lookup for DDL methods.
|
// Fallback to dynamic registry lookup for DDL methods.
|
||||||
updateRpcGeneric(method, stats);
|
updateRpcGeneric(method, stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void incrCacheDroppingExceptions(Object exception) {
|
||||||
|
getMetric(CACHE_BASE + exception.getClass().getSimpleName(),
|
||||||
|
cacheDroppingExceptions, counterFactory).inc();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
import org.apache.hadoop.hbase.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.hbase.ipc.CallTimeoutException;
|
import org.apache.hadoop.hbase.ipc.CallTimeoutException;
|
||||||
import org.apache.hadoop.hbase.ipc.FailedServerException;
|
import org.apache.hadoop.hbase.ipc.FailedServerException;
|
||||||
import org.apache.hadoop.hbase.ipc.RemoteWithExtrasException;
|
|
||||||
import org.apache.hadoop.hbase.quotas.ThrottlingException;
|
import org.apache.hadoop.hbase.quotas.ThrottlingException;
|
||||||
import org.apache.hadoop.ipc.RemoteException;
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
|
|
||||||
|
@ -62,7 +61,7 @@ public final class ClientExceptionsUtil {
|
||||||
return (cur instanceof RegionMovedException || cur instanceof RegionOpeningException
|
return (cur instanceof RegionMovedException || cur instanceof RegionOpeningException
|
||||||
|| cur instanceof RegionTooBusyException || cur instanceof ThrottlingException
|
|| cur instanceof RegionTooBusyException || cur instanceof ThrottlingException
|
||||||
|| cur instanceof MultiActionResultTooLarge || cur instanceof RetryImmediatelyException
|
|| cur instanceof MultiActionResultTooLarge || cur instanceof RetryImmediatelyException
|
||||||
|| isCallQueueTooBigException(cur) || cur instanceof NotServingRegionException);
|
|| cur instanceof CallQueueTooBigException || cur instanceof NotServingRegionException);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -86,12 +85,8 @@ public final class ClientExceptionsUtil {
|
||||||
}
|
}
|
||||||
if (cur instanceof RemoteException) {
|
if (cur instanceof RemoteException) {
|
||||||
RemoteException re = (RemoteException) cur;
|
RemoteException re = (RemoteException) cur;
|
||||||
cur = re.unwrapRemoteException(
|
cur = re.unwrapRemoteException();
|
||||||
RegionOpeningException.class, RegionMovedException.class,
|
|
||||||
RegionTooBusyException.class);
|
|
||||||
if (cur == null) {
|
|
||||||
cur = re.unwrapRemoteException();
|
|
||||||
}
|
|
||||||
// unwrapRemoteException can return the exception given as a parameter when it cannot
|
// unwrapRemoteException can return the exception given as a parameter when it cannot
|
||||||
// unwrap it. In this case, there is no need to look further
|
// unwrap it. In this case, there is no need to look further
|
||||||
// noinspection ObjectEquality
|
// noinspection ObjectEquality
|
||||||
|
@ -109,21 +104,14 @@ public final class ClientExceptionsUtil {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks if the exception is CallQueueTooBig exception, or tries to unwrap
|
* Checks if the exception is CallQueueTooBig exception (maybe wrapped
|
||||||
* {@link RemoteWithExtrasException} to see if we've got {@link CallQueueTooBigException}.
|
* into some RemoteException).
|
||||||
* @param t exception to check
|
* @param t exception to check
|
||||||
* @return true if it's a CQTBE, false otherwise
|
* @return true if it's a CQTBE, false otherwise
|
||||||
*/
|
*/
|
||||||
public static boolean isCallQueueTooBigException(Throwable t) {
|
public static boolean isCallQueueTooBigException(Throwable t) {
|
||||||
if (t instanceof CallQueueTooBigException) {
|
t = findException(t);
|
||||||
return true;
|
return (t instanceof CallQueueTooBigException);
|
||||||
}
|
|
||||||
if (t instanceof RemoteWithExtrasException) {
|
|
||||||
return CallQueueTooBigException.class.getName().equals(
|
|
||||||
((RemoteWithExtrasException) t).getClassName().trim());
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue