HBASE-15390 Unnecessary MetaCache evictions cause elevated number of requests to meta

This commit is contained in:
Mikhail Antonov 2016-03-16 12:48:10 -07:00
parent 3bf0945a11
commit 3adcc750e3
6 changed files with 89 additions and 62 deletions

View File

@ -30,4 +30,8 @@ public class CallQueueTooBigException extends IOException {
public CallQueueTooBigException() { public CallQueueTooBigException() {
super(); super();
} }
public CallQueueTooBigException(String message) {
super(message);
}
} }

View File

@ -1179,7 +1179,7 @@ class AsyncProcess {
Retry canRetry = errorsByServer.canTryMore(numAttempt) Retry canRetry = errorsByServer.canTryMore(numAttempt)
? Retry.YES : Retry.NO_RETRIES_EXHAUSTED; ? Retry.YES : Retry.NO_RETRIES_EXHAUSTED;
if (tableName == null) { if (tableName == null && ClientExceptionsUtil.isMetaClearingException(t)) {
// tableName is null when we made a cross-table RPC call. // tableName is null when we made a cross-table RPC call.
connection.clearCaches(server); connection.clearCaches(server);
} }

View File

@ -1860,6 +1860,9 @@ class ConnectionImplementation implements ClusterConnection, Closeable {
if (regionName == null) { if (regionName == null) {
// we do not know which region, so just remove the cache entry for the row and server // we do not know which region, so just remove the cache entry for the row and server
if (metrics != null) {
metrics.incrCacheDroppingExceptions(exception);
}
metaCache.clearCache(tableName, rowkey, source); metaCache.clearCache(tableName, rowkey, source);
return; return;
} }
@ -1899,6 +1902,10 @@ class ConnectionImplementation implements ClusterConnection, Closeable {
} }
} }
if (metrics != null) {
metrics.incrCacheDroppingExceptions(exception);
}
// If we're here, it means that can cannot be sure about the location, so we remove it from // If we're here, it means that can cannot be sure about the location, so we remove it from
// the cache. Do not send the source because source can be a new server in the same host:port // the cache. Do not send the source because source can be a new server in the same host:port
metaCache.clearCache(regionInfo); metaCache.clearCache(regionInfo);

View File

@ -278,8 +278,13 @@ public class MetaCache {
} }
this.cachedServers.remove(serverName); this.cachedServers.remove(serverName);
} }
if (deletedSomething && LOG.isTraceEnabled()) { if (deletedSomething) {
LOG.trace("Removed all cached region locations that map to " + serverName); if (metrics != null) {
metrics.incrMetaCacheNumClearServer();
}
if (LOG.isTraceEnabled()) {
LOG.trace("Removed all cached region locations that map to " + serverName);
}
} }
} }
@ -293,34 +298,6 @@ public class MetaCache {
this.cachedRegionLocations.remove(tableName); this.cachedRegionLocations.remove(tableName);
} }
/**
* Delete a cached location, no matter what it is. Called when we were told to not use cache.
* @param tableName tableName
* @param row
*/
public void clearCache(final TableName tableName, final byte [] row, int replicaId) {
ConcurrentMap<byte[], RegionLocations> tableLocations = getTableLocations(tableName);
boolean removed = false;
RegionLocations regionLocations = getCachedLocation(tableName, row);
if (regionLocations != null) {
HRegionLocation toBeRemoved = regionLocations.getRegionLocation(replicaId);
RegionLocations updatedLocations = regionLocations.remove(replicaId);
if (updatedLocations != regionLocations) {
byte[] startKey = regionLocations.getRegionLocation().getRegionInfo().getStartKey();
if (updatedLocations.isEmpty()) {
removed = tableLocations.remove(startKey, regionLocations);
} else {
removed = tableLocations.replace(startKey, regionLocations, updatedLocations);
}
}
if (removed && LOG.isTraceEnabled() && toBeRemoved != null) {
LOG.trace("Removed " + toBeRemoved + " from cache");
}
}
}
/** /**
* Delete a cached location, no matter what it is. Called when we were told to not use cache. * Delete a cached location, no matter what it is. Called when we were told to not use cache.
* @param tableName tableName * @param tableName tableName
@ -333,8 +310,13 @@ public class MetaCache {
if (regionLocations != null) { if (regionLocations != null) {
byte[] startKey = regionLocations.getRegionLocation().getRegionInfo().getStartKey(); byte[] startKey = regionLocations.getRegionLocation().getRegionInfo().getStartKey();
boolean removed = tableLocations.remove(startKey, regionLocations); boolean removed = tableLocations.remove(startKey, regionLocations);
if (removed && LOG.isTraceEnabled()) { if (removed) {
LOG.trace("Removed " + regionLocations + " from cache"); if (metrics != null) {
metrics.incrMetaCacheNumClearRegion();
}
if (LOG.isTraceEnabled()) {
LOG.trace("Removed " + regionLocations + " from cache");
}
} }
} }
} }
@ -356,9 +338,14 @@ public class MetaCache {
} else { } else {
removed = tableLocations.replace(startKey, regionLocations, updatedLocations); removed = tableLocations.replace(startKey, regionLocations, updatedLocations);
} }
if (removed && LOG.isTraceEnabled()) { if (removed) {
LOG.trace("Removed locations of table: " + tableName + " ,row: " + Bytes.toString(row) if (metrics != null) {
+ " mapping to server: " + serverName + " from cache"); metrics.incrMetaCacheNumClearRegion();
}
if (LOG.isTraceEnabled()) {
LOG.trace("Removed locations of table: " + tableName + " ,row: " + Bytes.toString(row)
+ " mapping to server: " + serverName + " from cache");
}
} }
} }
} }
@ -375,15 +362,20 @@ public class MetaCache {
HRegionLocation oldLocation = regionLocations.getRegionLocation(hri.getReplicaId()); HRegionLocation oldLocation = regionLocations.getRegionLocation(hri.getReplicaId());
if (oldLocation == null) return; if (oldLocation == null) return;
RegionLocations updatedLocations = regionLocations.remove(oldLocation); RegionLocations updatedLocations = regionLocations.remove(oldLocation);
boolean removed = false; boolean removed;
if (updatedLocations != regionLocations) { if (updatedLocations != regionLocations) {
if (updatedLocations.isEmpty()) { if (updatedLocations.isEmpty()) {
removed = tableLocations.remove(hri.getStartKey(), regionLocations); removed = tableLocations.remove(hri.getStartKey(), regionLocations);
} else { } else {
removed = tableLocations.replace(hri.getStartKey(), regionLocations, updatedLocations); removed = tableLocations.replace(hri.getStartKey(), regionLocations, updatedLocations);
} }
if (removed && LOG.isTraceEnabled()) { if (removed) {
LOG.trace("Removed " + oldLocation + " from cache"); if (metrics != null) {
metrics.incrMetaCacheNumClearRegion();
}
if (LOG.isTraceEnabled()) {
LOG.trace("Removed " + oldLocation + " from cache");
}
} }
} }
} }
@ -398,7 +390,7 @@ public class MetaCache {
RegionLocations regionLocations = tableLocations.get(location.getRegionInfo().getStartKey()); RegionLocations regionLocations = tableLocations.get(location.getRegionInfo().getStartKey());
if (regionLocations != null) { if (regionLocations != null) {
RegionLocations updatedLocations = regionLocations.remove(location); RegionLocations updatedLocations = regionLocations.remove(location);
boolean removed = false; boolean removed;
if (updatedLocations != regionLocations) { if (updatedLocations != regionLocations) {
if (updatedLocations.isEmpty()) { if (updatedLocations.isEmpty()) {
removed = tableLocations.remove(location.getRegionInfo().getStartKey(), regionLocations); removed = tableLocations.remove(location.getRegionInfo().getStartKey(), regionLocations);
@ -406,8 +398,13 @@ public class MetaCache {
removed = tableLocations.replace(location.getRegionInfo().getStartKey(), regionLocations, removed = tableLocations.replace(location.getRegionInfo().getStartKey(), regionLocations,
updatedLocations); updatedLocations);
} }
if (removed && LOG.isTraceEnabled()) { if (removed) {
LOG.trace("Removed " + location + " from cache"); if (metrics != null) {
metrics.incrMetaCacheNumClearRegion();
}
if (LOG.isTraceEnabled()) {
LOG.trace("Removed " + location + " from cache");
}
} }
} }
} }

View File

@ -62,6 +62,7 @@ public class MetricsConnection implements StatisticTrackable {
private static final String RESP_BASE = "rpcCallResponseSizeBytes_"; private static final String RESP_BASE = "rpcCallResponseSizeBytes_";
private static final String MEMLOAD_BASE = "memstoreLoad_"; private static final String MEMLOAD_BASE = "memstoreLoad_";
private static final String HEAP_BASE = "heapOccupancy_"; private static final String HEAP_BASE = "heapOccupancy_";
private static final String CACHE_BASE = "cacheDroppingExceptions_";
private static final String CLIENT_SVC = ClientService.getDescriptor().getName(); private static final String CLIENT_SVC = ClientService.getDescriptor().getName();
/** A container class for collecting details about the RPC call as it percolates. */ /** A container class for collecting details about the RPC call as it percolates. */
@ -263,6 +264,12 @@ public class MetricsConnection implements StatisticTrackable {
} }
}; };
private final NewMetric<Counter> counterFactory = new NewMetric<Counter>() {
@Override public Counter newMetric(Class<?> clazz, String name, String scope) {
return registry.counter(name(clazz, name, scope));
}
};
// static metrics // static metrics
@VisibleForTesting protected final Counter metaCacheHits; @VisibleForTesting protected final Counter metaCacheHits;
@ -275,6 +282,8 @@ public class MetricsConnection implements StatisticTrackable {
@VisibleForTesting protected final CallTracker putTracker; @VisibleForTesting protected final CallTracker putTracker;
@VisibleForTesting protected final CallTracker multiTracker; @VisibleForTesting protected final CallTracker multiTracker;
@VisibleForTesting protected final RunnerStats runnerStats; @VisibleForTesting protected final RunnerStats runnerStats;
private final Counter metaCacheNumClearServer;
private final Counter metaCacheNumClearRegion;
// dynamic metrics // dynamic metrics
@ -286,6 +295,8 @@ public class MetricsConnection implements StatisticTrackable {
@VisibleForTesting protected final ConcurrentMap<String, Histogram> rpcHistograms = @VisibleForTesting protected final ConcurrentMap<String, Histogram> rpcHistograms =
new ConcurrentHashMap<>(CAPACITY * 2 /* tracking both request and response sizes */, new ConcurrentHashMap<>(CAPACITY * 2 /* tracking both request and response sizes */,
LOAD_FACTOR, CONCURRENCY_LEVEL); LOAD_FACTOR, CONCURRENCY_LEVEL);
private final ConcurrentMap<String, Counter> cacheDroppingExceptions =
new ConcurrentHashMap<>(CAPACITY, LOAD_FACTOR, CONCURRENCY_LEVEL);
public MetricsConnection(final ConnectionImplementation conn) { public MetricsConnection(final ConnectionImplementation conn) {
this.scope = conn.toString(); this.scope = conn.toString();
@ -309,6 +320,10 @@ public class MetricsConnection implements StatisticTrackable {
}); });
this.metaCacheHits = registry.counter(name(this.getClass(), "metaCacheHits", scope)); this.metaCacheHits = registry.counter(name(this.getClass(), "metaCacheHits", scope));
this.metaCacheMisses = registry.counter(name(this.getClass(), "metaCacheMisses", scope)); this.metaCacheMisses = registry.counter(name(this.getClass(), "metaCacheMisses", scope));
this.metaCacheNumClearServer = registry.counter(name(this.getClass(),
"metaCacheNumClearServer", scope));
this.metaCacheNumClearRegion = registry.counter(name(this.getClass(),
"metaCacheNumClearRegion", scope));
this.getTracker = new CallTracker(this.registry, "Get", scope); this.getTracker = new CallTracker(this.registry, "Get", scope);
this.scanTracker = new CallTracker(this.registry, "Scan", scope); this.scanTracker = new CallTracker(this.registry, "Scan", scope);
this.appendTracker = new CallTracker(this.registry, "Mutate", "Append", scope); this.appendTracker = new CallTracker(this.registry, "Mutate", "Append", scope);
@ -342,6 +357,16 @@ public class MetricsConnection implements StatisticTrackable {
metaCacheMisses.inc(); metaCacheMisses.inc();
} }
/** Increment the number of meta cache drops requested for entire RegionServer. */
public void incrMetaCacheNumClearServer() {
metaCacheNumClearServer.inc();
}
/** Increment the number of meta cache drops requested for individual region. */
public void incrMetaCacheNumClearRegion() {
metaCacheNumClearRegion.inc();
}
/** Increment the number of normal runner counts. */ /** Increment the number of normal runner counts. */
public void incrNormalRunners() { public void incrNormalRunners() {
this.runnerStats.incrNormalRunners(); this.runnerStats.incrNormalRunners();
@ -364,7 +389,8 @@ public class MetricsConnection implements StatisticTrackable {
T t = map.get(key); T t = map.get(key);
if (t == null) { if (t == null) {
t = factory.newMetric(this.getClass(), key, scope); t = factory.newMetric(this.getClass(), key, scope);
map.putIfAbsent(key, t); T tmp = map.putIfAbsent(key, t);
t = (tmp == null) ? t : tmp;
} }
return t; return t;
} }
@ -436,4 +462,9 @@ public class MetricsConnection implements StatisticTrackable {
// Fallback to dynamic registry lookup for DDL methods. // Fallback to dynamic registry lookup for DDL methods.
updateRpcGeneric(method, stats); updateRpcGeneric(method, stats);
} }
public void incrCacheDroppingExceptions(Object exception) {
getMetric(CACHE_BASE + exception.getClass().getSimpleName(),
cacheDroppingExceptions, counterFactory).inc();
}
} }

View File

@ -38,7 +38,6 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.ipc.CallTimeoutException; import org.apache.hadoop.hbase.ipc.CallTimeoutException;
import org.apache.hadoop.hbase.ipc.FailedServerException; import org.apache.hadoop.hbase.ipc.FailedServerException;
import org.apache.hadoop.hbase.ipc.RemoteWithExtrasException;
import org.apache.hadoop.hbase.quotas.ThrottlingException; import org.apache.hadoop.hbase.quotas.ThrottlingException;
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RemoteException;
@ -62,7 +61,7 @@ public final class ClientExceptionsUtil {
return (cur instanceof RegionMovedException || cur instanceof RegionOpeningException return (cur instanceof RegionMovedException || cur instanceof RegionOpeningException
|| cur instanceof RegionTooBusyException || cur instanceof ThrottlingException || cur instanceof RegionTooBusyException || cur instanceof ThrottlingException
|| cur instanceof MultiActionResultTooLarge || cur instanceof RetryImmediatelyException || cur instanceof MultiActionResultTooLarge || cur instanceof RetryImmediatelyException
|| isCallQueueTooBigException(cur) || cur instanceof NotServingRegionException); || cur instanceof CallQueueTooBigException || cur instanceof NotServingRegionException);
} }
@ -86,12 +85,8 @@ public final class ClientExceptionsUtil {
} }
if (cur instanceof RemoteException) { if (cur instanceof RemoteException) {
RemoteException re = (RemoteException) cur; RemoteException re = (RemoteException) cur;
cur = re.unwrapRemoteException( cur = re.unwrapRemoteException();
RegionOpeningException.class, RegionMovedException.class,
RegionTooBusyException.class);
if (cur == null) {
cur = re.unwrapRemoteException();
}
// unwrapRemoteException can return the exception given as a parameter when it cannot // unwrapRemoteException can return the exception given as a parameter when it cannot
// unwrap it. In this case, there is no need to look further // unwrap it. In this case, there is no need to look further
// noinspection ObjectEquality // noinspection ObjectEquality
@ -109,21 +104,14 @@ public final class ClientExceptionsUtil {
} }
/** /**
* Checks if the exception is CallQueueTooBig exception, or tries to unwrap * Checks if the exception is CallQueueTooBig exception (maybe wrapped
* {@link RemoteWithExtrasException} to see if we've got {@link CallQueueTooBigException}. * into some RemoteException).
* @param t exception to check * @param t exception to check
* @return true if it's a CQTBE, false otherwise * @return true if it's a CQTBE, false otherwise
*/ */
public static boolean isCallQueueTooBigException(Throwable t) { public static boolean isCallQueueTooBigException(Throwable t) {
if (t instanceof CallQueueTooBigException) { t = findException(t);
return true; return (t instanceof CallQueueTooBigException);
}
if (t instanceof RemoteWithExtrasException) {
return CallQueueTooBigException.class.getName().equals(
((RemoteWithExtrasException) t).getClassName().trim());
} else {
return false;
}
} }
/** /**