HBASE-26945 Quotas causes too much load on meta for large clusters (#4576)

Signed-off-by: Xiaolin Ha <haxiaolin@apache.org>
This commit is contained in:
Bryan Beaudreault 2022-06-23 16:47:09 -04:00
parent 2b7347f23c
commit b379d8e2cf
1 changed files with 17 additions and 11 deletions

View File

@ -27,12 +27,13 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.ClusterMetrics.Option; import org.apache.hadoop.hbase.ClusterMetrics.Option;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.ScheduledChore; import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.Stoppable; import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.RegionStatesCount;
import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.RegionServerServices; import org.apache.hadoop.hbase.regionserver.RegionServerServices;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@ -352,24 +353,29 @@ public class QuotaCache implements Stoppable {
*/ */
private void updateQuotaFactors() { private void updateQuotaFactors() {
// Update machine quota factor // Update machine quota factor
ClusterMetrics clusterMetrics;
try { try {
int rsSize = rsServices.getConnection().getAdmin() clusterMetrics = rsServices.getConnection().getAdmin()
.getClusterMetrics(EnumSet.of(Option.SERVERS_NAME)).getServersName().size(); .getClusterMetrics(EnumSet.of(Option.SERVERS_NAME, Option.TABLE_TO_REGIONS_COUNT));
} catch (IOException e) {
LOG.warn("Failed to get cluster metrics needed for updating quotas", e);
return;
}
int rsSize = clusterMetrics.getServersName().size();
if (rsSize != 0) { if (rsSize != 0) {
// TODO if use rs group, the cluster limit should be shared by the rs group // TODO if use rs group, the cluster limit should be shared by the rs group
machineQuotaFactor = 1.0 / rsSize; machineQuotaFactor = 1.0 / rsSize;
} }
} catch (IOException e) {
LOG.warn("Get live region servers failed", e); Map<TableName, RegionStatesCount> tableRegionStatesCount =
} clusterMetrics.getTableRegionStatesCount();
// Update table machine quota factors // Update table machine quota factors
for (TableName tableName : tableQuotaCache.keySet()) { for (TableName tableName : tableQuotaCache.keySet()) {
double factor = 1; double factor = 1;
try { try {
long regionSize = long regionSize = tableRegionStatesCount.get(tableName).getOpenRegions();
MetaTableAccessor.getTableRegions(rsServices.getConnection(), tableName, true).stream()
.filter(regionInfo -> !regionInfo.isOffline()).count();
if (regionSize == 0) { if (regionSize == 0) {
factor = 0; factor = 0;
} else { } else {