HBASE-26163 Better logging in RSGroupInfoManagerImpl (#3610)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
Reviewed-by: Bharath Vissapragada <bharathv@apache.org>
This commit is contained in:
caroliney14 2021-09-07 07:20:09 -07:00 committed by GitHub
parent b7c6a0b637
commit 03b6f2d3f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 6 deletions

View File

@ -26,6 +26,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.HBaseIOException;
@ -129,6 +131,7 @@ public class RSGroupBasedLoadBalancer implements LoadBalancer {
try {
// For each rsgroup
for (RSGroupInfo rsgroup : rsGroupInfoManager.listRSGroups()) {
LOG.debug("Balancing RSGroup={}", rsgroup.getName());
Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfTablesInGroup = new HashMap<>();
for (Map.Entry<TableName, Map<ServerName, List<RegionInfo>>> entry : correctedLoadOfAllTable
.entrySet()) {
@ -234,6 +237,11 @@ public class RSGroupBasedLoadBalancer implements LoadBalancer {
if (!fallbackRegions.isEmpty()) {
List<ServerName> candidates = null;
if (isFallbackEnabled()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Falling back {} regions to servers outside their RSGroup. Regions: {}",
fallbackRegions.size(), fallbackRegions.stream()
.map(RegionInfo::getRegionNameAsString).collect(Collectors.toSet()));
}
candidates = getFallBackCandidates(servers);
}
candidates = (candidates == null || candidates.isEmpty()) ?

View File

@ -259,6 +259,9 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
// do not need to persist, as we do not persist default group.
resetRSGroupMap(newGroupMap);
LOG.info("Updated default servers, {} servers", newDefaultGroupInfo.getServers().size());
if (LOG.isDebugEnabled()) {
LOG.debug("New default servers list: {}", newDefaultGroupInfo.getServers());
}
}
private synchronized void init() throws IOException {
@ -300,6 +303,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
Map<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
newGroupMap.put(rsGroupInfo.getName(), rsGroupInfo);
flushConfig(newGroupMap);
LOG.info("Add group {} done.", rsGroupInfo.getName());
}
private RSGroupInfo getRSGroupInfo(final String groupName) throws ConstraintException {
@ -333,7 +337,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
if (onlineServers != null) {
if (!onlineServers.contains(el)) {
if (LOG.isDebugEnabled()) {
LOG.debug("Dropping " + el + " during move-to-default rsgroup because not online");
LOG.debug("Dropping " + el + " during move-to-default RSGroup because not online");
}
continue;
}
@ -375,8 +379,8 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
for (TableDescriptor td : masterServices.getTableDescriptors().getAll().values()) {
if (td.getRegionServerGroup().map(groupName::equals).orElse(false)) {
throw new ConstraintException("RSGroup " + groupName + " is already referenced by " +
td.getTableName() + "; you must remove all the tables from the rsgroup before " +
"the rsgroup can be removed.");
td.getTableName() + "; you must remove all the tables from the RSGroup before " +
"the RSGroup can be removed.");
}
}
for (NamespaceDescriptor ns : masterServices.getClusterSchema().getNamespaces()) {
@ -394,6 +398,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
Map<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
newGroupMap.remove(groupName);
flushConfig(newGroupMap);
LOG.info("Remove group {} done", groupName);
}
@Override
@ -641,6 +646,8 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
return;
}
LOG.debug("Offline mode, cannot persist to {}", RSGROUP_TABLE_NAME);
Map<String, RSGroupInfo> oldGroupMap = Maps.newHashMap(holder.groupName2Group);
RSGroupInfo oldDefaultGroup = oldGroupMap.remove(RSGroupInfo.DEFAULT_GROUP);
RSGroupInfo newDefaultGroup = newGroupMap.remove(RSGroupInfo.DEFAULT_GROUP);
@ -657,6 +664,8 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
// according to the inputted newGroupMap (an updated copy of rsGroupMap)
this.holder = new RSGroupInfoHolder(newGroupMap);
LOG.debug("New RSGroup map: {}", newGroupMap);
// Do not need to update tableMap
// because only the update on servers in default group is allowed above,
// or IOException will be thrown
@ -664,15 +673,18 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
}
/* For online mode, persist to hbase:rsgroup and Zookeeper */
LOG.debug("Online mode, persisting to {} and ZK", RSGROUP_TABLE_NAME);
flushConfigTable(newGroupMap);
// Make changes visible after having been persisted to the source of truth
resetRSGroupMap(newGroupMap);
saveRSGroupMapToZK(newGroupMap);
updateCacheOfRSGroups(newGroupMap.keySet());
LOG.info("Flush config done, new RSGroup map: {}", newGroupMap);
}
private void saveRSGroupMapToZK(Map<String, RSGroupInfo> newGroupMap) throws IOException {
LOG.debug("Saving RSGroup info to ZK");
try {
String groupBasePath =
ZNodePaths.joinZNode(watcher.getZNodePaths().baseZNode, RS_GROUP_ZNODE);
@ -802,6 +814,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
if (optProcId.isPresent()) {
procId = optProcId.getAsLong();
} else {
LOG.debug("Creating group table {}", RSGROUP_TABLE_NAME);
procId = masterServices.createSystemTable(RSGROUP_TABLE_DESC);
}
// wait for region to be online
@ -851,9 +864,11 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
}
MutateRowsRequest request = builder.build();
AsyncTable<?> table = conn.getTable(RSGROUP_TABLE_NAME);
LOG.debug("Multimutating {} with {} mutations", RSGROUP_TABLE_NAME, mutations.size());
FutureUtils.get(table.<MultiRowMutationService, MutateRowsResponse> coprocessorService(
MultiRowMutationService::newStub,
(stub, controller, done) -> stub.mutateRows(controller, request, done), ROW_KEY));
LOG.info("Multimutating {} with {} mutations done", RSGROUP_TABLE_NAME, mutations.size());
}
private void checkGroupName(String groupName) throws ConstraintException {
@ -873,7 +888,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
* @param servers servers to remove
*/
private void checkForDeadOrOnlineServers(Set<Address> servers) throws IOException {
// This uglyness is because we only have Address, not ServerName.
// This ugliness is because we only have Address, not ServerName.
Set<Address> onlineServers = new HashSet<>();
List<ServerName> drainingServers = masterServices.getServerManager().getDrainingServersList();
for (ServerName server : masterServices.getServerManager().getOnlineServers().keySet()) {
@ -1003,7 +1018,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
// Get regions that are associated with this server and filter regions by group tables.
for (RegionInfo region : getRegionsInfo.apply((T) owner.getAddress())) {
if (!validation.apply(region)) {
LOG.info("Moving region {}, which do not belong to RSGroup {}",
LOG.info("Moving region {}, which does not belong to RSGroup {}",
region.getShortNameToLog(), targetGroupName);
// Move region back to source RSGroup servers
ServerName dest =
@ -1197,6 +1212,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
}
private void moveTablesAndWait(Set<TableName> tables, String targetGroup) throws IOException {
LOG.debug("Moving {} tables to target group {}", tables.size(), targetGroup);
List<Long> procIds = new ArrayList<Long>();
for (TableName tableName : tables) {
TableDescriptor oldTd = masterServices.getTableDescriptors().get(tableName);
@ -1216,6 +1232,10 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
ProcedureSyncWait.waitForProcedureToCompleteIOE(masterServices.getMasterProcedureExecutor(),
proc, Long.MAX_VALUE);
}
LOG.info("Move tables done: moved {} tables to {}", tables.size(), targetGroup);
if (LOG.isDebugEnabled()) {
LOG.debug("Tables moved to {}: {}", targetGroup, tables);
}
}
@Override
@ -1281,7 +1301,12 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
Set<Address> movedServers = moveServers(servers, srcGrp.getName(),
targetGroupName);
moveServerRegionsFromGroup(movedServers, srcGrp.getServers(), targetGroupName, srcGrp.getName());
LOG.info("Move servers done: {} => {}", srcGrp.getName(), targetGroupName);
LOG.info("Move servers done: moved {} servers from {} to {}", movedServers.size(),
srcGrp.getName(), targetGroupName);
if (LOG.isDebugEnabled()) {
LOG.debug("Servers moved from {} to {}: {}", srcGrp.getName(), targetGroupName,
movedServers);
}
}
}
@ -1315,6 +1340,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
.map(TableDescriptor::getTableName)
.collect(Collectors.toSet());
setRSGroup(updateTables, newName);
LOG.info("Rename RSGroup done: {} => {}", oldName, newName);
}
@Override