HBASE-26163 Better logging in RSGroupInfoManagerImpl (#3610)
Signed-off-by: Duo Zhang <zhangduo@apache.org> Reviewed-by: Bharath Vissapragada <bharathv@apache.org>
This commit is contained in:
parent
b7c6a0b637
commit
03b6f2d3f2
|
@ -26,6 +26,8 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.hbase.ClusterMetrics;
|
import org.apache.hadoop.hbase.ClusterMetrics;
|
||||||
import org.apache.hadoop.hbase.HBaseIOException;
|
import org.apache.hadoop.hbase.HBaseIOException;
|
||||||
|
@ -129,6 +131,7 @@ public class RSGroupBasedLoadBalancer implements LoadBalancer {
|
||||||
try {
|
try {
|
||||||
// For each rsgroup
|
// For each rsgroup
|
||||||
for (RSGroupInfo rsgroup : rsGroupInfoManager.listRSGroups()) {
|
for (RSGroupInfo rsgroup : rsGroupInfoManager.listRSGroups()) {
|
||||||
|
LOG.debug("Balancing RSGroup={}", rsgroup.getName());
|
||||||
Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfTablesInGroup = new HashMap<>();
|
Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfTablesInGroup = new HashMap<>();
|
||||||
for (Map.Entry<TableName, Map<ServerName, List<RegionInfo>>> entry : correctedLoadOfAllTable
|
for (Map.Entry<TableName, Map<ServerName, List<RegionInfo>>> entry : correctedLoadOfAllTable
|
||||||
.entrySet()) {
|
.entrySet()) {
|
||||||
|
@ -234,6 +237,11 @@ public class RSGroupBasedLoadBalancer implements LoadBalancer {
|
||||||
if (!fallbackRegions.isEmpty()) {
|
if (!fallbackRegions.isEmpty()) {
|
||||||
List<ServerName> candidates = null;
|
List<ServerName> candidates = null;
|
||||||
if (isFallbackEnabled()) {
|
if (isFallbackEnabled()) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Falling back {} regions to servers outside their RSGroup. Regions: {}",
|
||||||
|
fallbackRegions.size(), fallbackRegions.stream()
|
||||||
|
.map(RegionInfo::getRegionNameAsString).collect(Collectors.toSet()));
|
||||||
|
}
|
||||||
candidates = getFallBackCandidates(servers);
|
candidates = getFallBackCandidates(servers);
|
||||||
}
|
}
|
||||||
candidates = (candidates == null || candidates.isEmpty()) ?
|
candidates = (candidates == null || candidates.isEmpty()) ?
|
||||||
|
|
|
@ -259,6 +259,9 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
// do not need to persist, as we do not persist default group.
|
// do not need to persist, as we do not persist default group.
|
||||||
resetRSGroupMap(newGroupMap);
|
resetRSGroupMap(newGroupMap);
|
||||||
LOG.info("Updated default servers, {} servers", newDefaultGroupInfo.getServers().size());
|
LOG.info("Updated default servers, {} servers", newDefaultGroupInfo.getServers().size());
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("New default servers list: {}", newDefaultGroupInfo.getServers());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void init() throws IOException {
|
private synchronized void init() throws IOException {
|
||||||
|
@ -300,6 +303,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
Map<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
|
Map<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
|
||||||
newGroupMap.put(rsGroupInfo.getName(), rsGroupInfo);
|
newGroupMap.put(rsGroupInfo.getName(), rsGroupInfo);
|
||||||
flushConfig(newGroupMap);
|
flushConfig(newGroupMap);
|
||||||
|
LOG.info("Add group {} done.", rsGroupInfo.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
private RSGroupInfo getRSGroupInfo(final String groupName) throws ConstraintException {
|
private RSGroupInfo getRSGroupInfo(final String groupName) throws ConstraintException {
|
||||||
|
@ -333,7 +337,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
if (onlineServers != null) {
|
if (onlineServers != null) {
|
||||||
if (!onlineServers.contains(el)) {
|
if (!onlineServers.contains(el)) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Dropping " + el + " during move-to-default rsgroup because not online");
|
LOG.debug("Dropping " + el + " during move-to-default RSGroup because not online");
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -375,8 +379,8 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
for (TableDescriptor td : masterServices.getTableDescriptors().getAll().values()) {
|
for (TableDescriptor td : masterServices.getTableDescriptors().getAll().values()) {
|
||||||
if (td.getRegionServerGroup().map(groupName::equals).orElse(false)) {
|
if (td.getRegionServerGroup().map(groupName::equals).orElse(false)) {
|
||||||
throw new ConstraintException("RSGroup " + groupName + " is already referenced by " +
|
throw new ConstraintException("RSGroup " + groupName + " is already referenced by " +
|
||||||
td.getTableName() + "; you must remove all the tables from the rsgroup before " +
|
td.getTableName() + "; you must remove all the tables from the RSGroup before " +
|
||||||
"the rsgroup can be removed.");
|
"the RSGroup can be removed.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (NamespaceDescriptor ns : masterServices.getClusterSchema().getNamespaces()) {
|
for (NamespaceDescriptor ns : masterServices.getClusterSchema().getNamespaces()) {
|
||||||
|
@ -394,6 +398,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
Map<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
|
Map<String, RSGroupInfo> newGroupMap = Maps.newHashMap(rsGroupMap);
|
||||||
newGroupMap.remove(groupName);
|
newGroupMap.remove(groupName);
|
||||||
flushConfig(newGroupMap);
|
flushConfig(newGroupMap);
|
||||||
|
LOG.info("Remove group {} done", groupName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -641,6 +646,8 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG.debug("Offline mode, cannot persist to {}", RSGROUP_TABLE_NAME);
|
||||||
|
|
||||||
Map<String, RSGroupInfo> oldGroupMap = Maps.newHashMap(holder.groupName2Group);
|
Map<String, RSGroupInfo> oldGroupMap = Maps.newHashMap(holder.groupName2Group);
|
||||||
RSGroupInfo oldDefaultGroup = oldGroupMap.remove(RSGroupInfo.DEFAULT_GROUP);
|
RSGroupInfo oldDefaultGroup = oldGroupMap.remove(RSGroupInfo.DEFAULT_GROUP);
|
||||||
RSGroupInfo newDefaultGroup = newGroupMap.remove(RSGroupInfo.DEFAULT_GROUP);
|
RSGroupInfo newDefaultGroup = newGroupMap.remove(RSGroupInfo.DEFAULT_GROUP);
|
||||||
|
@ -657,6 +664,8 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
// according to the inputted newGroupMap (an updated copy of rsGroupMap)
|
// according to the inputted newGroupMap (an updated copy of rsGroupMap)
|
||||||
this.holder = new RSGroupInfoHolder(newGroupMap);
|
this.holder = new RSGroupInfoHolder(newGroupMap);
|
||||||
|
|
||||||
|
LOG.debug("New RSGroup map: {}", newGroupMap);
|
||||||
|
|
||||||
// Do not need to update tableMap
|
// Do not need to update tableMap
|
||||||
// because only the update on servers in default group is allowed above,
|
// because only the update on servers in default group is allowed above,
|
||||||
// or IOException will be thrown
|
// or IOException will be thrown
|
||||||
|
@ -664,15 +673,18 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For online mode, persist to hbase:rsgroup and Zookeeper */
|
/* For online mode, persist to hbase:rsgroup and Zookeeper */
|
||||||
|
LOG.debug("Online mode, persisting to {} and ZK", RSGROUP_TABLE_NAME);
|
||||||
flushConfigTable(newGroupMap);
|
flushConfigTable(newGroupMap);
|
||||||
|
|
||||||
// Make changes visible after having been persisted to the source of truth
|
// Make changes visible after having been persisted to the source of truth
|
||||||
resetRSGroupMap(newGroupMap);
|
resetRSGroupMap(newGroupMap);
|
||||||
saveRSGroupMapToZK(newGroupMap);
|
saveRSGroupMapToZK(newGroupMap);
|
||||||
updateCacheOfRSGroups(newGroupMap.keySet());
|
updateCacheOfRSGroups(newGroupMap.keySet());
|
||||||
|
LOG.info("Flush config done, new RSGroup map: {}", newGroupMap);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void saveRSGroupMapToZK(Map<String, RSGroupInfo> newGroupMap) throws IOException {
|
private void saveRSGroupMapToZK(Map<String, RSGroupInfo> newGroupMap) throws IOException {
|
||||||
|
LOG.debug("Saving RSGroup info to ZK");
|
||||||
try {
|
try {
|
||||||
String groupBasePath =
|
String groupBasePath =
|
||||||
ZNodePaths.joinZNode(watcher.getZNodePaths().baseZNode, RS_GROUP_ZNODE);
|
ZNodePaths.joinZNode(watcher.getZNodePaths().baseZNode, RS_GROUP_ZNODE);
|
||||||
|
@ -802,6 +814,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
if (optProcId.isPresent()) {
|
if (optProcId.isPresent()) {
|
||||||
procId = optProcId.getAsLong();
|
procId = optProcId.getAsLong();
|
||||||
} else {
|
} else {
|
||||||
|
LOG.debug("Creating group table {}", RSGROUP_TABLE_NAME);
|
||||||
procId = masterServices.createSystemTable(RSGROUP_TABLE_DESC);
|
procId = masterServices.createSystemTable(RSGROUP_TABLE_DESC);
|
||||||
}
|
}
|
||||||
// wait for region to be online
|
// wait for region to be online
|
||||||
|
@ -851,9 +864,11 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
}
|
}
|
||||||
MutateRowsRequest request = builder.build();
|
MutateRowsRequest request = builder.build();
|
||||||
AsyncTable<?> table = conn.getTable(RSGROUP_TABLE_NAME);
|
AsyncTable<?> table = conn.getTable(RSGROUP_TABLE_NAME);
|
||||||
|
LOG.debug("Multimutating {} with {} mutations", RSGROUP_TABLE_NAME, mutations.size());
|
||||||
FutureUtils.get(table.<MultiRowMutationService, MutateRowsResponse> coprocessorService(
|
FutureUtils.get(table.<MultiRowMutationService, MutateRowsResponse> coprocessorService(
|
||||||
MultiRowMutationService::newStub,
|
MultiRowMutationService::newStub,
|
||||||
(stub, controller, done) -> stub.mutateRows(controller, request, done), ROW_KEY));
|
(stub, controller, done) -> stub.mutateRows(controller, request, done), ROW_KEY));
|
||||||
|
LOG.info("Multimutating {} with {} mutations done", RSGROUP_TABLE_NAME, mutations.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkGroupName(String groupName) throws ConstraintException {
|
private void checkGroupName(String groupName) throws ConstraintException {
|
||||||
|
@ -873,7 +888,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
* @param servers servers to remove
|
* @param servers servers to remove
|
||||||
*/
|
*/
|
||||||
private void checkForDeadOrOnlineServers(Set<Address> servers) throws IOException {
|
private void checkForDeadOrOnlineServers(Set<Address> servers) throws IOException {
|
||||||
// This uglyness is because we only have Address, not ServerName.
|
// This ugliness is because we only have Address, not ServerName.
|
||||||
Set<Address> onlineServers = new HashSet<>();
|
Set<Address> onlineServers = new HashSet<>();
|
||||||
List<ServerName> drainingServers = masterServices.getServerManager().getDrainingServersList();
|
List<ServerName> drainingServers = masterServices.getServerManager().getDrainingServersList();
|
||||||
for (ServerName server : masterServices.getServerManager().getOnlineServers().keySet()) {
|
for (ServerName server : masterServices.getServerManager().getOnlineServers().keySet()) {
|
||||||
|
@ -1003,7 +1018,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
// Get regions that are associated with this server and filter regions by group tables.
|
// Get regions that are associated with this server and filter regions by group tables.
|
||||||
for (RegionInfo region : getRegionsInfo.apply((T) owner.getAddress())) {
|
for (RegionInfo region : getRegionsInfo.apply((T) owner.getAddress())) {
|
||||||
if (!validation.apply(region)) {
|
if (!validation.apply(region)) {
|
||||||
LOG.info("Moving region {}, which do not belong to RSGroup {}",
|
LOG.info("Moving region {}, which does not belong to RSGroup {}",
|
||||||
region.getShortNameToLog(), targetGroupName);
|
region.getShortNameToLog(), targetGroupName);
|
||||||
// Move region back to source RSGroup servers
|
// Move region back to source RSGroup servers
|
||||||
ServerName dest =
|
ServerName dest =
|
||||||
|
@ -1197,6 +1212,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void moveTablesAndWait(Set<TableName> tables, String targetGroup) throws IOException {
|
private void moveTablesAndWait(Set<TableName> tables, String targetGroup) throws IOException {
|
||||||
|
LOG.debug("Moving {} tables to target group {}", tables.size(), targetGroup);
|
||||||
List<Long> procIds = new ArrayList<Long>();
|
List<Long> procIds = new ArrayList<Long>();
|
||||||
for (TableName tableName : tables) {
|
for (TableName tableName : tables) {
|
||||||
TableDescriptor oldTd = masterServices.getTableDescriptors().get(tableName);
|
TableDescriptor oldTd = masterServices.getTableDescriptors().get(tableName);
|
||||||
|
@ -1216,6 +1232,10 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
ProcedureSyncWait.waitForProcedureToCompleteIOE(masterServices.getMasterProcedureExecutor(),
|
ProcedureSyncWait.waitForProcedureToCompleteIOE(masterServices.getMasterProcedureExecutor(),
|
||||||
proc, Long.MAX_VALUE);
|
proc, Long.MAX_VALUE);
|
||||||
}
|
}
|
||||||
|
LOG.info("Move tables done: moved {} tables to {}", tables.size(), targetGroup);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Tables moved to {}: {}", targetGroup, tables);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -1281,7 +1301,12 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
Set<Address> movedServers = moveServers(servers, srcGrp.getName(),
|
Set<Address> movedServers = moveServers(servers, srcGrp.getName(),
|
||||||
targetGroupName);
|
targetGroupName);
|
||||||
moveServerRegionsFromGroup(movedServers, srcGrp.getServers(), targetGroupName, srcGrp.getName());
|
moveServerRegionsFromGroup(movedServers, srcGrp.getServers(), targetGroupName, srcGrp.getName());
|
||||||
LOG.info("Move servers done: {} => {}", srcGrp.getName(), targetGroupName);
|
LOG.info("Move servers done: moved {} servers from {} to {}", movedServers.size(),
|
||||||
|
srcGrp.getName(), targetGroupName);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Servers moved from {} to {}: {}", srcGrp.getName(), targetGroupName,
|
||||||
|
movedServers);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1315,6 +1340,7 @@ final class RSGroupInfoManagerImpl implements RSGroupInfoManager {
|
||||||
.map(TableDescriptor::getTableName)
|
.map(TableDescriptor::getTableName)
|
||||||
.collect(Collectors.toSet());
|
.collect(Collectors.toSet());
|
||||||
setRSGroup(updateTables, newName);
|
setRSGroup(updateTables, newName);
|
||||||
|
LOG.info("Rename RSGroup done: {} => {}", oldName, newName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue