HBASE-20548 Master fails to startup on large clusters, refreshing block distribution

Signed-off-by: Andrew Purtell <apurtell@apache.org>
This commit is contained in:
Thiruvel Thirumoolan 2018-05-24 01:01:54 -07:00 committed by Andrew Purtell
parent 554d513f50
commit 1fbce10ff4
4 changed files with 38 additions and 9 deletions

View File

@ -466,4 +466,9 @@ public class RSGroupBasedLoadBalancer implements RSGroupableBalancer {
public void setRsGroupInfoManager(RSGroupInfoManager rsGroupInfoManager) { public void setRsGroupInfoManager(RSGroupInfoManager rsGroupInfoManager) {
this.rsGroupInfoManager = rsGroupInfoManager; this.rsGroupInfoManager = rsGroupInfoManager;
} }
@Override
public void postMasterStartupInitialize() {
this.internalBalancer.postMasterStartupInitialize();
}
} }

View File

@ -1003,6 +1003,17 @@ public class HMaster extends HRegionServer implements MasterServices {
} }
zombieDetector.interrupt(); zombieDetector.interrupt();
/*
* After master has started up, lets do balancer post startup initialization. Since this runs
* in activeMasterManager thread, it should be fine.
*/
long start = System.currentTimeMillis();
this.balancer.postMasterStartupInitialize();
if (LOG.isDebugEnabled()) {
LOG.debug("Balancer post startup initialization complete, took " + (
(System.currentTimeMillis() - start) / 1000) + " seconds");
}
} }
/** /**

View File

@ -159,6 +159,11 @@ public interface LoadBalancer extends Configurable, Stoppable, ConfigurationObse
@Override @Override
void onConfigurationChange(Configuration conf); void onConfigurationChange(Configuration conf);
/**
* If balancer needs to do initialization after Master has started up, lets do that here.
*/
void postMasterStartupInitialize();
/** /**
* @return true if Master carries regions * @return true if Master carries regions
*/ */

View File

@ -1151,6 +1151,19 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
} }
} }
@Override
public void postMasterStartupInitialize() {
if (services != null && regionFinder != null) {
try {
Set<RegionInfo> regions =
services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet();
regionFinder.refreshAndWait(regions);
} catch (Exception e) {
LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e);
}
}
}
public void setRackManager(RackManager rackManager) { public void setRackManager(RackManager rackManager) {
this.rackManager = rackManager; this.rackManager = rackManager;
} }
@ -1249,7 +1262,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return assignments; return assignments;
} }
Cluster cluster = createCluster(servers, regions, false); Cluster cluster = createCluster(servers, regions);
List<RegionInfo> unassignedRegions = new ArrayList<>(); List<RegionInfo> unassignedRegions = new ArrayList<>();
roundRobinAssignment(cluster, regions, unassignedRegions, roundRobinAssignment(cluster, regions, unassignedRegions,
@ -1288,11 +1301,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return assignments; return assignments;
} }
protected Cluster createCluster(List<ServerName> servers, protected Cluster createCluster(List<ServerName> servers, Collection<RegionInfo> regions) {
Collection<RegionInfo> regions, boolean forceRefresh) {
if (forceRefresh && useRegionFinder) {
regionFinder.refreshAndWait(regions);
}
// Get the snapshot of the current assignments for the regions in question, and then create // Get the snapshot of the current assignments for the regions in question, and then create
// a cluster out of it. Note that we might have replicas already assigned to some servers // a cluster out of it. Note that we might have replicas already assigned to some servers
// earlier. So we want to get the snapshot to see those assignments, but this will only contain // earlier. So we want to get the snapshot to see those assignments, but this will only contain
@ -1346,7 +1355,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
final List<ServerName> finalServers = idleServers.isEmpty() ? final List<ServerName> finalServers = idleServers.isEmpty() ?
servers : idleServers; servers : idleServers;
List<RegionInfo> regions = Lists.newArrayList(regionInfo); List<RegionInfo> regions = Lists.newArrayList(regionInfo);
Cluster cluster = createCluster(finalServers, regions, false); Cluster cluster = createCluster(finalServers, regions);
return randomAssignment(cluster, regionInfo, finalServers); return randomAssignment(cluster, regionInfo, finalServers);
} }
@ -1419,8 +1428,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
int numRandomAssignments = 0; int numRandomAssignments = 0;
int numRetainedAssigments = 0; int numRetainedAssigments = 0;
Cluster cluster = createCluster(servers, regions.keySet(), true);
for (Map.Entry<RegionInfo, ServerName> entry : regions.entrySet()) { for (Map.Entry<RegionInfo, ServerName> entry : regions.entrySet()) {
RegionInfo region = entry.getKey(); RegionInfo region = entry.getKey();
ServerName oldServerName = entry.getValue(); ServerName oldServerName = entry.getValue();
@ -1463,6 +1470,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
// If servers from prior assignment aren't present, then lets do randomAssignment on regions. // If servers from prior assignment aren't present, then lets do randomAssignment on regions.
if (randomAssignRegions.size() > 0) { if (randomAssignRegions.size() > 0) {
Cluster cluster = createCluster(servers, regions.keySet());
for (Map.Entry<ServerName, List<RegionInfo>> entry : assignments.entrySet()) { for (Map.Entry<ServerName, List<RegionInfo>> entry : assignments.entrySet()) {
ServerName sn = entry.getKey(); ServerName sn = entry.getKey();
for (RegionInfo region : entry.getValue()) { for (RegionInfo region : entry.getValue()) {