HBASE-16941: FavoredNodes - Split/Merge code paths

Signed-off-by: Francis Liu <toffer@apache.org>
This commit is contained in:
Thiruvel Thirumoolan 2016-12-05 16:03:00 -08:00 committed by Francis Liu
parent 75567f828c
commit c1293cc91e
20 changed files with 1565 additions and 225 deletions

View File

@ -98,7 +98,7 @@ import com.google.common.net.InetAddresses;
private byte [] bytes;
public static final List<ServerName> EMPTY_SERVER_LIST = new ArrayList<ServerName>(0);
private ServerName(final String hostname, final int port, final long startcode) {
protected ServerName(final String hostname, final int port, final long startcode) {
// Drop the domain is there is one; no need of it in a local cluster. With it, we get long
// unwieldy names.
this.hostnameOnly = hostname;

View File

@ -17,7 +17,7 @@
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.balancer;
package org.apache.hadoop.hbase.favored;
import java.io.IOException;
import java.util.ArrayList;
@ -29,9 +29,11 @@ import java.util.Map.Entry;
import java.util.Random;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.MetaTableAccessor;
@ -49,14 +51,16 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.FavoredNode
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.protobuf.InvalidProtocolBufferException;
/**
* Helper class for {@link FavoredNodeLoadBalancer} that has all the intelligence
* for racks, meta scans, etc. Instantiated by the {@link FavoredNodeLoadBalancer}
* when needed (from within calls like
* {@link FavoredNodeLoadBalancer#randomAssignment(HRegionInfo, List)}).
*
* Helper class for {@link FavoredNodeLoadBalancer} that has all the intelligence for racks,
* meta scans, etc. Instantiated by the {@link FavoredNodeLoadBalancer} when needed (from
* within calls like {@link FavoredNodeLoadBalancer#randomAssignment(HRegionInfo, List)}).
* All updates to favored nodes should only be done from {@link FavoredNodesManager} and not
* through this helper class (except for tests).
*/
@InterfaceAudience.Private
public class FavoredNodeAssignmentHelper {
@ -64,11 +68,14 @@ public class FavoredNodeAssignmentHelper {
private RackManager rackManager;
private Map<String, List<ServerName>> rackToRegionServerMap;
private List<String> uniqueRackList;
private Map<ServerName, String> regionServerToRackMap;
// This map serves as a cache for rack to sn lookups. The num of
// region server entries might not match with that is in servers.
private Map<String, String> regionServerToRackMap;
private Random random;
private List<ServerName> servers;
public static final byte [] FAVOREDNODES_QUALIFIER = Bytes.toBytes("fn");
public final static short FAVORED_NODES_NUM = 3;
public final static short MAX_ATTEMPTS_FN_GENERATION = 10;
public FavoredNodeAssignmentHelper(final List<ServerName> servers, Configuration conf) {
this(servers, new RackManager(conf));
@ -79,11 +86,33 @@ public class FavoredNodeAssignmentHelper {
this.servers = servers;
this.rackManager = rackManager;
this.rackToRegionServerMap = new HashMap<String, List<ServerName>>();
this.regionServerToRackMap = new HashMap<ServerName, String>();
this.regionServerToRackMap = new HashMap<String, String>();
this.uniqueRackList = new ArrayList<String>();
this.random = new Random();
}
// Always initialize() when FavoredNodeAssignmentHelper is constructed.
public void initialize() {
for (ServerName sn : this.servers) {
String rackName = getRackOfServer(sn);
List<ServerName> serverList = this.rackToRegionServerMap.get(rackName);
if (serverList == null) {
serverList = Lists.newArrayList();
// Add the current rack to the unique rack list
this.uniqueRackList.add(rackName);
this.rackToRegionServerMap.put(rackName, serverList);
}
for (ServerName serverName : serverList) {
if (ServerName.isSameHostnameAndPort(sn, serverName)) {
// The server is already present, ignore.
break;
}
}
serverList.add((sn));
this.regionServerToRackMap.put(sn.getHostname(), rackName);
}
}
/**
* Update meta table with favored nodes info
* @param regionToFavoredNodes map of HRegionInfo's to their favored nodes
@ -148,8 +177,8 @@ public class FavoredNodeAssignmentHelper {
byte[] favoredNodes = getFavoredNodes(favoredNodeList);
put.addImmutable(HConstants.CATALOG_FAMILY, FAVOREDNODES_QUALIFIER,
EnvironmentEdgeManager.currentTime(), favoredNodes);
LOG.info("Create the region " + regionInfo.getRegionNameAsString() +
" with favored nodes " + Bytes.toString(favoredNodes));
LOG.debug("Create the region " + regionInfo.getRegionNameAsString() +
" with favored nodes " + favoredNodeList);
}
return put;
}
@ -180,7 +209,7 @@ public class FavoredNodeAssignmentHelper {
HBaseProtos.ServerName.Builder b = HBaseProtos.ServerName.newBuilder();
b.setHostName(s.getHostname());
b.setPort(s.getPort());
b.setStartCode(s.getStartcode());
b.setStartCode(ServerName.NON_STARTCODE);
f.addFavoredNode(b.build());
}
return f.build().toByteArray();
@ -218,7 +247,7 @@ public class FavoredNodeAssignmentHelper {
numIterations++;
// Get the server list for the current rack
currentServerList = rackToRegionServerMap.get(rackName);
if (serverIndex >= currentServerList.size()) { //not enough machines in this rack
if (numIterations % rackList.size() == 0) {
if (++serverIndex >= maxRackSize) serverIndex = 0;
@ -234,12 +263,14 @@ public class FavoredNodeAssignmentHelper {
// Place the current region with the current primary region server
primaryRSMap.put(regionInfo, currentServer);
List<HRegionInfo> regionsForServer = assignmentMap.get(currentServer);
if (regionsForServer == null) {
regionsForServer = new ArrayList<HRegionInfo>();
assignmentMap.put(currentServer, regionsForServer);
if (assignmentMap != null) {
List<HRegionInfo> regionsForServer = assignmentMap.get(currentServer);
if (regionsForServer == null) {
regionsForServer = new ArrayList<HRegionInfo>();
assignmentMap.put(currentServer, regionsForServer);
}
regionsForServer.add(regionInfo);
}
regionsForServer.add(regionInfo);
// Set the next processing index
if (numIterations % rackList.size() == 0) {
@ -263,7 +294,7 @@ public class FavoredNodeAssignmentHelper {
// Create the secondary and tertiary region server pair object.
ServerName[] favoredNodes;
// Get the rack for the primary region server
String primaryRack = rackManager.getRack(primaryRS);
String primaryRack = getRackOfServer(primaryRS);
if (getTotalNumberOfRacks() == 1) {
favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack);
@ -301,8 +332,8 @@ public class FavoredNodeAssignmentHelper {
/**
* For regions that share the primary, avoid placing the secondary and tertiary
* on a same RS. Used for generating new assignments for the
* primary/secondary/tertiary RegionServers
* on a same RS. Used for generating new assignments for the
* primary/secondary/tertiary RegionServers
* @param primaryRSMap
* @return the map of regions to the servers the region-files should be hosted on
*/
@ -319,14 +350,14 @@ public class FavoredNodeAssignmentHelper {
ServerName primaryRS = entry.getValue();
try {
// Get the rack for the primary region server
String primaryRack = rackManager.getRack(primaryRS);
String primaryRack = getRackOfServer(primaryRS);
ServerName[] favoredNodes = null;
if (getTotalNumberOfRacks() == 1) {
// Single rack case: have to pick the secondary and tertiary
// from the same rack
favoredNodes = singleRackCase(regionInfo, primaryRS, primaryRack);
} else {
favoredNodes = multiRackCaseWithRestrictions(serverToPrimaries,
favoredNodes = multiRackCaseWithRestrictions(serverToPrimaries,
secondaryAndTertiaryMap, primaryRack, primaryRS, regionInfo);
}
if (favoredNodes != null) {
@ -370,10 +401,10 @@ public class FavoredNodeAssignmentHelper {
for (HRegionInfo primary : primaries) {
secondaryAndTertiary = secondaryAndTertiaryMap.get(primary);
if (secondaryAndTertiary != null) {
if (regionServerToRackMap.get(secondaryAndTertiary[0]).equals(secondaryRack)) {
if (getRackOfServer(secondaryAndTertiary[0]).equals(secondaryRack)) {
skipServerSet.add(secondaryAndTertiary[0]);
}
if (regionServerToRackMap.get(secondaryAndTertiary[1]).equals(secondaryRack)) {
if (getRackOfServer(secondaryAndTertiary[1]).equals(secondaryRack)) {
skipServerSet.add(secondaryAndTertiary[1]);
}
}
@ -440,7 +471,7 @@ public class FavoredNodeAssignmentHelper {
// Single rack case: have to pick the secondary and tertiary
// from the same rack
List<ServerName> serverList = getServersFromRack(primaryRack);
if (serverList.size() <= 2) {
if ((serverList == null) || (serverList.size() <= 2)) {
// Single region server case: cannot not place the favored nodes
// on any server;
return null;
@ -454,14 +485,10 @@ public class FavoredNodeAssignmentHelper {
ServerName secondaryRS = getOneRandomServer(primaryRack, serverSkipSet);
// Skip the secondary for the tertiary placement
serverSkipSet.add(secondaryRS);
// Place the tertiary RS
ServerName tertiaryRS =
getOneRandomServer(primaryRack, serverSkipSet);
ServerName tertiaryRS = getOneRandomServer(primaryRack, serverSkipSet);
if (secondaryRS == null || tertiaryRS == null) {
LOG.error("Cannot place the secondary and ternary" +
"region server for region " +
LOG.error("Cannot place the secondary, tertiary favored node for region " +
regionInfo.getRegionNameAsString());
}
// Create the secondary and tertiary pair
@ -472,80 +499,48 @@ public class FavoredNodeAssignmentHelper {
}
}
private ServerName[] multiRackCase(HRegionInfo regionInfo,
ServerName primaryRS,
/**
* Place secondary and tertiary nodes in a multi rack case.
* If there are only two racks, then we try the place the secondary
* and tertiary on different rack than primary. But if the other rack has
* only one region server, then we place primary and tertiary on one rack
* and secondary on another. The aim is two distribute the three favored nodes
* on >= 2 racks.
* TODO: see how we can use generateMissingFavoredNodeMultiRack API here
* @param regionInfo Region for which we are trying to generate FN
* @param primaryRS The primary favored node.
* @param primaryRack The rack of the primary favored node.
* @return Array containing secondary and tertiary favored nodes.
* @throws IOException Signals that an I/O exception has occurred.
*/
private ServerName[] multiRackCase(HRegionInfo regionInfo, ServerName primaryRS,
String primaryRack) throws IOException {
// Random to choose the secondary and tertiary region server
// from another rack to place the secondary and tertiary
List<ServerName>favoredNodes = Lists.newArrayList(primaryRS);
// Create the secondary and tertiary pair
ServerName secondaryRS = generateMissingFavoredNodeMultiRack(favoredNodes);
favoredNodes.add(secondaryRS);
String secondaryRack = getRackOfServer(secondaryRS);
// Random to choose one rack except for the current rack
Set<String> rackSkipSet = new HashSet<String>();
rackSkipSet.add(primaryRack);
ServerName[] favoredNodes = new ServerName[2];
String secondaryRack = getOneRandomRack(rackSkipSet);
List<ServerName> serverList = getServersFromRack(secondaryRack);
if (serverList.size() >= 2) {
// Randomly pick up two servers from this secondary rack
// Place the secondary RS
ServerName secondaryRS = getOneRandomServer(secondaryRack);
// Skip the secondary for the tertiary placement
Set<ServerName> skipServerSet = new HashSet<ServerName>();
skipServerSet.add(secondaryRS);
// Place the tertiary RS
ServerName tertiaryRS = getOneRandomServer(secondaryRack, skipServerSet);
if (secondaryRS == null || tertiaryRS == null) {
LOG.error("Cannot place the secondary and ternary" +
"region server for region " +
regionInfo.getRegionNameAsString());
}
// Create the secondary and tertiary pair
favoredNodes[0] = secondaryRS;
favoredNodes[1] = tertiaryRS;
ServerName tertiaryRS;
if (primaryRack.equals(secondaryRack)) {
tertiaryRS = generateMissingFavoredNode(favoredNodes);
} else {
// Pick the secondary rs from this secondary rack
// and pick the tertiary from another random rack
favoredNodes[0] = getOneRandomServer(secondaryRack);
// Pick the tertiary
if (getTotalNumberOfRacks() == 2) {
// Pick the tertiary from the same rack of the primary RS
Set<ServerName> serverSkipSet = new HashSet<ServerName>();
serverSkipSet.add(primaryRS);
favoredNodes[1] = getOneRandomServer(primaryRack, serverSkipSet);
} else {
// Pick the tertiary from another rack
rackSkipSet.add(secondaryRack);
String tertiaryRandomRack = getOneRandomRack(rackSkipSet);
favoredNodes[1] = getOneRandomServer(tertiaryRandomRack);
// Try to place tertiary in secondary RS rack else place on primary rack.
tertiaryRS = getOneRandomServer(secondaryRack, Sets.newHashSet(secondaryRS));
if (tertiaryRS == null) {
tertiaryRS = getOneRandomServer(primaryRack, Sets.newHashSet(primaryRS));
}
// We couldn't find anything in secondary rack, get any FN
if (tertiaryRS == null) {
tertiaryRS = generateMissingFavoredNode(Lists.newArrayList(primaryRS, secondaryRS));
}
}
return favoredNodes;
return new ServerName[]{ secondaryRS, tertiaryRS };
}
boolean canPlaceFavoredNodes() {
int serverSize = this.regionServerToRackMap.size();
return (serverSize >= FAVORED_NODES_NUM);
}
public void initialize() {
for (ServerName sn : this.servers) {
String rackName = this.rackManager.getRack(sn);
List<ServerName> serverList = this.rackToRegionServerMap.get(rackName);
if (serverList == null) {
serverList = new ArrayList<ServerName>();
// Add the current rack to the unique rack list
this.uniqueRackList.add(rackName);
}
if (!serverList.contains(sn)) {
serverList.add(sn);
this.rackToRegionServerMap.put(rackName, serverList);
this.regionServerToRackMap.put(sn, rackName);
}
}
return (this.servers.size() >= FAVORED_NODES_NUM);
}
private int getTotalNumberOfRacks() {
@ -556,31 +551,60 @@ public class FavoredNodeAssignmentHelper {
return this.rackToRegionServerMap.get(rack);
}
private ServerName getOneRandomServer(String rack,
Set<ServerName> skipServerSet) throws IOException {
if(rack == null) return null;
List<ServerName> serverList = this.rackToRegionServerMap.get(rack);
if (serverList == null) return null;
/**
* Gets a random server from the specified rack and skips anything specified.
// Get a random server except for any servers from the skip set
if (skipServerSet != null && serverList.size() <= skipServerSet.size()) {
throw new IOException("Cannot randomly pick another random server");
* @param rack rack from a server is needed
* @param skipServerSet the server shouldn't belong to this set
*/
protected ServerName getOneRandomServer(String rack, Set<ServerName> skipServerSet)
throws IOException {
// Is the rack valid? Do we recognize it?
if (rack == null || getServersFromRack(rack) == null ||
getServersFromRack(rack).size() == 0) {
return null;
}
ServerName randomServer;
do {
int randomIndex = random.nextInt(serverList.size());
randomServer = serverList.get(randomIndex);
} while (skipServerSet != null && skipServerSet.contains(randomServer));
// Lets use a set so we can eliminate duplicates
Set<StartcodeAgnosticServerName> serversToChooseFrom = Sets.newHashSet();
for (ServerName sn : getServersFromRack(rack)) {
serversToChooseFrom.add(StartcodeAgnosticServerName.valueOf(sn));
}
return randomServer;
if (skipServerSet != null && skipServerSet.size() > 0) {
for (ServerName sn : skipServerSet) {
serversToChooseFrom.remove(StartcodeAgnosticServerName.valueOf(sn));
}
// Do we have any servers left to choose from?
if (serversToChooseFrom.size() == 0) {
return null;
}
}
ServerName randomServer = null;
int randomIndex = random.nextInt(serversToChooseFrom.size());
int j = 0;
for (StartcodeAgnosticServerName sn : serversToChooseFrom) {
if (j == randomIndex) {
randomServer = sn;
break;
}
j++;
}
if (randomServer != null) {
return ServerName.valueOf(randomServer.getHostAndPort(), randomServer.getStartcode());
} else {
return null;
}
}
private ServerName getOneRandomServer(String rack) throws IOException {
return this.getOneRandomServer(rack, null);
}
private String getOneRandomRack(Set<String> skipRackSet) throws IOException {
protected String getOneRandomRack(Set<String> skipRackSet) throws IOException {
if (skipRackSet == null || uniqueRackList.size() <= skipRackSet.size()) {
throw new IOException("Cannot randomly pick another random server");
}
@ -603,4 +627,172 @@ public class FavoredNodeAssignmentHelper {
}
return strBuf.toString();
}
/*
* Generates a missing favored node based on the input favored nodes. This helps to generate
* new FN when there is already 2 FN and we need a third one. For eg, while generating new FN
* for split daughters after inheriting 2 FN from the parent. If the cluster has only one rack
* it generates from the same rack. If the cluster has multiple racks, then it ensures the new
* FN respects the rack constraints similar to HDFS. For eg: if there are 3 FN, they will be
* spread across 2 racks.
*/
public ServerName generateMissingFavoredNode(List<ServerName> favoredNodes) throws IOException {
if (this.uniqueRackList.size() == 1) {
return generateMissingFavoredNodeSingleRack(favoredNodes, null);
} else {
return generateMissingFavoredNodeMultiRack(favoredNodes, null);
}
}
public ServerName generateMissingFavoredNode(List<ServerName> favoredNodes,
List<ServerName> excludeNodes) throws IOException {
if (this.uniqueRackList.size() == 1) {
return generateMissingFavoredNodeSingleRack(favoredNodes, excludeNodes);
} else {
return generateMissingFavoredNodeMultiRack(favoredNodes, excludeNodes);
}
}
/*
* Generate FN for a single rack scenario, don't generate from one of the excluded nodes. Helps
* when we would like to find a replacement node.
*/
private ServerName generateMissingFavoredNodeSingleRack(List<ServerName> favoredNodes,
List<ServerName> excludeNodes) throws IOException {
ServerName newServer = null;
Set<ServerName> excludeFNSet = Sets.newHashSet(favoredNodes);
if (excludeNodes != null && excludeNodes.size() > 0) {
excludeFNSet.addAll(excludeNodes);
}
if (favoredNodes.size() < FAVORED_NODES_NUM) {
newServer = this.getOneRandomServer(this.uniqueRackList.get(0), excludeFNSet);
}
return newServer;
}
private ServerName generateMissingFavoredNodeMultiRack(List<ServerName> favoredNodes)
throws IOException {
return generateMissingFavoredNodeMultiRack(favoredNodes, null);
}
/*
* Generates a missing FN based on the input favoredNodes and also the nodes to be skipped.
*
* Get the current layout of favored nodes arrangement and nodes to be excluded and get a
* random node that goes with HDFS block placement. Eg: If the existing nodes are on one rack,
* generate one from another rack. We exclude as much as possible so the random selection
* has more chance to generate a node within a few iterations, ideally 1.
*/
private ServerName generateMissingFavoredNodeMultiRack(List<ServerName> favoredNodes,
List<ServerName> excludeNodes) throws IOException {
Set<String> racks = Sets.newHashSet();
Map<String, Set<ServerName>> rackToFNMapping = new HashMap<>();
// Lets understand the current rack distribution of the FN
for (ServerName sn : favoredNodes) {
String rack = getRackOfServer(sn);
racks.add(rack);
Set<ServerName> serversInRack = rackToFNMapping.get(rack);
if (serversInRack == null) {
serversInRack = Sets.newHashSet();
rackToFNMapping.put(rack, serversInRack);
}
serversInRack.add(sn);
}
// What racks should be skipped while getting a FN?
Set<String> skipRackSet = Sets.newHashSet();
/*
* If both the FN are from the same rack, then we don't want to generate another FN on the
* same rack. If that rack fails, the region would be unavailable.
*/
if (racks.size() == 1 && favoredNodes.size() > 1) {
skipRackSet.add(racks.iterator().next());
}
/*
* If there are no free nodes on the existing racks, we should skip those racks too. We can
* reduce the number of iterations for FN selection.
*/
for (String rack : racks) {
if (getServersFromRack(rack) != null &&
rackToFNMapping.get(rack).size() == getServersFromRack(rack).size()) {
skipRackSet.add(rack);
}
}
Set<ServerName> favoredNodeSet = Sets.newHashSet(favoredNodes);
if (excludeNodes != null && excludeNodes.size() > 0) {
favoredNodeSet.addAll(excludeNodes);
}
/*
* Lets get a random rack by excluding skipRackSet and generate a random FN from that rack.
*/
int i = 0;
Set<String> randomRacks = Sets.newHashSet();
ServerName newServer = null;
do {
String randomRack = this.getOneRandomRack(skipRackSet);
newServer = this.getOneRandomServer(randomRack, favoredNodeSet);
randomRacks.add(randomRack);
i++;
} while ((i < MAX_ATTEMPTS_FN_GENERATION) && (newServer == null));
if (newServer == null) {
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("Unable to generate additional favored nodes for %s after "
+ "considering racks %s and skip rack %s with a unique rack list of %s and rack "
+ "to RS map of %s and RS to rack map of %s",
StringUtils.join(favoredNodes, ","), randomRacks, skipRackSet, uniqueRackList,
rackToRegionServerMap, regionServerToRackMap));
}
throw new IOException(" Unable to generate additional favored nodes for "
+ StringUtils.join(favoredNodes, ","));
}
return newServer;
}
/*
* Generate favored nodes for a region.
*
* Choose a random server as primary and then choose secondary and tertiary FN so its spread
* across two racks.
*/
List<ServerName> generateFavoredNodes(HRegionInfo hri) throws IOException {
List<ServerName> favoredNodesForRegion = new ArrayList<>(FAVORED_NODES_NUM);
ServerName primary = servers.get(random.nextInt(servers.size()));
favoredNodesForRegion.add(ServerName.valueOf(primary.getHostAndPort(), ServerName.NON_STARTCODE));
Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<>(1);
primaryRSMap.put(hri, primary);
Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
placeSecondaryAndTertiaryRS(primaryRSMap);
ServerName[] secondaryAndTertiaryNodes = secondaryAndTertiaryRSMap.get(hri);
if (secondaryAndTertiaryNodes != null && secondaryAndTertiaryNodes.length == 2) {
for (ServerName sn : secondaryAndTertiaryNodes) {
favoredNodesForRegion.add(ServerName.valueOf(sn.getHostAndPort(), ServerName.NON_STARTCODE));
}
return favoredNodesForRegion;
} else {
throw new HBaseIOException("Unable to generate secondary and tertiary favored nodes.");
}
}
/*
* Get the rack of server from local mapping when present, saves lookup by the RackManager.
*/
private String getRackOfServer(ServerName sn) {
if (this.regionServerToRackMap.containsKey(sn.getHostname())) {
return this.regionServerToRackMap.get(sn.getHostname());
} else {
String rack = this.rackManager.getRack(sn);
this.regionServerToRackMap.put(sn.getHostname(), rack);
return rack;
}
}
}

View File

@ -16,57 +16,70 @@
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.balancer;
package org.apache.hadoop.hbase.favored;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.google.common.collect.Maps;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.RackManager;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta;
import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position;
import org.apache.hadoop.hbase.master.*;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.util.Pair;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
/**
* An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that
* assigns favored nodes for each region. There is a Primary RegionServer that hosts
* the region, and then there is Secondary and Tertiary RegionServers. Currently, the
* favored nodes information is used in creating HDFS files - the Primary RegionServer
* passes the primary, secondary, tertiary node addresses as hints to the
* DistributedFileSystem API for creating files on the filesystem. These nodes are
* treated as hints by the HDFS to place the blocks of the file. This alleviates the
* problem to do with reading from remote nodes (since we can make the Secondary
* RegionServer as the new Primary RegionServer) after a region is recovered. This
* should help provide consistent read latencies for the regions even when their
* An implementation of the {@link org.apache.hadoop.hbase.master.LoadBalancer} that
* assigns favored nodes for each region. There is a Primary RegionServer that hosts
* the region, and then there is Secondary and Tertiary RegionServers. Currently, the
* favored nodes information is used in creating HDFS files - the Primary RegionServer
* passes the primary, secondary, tertiary node addresses as hints to the
* DistributedFileSystem API for creating files on the filesystem. These nodes are
* treated as hints by the HDFS to place the blocks of the file. This alleviates the
* problem to do with reading from remote nodes (since we can make the Secondary
* RegionServer as the new Primary RegionServer) after a region is recovered. This
* should help provide consistent read latencies for the regions even when their
* primary region servers die.
*
*/
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
public class FavoredNodeLoadBalancer extends BaseLoadBalancer implements FavoredNodesPromoter {
private static final Log LOG = LogFactory.getLog(FavoredNodeLoadBalancer.class);
private FavoredNodesPlan globalFavoredNodesAssignmentPlan;
private RackManager rackManager;
private Configuration conf;
private FavoredNodesManager fnm;
@Override
public void setConf(Configuration conf) {
this.conf = conf;
}
@Override
public synchronized void initialize() throws HBaseIOException {
super.initialize();
super.setConf(conf);
globalFavoredNodesAssignmentPlan = new FavoredNodesPlan();
this.fnm = services.getFavoredNodesManager();
this.rackManager = new RackManager(conf);
super.setConf(conf);
}
@ -84,7 +97,6 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
LOG.warn("Not running balancer since exception was thrown " + ie);
return plans;
}
globalFavoredNodesAssignmentPlan = snaphotOfRegionAssignment.getExistingAssignmentPlan();
Map<ServerName, ServerName> serverNameToServerNameWithoutCode =
new HashMap<ServerName, ServerName>();
Map<ServerName, ServerName> serverNameWithoutCodeToServerName =
@ -102,11 +114,10 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
currentServer.getPort(), ServerName.NON_STARTCODE);
List<HRegionInfo> list = entry.getValue();
for (HRegionInfo region : list) {
if(region.getTable().getNamespaceAsString()
.equals(NamespaceDescriptor.SYSTEM_NAMESPACE_NAME_STR)) {
if(region.getTable().isSystemTable()) {
continue;
}
List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
List<ServerName> favoredNodes = fnm.getFavoredNodes(region);
if (favoredNodes == null || favoredNodes.get(0).equals(currentServerWithoutStartCode)) {
continue; //either favorednodes does not exist or we are already on the primary node
}
@ -201,7 +212,7 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
if (!assignmentHelper.canPlaceFavoredNodes()) {
return primary;
}
List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
List<ServerName> favoredNodes = fnm.getFavoredNodes(regionInfo);
// check if we have a favored nodes mapping for this region and if so, return
// a server from the favored nodes list if the passed 'servers' contains this
// server as well (available servers, that is)
@ -233,7 +244,7 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
new HashMap<ServerName, List<HRegionInfo>>(regions.size() / 2);
List<HRegionInfo> regionsWithNoFavoredNodes = new ArrayList<HRegionInfo>(regions.size()/2);
for (HRegionInfo region : regions) {
List<ServerName> favoredNodes = globalFavoredNodesAssignmentPlan.getFavoredNodes(region);
List<ServerName> favoredNodes = fnm.getFavoredNodes(region);
ServerName primaryHost = null;
ServerName secondaryHost = null;
ServerName tertiaryHost = null;
@ -310,13 +321,13 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
regionsOnServer.add(region);
}
public List<ServerName> getFavoredNodes(HRegionInfo regionInfo) {
return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
public synchronized List<ServerName> getFavoredNodes(HRegionInfo regionInfo) {
return this.fnm.getFavoredNodes(regionInfo);
}
private void roundRobinAssignmentImpl(FavoredNodeAssignmentHelper assignmentHelper,
Map<ServerName, List<HRegionInfo>> assignmentMap,
List<HRegionInfo> regions, List<ServerName> servers) {
List<HRegionInfo> regions, List<ServerName> servers) throws IOException {
Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>();
// figure the primary RSs
assignmentHelper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
@ -325,10 +336,12 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
private void assignSecondaryAndTertiaryNodesForRegion(
FavoredNodeAssignmentHelper assignmentHelper,
List<HRegionInfo> regions, Map<HRegionInfo, ServerName> primaryRSMap) {
List<HRegionInfo> regions, Map<HRegionInfo, ServerName> primaryRSMap) throws IOException {
// figure the secondary and tertiary RSs
Map<HRegionInfo, ServerName[]> secondaryAndTertiaryRSMap =
assignmentHelper.placeSecondaryAndTertiaryRS(primaryRSMap);
Map<HRegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
// now record all the assignments so that we can serve queries later
for (HRegionInfo region : regions) {
// Store the favored nodes without startCode for the ServerName objects
@ -344,8 +357,82 @@ public class FavoredNodeLoadBalancer extends BaseLoadBalancer {
favoredNodesForRegion.add(ServerName.valueOf(secondaryAndTertiaryNodes[1].getHostname(),
secondaryAndTertiaryNodes[1].getPort(), ServerName.NON_STARTCODE));
}
globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(region, favoredNodesForRegion);
regionFNMap.put(region, favoredNodesForRegion);
}
fnm.updateFavoredNodes(regionFNMap);
}
/*
* Generate Favored Nodes for daughters during region split.
*
* If the parent does not have FN, regenerates them for the daughters.
*
* If the parent has FN, inherit two FN from parent for each daughter and generate the remaining.
* The primary FN for both the daughters should be the same as parent. Inherit the secondary
* FN from the parent but keep it different for each daughter. Choose the remaining FN
* randomly. This would give us better distribution over a period of time after enough splits.
*/
@Override
public void generateFavoredNodesForDaughter(List<ServerName> servers, HRegionInfo parent,
HRegionInfo regionA, HRegionInfo regionB) throws IOException {
Map<HRegionInfo, List<ServerName>> result = new HashMap<>();
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
List<ServerName> parentFavoredNodes = getFavoredNodes(parent);
if (parentFavoredNodes == null) {
LOG.debug("Unable to find favored nodes for parent, " + parent
+ " generating new favored nodes for daughter");
result.put(regionA, helper.generateFavoredNodes(regionA));
result.put(regionB, helper.generateFavoredNodes(regionB));
} else {
// Lets get the primary and secondary from parent for regionA
Set<ServerName> regionAFN =
getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, SECONDARY);
result.put(regionA, Lists.newArrayList(regionAFN));
// Lets get the primary and tertiary from parent for regionB
Set<ServerName> regionBFN =
getInheritedFNForDaughter(helper, parentFavoredNodes, PRIMARY, TERTIARY);
result.put(regionB, Lists.newArrayList(regionBFN));
}
fnm.updateFavoredNodes(result);
}
private Set<ServerName> getInheritedFNForDaughter(FavoredNodeAssignmentHelper helper,
List<ServerName> parentFavoredNodes, Position primary, Position secondary)
throws IOException {
Set<ServerName> daughterFN = Sets.newLinkedHashSet();
if (parentFavoredNodes.size() >= primary.ordinal()) {
daughterFN.add(parentFavoredNodes.get(primary.ordinal()));
}
if (parentFavoredNodes.size() >= secondary.ordinal()) {
daughterFN.add(parentFavoredNodes.get(secondary.ordinal()));
}
while (daughterFN.size() < FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) {
ServerName newNode = helper.generateMissingFavoredNode(Lists.newArrayList(daughterFN));
daughterFN.add(newNode);
}
return daughterFN;
}
/*
* Generate favored nodes for a region during merge. Choose the FN from one of the sources to
* keep it simple.
*/
@Override
public void generateFavoredNodesForMergedRegion(HRegionInfo merged, HRegionInfo regionA,
HRegionInfo regionB) throws IOException {
Map<HRegionInfo, List<ServerName>> regionFNMap = Maps.newHashMap();
regionFNMap.put(merged, getFavoredNodes(regionA));
fnm.updateFavoredNodes(regionFNMap);
}
@Override

View File

@ -0,0 +1,185 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.favored;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.SnapshotOfRegionAssignmentFromMeta;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
/**
* FavoredNodesManager is responsible for maintaining favored nodes info in internal cache and
* META table. Its the centralized store for all favored nodes information. All reads and updates
* should be done through this class. There should only be one instance of
* {@link FavoredNodesManager} in Master. {@link FavoredNodesPlan} and favored node information
* from {@link SnapshotOfRegionAssignmentFromMeta} should not be used outside this class (except
* for may be tools that only read or test cases). All other classes including Favored balancers
* and {@link FavoredNodeAssignmentHelper} should use {@link FavoredNodesManager} for any
* read/write/deletes to favored nodes.
*/
@InterfaceAudience.Private
public class FavoredNodesManager {
private static final Log LOG = LogFactory.getLog(FavoredNodesManager.class);
private FavoredNodesPlan globalFavoredNodesAssignmentPlan;
private Map<ServerName, List<HRegionInfo>> primaryRSToRegionMap;
private Map<ServerName, List<HRegionInfo>> secondaryRSToRegionMap;
private Map<ServerName, List<HRegionInfo>> teritiaryRSToRegionMap;
private MasterServices masterServices;
public FavoredNodesManager(MasterServices masterServices) {
this.masterServices = masterServices;
this.globalFavoredNodesAssignmentPlan = new FavoredNodesPlan();
this.primaryRSToRegionMap = new HashMap<>();
this.secondaryRSToRegionMap = new HashMap<>();
this.teritiaryRSToRegionMap = new HashMap<>();
}
public void initialize(SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment)
throws HBaseIOException {
globalFavoredNodesAssignmentPlan = snapshotOfRegionAssignment.getExistingAssignmentPlan();
primaryRSToRegionMap = snapshotOfRegionAssignment.getPrimaryToRegionInfoMap();
secondaryRSToRegionMap = snapshotOfRegionAssignment.getSecondaryToRegionInfoMap();
teritiaryRSToRegionMap = snapshotOfRegionAssignment.getTertiaryToRegionInfoMap();
}
public synchronized List<ServerName> getFavoredNodes(HRegionInfo regionInfo) {
return this.globalFavoredNodesAssignmentPlan.getFavoredNodes(regionInfo);
}
public synchronized void updateFavoredNodes(Map<HRegionInfo, List<ServerName>> regionFNMap)
throws IOException {
Map<HRegionInfo, List<ServerName>> regionToFavoredNodes = new HashMap<>();
for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionFNMap.entrySet()) {
HRegionInfo regionInfo = entry.getKey();
List<ServerName> servers = entry.getValue();
/*
* None of the following error conditions should happen. If it does, there is an issue with
* favored nodes generation or the regions its called on.
*/
if (servers.size() != Sets.newHashSet(servers).size()) {
throw new IOException("Duplicates found: " + servers);
}
if (regionInfo.isSystemTable()) {
throw new IOException("Can't update FN for system region: "
+ regionInfo.getRegionNameAsString() + " with " + servers);
}
if (servers.size() != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) {
throw new IOException("At least " + FavoredNodeAssignmentHelper.FAVORED_NODES_NUM
+ " favored nodes should be present for region : " + regionInfo.getEncodedName()
+ " current FN servers:" + servers);
}
List<ServerName> serversWithNoStartCodes = Lists.newArrayList();
for (ServerName sn : servers) {
if (sn.getStartcode() == ServerName.NON_STARTCODE) {
serversWithNoStartCodes.add(sn);
} else {
serversWithNoStartCodes.add(ServerName.valueOf(sn.getHostname(), sn.getPort(),
ServerName.NON_STARTCODE));
}
}
regionToFavoredNodes.put(regionInfo, serversWithNoStartCodes);
}
// Lets do a bulk update to meta since that reduces the RPC's
FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(
regionToFavoredNodes,
masterServices.getConnection());
deleteFavoredNodesForRegions(regionToFavoredNodes.keySet());
for (Map.Entry<HRegionInfo, List<ServerName>> entry : regionToFavoredNodes.entrySet()) {
HRegionInfo regionInfo = entry.getKey();
List<ServerName> serversWithNoStartCodes = entry.getValue();
globalFavoredNodesAssignmentPlan.updateFavoredNodesMap(regionInfo, serversWithNoStartCodes);
addToReplicaLoad(regionInfo, serversWithNoStartCodes);
}
}
private synchronized void addToReplicaLoad(HRegionInfo hri, List<ServerName> servers) {
ServerName serverToUse = ServerName.valueOf(servers.get(PRIMARY.ordinal()).getHostAndPort(),
ServerName.NON_STARTCODE);
List<HRegionInfo> regionList = primaryRSToRegionMap.get(serverToUse);
if (regionList == null) {
regionList = new ArrayList<>();
}
regionList.add(hri);
primaryRSToRegionMap.put(serverToUse, regionList);
serverToUse = ServerName
.valueOf(servers.get(SECONDARY.ordinal()).getHostAndPort(), ServerName.NON_STARTCODE);
regionList = secondaryRSToRegionMap.get(serverToUse);
if (regionList == null) {
regionList = new ArrayList<>();
}
regionList.add(hri);
secondaryRSToRegionMap.put(serverToUse, regionList);
serverToUse = ServerName.valueOf(servers.get(TERTIARY.ordinal()).getHostAndPort(),
ServerName.NON_STARTCODE);
regionList = teritiaryRSToRegionMap.get(serverToUse);
if (regionList == null) {
regionList = new ArrayList<>();
}
regionList.add(hri);
teritiaryRSToRegionMap.put(serverToUse, regionList);
}
private synchronized void deleteFavoredNodesForRegions(Collection<HRegionInfo> regionInfoList) {
for (HRegionInfo hri : regionInfoList) {
List<ServerName> favNodes = getFavoredNodes(hri);
if (favNodes != null) {
if (primaryRSToRegionMap.containsKey(favNodes.get(PRIMARY.ordinal()))) {
primaryRSToRegionMap.get(favNodes.get(PRIMARY.ordinal())).remove(hri);
}
if (secondaryRSToRegionMap.containsKey(favNodes.get(SECONDARY.ordinal()))) {
secondaryRSToRegionMap.get(favNodes.get(SECONDARY.ordinal())).remove(hri);
}
if (teritiaryRSToRegionMap.containsKey(favNodes.get(TERTIARY.ordinal()))) {
teritiaryRSToRegionMap.get(favNodes.get(TERTIARY.ordinal())).remove(hri);
}
globalFavoredNodesAssignmentPlan.removeFavoredNodes(hri);
}
}
}
}

View File

@ -16,7 +16,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.balancer;
package org.apache.hadoop.hbase.favored;
import java.util.List;
import java.util.Map;
@ -61,6 +61,15 @@ public class FavoredNodesPlan {
this.favoredNodesMap.put(region.getRegionNameAsString(), servers);
}
/**
* Remove a favored node assignment
* @param region region
* @return the list of favored region server for this region based on the plan
*/
public List<ServerName> removeFavoredNodes(HRegionInfo region) {
return favoredNodesMap.remove(region.getRegionNameAsString());
}
/**
* @param region
* @return the list of favored region server for this region based on the plan

View File

@ -0,0 +1,35 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.favored;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
@InterfaceAudience.Private
public interface FavoredNodesPromoter {
void generateFavoredNodesForDaughter(List<ServerName> servers,
HRegionInfo parent, HRegionInfo hriA, HRegionInfo hriB) throws IOException;
void generateFavoredNodesForMergedRegion(HRegionInfo merged, HRegionInfo hriA,
HRegionInfo hriB) throws IOException;
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.favored;
import com.google.common.net.HostAndPort;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.util.Addressing;
/**
* This class differs from ServerName in that start code is always ignored. This is because
* start code, ServerName.NON_STARTCODE is used to persist favored nodes and keeping this separate
* from {@link ServerName} is much cleaner. This should only be used by Favored node specific
* classes and should not be used outside favored nodes.
*/
@InterfaceAudience.Private
class StartcodeAgnosticServerName extends ServerName {
public StartcodeAgnosticServerName(final String hostname, final int port, long startcode) {
super(hostname, port, startcode);
}
public static StartcodeAgnosticServerName valueOf(final ServerName serverName) {
return new StartcodeAgnosticServerName(serverName.getHostname(), serverName.getPort(),
serverName.getStartcode());
}
public static StartcodeAgnosticServerName valueOf(final String hostnameAndPort, long startcode) {
return new StartcodeAgnosticServerName(Addressing.parseHostname(hostnameAndPort),
Addressing.parsePort(hostnameAndPort), startcode);
}
public static StartcodeAgnosticServerName valueOf(final HostAndPort hostnameAndPort, long startcode) {
return new StartcodeAgnosticServerName(hostnameAndPort.getHostText(),
hostnameAndPort.getPort(), startcode);
}
@Override
public int compareTo(ServerName other) {
int compare = this.getHostname().compareTo(other.getHostname());
if (compare != 0) return compare;
compare = this.getPort() - other.getPort();
if (compare != 0) return compare;
return 0;
}
@Override
public int hashCode() {
return getHostAndPort().hashCode();
}
}

View File

@ -69,12 +69,11 @@ import org.apache.hadoop.hbase.client.TableState;
import org.apache.hadoop.hbase.executor.EventHandler;
import org.apache.hadoop.hbase.executor.EventType;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;
import org.apache.hadoop.hbase.ipc.FailedServerException;
import org.apache.hadoop.hbase.ipc.RpcClient;
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan.PlanType;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
@ -229,10 +228,6 @@ public class AssignmentManager {
this.regionsToReopen = Collections.synchronizedMap
(new HashMap<String, HRegionInfo> ());
Configuration conf = server.getConfiguration();
// Only read favored nodes if using the favored nodes load balancer.
this.shouldAssignRegionsWithFavoredNodes = conf.getClass(
HConstants.HBASE_MASTER_LOADBALANCER_CLASS, Object.class).equals(
FavoredNodeLoadBalancer.class);
this.tableStateManager = tableStateManager;
@ -242,6 +237,8 @@ public class AssignmentManager {
this.sleepTimeBeforeRetryingMetaAssignment = this.server.getConfiguration().getLong(
"hbase.meta.assignment.retry.sleeptime", 1000l);
this.balancer = balancer;
// Only read favored nodes if using the favored nodes load balancer.
this.shouldAssignRegionsWithFavoredNodes = this.balancer instanceof FavoredNodesPromoter;
int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
@ -629,23 +626,21 @@ public class AssignmentManager {
}
}
// TODO: processFavoredNodes might throw an exception, for e.g., if the
// meta could not be contacted/updated. We need to see how seriously to treat
// this problem as. Should we fail the current assignment. We should be able
// to recover from this problem eventually (if the meta couldn't be updated
// things should work normally and eventually get fixed up).
void processFavoredNodes(List<HRegionInfo> regions) throws IOException {
if (!shouldAssignRegionsWithFavoredNodes) return;
// The AM gets the favored nodes info for each region and updates the meta
// table with that info
Map<HRegionInfo, List<ServerName>> regionToFavoredNodes =
new HashMap<HRegionInfo, List<ServerName>>();
for (HRegionInfo region : regions) {
regionToFavoredNodes.put(region,
((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region));
void processFavoredNodesForDaughters(HRegionInfo parent,
HRegionInfo regionA, HRegionInfo regionB) throws IOException {
if (shouldAssignRegionsWithFavoredNodes) {
List<ServerName> onlineServers = this.serverManager.getOnlineServersList();
((FavoredNodesPromoter) this.balancer).
generateFavoredNodesForDaughter(onlineServers, parent, regionA, regionB);
}
}
void processFavoredNodesForMerge(HRegionInfo merged, HRegionInfo regionA, HRegionInfo regionB)
throws IOException {
if (shouldAssignRegionsWithFavoredNodes) {
((FavoredNodesPromoter)this.balancer).
generateFavoredNodesForMergedRegion(merged, regionA, regionB);
}
FavoredNodeAssignmentHelper.updateMetaWithFavoredNodesInfo(regionToFavoredNodes,
this.server.getConnection());
}
/**
@ -806,7 +801,7 @@ public class AssignmentManager {
region, State.PENDING_OPEN, destination);
List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
if (this.shouldAssignRegionsWithFavoredNodes) {
favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
favoredNodes = server.getFavoredNodesManager().getFavoredNodes(region);
}
regionOpenInfos.add(new Pair<HRegionInfo, List<ServerName>>(
region, favoredNodes));
@ -1114,7 +1109,7 @@ public class AssignmentManager {
try {
List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
if (this.shouldAssignRegionsWithFavoredNodes) {
favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
favoredNodes = server.getFavoredNodesManager().getFavoredNodes(region);
}
serverManager.sendRegionOpen(plan.getDestination(), region, favoredNodes);
return; // we're done
@ -1299,15 +1294,6 @@ public class AssignmentManager {
LOG.warn("Failed to create new plan.",ex);
return null;
}
if (!region.isMetaTable() && shouldAssignRegionsWithFavoredNodes) {
List<HRegionInfo> regions = new ArrayList<HRegionInfo>(1);
regions.add(region);
try {
processFavoredNodes(regions);
} catch (IOException ie) {
LOG.warn("Ignoring exception in processFavoredNodes " + ie);
}
}
this.regionPlans.put(encodedName, randomPlan);
}
}
@ -1579,7 +1565,6 @@ public class AssignmentManager {
processBogusAssignments(bulkPlan);
processFavoredNodes(regions);
assign(regions.size(), servers.size(), "round-robin=true", bulkPlan);
}
@ -1869,7 +1854,8 @@ public class AssignmentManager {
}
List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
if (shouldAssignRegionsWithFavoredNodes) {
favoredNodes = ((FavoredNodeLoadBalancer)balancer).getFavoredNodes(hri);
favoredNodes =
((MasterServices)server).getFavoredNodesManager().getFavoredNodes(hri);
}
serverManager.sendRegionOpen(serverName, hri, favoredNodes);
return; // we're done
@ -2424,6 +2410,7 @@ public class AssignmentManager {
try {
regionStates.splitRegion(hri, a, b, serverName);
processFavoredNodesForDaughters(hri, a ,b);
} catch (IOException ioe) {
LOG.info("Failed to record split region " + hri.getShortNameToLog());
return "Failed to record the splitting in meta";
@ -2692,6 +2679,26 @@ public class AssignmentManager {
regionOffline(b, State.MERGED);
regionOnline(hri, serverName, 1);
try {
if (this.shouldAssignRegionsWithFavoredNodes) {
processFavoredNodesForMerge(hri, a, b);
/*
* This can be removed once HBASE-16119 (Procedure v2 Merge) is implemented and AM force
* assigns the merged region on the same region server. FavoredNodes for the region would
* be passed along with OpenRegionRequest and hence the following would become redundant.
*/
List<ServerName> favoredNodes = server.getFavoredNodesManager().getFavoredNodes(hri);
if (favoredNodes != null) {
Map<HRegionInfo, List<ServerName>> regionFNMap = new HashMap<>(1);
regionFNMap.put(hri, favoredNodes);
server.getServerManager().sendFavoredNodes(serverName, regionFNMap);
}
}
} catch (IOException e) {
LOG.error("Error while processing favored nodes after merge.", e);
return StringUtils.stringifyException(e);
}
// User could disable the table before master knows the new region.
if (getTableStateManager().isTableState(hri.getTable(),
TableState.State.DISABLED, TableState.State.DISABLING)) {

View File

@ -29,11 +29,11 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan;
/**
* Helper class that is used by {@link RegionPlacementMaintainer} to print
* information for favored nodes

View File

@ -83,6 +83,8 @@ import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
import org.apache.hadoop.hbase.executor.ExecutorType;
import org.apache.hadoop.hbase.favored.FavoredNodesManager;
import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
import org.apache.hadoop.hbase.ipc.RpcServer;
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
@ -91,7 +93,6 @@ import org.apache.hadoop.hbase.master.balancer.BalancerChore;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer;
import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
import org.apache.hadoop.hbase.master.cleaner.ReplicationMetaCleaner;
@ -365,6 +366,9 @@ public class HMaster extends HRegionServer implements MasterServices {
/** flag used in test cases in order to simulate RS failures during master initialization */
private volatile boolean initializationBeforeMetaAssignment = false;
/* Handle favored nodes information */
private FavoredNodesManager favoredNodesManager;
/** jetty server for master to redirect requests to regionserver infoServer */
private org.mortbay.jetty.Server masterJettyServer;
@ -749,6 +753,9 @@ public class HMaster extends HRegionServer implements MasterServices {
this.initializationBeforeMetaAssignment = true;
if (this.balancer instanceof FavoredNodesPromoter) {
favoredNodesManager = new FavoredNodesManager(this);
}
// Wait for regionserver to finish initialization.
if (BaseLoadBalancer.tablesOnMaster(conf)) {
waitForServerOnline();
@ -771,6 +778,14 @@ public class HMaster extends HRegionServer implements MasterServices {
// assigned when master is shutting down
if (isStopped()) return;
//Initialize after meta as it scans meta
if (favoredNodesManager != null) {
SnapshotOfRegionAssignmentFromMeta snapshotOfRegionAssignment =
new SnapshotOfRegionAssignmentFromMeta(getConnection());
snapshotOfRegionAssignment.initialize();
favoredNodesManager.initialize(snapshotOfRegionAssignment);
}
// migrating existent table state from zk, so splitters
// and recovery process treat states properly.
for (Map.Entry<TableName, TableState.State> entry : ZKDataMigrator
@ -2995,4 +3010,9 @@ public class HMaster extends HRegionServer implements MasterServices {
public LoadBalancer getLoadBalancer() {
return balancer;
}
@Override
public FavoredNodesManager getFavoredNodesManager() {
return favoredNodesManager;
}
}

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.favored.FavoredNodesManager;
import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
@ -409,4 +410,9 @@ public interface MasterServices extends Server {
* @return True if this master is stopping.
*/
boolean isStopping();
/**
* @return Favored Nodes Manager
*/
public FavoredNodesManager getFavoredNodesManager();
}

View File

@ -51,8 +51,8 @@ import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan;
import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.RequestConverter;

View File

@ -71,6 +71,7 @@ import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminServic
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.OpenRegionResponse;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.ServerInfo;
import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.UpdateFavoredNodesRequest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.RegionStoreSequenceIds;
import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos.StoreSequenceId;
import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
@ -1237,4 +1238,27 @@ public class ServerManager {
removeRegion(hri);
}
}
public void sendFavoredNodes(final ServerName server,
Map<HRegionInfo, List<ServerName>> favoredNodes) throws IOException {
AdminService.BlockingInterface admin = getRsAdmin(server);
if (admin == null) {
LOG.warn("Attempting to send favored nodes update rpc to server " + server.toString()
+ " failed because no RPC connection found to this server");
} else {
List<Pair<HRegionInfo, List<ServerName>>> regionUpdateInfos =
new ArrayList<Pair<HRegionInfo, List<ServerName>>>();
for (Entry<HRegionInfo, List<ServerName>> entry : favoredNodes.entrySet()) {
regionUpdateInfos.add(new Pair<HRegionInfo, List<ServerName>>(entry.getKey(),
entry.getValue()));
}
UpdateFavoredNodesRequest request =
RequestConverter.buildUpdateFavoredNodesRequest(regionUpdateInfos);
try {
admin.updateFavoredNodes(null, request);
} catch (ServiceException se) {
throw ProtobufUtil.getRemoteException(se);
}
}
}
}

View File

@ -18,6 +18,10 @@
*/
package org.apache.hadoop.hbase.master;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -31,6 +35,8 @@ import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
@ -41,8 +47,6 @@ import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.MetaTableAccessor.Visitor;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan;
/**
* Used internally for reading meta and constructing datastructures that are
@ -66,7 +70,10 @@ public class SnapshotOfRegionAssignmentFromMeta {
private final Map<String, HRegionInfo> regionNameToRegionInfoMap;
/** the regionServer to region map */
private final Map<ServerName, List<HRegionInfo>> regionServerToRegionMap;
private final Map<ServerName, List<HRegionInfo>> currentRSToRegionMap;
private final Map<ServerName, List<HRegionInfo>> secondaryRSToRegionMap;
private final Map<ServerName, List<HRegionInfo>> teritiaryRSToRegionMap;
private final Map<ServerName, List<HRegionInfo>> primaryRSToRegionMap;
/** the existing assignment plan in the hbase:meta region */
private final FavoredNodesPlan existingAssignmentPlan;
private final Set<TableName> disabledTables;
@ -81,7 +88,10 @@ public class SnapshotOfRegionAssignmentFromMeta {
this.connection = connection;
tableToRegionMap = new HashMap<TableName, List<HRegionInfo>>();
regionToRegionServerMap = new HashMap<HRegionInfo, ServerName>();
regionServerToRegionMap = new HashMap<ServerName, List<HRegionInfo>>();
currentRSToRegionMap = new HashMap<ServerName, List<HRegionInfo>>();
primaryRSToRegionMap = new HashMap<ServerName, List<HRegionInfo>>();
secondaryRSToRegionMap = new HashMap<ServerName, List<HRegionInfo>>();
teritiaryRSToRegionMap = new HashMap<ServerName, List<HRegionInfo>>();
regionNameToRegionInfoMap = new TreeMap<String, HRegionInfo>();
existingAssignmentPlan = new FavoredNodesPlan();
this.disabledTables = disabledTables;
@ -122,6 +132,7 @@ public class SnapshotOfRegionAssignmentFromMeta {
addRegion(hri);
}
hri = rl.getRegionLocation(0).getRegionInfo();
// the code below is to handle favored nodes
byte[] favoredNodes = result.getValue(HConstants.CATALOG_FAMILY,
FavoredNodeAssignmentHelper.FAVOREDNODES_QUALIFIER);
@ -132,6 +143,20 @@ public class SnapshotOfRegionAssignmentFromMeta {
// Add the favored nodes into assignment plan
existingAssignmentPlan.updateFavoredNodesMap(hri,
Arrays.asList(favoredServerList));
/*
* Typically there should be FAVORED_NODES_NUM favored nodes for a region in meta. If
* there is less than FAVORED_NODES_NUM, lets use as much as we can but log a warning.
*/
if (favoredServerList.length != FavoredNodeAssignmentHelper.FAVORED_NODES_NUM) {
LOG.warn("Insufficient favored nodes for region " + hri + " fn: " + Arrays
.toString(favoredServerList));
}
for (int i = 0; i < favoredServerList.length; i++) {
if (i == PRIMARY.ordinal()) addPrimaryAssignment(hri, favoredServerList[i]);
if (i == SECONDARY.ordinal()) addSecondaryAssignment(hri, favoredServerList[i]);
if (i == TERTIARY.ordinal()) addTeritiaryAssignment(hri, favoredServerList[i]);
}
return true;
} catch (RuntimeException e) {
LOG.error("Catche remote exception " + e.getMessage() +
@ -169,12 +194,42 @@ public class SnapshotOfRegionAssignmentFromMeta {
if (server == null) return;
// Process the region server to region map
List<HRegionInfo> regionList = regionServerToRegionMap.get(server);
List<HRegionInfo> regionList = currentRSToRegionMap.get(server);
if (regionList == null) {
regionList = new ArrayList<HRegionInfo>();
}
regionList.add(regionInfo);
regionServerToRegionMap.put(server, regionList);
currentRSToRegionMap.put(server, regionList);
}
private void addPrimaryAssignment(HRegionInfo regionInfo, ServerName server) {
// Process the region server to region map
List<HRegionInfo> regionList = primaryRSToRegionMap.get(server);
if (regionList == null) {
regionList = new ArrayList<HRegionInfo>();
}
regionList.add(regionInfo);
primaryRSToRegionMap.put(server, regionList);
}
private void addSecondaryAssignment(HRegionInfo regionInfo, ServerName server) {
// Process the region server to region map
List<HRegionInfo> regionList = secondaryRSToRegionMap.get(server);
if (regionList == null) {
regionList = new ArrayList<HRegionInfo>();
}
regionList.add(regionInfo);
secondaryRSToRegionMap.put(server, regionList);
}
private void addTeritiaryAssignment(HRegionInfo regionInfo, ServerName server) {
// Process the region server to region map
List<HRegionInfo> regionList = teritiaryRSToRegionMap.get(server);
if (regionList == null) {
regionList = new ArrayList<HRegionInfo>();
}
regionList.add(regionInfo);
teritiaryRSToRegionMap.put(server, regionList);
}
/**
@ -206,7 +261,7 @@ public class SnapshotOfRegionAssignmentFromMeta {
* @return regionserver to region map
*/
public Map<ServerName, List<HRegionInfo>> getRegionServerToRegionMap() {
return regionServerToRegionMap;
return currentRSToRegionMap;
}
/**
@ -224,4 +279,16 @@ public class SnapshotOfRegionAssignmentFromMeta {
public Set<TableName> getTableSet() {
return this.tableToRegionMap.keySet();
}
public Map<ServerName, List<HRegionInfo>> getSecondaryToRegionInfoMap() {
return this.secondaryRSToRegionMap;
}
public Map<ServerName, List<HRegionInfo>> getTertiaryToRegionInfoMap() {
return this.teritiaryRSToRegionMap;
}
public Map<ServerName, List<HRegionInfo>> getPrimaryToRegionInfoMap() {
return this.primaryRSToRegionMap;
}
}

View File

@ -0,0 +1,297 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.client;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.PRIMARY;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.SECONDARY;
import static org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position.TERTIARY;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.favored.FavoredNodeLoadBalancer;
import org.apache.hadoop.hbase.favored.FavoredNodesManager;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.RegionStates;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({ClientTests.class, MediumTests.class})
public class TestTableFavoredNodes {
private static final Log LOG = LogFactory.getLog(TestTableFavoredNodes.class);
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private final static int WAIT_TIMEOUT = 60000;
private final static int SLAVES = 8;
private FavoredNodesManager fnm;
private RegionStates regionStates;
private Admin admin;
private final byte[][] splitKeys = new byte[][] {Bytes.toBytes(1), Bytes.toBytes(9)};
private final int NUM_REGIONS = splitKeys.length + 1;
@BeforeClass
public static void setupBeforeClass() throws Exception {
Configuration conf = TEST_UTIL.getConfiguration();
// Setting FavoredNodeBalancer will enable favored nodes
conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS,
FavoredNodeLoadBalancer.class, LoadBalancer.class);
conf.set(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, "" + SLAVES);
TEST_UTIL.startMiniCluster(SLAVES);
TEST_UTIL.getMiniHBaseCluster().waitForActiveAndReadyMaster(WAIT_TIMEOUT);
}
@AfterClass
public static void tearDownAfterClass() throws Exception {
TEST_UTIL.cleanupTestDir();
TEST_UTIL.shutdownMiniCluster();
}
@Before
public void setup() throws IOException {
fnm = TEST_UTIL.getMiniHBaseCluster().getMaster().getFavoredNodesManager();
admin = TEST_UTIL.getAdmin();
admin.setBalancerRunning(false, true);
regionStates =
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getRegionStates();
}
/*
* Create a table with FN enabled and check if all its regions have favored nodes set.
*/
@Test
public void testCreateTable() throws Exception {
TableName tableName = TableName.valueOf("createTable");
TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys);
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
// All regions should have favored nodes
checkIfFavoredNodeInformationIsCorrect(tableName);
TEST_UTIL.deleteTable(tableName);
}
/*
* Check if daughters inherit at-least 2 FN from parent after region split.
*/
@Test
public void testSplitTable() throws Exception {
TableName tableName = TableName.valueOf("splitRegions");
TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys);
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
checkIfFavoredNodeInformationIsCorrect(tableName);
byte[] splitPoint = Bytes.toBytes(0);
RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName);
HRegionInfo parent = locator.getRegionLocation(splitPoint).getRegionInfo();
List<ServerName> parentFN = fnm.getFavoredNodes(parent);
assertNotNull("FN should not be null for region: " + parent, parentFN);
admin.split(tableName, splitPoint);
TEST_UTIL.waitUntilNoRegionsInTransition(WAIT_TIMEOUT);
waitUntilTableRegionCountReached(tableName, NUM_REGIONS + 1);
// All regions should have favored nodes
checkIfFavoredNodeInformationIsCorrect(tableName);
// Get the daughters of parent.
HRegionInfo daughter1 = locator.getRegionLocation(parent.getStartKey(), true).getRegionInfo();
List<ServerName> daughter1FN = fnm.getFavoredNodes(daughter1);
HRegionInfo daughter2 = locator.getRegionLocation(splitPoint, true).getRegionInfo();
List<ServerName> daughter2FN = fnm.getFavoredNodes(daughter2);
checkIfDaughterInherits2FN(parentFN, daughter1FN);
checkIfDaughterInherits2FN(parentFN, daughter2FN);
assertEquals("Daughter's PRIMARY FN should be PRIMARY of parent",
parentFN.get(PRIMARY.ordinal()), daughter1FN.get(PRIMARY.ordinal()));
assertEquals("Daughter's SECONDARY FN should be SECONDARY of parent",
parentFN.get(SECONDARY.ordinal()), daughter1FN.get(SECONDARY.ordinal()));
assertEquals("Daughter's PRIMARY FN should be PRIMARY of parent",
parentFN.get(PRIMARY.ordinal()), daughter2FN.get(PRIMARY.ordinal()));
assertEquals("Daughter's SECONDARY FN should be TERTIARY of parent",
parentFN.get(TERTIARY.ordinal()), daughter2FN.get(SECONDARY.ordinal()));
TEST_UTIL.deleteTable(tableName);
}
/*
* Check if merged region inherits FN from one of its regions.
*/
@Test
public void testMergeTable() throws Exception {
TableName tableName = TableName.valueOf("mergeRegions");
TEST_UTIL.createTable(tableName, Bytes.toBytes("f"), splitKeys);
TEST_UTIL.waitUntilAllRegionsAssigned(tableName);
checkIfFavoredNodeInformationIsCorrect(tableName);
RegionLocator locator = TEST_UTIL.getConnection().getRegionLocator(tableName);
HRegionInfo regionA = locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
HRegionInfo regionB = locator.getRegionLocation(splitKeys[0]).getRegionInfo();
List<ServerName> regionAFN = fnm.getFavoredNodes(regionA);
LOG.info("regionA: " + regionA.getEncodedName() + " with FN: " + fnm.getFavoredNodes(regionA));
LOG.info("regionB: " + regionA.getEncodedName() + " with FN: " + fnm.getFavoredNodes(regionB));
admin.mergeRegionsAsync(regionA.getEncodedNameAsBytes(),
regionB.getEncodedNameAsBytes(), false).get(60, TimeUnit.SECONDS);
TEST_UTIL.waitUntilNoRegionsInTransition(WAIT_TIMEOUT);
waitUntilTableRegionCountReached(tableName, NUM_REGIONS - 1);
// All regions should have favored nodes
checkIfFavoredNodeInformationIsCorrect(tableName);
HRegionInfo mergedRegion =
locator.getRegionLocation(HConstants.EMPTY_START_ROW).getRegionInfo();
List<ServerName> mergedFN = fnm.getFavoredNodes(mergedRegion);
assertArrayEquals("Merged region doesn't match regionA's FN",
regionAFN.toArray(), mergedFN.toArray());
TEST_UTIL.deleteTable(tableName);
}
/*
* This checks the following:
*
* 1. Do all regions of the table have favored nodes updated in master?
* 2. Is the number of favored nodes correct for a region? Is the start code -1?
* 3. Is the FN information consistent between Master and the respective RegionServer?
*/
private void checkIfFavoredNodeInformationIsCorrect(TableName tableName) throws Exception {
/*
* Since we need HRegionServer to check for consistency of FN between Master and RS,
* lets construct a map for each serverName lookup. Makes it easy later.
*/
Map<ServerName, HRegionServer> snRSMap = Maps.newHashMap();
for (JVMClusterUtil.RegionServerThread rst :
TEST_UTIL.getMiniHBaseCluster().getLiveRegionServerThreads()) {
snRSMap.put(rst.getRegionServer().getServerName(), rst.getRegionServer());
}
// Also include master, since it can also host user regions.
for (JVMClusterUtil.MasterThread rst :
TEST_UTIL.getMiniHBaseCluster().getLiveMasterThreads()) {
snRSMap.put(rst.getMaster().getServerName(), rst.getMaster());
}
RegionLocator regionLocator = admin.getConnection().getRegionLocator(tableName);
for (HRegionLocation regionLocation : regionLocator.getAllRegionLocations()) {
HRegionInfo regionInfo = regionLocation.getRegionInfo();
List<ServerName> fnList = fnm.getFavoredNodes(regionInfo);
// 1. Does each region have favored node?
assertNotNull("Favored nodes should not be null for region:" + regionInfo, fnList);
// 2. Do we have the right number of favored nodes? Is start code -1?
assertEquals("Incorrect favored nodes for region:" + regionInfo + " fnlist: " + fnList,
FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, fnList.size());
for (ServerName sn : fnList) {
assertEquals("FN should not have startCode, fnlist:" + fnList, -1, sn.getStartcode());
}
// 3. Check if the regionServers have all the FN updated and in sync with Master
HRegionServer regionServer = snRSMap.get(regionLocation.getServerName());
assertNotNull("RS should not be null for regionLocation: " + regionLocation, regionServer);
InetSocketAddress[] rsFavNodes =
regionServer.getFavoredNodesForRegion(regionInfo.getEncodedName());
assertNotNull("RS " + regionLocation.getServerName()
+ " does not have FN for region: " + regionInfo, rsFavNodes);
List<ServerName> fnFromRS = Lists.newArrayList();
for (InetSocketAddress addr : rsFavNodes) {
fnFromRS.add(ServerName.valueOf(addr.getHostName(), addr.getPort(),
ServerName.NON_STARTCODE));
}
fnFromRS.removeAll(fnList);
assertEquals("Inconsistent FN bet RS and Master, RS diff: " + fnFromRS
+ " List on master: " + fnList, 0, fnFromRS.size());
}
}
private void checkIfDaughterInherits2FN(List<ServerName> parentFN, List<ServerName> daughterFN) {
assertNotNull(parentFN);
assertNotNull(daughterFN);
List<ServerName> favoredNodes = Lists.newArrayList(daughterFN);
favoredNodes.removeAll(parentFN);
/*
* With a small cluster its likely some FN might accidentally get shared. Its likely the
* 3rd FN the balancer chooses might still belong to the parent in which case favoredNodes
* size would be 0.
*/
assertTrue("Daughter FN:" + daughterFN + " should have inherited 2 FN from parent FN:"
+ parentFN, favoredNodes.size() <= 1);
}
private void waitUntilTableRegionCountReached(final TableName tableName, final int numRegions)
throws Exception {
TEST_UTIL.waitFor(WAIT_TIMEOUT, new Waiter.Predicate<Exception>() {
@Override
public boolean evaluate() throws Exception {
return regionStates.getRegionsOfTable(tableName).size() == numRegions;
}
});
}
}

View File

@ -16,19 +16,25 @@
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.balancer;
package org.apache.hadoop.hbase.favored;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
@ -39,10 +45,12 @@ import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Triple;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.Ignore;
import org.junit.experimental.categories.Category;
import org.mockito.Mockito;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
@Category({MasterTests.class, SmallTests.class})
public class TestFavoredNodeAssignmentHelper {
@ -51,6 +59,9 @@ public class TestFavoredNodeAssignmentHelper {
List<ServerName>>();
private static RackManager rackManager = Mockito.mock(RackManager.class);
// Some tests have randomness, so we run them multiple times
private static final int MAX_ATTEMPTS = 100;
@BeforeClass
public static void setupBeforeClass() throws Exception {
// Set up some server -> rack mappings
@ -99,7 +110,7 @@ public class TestFavoredNodeAssignmentHelper {
return chosenServers;
}
@Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test
@Test
public void testSmallCluster() {
// Test the case where we cannot assign favored nodes (because the number
// of nodes in the cluster is too less)
@ -108,10 +119,11 @@ public class TestFavoredNodeAssignmentHelper {
List<ServerName> servers = getServersFromRack(rackToServerCount);
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers,
new Configuration());
helper.initialize();
assertFalse(helper.canPlaceFavoredNodes());
}
@Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test
@Test
public void testPlacePrimaryRSAsRoundRobin() {
// Test the regular case where there are many servers in different racks
// Test once for few regions and once for many regions
@ -119,10 +131,10 @@ public class TestFavoredNodeAssignmentHelper {
// now create lots of regions and try to place them on the limited number of machines
primaryRSPlacement(600, null, 10, 10, 10);
}
@Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test
@Test
public void testRoundRobinAssignmentsWithUnevenSizedRacks() {
//In the case of uneven racks, the regions should be distributed
//In the case of uneven racks, the regions should be distributed
//proportionately to the rack sizes
primaryRSPlacement(6, null, 10, 10, 10);
primaryRSPlacement(600, null, 10, 10, 5);
@ -137,7 +149,7 @@ public class TestFavoredNodeAssignmentHelper {
primaryRSPlacement(459, null, 7, 9, 8);
}
@Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test
@Test
public void testSecondaryAndTertiaryPlacementWithSingleRack() {
// Test the case where there is a single rack and we need to choose
// Primary/Secondary/Tertiary from a single rack.
@ -155,13 +167,15 @@ public class TestFavoredNodeAssignmentHelper {
// primary/secondary/tertiary for any given region
for (HRegionInfo region : regions) {
ServerName[] secondaryAndTertiaryServers = secondaryAndTertiaryMap.get(region);
assertNotNull(secondaryAndTertiaryServers);
assertTrue(primaryRSMap.containsKey(region));
assertTrue(!secondaryAndTertiaryServers[0].equals(primaryRSMap.get(region)));
assertTrue(!secondaryAndTertiaryServers[1].equals(primaryRSMap.get(region)));
assertTrue(!secondaryAndTertiaryServers[0].equals(secondaryAndTertiaryServers[1]));
}
}
@Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test
@Test
public void testSecondaryAndTertiaryPlacementWithSingleServer() {
// Test the case where we have a single node in the cluster. In this case
// the primary can be assigned but the secondary/tertiary would be null
@ -179,7 +193,7 @@ public class TestFavoredNodeAssignmentHelper {
assertTrue(secondaryAndTertiaryMap.get(regions.get(0)) == null);
}
@Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test
@Test
public void testSecondaryAndTertiaryPlacementWithMultipleRacks() {
// Test the case where we have multiple racks and the region servers
// belong to multiple racks
@ -203,12 +217,14 @@ public class TestFavoredNodeAssignmentHelper {
String primaryRSRack = rackManager.getRack(primaryRSMap.get(entry.getKey()));
String secondaryRSRack = rackManager.getRack(allServersForRegion[0]);
String tertiaryRSRack = rackManager.getRack(allServersForRegion[1]);
assertTrue(!primaryRSRack.equals(secondaryRSRack));
assertTrue(secondaryRSRack.equals(tertiaryRSRack));
Set<String> racks = Sets.newHashSet(primaryRSRack);
racks.add(secondaryRSRack);
racks.add(tertiaryRSRack);
assertTrue(racks.size() >= 2);
}
}
@Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test
@Test
public void testSecondaryAndTertiaryPlacementWithLessThanTwoServersInRacks() {
// Test the case where we have two racks but with less than two servers in each
// We will not have enough machines to select secondary/tertiary
@ -229,7 +245,7 @@ public class TestFavoredNodeAssignmentHelper {
}
}
@Ignore("Disabled for now until FavoredNodes gets finished as a feature") @Test
@Test
public void testSecondaryAndTertiaryPlacementWithMoreThanOneServerInPrimaryRack() {
// Test the case where there is only one server in one rack and another rack
// has more servers. We try to choose secondary/tertiary on different
@ -247,18 +263,15 @@ public class TestFavoredNodeAssignmentHelper {
assertTrue(primaryRSMap.size() == 6);
Map<HRegionInfo, ServerName[]> secondaryAndTertiaryMap =
helper.placeSecondaryAndTertiaryRS(primaryRSMap);
assertTrue(secondaryAndTertiaryMap.size() == regions.size());
for (HRegionInfo region : regions) {
ServerName s = primaryRSMap.get(region);
ServerName secondaryRS = secondaryAndTertiaryMap.get(region)[0];
ServerName tertiaryRS = secondaryAndTertiaryMap.get(region)[1];
if (rackManager.getRack(s).equals("rack1")) {
assertTrue(rackManager.getRack(secondaryRS).equals("rack2") &&
rackManager.getRack(tertiaryRS).equals("rack1"));
}
if (rackManager.getRack(s).equals("rack2")) {
assertTrue(rackManager.getRack(secondaryRS).equals("rack1") &&
rackManager.getRack(tertiaryRS).equals("rack1"));
}
Set<String> racks = Sets.newHashSet(rackManager.getRack(s));
racks.add(rackManager.getRack(secondaryRS));
racks.add(rackManager.getRack(tertiaryRS));
assertTrue(racks.size() >= 2);
}
}
@ -361,4 +374,277 @@ public class TestFavoredNodeAssignmentHelper {
+ " " + thirdRackSize + " " + regionsOnRack1 + " " + regionsOnRack2 +
" " + regionsOnRack3;
}
@Test
public void testConstrainedPlacement() throws Exception {
List<ServerName> servers = Lists.newArrayList();
servers.add(ServerName.valueOf("foo" + 1 + ":1234", -1));
servers.add(ServerName.valueOf("foo" + 2 + ":1234", -1));
servers.add(ServerName.valueOf("foo" + 15 + ":1234", -1));
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
assertTrue(helper.canPlaceFavoredNodes());
List<HRegionInfo> regions = new ArrayList<HRegionInfo>(20);
for (int i = 0; i < 20; i++) {
HRegionInfo region = new HRegionInfo(TableName.valueOf("foobar"),
Bytes.toBytes(i), Bytes.toBytes(i + 1));
regions.add(region);
}
Map<ServerName, List<HRegionInfo>> assignmentMap =
new HashMap<ServerName, List<HRegionInfo>>();
Map<HRegionInfo, ServerName> primaryRSMap = new HashMap<HRegionInfo, ServerName>();
helper.placePrimaryRSAsRoundRobin(assignmentMap, primaryRSMap, regions);
assertTrue(primaryRSMap.size() == regions.size());
Map<HRegionInfo, ServerName[]> secondaryAndTertiary =
helper.placeSecondaryAndTertiaryRS(primaryRSMap);
assertEquals(regions.size(), secondaryAndTertiary.size());
}
@Test
public void testGetOneRandomRack() throws IOException {
Map<String,Integer> rackToServerCount = new HashMap<>();
Set<String> rackList = Sets.newHashSet("rack1", "rack2", "rack3");
for (String rack : rackList) {
rackToServerCount.put(rack, 2);
}
List<ServerName> servers = getServersFromRack(rackToServerCount);
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
assertTrue(helper.canPlaceFavoredNodes());
// Check we don't get a bad rack on any number of attempts
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
assertTrue(rackList.contains(helper.getOneRandomRack(Sets.newHashSet())));
}
// Check skipRack multiple times when an invalid rack is specified
Set<String> skipRacks = Sets.newHashSet("rack");
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
assertTrue(rackList.contains(helper.getOneRandomRack(skipRacks)));
}
// Check skipRack multiple times when an valid rack is specified
skipRacks = Sets.newHashSet("rack1");
Set<String> validRacks = Sets.newHashSet("rack2", "rack3");
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
assertTrue(validRacks.contains(helper.getOneRandomRack(skipRacks)));
}
}
@Test
public void testGetRandomServerSingleRack() throws IOException {
Map<String,Integer> rackToServerCount = new HashMap<>();
final String rack = "rack1";
rackToServerCount.put(rack, 4);
List<ServerName> servers = getServersFromRack(rackToServerCount);
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
assertTrue(helper.canPlaceFavoredNodes());
// Check we don't get a bad node on any number of attempts
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
ServerName sn = helper.getOneRandomServer(rack, Sets.newHashSet());
assertTrue("Server:" + sn + " does not belong to list: " + servers, servers.contains(sn));
}
// Check skipServers multiple times when an invalid server is specified
Set<ServerName> skipServers =
Sets.newHashSet(ServerName.valueOf("invalidnode:1234", ServerName.NON_STARTCODE));
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
ServerName sn = helper.getOneRandomServer(rack, skipServers);
assertTrue("Server:" + sn + " does not belong to list: " + servers, servers.contains(sn));
}
// Check skipRack multiple times when an valid servers are specified
ServerName skipSN = ServerName.valueOf("foo1:1234", ServerName.NON_STARTCODE);
skipServers = Sets.newHashSet(skipSN);
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
ServerName sn = helper.getOneRandomServer(rack, skipServers);
assertNotEquals("Skip server should not be selected ",
skipSN.getHostAndPort(), sn.getHostAndPort());
assertTrue("Server:" + sn + " does not belong to list: " + servers, servers.contains(sn));
}
}
@Test
public void testGetRandomServerMultiRack() throws IOException {
Map<String,Integer> rackToServerCount = new HashMap<>();
Set<String> rackList = Sets.newHashSet("rack1", "rack2", "rack3");
for (String rack : rackList) {
rackToServerCount.put(rack, 4);
}
List<ServerName> servers = getServersFromRack(rackToServerCount);
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
assertTrue(helper.canPlaceFavoredNodes());
// Check we don't get a bad node on any number of attempts
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
for (String rack : rackList) {
ServerName sn = helper.getOneRandomServer(rack, Sets.newHashSet());
assertTrue("Server:" + sn + " does not belong to rack servers: " + rackToServers.get(rack),
rackToServers.get(rack).contains(sn));
}
}
// Check skipServers multiple times when an invalid server is specified
Set<ServerName> skipServers =
Sets.newHashSet(ServerName.valueOf("invalidnode:1234", ServerName.NON_STARTCODE));
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
for (String rack : rackList) {
ServerName sn = helper.getOneRandomServer(rack, skipServers);
assertTrue("Server:" + sn + " does not belong to rack servers: " + rackToServers.get(rack),
rackToServers.get(rack).contains(sn));
}
}
// Check skipRack multiple times when an valid servers are specified
ServerName skipSN1 = ServerName.valueOf("foo1:1234", ServerName.NON_STARTCODE);
ServerName skipSN2 = ServerName.valueOf("foo10:1234", ServerName.NON_STARTCODE);
ServerName skipSN3 = ServerName.valueOf("foo20:1234", ServerName.NON_STARTCODE);
skipServers = Sets.newHashSet(skipSN1, skipSN2, skipSN3);
for (int attempts = 0 ; attempts < MAX_ATTEMPTS; attempts++) {
for (String rack : rackList) {
ServerName sn = helper.getOneRandomServer(rack, skipServers);
assertFalse("Skip server should not be selected ", skipServers.contains(sn));
assertTrue("Server:" + sn + " does not belong to rack servers: " + rackToServers.get(rack),
rackToServers.get(rack).contains(sn));
}
}
}
@Test
public void testGetFavoredNodes() throws IOException {
Map<String,Integer> rackToServerCount = new HashMap<>();
Set<String> rackList = Sets.newHashSet("rack1", "rack2", "rack3");
for (String rack : rackList) {
rackToServerCount.put(rack, 4);
}
List<ServerName> servers = getServersFromRack(rackToServerCount);
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
assertTrue(helper.canPlaceFavoredNodes());
HRegionInfo region = new HRegionInfo(TableName.valueOf("foobar"),
HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
for (int maxattempts = 0; maxattempts < MAX_ATTEMPTS; maxattempts++) {
List<ServerName> fn = helper.generateFavoredNodes(region);
checkDuplicateFN(fn);
checkFNRacks(fn);
}
}
@Test
public void testGenMissingFavoredNodeOneRack() throws IOException {
Map<String, Integer> rackToServerCount = new HashMap<>();
final String rack = "rack1";
rackToServerCount.put(rack, 6);
List<ServerName> servers = getServersFromRack(rackToServerCount);
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
assertTrue(helper.canPlaceFavoredNodes());
ServerName snRack1SN1 = ServerName.valueOf("foo1:1234", ServerName.NON_STARTCODE);
ServerName snRack1SN2 = ServerName.valueOf("foo2:1234", ServerName.NON_STARTCODE);
ServerName snRack1SN3 = ServerName.valueOf("foo3:1234", ServerName.NON_STARTCODE);
List<ServerName> fn = Lists.newArrayList(snRack1SN1, snRack1SN2);
for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) {
checkDuplicateFN(fn, helper.generateMissingFavoredNode(fn));
}
fn = Lists.newArrayList(snRack1SN1, snRack1SN2);
List<ServerName> skipServers = Lists.newArrayList(snRack1SN3);
for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) {
ServerName genSN = helper.generateMissingFavoredNode(fn, skipServers);
checkDuplicateFN(fn, genSN);
assertNotEquals("Generated FN should not match excluded one", snRack1SN3, genSN);
}
}
@Test
public void testGenMissingFavoredNodeMultiRack() throws IOException {
ServerName snRack1SN1 = ServerName.valueOf("foo1:1234", ServerName.NON_STARTCODE);
ServerName snRack1SN2 = ServerName.valueOf("foo2:1234", ServerName.NON_STARTCODE);
ServerName snRack2SN1 = ServerName.valueOf("foo10:1234", ServerName.NON_STARTCODE);
ServerName snRack2SN2 = ServerName.valueOf("foo11:1234", ServerName.NON_STARTCODE);
Map<String,Integer> rackToServerCount = new HashMap<>();
Set<String> rackList = Sets.newHashSet("rack1", "rack2");
for (String rack : rackList) {
rackToServerCount.put(rack, 4);
}
List<ServerName> servers = getServersFromRack(rackToServerCount);
FavoredNodeAssignmentHelper helper = new FavoredNodeAssignmentHelper(servers, rackManager);
helper.initialize();
assertTrue(helper.canPlaceFavoredNodes());
List<ServerName> fn = Lists.newArrayList(snRack1SN1, snRack1SN2);
for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) {
ServerName genSN = helper.generateMissingFavoredNode(fn);
checkDuplicateFN(fn, genSN);
checkFNRacks(fn, genSN);
}
fn = Lists.newArrayList(snRack1SN1, snRack2SN1);
for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) {
ServerName genSN = helper.generateMissingFavoredNode(fn);
checkDuplicateFN(fn, genSN);
checkFNRacks(fn, genSN);
}
fn = Lists.newArrayList(snRack1SN1, snRack2SN1);
List<ServerName> skipServers = Lists.newArrayList(snRack2SN2);
for (int attempts = 0; attempts < MAX_ATTEMPTS; attempts++) {
ServerName genSN = helper.generateMissingFavoredNode(fn, skipServers);
checkDuplicateFN(fn, genSN);
checkFNRacks(fn, genSN);
assertNotEquals("Generated FN should not match excluded one", snRack2SN2, genSN);
}
}
private void checkDuplicateFN(List<ServerName> fnList, ServerName genFN) {
Set<ServerName> favoredNodes = Sets.newHashSet(fnList);
assertNotNull("Generated FN can't be null", genFN);
favoredNodes.add(genFN);
assertEquals("Did not find expected number of favored nodes",
FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, favoredNodes.size());
}
private void checkDuplicateFN(List<ServerName> fnList) {
Set<ServerName> favoredNodes = Sets.newHashSet(fnList);
assertEquals("Did not find expected number of favored nodes",
FavoredNodeAssignmentHelper.FAVORED_NODES_NUM, favoredNodes.size());
}
private void checkFNRacks(List<ServerName> fnList, ServerName genFN) {
Set<ServerName> favoredNodes = Sets.newHashSet(fnList);
favoredNodes.add(genFN);
Set<String> racks = Sets.newHashSet();
for (ServerName sn : favoredNodes) {
racks.add(rackManager.getRack(sn));
}
assertTrue("FN should be spread atleast across 2 racks", racks.size() >= 2);
}
private void checkFNRacks(List<ServerName> fnList) {
Set<ServerName> favoredNodes = Sets.newHashSet(fnList);
Set<String> racks = Sets.newHashSet();
for (ServerName sn : favoredNodes) {
racks.add(rackManager.getRack(sn));
}
assertTrue("FN should be spread atleast across 2 racks", racks.size() >= 2);
}
}

View File

@ -0,0 +1,50 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.favored;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.testclassification.MiscTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@Category({MiscTests.class, SmallTests.class})
public class TestStartcodeAgnosticServerName {
@Test
public void testStartCodeServerName() {
ServerName sn = ServerName.valueOf("www.example.org", 1234, 5678);
StartcodeAgnosticServerName snStartCode =
new StartcodeAgnosticServerName("www.example.org", 1234, 5678);
assertTrue(ServerName.isSameHostnameAndPort(sn, snStartCode));
assertTrue(snStartCode.equals(sn));
assertTrue(sn.equals(snStartCode));
assertEquals(0, snStartCode.compareTo(sn));
StartcodeAgnosticServerName snStartCodeFNPort =
new StartcodeAgnosticServerName("www.example.org", 1234, ServerName.NON_STARTCODE);
assertTrue(ServerName.isSameHostnameAndPort(snStartCodeFNPort, snStartCode));
assertTrue(snStartCode.equals(snStartCodeFNPort));
assertTrue(snStartCodeFNPort.equals(snStartCode));
assertEquals(0, snStartCode.compareTo(snStartCodeFNPort));
}
}

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.favored.FavoredNodesManager;
import org.apache.hadoop.hbase.master.normalizer.RegionNormalizer;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.snapshot.SnapshotManager;
@ -365,6 +366,11 @@ public class MockNoopMasterServices implements MasterServices, Server {
return null;
}
@Override
public FavoredNodesManager getFavoredNodesManager() {
return null;
}
@Override
public SnapshotManager getSnapshotManager() {
return null;

View File

@ -52,10 +52,10 @@ import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan;
import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position;
import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.favored.FavoredNodeLoadBalancer;
import org.apache.hadoop.hbase.favored.FavoredNodesPlan;
import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.testclassification.MasterTests;

View File

@ -28,15 +28,16 @@ import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.favored.FavoredNodeAssignmentHelper;
import org.apache.hadoop.hbase.favored.FavoredNodeLoadBalancer;
import org.apache.hadoop.hbase.favored.FavoredNodesPlan.Position;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.balancer.FavoredNodeLoadBalancer;
import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
import org.apache.hadoop.hbase.master.balancer.FavoredNodesPlan.Position;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.AfterClass;
@ -72,6 +73,7 @@ public class TestRegionPlacement2 {
public void testFavoredNodesPresentForRoundRobinAssignment() throws HBaseIOException {
LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration());
balancer.setMasterServices(TEST_UTIL.getMiniHBaseCluster().getMaster());
balancer.initialize();
List<ServerName> servers = new ArrayList<ServerName>();
for (int i = 0; i < SLAVES; i++) {
ServerName server = TEST_UTIL.getMiniHBaseCluster().getRegionServer(i).getServerName();
@ -85,7 +87,7 @@ public class TestRegionPlacement2 {
Set<ServerName> serverBefore = assignmentMap.keySet();
List<ServerName> favoredNodesBefore =
((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region);
assertTrue(favoredNodesBefore.size() == 3);
assertTrue(favoredNodesBefore.size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM);
// the primary RS should be the one that the balancer's assignment returns
assertTrue(ServerName.isSameHostnameAndPort(serverBefore.iterator().next(),
favoredNodesBefore.get(PRIMARY)));
@ -95,7 +97,7 @@ public class TestRegionPlacement2 {
assignmentMap = balancer.roundRobinAssignment(regions, servers);
List<ServerName> favoredNodesAfter =
((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region);
assertTrue(favoredNodesAfter.size() == 3);
assertTrue(favoredNodesAfter.size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM);
// We don't expect the favored nodes assignments to change in multiple calls
// to the roundRobinAssignment method in the balancer (relevant for AssignmentManager.assign
// failures)
@ -122,7 +124,7 @@ public class TestRegionPlacement2 {
assignmentMap = balancer.roundRobinAssignment(regions, servers);
List<ServerName> favoredNodesNow =
((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region);
assertTrue(favoredNodesNow.size() == 3);
assertTrue(favoredNodesNow.size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM);
assertTrue(!favoredNodesNow.contains(favoredNodesAfter.get(PRIMARY)) &&
!favoredNodesNow.contains(favoredNodesAfter.get(SECONDARY)) &&
!favoredNodesNow.contains(favoredNodesAfter.get(TERTIARY)));
@ -132,6 +134,7 @@ public class TestRegionPlacement2 {
public void testFavoredNodesPresentForRandomAssignment() throws HBaseIOException {
LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(TEST_UTIL.getConfiguration());
balancer.setMasterServices(TEST_UTIL.getMiniHBaseCluster().getMaster());
balancer.initialize();
List<ServerName> servers = new ArrayList<ServerName>();
for (int i = 0; i < SLAVES; i++) {
ServerName server = TEST_UTIL.getMiniHBaseCluster().getRegionServer(i).getServerName();
@ -143,7 +146,7 @@ public class TestRegionPlacement2 {
ServerName serverBefore = balancer.randomAssignment(region, servers);
List<ServerName> favoredNodesBefore =
((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region);
assertTrue(favoredNodesBefore.size() == 3);
assertTrue(favoredNodesBefore.size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM);
// the primary RS should be the one that the balancer's assignment returns
assertTrue(ServerName.isSameHostnameAndPort(serverBefore,favoredNodesBefore.get(PRIMARY)));
// now remove the primary from the list of servers
@ -152,7 +155,7 @@ public class TestRegionPlacement2 {
ServerName serverAfter = balancer.randomAssignment(region, servers);
List<ServerName> favoredNodesAfter =
((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region);
assertTrue(favoredNodesAfter.size() == 3);
assertTrue(favoredNodesAfter.size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM);
// We don't expect the favored nodes assignments to change in multiple calls
// to the randomAssignment method in the balancer (relevant for AssignmentManager.assign
// failures)
@ -167,7 +170,7 @@ public class TestRegionPlacement2 {
balancer.randomAssignment(region, servers);
List<ServerName> favoredNodesNow =
((FavoredNodeLoadBalancer)balancer).getFavoredNodes(region);
assertTrue(favoredNodesNow.size() == 3);
assertTrue(favoredNodesNow.size() == FavoredNodeAssignmentHelper.FAVORED_NODES_NUM);
assertTrue(!favoredNodesNow.contains(favoredNodesAfter.get(PRIMARY)) &&
!favoredNodesNow.contains(favoredNodesAfter.get(SECONDARY)) &&
!favoredNodesNow.contains(favoredNodesAfter.get(TERTIARY)));