HBASE-12034 If I kill single RS in branch-1, all regions end up on Master!

This commit is contained in:
Jimmy Xiang 2014-09-22 13:39:18 -07:00
parent a36ffdaff7
commit 683f3b3d50
26 changed files with 302 additions and 566 deletions

View File

@ -59,17 +59,22 @@ if [ "$HOSTLIST" = "" ]; then
fi
fi
for regionserver in `cat "$HOSTLIST"`; do
if ${HBASE_SLAVE_PARALLEL:-true}; then
ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
2>&1 | sed "s/^/$regionserver: /" &
else # run each command serially
ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
2>&1 | sed "s/^/$regionserver: /"
fi
if [ "$HBASE_SLAVE_SLEEP" != "" ]; then
sleep $HBASE_SLAVE_SLEEP
fi
done
regionservers=`cat "$HOSTLIST"`
if [ "$regionservers" = "localhost" ]; then
"$bin"/local-regionservers.sh start 1
else
for regionserver in `cat "$HOSTLIST"`; do
if ${HBASE_SLAVE_PARALLEL:-true}; then
ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
2>&1 | sed "s/^/$regionserver: /" &
else # run each command serially
ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \
2>&1 | sed "s/^/$regionserver: /"
fi
if [ "$HBASE_SLAVE_SLEEP" != "" ]; then
sleep $HBASE_SLAVE_SLEEP
fi
done
fi
wait

View File

@ -0,0 +1 @@
localhost

View File

@ -560,15 +560,6 @@ possible configurations would overwhelm and obscure the important.
<value>300000</value>
<description>Period at which the region balancer runs in the Master.</description>
</property>
<property>
<name>hbase.balancer.backupMasterWeight</name>
<value>1</value>
<description>Used to control how many regions the region balancer can assign to
backup Masters, compared to normal region servers. The default value 1 means a
backup Master can host as many regions as a normal region server. The bigger the
weight, the less the regions a backup Master will host. If the weight is less than 1,
the balancer will not assign any region to any backup Master</description>
</property>
<property>
<name>hbase.regions.slop</name>
<value>0.2</value>

View File

@ -24,6 +24,7 @@ java.util.*;
org.apache.hadoop.hbase.ServerName;
org.apache.hadoop.hbase.ClusterStatus;
org.apache.hadoop.hbase.master.HMaster;
org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
</%import>
<%java>
Collection<ServerName> masters = null;
@ -32,7 +33,9 @@ if (master.isActiveMaster()) {
ClusterStatus status = master.getClusterStatus();
masters = status.getBackupMasters();
} else{
ServerName sn = master.getMasterAddressTracker().getMasterAddress();
MasterAddressTracker masterAddressTracker = master.getMasterAddressTracker();
ServerName sn = masterAddressTracker == null ? null
: masterAddressTracker.getMasterAddress();
assert sn != null : "Failed to retreive master's ServerName!";
masters = Collections.singletonList(sn);
}

View File

@ -40,8 +40,8 @@ org.apache.hadoop.util.StringUtils;
com.yammer.metrics.stats.Snapshot;
</%import>
<%java>
BlockCache bc = cacheConfig.getBlockCache();
BlockCache [] bcs = bc.getBlockCaches();
BlockCache bc = cacheConfig == null ? null : cacheConfig.getBlockCache();
BlockCache [] bcs = bc == null ? null : bc.getBlockCaches();
if (bcn.equals("L1")) {
bc = bcs == null || bcs.length == 0? bc: bcs[0];
} else {
@ -51,6 +51,10 @@ com.yammer.metrics.stats.Snapshot;
}
bc = bcs[1];
}
if (bc == null) {
System.out.println("There is no block cache");
return;
}
CachedBlocksByFile cbsbf = BlockCacheUtil.getLoadedCachedBlocksByFile(conf, bc);
</%java>
<%if bcv.equals("file") %><& bc_by_file; cbsbf = cbsbf; &><%else>[ <% BlockCacheUtil.toJSON(bc) %>, <% BlockCacheUtil.toJSON(cbsbf) %> ]</%if>

View File

@ -42,7 +42,6 @@ import javax.servlet.http.HttpServletResponse;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus;
@ -67,6 +66,7 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
@ -80,6 +80,7 @@ import org.apache.hadoop.hbase.ipc.RpcServer;
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode;
import org.apache.hadoop.hbase.master.balancer.BalancerChore;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
@ -226,6 +227,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
// monitor for distributed procedures
MasterProcedureManagerHost mpmHost;
// A flag to indicate if any table is configured to put on the active master
protected final boolean tablesOnMaster;
private MasterQuotaManager quotaManager;
// handle table states
@ -287,6 +291,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true);
this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this));
String[] tablesOnMaster = BaseLoadBalancer.getTablesOnMaster(conf);
this.tablesOnMaster = tablesOnMaster != null && tablesOnMaster.length > 0;
// Do we publish the status?
boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED,
@ -349,6 +355,18 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
}
}
/**
* If configured to put regions on active master,
* wait till a backup master becomes active.
* Otherwise, loop till the server is stopped or aborted.
*/
protected void waitForMasterActive(){
while (!(tablesOnMaster && isActiveMaster)
&& !isStopped() && !isAborted()) {
sleeper.sleep();
}
}
@VisibleForTesting
public MasterRpcServices getMasterRpcServices() {
return (MasterRpcServices)rpcServices;
@ -377,7 +395,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
protected void configureInfoServer() {
infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class);
infoServer.setAttribute(MASTER, this);
super.configureInfoServer();
if (tablesOnMaster) {
super.configureInfoServer();
}
}
protected Class<? extends HttpServlet> getDumpServlet() {
@ -563,10 +583,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
this.initializationBeforeMetaAssignment = true;
// Wait for regionserver to finish initialization.
synchronized (online) {
while (!isStopped() && !isOnline()) {
online.wait(100);
}
if (tablesOnMaster) {
waitForServerOnline();
}
//initialize load balancer
@ -1596,6 +1614,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
@Override
public void abort(final String msg, final Throwable t) {
if (isAborted() || isStopped()) {
return;
}
if (cpHost != null) {
// HBASE-4014: dump a list of loaded coprocessors.
LOG.fatal("Master server abort: loaded coprocessors are: " +

View File

@ -32,10 +32,10 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CoordinatedStateManager;
import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.LocalHBaseCluster;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.HBaseAdmin;
@ -154,7 +154,6 @@ public class HMasterCommandLine extends ServerCommandLine {
// and regionserver both in the one JVM.
if (LocalHBaseCluster.isLocal(conf)) {
DefaultMetricsSystem.setMiniClusterMode(true);
conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
final MiniZooKeeperCluster zooKeeperCluster = new MiniZooKeeperCluster(conf);
File zkDataPath = new File(conf.get(HConstants.ZOOKEEPER_DATA_DIR));
int zkClientPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 0);
@ -183,7 +182,7 @@ public class HMasterCommandLine extends ServerCommandLine {
// Need to have the zk cluster shutdown when master is shutdown.
// Run a subclass that does the zk cluster shutdown on its way out.
LocalHBaseCluster cluster = new LocalHBaseCluster(conf, conf.getInt("hbase.masters", 1),
conf.getInt("hbase.regionservers", 0), LocalHMaster.class, HRegionServer.class);
conf.getInt("hbase.regionservers", 1), LocalHMaster.class, HRegionServer.class);
((LocalHMaster)cluster.getMaster(0)).setZKCluster(zooKeeperCluster);
cluster.startup();
waitOnMasterThreads(cluster);
@ -208,6 +207,7 @@ public class HMasterCommandLine extends ServerCommandLine {
return 0;
}
@SuppressWarnings("resource")
private int stopMaster() {
Admin adm = null;
try {

View File

@ -170,6 +170,7 @@ import com.google.protobuf.ServiceException;
* Implements the master RPC services.
*/
@InterfaceAudience.Private
@SuppressWarnings("deprecation")
public class MasterRpcServices extends RSRpcServices
implements MasterService.BlockingInterface, RegionServerStatusService.BlockingInterface {
protected static final Log LOG = LogFactory.getLog(MasterRpcServices.class.getName());
@ -1097,7 +1098,6 @@ public class MasterRpcServices extends RSRpcServices
*
*/
@Override
@SuppressWarnings("deprecation")
public OfflineRegionResponse offlineRegion(RpcController controller,
OfflineRegionRequest request) throws ServiceException {
final byte [] regionName = request.getRegion().getValue().toByteArray();
@ -1227,7 +1227,6 @@ public class MasterRpcServices extends RSRpcServices
}
@Override
@SuppressWarnings("deprecation")
public UnassignRegionResponse unassignRegion(RpcController controller,
UnassignRegionRequest req) throws ServiceException {
try {

View File

@ -49,7 +49,6 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler;
import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler;
import org.apache.hadoop.hbase.monitoring.MonitoredTask;
@ -95,6 +94,7 @@ import com.google.protobuf.ServiceException;
* and has completed the handling.
*/
@InterfaceAudience.Private
@SuppressWarnings("deprecation")
public class ServerManager {
public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART =
"hbase.master.wait.on.regionservers.maxtostart";
@ -142,8 +142,6 @@ public class ServerManager {
private final long maxSkew;
private final long warningSkew;
private final boolean checkingBackupMaster;
private BaseLoadBalancer balancer;
/**
* Set of region servers which are dead but not processed immediately. If one
@ -203,18 +201,6 @@ public class ServerManager {
maxSkew = c.getLong("hbase.master.maxclockskew", 30000);
warningSkew = c.getLong("hbase.master.warningclockskew", 10000);
this.connection = connect ? HConnectionManager.getConnection(c) : null;
// Put this in constructor so we don't cast it every time
//
// We need to check if a newly added server is a backup master
// only if we are configured not to assign any region to it.
checkingBackupMaster = (master instanceof HMaster)
&& ((HMaster)master).balancer instanceof BaseLoadBalancer
&& (c.getInt(BaseLoadBalancer.BACKUP_MASTER_WEIGHT_KEY,
BaseLoadBalancer.DEFAULT_BACKUP_MASTER_WEIGHT) < 1);
if (checkingBackupMaster) {
balancer = (BaseLoadBalancer)((HMaster)master).balancer;
}
}
/**
@ -419,18 +405,6 @@ public class ServerManager {
@VisibleForTesting
void recordNewServerWithLock(final ServerName serverName, final ServerLoad sl) {
LOG.info("Registering server=" + serverName);
if (checkingBackupMaster) {
ZooKeeperWatcher zooKeeper = master.getZooKeeper();
String backupZNode = ZKUtil.joinZNode(
zooKeeper.backupMasterAddressesZNode, serverName.toString());
try {
if (ZKUtil.checkExists(zooKeeper, backupZNode) != -1) {
balancer.excludeServer(serverName);
}
} catch (KeeperException e) {
master.abort("Failed to check if a new server a backup master", e);
}
}
this.onlineServers.put(serverName, sl);
this.rsAdmins.remove(serverName);
}
@ -468,19 +442,10 @@ public class ServerManager {
(double)totalLoad / (double)numServers;
}
/**
* Get the count of active regionservers that are not backup
* masters. This count may not be accurate depending on timing.
* @return the count of active regionservers
*/
/** @return the count of active regionservers */
private int countOfRegionServers() {
// Presumes onlineServers is a concurrent map
int count = this.onlineServers.size();
if (balancer != null) {
count -= balancer.getExcludedServers().size();
if (count < 0) count = 0;
}
return count;
return this.onlineServers.size();
}
/**
@ -535,7 +500,7 @@ public class ServerManager {
try {
List<String> servers = ZKUtil.listChildrenNoWatch(zkw, zkw.rsZNode);
if (servers == null || (servers.size() == 1
if (servers == null || servers.size() == 0 || (servers.size() == 1
&& servers.contains(sn.toString()))) {
LOG.info("ZK shows there is only the master self online, exiting now");
// Master could have lost some ZK events, no need to wait more.
@ -854,7 +819,6 @@ public class ServerManager {
* @throws IOException
* @throws RetriesExhaustedException wrapping a ConnectException if failed
*/
@SuppressWarnings("deprecation")
private AdminService.BlockingInterface getRsAdmin(final ServerName sn)
throws IOException {
AdminService.BlockingInterface admin = this.rsAdmins.get(sn);
@ -890,8 +854,16 @@ public class ServerManager {
getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500);
final long timeout = this.master.getConfiguration().
getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500);
int defaultMinToStart = 1;
if (((HMaster)services).tablesOnMaster) {
// If we assign regions to master, we'd like to start
// at least another region server so that we don't
// assign all regions to master if other region servers
// don't come up in time.
defaultMinToStart = 2;
}
int minToStart = this.master.getConfiguration().
getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, 2);
getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, defaultMinToStart);
if (minToStart < 1) {
LOG.warn(String.format(
"The value of '%s' (%d) can not be less than 1, ignoring.",
@ -915,10 +887,8 @@ public class ServerManager {
long lastCountChange = startTime;
int count = countOfRegionServers();
int oldCount = 0;
ServerName masterSn = master.getServerName();
boolean selfCheckedIn = isServerOnline(masterSn);
while (!this.master.isStopped() && (!selfCheckedIn || (count < maxToStart
&& (lastCountChange+interval > now || timeout > slept || count < minToStart)))) {
while (!this.master.isStopped() && count < maxToStart
&& (lastCountChange+interval > now || timeout > slept || count < minToStart)) {
// Log some info at every interval time or if there is a change
if (oldCount != count || lastLogTime+interval < now){
lastLogTime = now;
@ -926,8 +896,7 @@ public class ServerManager {
"Waiting for region servers count to settle; currently"+
" checked in " + count + ", slept for " + slept + " ms," +
" expecting minimum of " + minToStart + ", maximum of "+ maxToStart+
", timeout of "+timeout+" ms, interval of "+interval+" ms," +
" selfCheckedIn " + selfCheckedIn;
", timeout of "+timeout+" ms, interval of "+interval+" ms.";
LOG.info(msg);
status.setStatus(msg);
}
@ -938,8 +907,6 @@ public class ServerManager {
now = System.currentTimeMillis();
slept = now - startTime;
selfCheckedIn = isServerOnline(masterSn);
oldCount = count;
count = countOfRegionServers();
if (count != oldCount) {
@ -950,8 +917,7 @@ public class ServerManager {
LOG.info("Finished waiting for region servers count to settle;" +
" checked in " + count + ", slept for " + slept + " ms," +
" expecting minimum of " + minToStart + ", maximum of "+ maxToStart+","+
" master is "+ (this.master.isStopped() ? "stopped.": "running," +
" selfCheckedIn " + selfCheckedIn)
" master is "+ (this.master.isStopped() ? "stopped.": "running")
);
}

View File

@ -44,14 +44,15 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.RegionLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.AssignmentManager;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.security.access.AccessControlLists;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.RackManager;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type;
import org.apache.hadoop.hbase.security.access.AccessControlLists;
import org.apache.hadoop.util.StringUtils;
import com.google.common.base.Joiner;
import com.google.common.collect.ArrayListMultimap;
@ -100,7 +101,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
ArrayList<String> tables;
HRegionInfo[] regions;
Deque<RegionLoad>[] regionLoads;
boolean[] backupMasterFlags;
int activeMasterIndex = -1;
int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality
@ -153,10 +153,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
Map<ServerName, List<HRegionInfo>> clusterState,
Map<String, Deque<RegionLoad>> loads,
RegionLocationFinder regionFinder,
Collection<ServerName> backupMasters,
Set<String> tablesOnMaster,
RackManager rackManager) {
this(masterServerName, null, clusterState, loads, regionFinder, backupMasters,
this(masterServerName, null, clusterState, loads, regionFinder,
tablesOnMaster, rackManager);
}
@ -167,7 +166,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
Map<ServerName, List<HRegionInfo>> clusterState,
Map<String, Deque<RegionLoad>> loads,
RegionLocationFinder regionFinder,
Collection<ServerName> backupMasters,
Set<String> tablesOnMaster,
RackManager rackManager) {
@ -235,7 +233,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
regionLoads = new Deque[numRegions];
regionLocations = new int[numRegions][];
serverIndicesSortedByRegionCount = new Integer[numServers];
backupMasterFlags = new boolean[numServers];
serverIndexToHostIndex = new int[numServers];
serverIndexToRackIndex = new int[numServers];
@ -256,8 +253,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
if (servers[serverIndex] == null ||
servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) {
servers[serverIndex] = entry.getKey();
backupMasterFlags[serverIndex] = backupMasters != null
&& backupMasters.contains(servers[serverIndex]);
}
if (regionsPerServer[serverIndex] != null) {
@ -272,11 +267,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
if (servers[serverIndex].equals(masterServerName)) {
activeMasterIndex = serverIndex;
for (HRegionInfo hri: entry.getValue()) {
if (!shouldBeOnMaster(hri)) {
numUserRegionsOnMaster++;
}
}
}
}
@ -718,15 +708,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
}
}
if (oldServer >= 0 && isActiveMaster(oldServer)) {
if (!shouldBeOnMaster(regions[region])) {
numUserRegionsOnMaster--;
}
} else if (isActiveMaster(newServer)) {
if (!shouldBeOnMaster(regions[region])) {
numUserRegionsOnMaster++;
}
}
}
int[] removeRegion(int[] regions, int regionIndex) {
@ -784,10 +765,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return regionsPerServer[server].length;
}
boolean isBackupMaster(int server) {
return backupMasterFlags[server];
}
boolean isActiveMaster(int server) {
return activeMasterIndex == server;
}
@ -848,31 +825,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
private static final Random RANDOM = new Random(System.currentTimeMillis());
private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class);
// The weight means that each region on the backup master is
// equal to that many regions on a normal regionserver, in calculating
// the region load by the load balancer. So that the backup master
// can host less (or equal if weight = 1) regions than normal regionservers.
//
// The weight can be used to control the number of regions on backup
// masters, which shouldn't host as many regions as normal regionservers.
// So that we don't need to move around too many regions when a
// backup master becomes the active one.
public static final String BACKUP_MASTER_WEIGHT_KEY =
"hbase.balancer.backupMasterWeight";
public static final int DEFAULT_BACKUP_MASTER_WEIGHT = 1;
// Regions of these tables are put on the master by default.
private static final String[] DEFAULT_TABLES_ON_MASTER =
new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(),
TableName.NAMESPACE_TABLE_NAME.getNameAsString(),
TableName.META_TABLE_NAME.getNameAsString()};
protected int backupMasterWeight;
// a flag to indicate if assigning regions to backup masters
protected boolean usingBackupMasters = true;
protected final Set<ServerName> excludedServers =
Collections.synchronizedSet(new HashSet<ServerName>());
public static final String TABLES_ON_MASTER =
"hbase.balancer.tablesOnMaster";
protected final Set<String> tablesOnMaster = new HashSet<String>();
protected final MetricsBalancer metricsBalancer = new MetricsBalancer();
@ -880,6 +840,24 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
protected ServerName masterServerName;
protected MasterServices services;
/**
* By default, regions of some small system tables such as meta,
* namespace, and acl are assigned to the active master. If you don't
* want to assign any region to the active master, you need to
* configure "hbase.balancer.tablesOnMaster" to "none".
*/
public static String[] getTablesOnMaster(Configuration conf) {
String valueString = conf.get(TABLES_ON_MASTER);
if (valueString == null) {
return DEFAULT_TABLES_ON_MASTER;
}
valueString = valueString.trim();
if (valueString.equalsIgnoreCase("none")) {
return null;
}
return StringUtils.getStrings(valueString);
}
@Override
public void setConf(Configuration conf) {
setSlop(conf);
@ -887,17 +865,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
else if (slop > 1) slop = 1;
this.config = conf;
backupMasterWeight = conf.getInt(
BACKUP_MASTER_WEIGHT_KEY, DEFAULT_BACKUP_MASTER_WEIGHT);
if (backupMasterWeight < 1) {
usingBackupMasters = false;
LOG.info("Backup master won't host any region since "
+ BACKUP_MASTER_WEIGHT_KEY + " is " + backupMasterWeight
+ "(<1)");
}
String[] tables = conf.getStrings(
"hbase.balancer.tablesOnMaster", DEFAULT_TABLES_ON_MASTER);
if (tables != null) {
String[] tables = getTablesOnMaster(conf);
if (tables != null && tables.length > 0) {
Collections.addAll(tablesOnMaster, tables);
}
this.rackManager = new RackManager(getConf());
@ -908,23 +877,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2);
}
/**
* If there is any server excluded, filter it out from the cluster map so
* we won't assign any region to it, assuming none's already assigned there.
*/
protected void filterExcludedServers(Map<ServerName, List<HRegionInfo>> clusterMap) {
if (excludedServers.isEmpty()) { // No server to filter out
return;
}
Iterator<Map.Entry<ServerName, List<HRegionInfo>>> it = clusterMap.entrySet().iterator();
while (it.hasNext()) {
Map.Entry<ServerName, List<HRegionInfo>> en = it.next();
if (excludedServers.contains(en.getKey()) && en.getValue().isEmpty()) {
it.remove();
}
}
}
/**
* Check if a region belongs to some small system table.
* If so, it may be expected to be put on the master regionserver.
@ -982,14 +934,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return plans;
}
public void excludeServer(ServerName serverName) {
if (!usingBackupMasters) excludedServers.add(serverName);
}
public Set<ServerName> getExcludedServers() {
return excludedServers;
}
@Override
public Configuration getConf() {
return this.config;
@ -998,20 +942,12 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
@Override
public void setClusterStatus(ClusterStatus st) {
this.clusterStatus = st;
if (st == null || usingBackupMasters) return;
// Not assign any region to backup masters.
// Put them on the excluded server list.
// Assume there won't be too much backup masters
// re/starting, so this won't leak much memory.
excludedServers.addAll(st.getBackupMasters());
regionFinder.setClusterStatus(st);
}
@Override
public void setMasterServices(MasterServices masterServices) {
masterServerName = masterServices.getServerName();
excludedServers.remove(masterServerName);
this.services = masterServices;
this.regionFinder.setServices(masterServices);
}
@ -1020,13 +956,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
this.rackManager = rackManager;
}
protected Collection<ServerName> getBackupMasters() {
return clusterStatus == null ? null : clusterStatus.getBackupMasters();
}
protected boolean needsBalance(Cluster c) {
ClusterLoadState cs = new ClusterLoadState(
masterServerName, getBackupMasters(), backupMasterWeight, c.clusterState);
masterServerName, c.clusterState);
if (cs.getNumServers() < MIN_SERVER_BALANCE) {
if (LOG.isDebugEnabled()) {
LOG.debug("Not running balancer because only " + cs.getNumServers()
@ -1045,9 +977,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
if (LOG.isTraceEnabled()) {
// If nothing to balance, then don't say anything unless trace-level logging.
LOG.trace("Skipping load balancing because balanced cluster; " +
"servers=" + cs.getNumServers() + "(backupMasters=" + cs.getNumBackupMasters() +
") regions=" + cs.getNumRegions() + " average=" + average + " " +
"mostloaded=" + serversByLoad.lastKey().getLoad() +
"servers=" + cs.getNumServers() +
" regions=" + cs.getNumRegions() + " average=" + average +
" mostloaded=" + serversByLoad.lastKey().getLoad() +
" leastloaded=" + serversByLoad.firstKey().getLoad());
}
return false;
@ -1091,10 +1023,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return null;
}
List<ServerName> backupMasters = normalizeServers(servers);
int numServers = servers == null ? 0 : servers.size();
int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
if (numServers == 0 && numBackupMasters == 0) {
if (numServers == 0) {
LOG.warn("Wanted to do round robin assignment but no servers to assign to");
return null;
}
@ -1105,40 +1035,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
// and balanced. This should also run fast with fewer number of iterations.
Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here
ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0);
if (numServers == 1) { // Only one server, nothing fancy we can do here
ServerName server = servers.get(0);
assignments.put(server, new ArrayList<HRegionInfo>(regions));
return assignments;
}
List<HRegionInfo> masterRegions = null;
if (numServers > 0 && servers.contains(masterServerName)) {
if (servers.contains(masterServerName)) {
masterRegions = new ArrayList<HRegionInfo>();
if (numServers == 1) {
// The only server in servers is the master,
// Assign all regions to backup masters
numServers = 0;
}
}
Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster);
Cluster cluster = createCluster(servers, regions, tablesOnMaster);
List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>();
int total = regions.size();
// Get the number of regions to be assigned
// to backup masters based on the weight
int numRegions = total * numBackupMasters
/ (numServers * backupMasterWeight + numBackupMasters);
if (numRegions > 0) {
// backupMasters can't be null, according to the formula, numBackupMasters != 0
roundRobinAssignment(cluster, regions, unassignedRegions, 0,
numRegions, backupMasters, masterRegions, assignments);
}
int remainder = total - numRegions;
if (remainder > 0) {
// servers can't be null, or contains the master only since numServers != 0
roundRobinAssignment(cluster, regions, unassignedRegions, numRegions, remainder,
servers, masterRegions, assignments);
}
roundRobinAssignment(cluster, regions, unassignedRegions,
servers, masterRegions, assignments);
if (masterRegions != null && !masterRegions.isEmpty()) {
assignments.put(masterServerName, masterRegions);
for (HRegionInfo r : masterRegions) {
@ -1175,16 +1087,12 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
// just sprinkle the rest of the regions on random regionservers. The balanceCluster will
// make it optimal later. we can end up with this if numReplicas > numServers.
for (HRegionInfo region : lastFewRegions) {
ServerName server = null;
if (numServers == 0) {
// select from backup masters
int i = RANDOM.nextInt(backupMasters.size());
server = backupMasters.get(i);
} else {
do {
int i = RANDOM.nextInt(numServers);
server = servers.get(i);
} while (numServers > 1 && server.equals(masterServerName));
int i = RANDOM.nextInt(numServers);
ServerName server = servers.get(i);
if (server.equals(masterServerName)) {
// Try to avoid master for a user region
i = (i == 0 ? 1 : i - 1);
server = servers.get(i);
}
List<HRegionInfo> serverRegions = assignments.get(server);
if (serverRegions == null) {
@ -1198,7 +1106,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
protected Cluster createCluster(List<ServerName> servers,
Collection<HRegionInfo> regions, List<ServerName> backupMasters, Set<String> tablesOnMaster) {
Collection<HRegionInfo> regions, Set<String> tablesOnMaster) {
// Get the snapshot of the current assignments for the regions in question, and then create
// a cluster out of it. Note that we might have replicas already assigned to some servers
// earlier. So we want to get the snapshot to see those assignments, but this will only contain
@ -1210,7 +1118,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
clusterState.put(server, EMPTY_REGION_LIST);
}
}
return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder, backupMasters,
return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder,
tablesOnMaster, rackManager);
}
@ -1253,15 +1161,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
@Override
public ServerName randomAssignment(HRegionInfo regionInfo, List<ServerName> servers) {
metricsBalancer.incrMiscInvocations();
if (servers == null || servers.isEmpty()) {
LOG.warn("Wanted to do random assignment but no servers to assign to");
int numServers = servers == null ? 0 : servers.size();
if (numServers == 0) {
LOG.warn("Wanted to do retain assignment but no servers to assign to");
return null;
}
List<ServerName> backupMasters = normalizeServers(servers);
List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster);
if (numServers == 1) { // Only one server, nothing fancy we can do here
return servers.get(0);
}
if (shouldBeOnMaster(regionInfo)
&& servers.contains(masterServerName)) {
return masterServerName;
}
return randomAssignment(cluster, regionInfo, servers, backupMasters);
List<HRegionInfo> regions = Lists.newArrayList(regionInfo);
Cluster cluster = createCluster(servers, regions, tablesOnMaster);
return randomAssignment(cluster, regionInfo, servers);
}
/**
@ -1290,16 +1205,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
return null;
}
List<ServerName> backupMasters = normalizeServers(servers);
int numServers = servers == null ? 0 : servers.size();
int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
if (numServers == 0 && numBackupMasters == 0) {
if (numServers == 0) {
LOG.warn("Wanted to do retain assignment but no servers to assign to");
return null;
}
Map<ServerName, List<HRegionInfo>> assignments = new TreeMap<ServerName, List<HRegionInfo>>();
if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here
ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0);
if (numServers == 1) { // Only one server, nothing fancy we can do here
ServerName server = servers.get(0);
assignments.put(server, new ArrayList<HRegionInfo>(regions.keySet()));
return assignments;
}
@ -1317,11 +1230,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
serversByHostname.put(server.getHostname(), server);
}
}
if (numBackupMasters > 0) {
for (ServerName server : backupMasters) {
assignments.put(server, new ArrayList<HRegionInfo>());
}
}
// Collection of the hostnames that used to have regions
// assigned, but for which we no longer have any RS running
@ -1334,7 +1242,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
int numRandomAssignments = 0;
int numRetainedAssigments = 0;
Cluster cluster = createCluster(servers, regions.keySet(), backupMasters, tablesOnMaster);
Cluster cluster = createCluster(servers, regions.keySet(), tablesOnMaster);
for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
HRegionInfo region = entry.getKey();
@ -1353,7 +1261,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
} else if (localServers.isEmpty()) {
// No servers on the new cluster match up with this hostname,
// assign randomly.
ServerName randomServer = randomAssignment(cluster, region, servers, backupMasters);
ServerName randomServer = randomAssignment(cluster, region, servers);
assignments.get(randomServer).add(region);
numRandomAssignments++;
if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname());
@ -1377,7 +1285,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
}
if (target == null) {
target = randomAssignment(cluster, region, localServers, backupMasters);
target = randomAssignment(cluster, region, localServers);
}
assignments.get(target).add(region);
}
@ -1423,78 +1331,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
/**
* Prepare the list of target regionservers so that it doesn't
* contain any excluded server, or backup master. Those backup masters
* used to be in the original list are returned.
*/
private List<ServerName> normalizeServers(List<ServerName> servers) {
if (servers == null) {
return null;
}
if (!excludedServers.isEmpty()) {
servers.removeAll(excludedServers);
}
Collection<ServerName> allBackupMasters = getBackupMasters();
List<ServerName> backupMasters = null;
if (allBackupMasters != null && !allBackupMasters.isEmpty()) {
for (ServerName server: allBackupMasters) {
if (!servers.contains(server)) {
// Ignore backup masters not included
continue;
}
servers.remove(server);
if (backupMasters == null) {
backupMasters = new ArrayList<ServerName>();
}
backupMasters.add(server);
}
}
return backupMasters;
}
/**
* Used to assign a single region to a random server. The input should
* have been already normalized: 1) servers doesn't include any exclude sever,
* 2) servers doesn't include any backup master, 3) backupMasters contains
* only backup masters that are intended to host this region, i.e, it
* may not have all the backup masters.
* Used to assign a single region to a random server.
*/
private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo,
List<ServerName> servers, List<ServerName> backupMasters) {
int numServers = servers == null ? 0 : servers.size();
int numBackupMasters = backupMasters == null ? 0 : backupMasters.size();
if (numServers == 0 && numBackupMasters == 0) {
LOG.warn("Wanted to do random assignment but no servers to assign to");
return null;
}
if (servers != null && shouldBeOnMaster(regionInfo)
&& servers.contains(masterServerName)) {
return masterServerName;
}
List<ServerName> servers) {
int numServers = servers.size(); // servers is not null, numServers > 1
ServerName sn = null;
final int maxIterations = servers.size() * 4;
final int maxIterations = numServers * 4;
int iterations = 0;
do {
// Generate a random number weighted more towards
// regular regionservers instead of backup masters.
// This formula is chosen for simplicity.
int i = RANDOM.nextInt(
numBackupMasters + numServers * backupMasterWeight);
if (i < numBackupMasters) {
sn = backupMasters.get(i);
continue;
}
i = (i - numBackupMasters)/backupMasterWeight;
int i = RANDOM.nextInt(numServers);
sn = servers.get(i);
if (sn.equals(masterServerName)) {
// Try to avoid master for a user region
if (numServers > 1) {
i = (i == 0 ? 1 : i - 1);
sn = servers.get(i);
} else if (numBackupMasters > 0) {
sn = backupMasters.get(0);
}
i = (i == 0 ? 1 : i - 1);
sn = servers.get(i);
}
} while (cluster.wouldLowerAvailability(regionInfo, sn)
&& iterations++ < maxIterations);
@ -1503,12 +1355,11 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
/**
* Round robin a chunk of a list of regions to a list of servers
* Round robin a list of regions to a list of servers
*/
private void roundRobinAssignment(Cluster cluster, List<HRegionInfo> regions,
List<HRegionInfo> unassignedRegions, int offset,
int numRegions, List<ServerName> servers, List<HRegionInfo> masterRegions,
Map<ServerName, List<HRegionInfo>> assignments) {
List<HRegionInfo> unassignedRegions, List<ServerName> servers,
List<HRegionInfo> masterRegions, Map<ServerName, List<HRegionInfo>> assignments) {
boolean masterIncluded = servers.contains(masterServerName);
int numServers = servers.size();
@ -1516,6 +1367,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
if (masterIncluded) {
skipServers--;
}
int numRegions = regions.size();
int max = (int) Math.ceil((float) numRegions / skipServers);
int serverIdx = 0;
if (numServers > 1) {
@ -1532,7 +1384,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
List<HRegionInfo> serverRegions = new ArrayList<HRegionInfo>(max);
for (int i = regionIdx; i < numRegions; i += skipServers) {
HRegionInfo region = regions.get(offset + i % numRegions);
HRegionInfo region = regions.get(i % numRegions);
if (masterRegions == null || !shouldBeOnMaster(region)) {
if (cluster.wouldLowerAvailability(region, server)) {
unassignedRegions.add(region);

View File

@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hbase.master.balancer;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
@ -35,12 +34,9 @@ public class ClusterLoadState {
private boolean emptyRegionServerPresent = false;
private int numRegions = 0;
private int numServers = 0;
private int numBackupMasters = 0;
private int backupMasterWeight;
public ClusterLoadState(ServerName master, Collection<ServerName> backupMasters,
int backupMasterWeight, Map<ServerName, List<HRegionInfo>> clusterState) {
this.backupMasterWeight = backupMasterWeight;
public ClusterLoadState(ServerName master,
Map<ServerName, List<HRegionInfo>> clusterState) {
this.numRegions = 0;
this.numServers = clusterState.size();
this.clusterState = clusterState;
@ -56,10 +52,6 @@ public class ClusterLoadState {
int sz = regions.size();
if (sz == 0) emptyRegionServerPresent = true;
numRegions += sz;
if (backupMasters != null && backupMasters.contains(server.getKey())) {
sz *= backupMasterWeight;
numBackupMasters++;
}
serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions);
}
}
@ -84,12 +76,8 @@ public class ClusterLoadState {
return numServers;
}
int getNumBackupMasters() {
return numBackupMasters;
}
float getLoadAverage() {
return numRegions / (numServers - numBackupMasters * (1 - 1.0f/backupMasterWeight));
return (float) numRegions / numServers;
}
int getMaxLoad() {

View File

@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.master.balancer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@ -187,17 +186,14 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
if (regionsToReturn != null) {
return regionsToReturn;
}
filterExcludedServers(clusterMap);
boolean emptyRegionServerPresent = false;
long startTime = System.currentTimeMillis();
Collection<ServerName> backupMasters = getBackupMasters();
ClusterLoadState cs = new ClusterLoadState(masterServerName,
backupMasters, backupMasterWeight, clusterMap);
ClusterLoadState cs = new ClusterLoadState(masterServerName, clusterMap);
// construct a Cluster object with clusterMap and rest of the
// argument as defaults
Cluster c = new Cluster(masterServerName, clusterMap, null, this.regionFinder,
getBackupMasters(), tablesOnMaster, this.rackManager);
tablesOnMaster, this.rackManager);
if (!this.needsBalance(c)) return null;
int numServers = cs.getNumServers();
@ -210,9 +206,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
// Using to check balance result.
StringBuilder strBalanceParam = new StringBuilder();
strBalanceParam.append("Balance parameter: numRegions=").append(numRegions)
.append(", numServers=").append(numServers).append(", numBackupMasters=")
.append(cs.getNumBackupMasters()).append(", backupMasterWeight=")
.append(backupMasterWeight).append(", max=").append(max)
.append(", numServers=").append(numServers).append(", max=").append(max)
.append(", min=").append(min);
LOG.debug(strBalanceParam.toString());
@ -238,11 +232,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
}
serversOverloaded++;
List<HRegionInfo> regions = server.getValue();
int w = 1; // Normal region server has weight 1
if (backupMasters != null && backupMasters.contains(sal.getServerName())) {
w = backupMasterWeight; // Backup master has heavier weight
}
int numToOffload = Math.min((load - max) / w, regions.size());
int numToOffload = Math.min(load - max, regions.size());
// account for the out-of-band regions which were assigned to this server
// after some other region server crashed
Collections.sort(regions, riComparator);
@ -282,12 +272,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer {
if (load >= min && load > 0) {
continue; // look for other servers which haven't reached min
}
int w = 1; // Normal region server has weight 1
if (backupMasters != null
&& backupMasters.contains(server.getKey().getServerName())) {
w = backupMasterWeight; // Backup master has heavier weight
}
int regionsToPut = (min - load) / w;
int regionsToPut = min - load;
if (regionsToPut == 0)
{
regionsToPut = 1;

View File

@ -157,7 +157,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
costFunctions = new CostFunction[]{
new RegionCountSkewCostFunction(conf, backupMasterWeight),
new RegionCountSkewCostFunction(conf),
new MoveCostFunction(conf),
localityCost,
new TableSkewCostFunction(conf),
@ -211,12 +211,11 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
if (plans != null) {
return plans;
}
filterExcludedServers(clusterState);
//The clusterState that is given to this method contains the state
//of all the regions in the table(s) (that's true today)
// Keep track of servers to iterate through them.
Cluster cluster = new Cluster(masterServerName,
clusterState, loads, regionFinder, getBackupMasters(), tablesOnMaster, rackManager);
clusterState, loads, regionFinder, tablesOnMaster, rackManager);
if (!needsBalance(cluster)) {
return null;
}
@ -437,7 +436,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
}
protected int pickOtherRandomServer(Cluster cluster, int serverIndex) {
if (cluster.numServers <= 2) {
if (cluster.numServers < 2) {
return -1;
}
if (cluster.activeMasterIndex != -1 && cluster.numServers == 2) {
return -1;
}
while (true) {
@ -527,10 +529,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
private int pickLeastLoadedServer(final Cluster cluster, int thisServer) {
Integer[] servers = cluster.serverIndicesSortedByRegionCount;
if (servers.length <= 2) {
return thisServer -1;
}
int index = 0;
while (servers[index] == null || servers[index] == thisServer
|| cluster.isActiveMaster(index)) {
@ -583,6 +581,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
// Pick the server with the highest locality
int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion);
if (otherServer == -1) {
return Cluster.NullAction;
}
// pick an region on the other server to potentially swap
int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f);
@ -799,7 +801,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
double total = getSum(stats);
double count = stats.length;
if (stats.length > 1 && cluster.masterServerName != null) {
if (stats.length > 1 && cluster.activeMasterIndex != -1) {
count--; // Exclude the active master
}
double mean = total/count;
@ -900,14 +902,12 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
"hbase.master.balancer.stochastic.regionCountCost";
private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500;
private double backupMasterWeight;
private double[] stats = null;
RegionCountSkewCostFunction(Configuration conf, double backupMasterWeight) {
RegionCountSkewCostFunction(Configuration conf) {
super(conf);
// Load multiplier should be the greatest as it is the most general way to balance data.
this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST));
this.backupMasterWeight = backupMasterWeight;
}
@Override
@ -918,11 +918,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer {
for (int i =0; i < cluster.numServers; i++) {
stats[i] = cluster.regionsPerServer[i].length;
// Use some weight on regions assigned to active/backup masters,
// so that they won't carry as many regions as normal regionservers.
if (cluster.isBackupMaster(i)) {
stats[i] *= backupMasterWeight;
}
}
return costFromArray(stats);
}

View File

@ -52,7 +52,6 @@ import javax.servlet.http.HttpServlet;
import org.apache.commons.lang.math.RandomUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -72,6 +71,7 @@ import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.YouAreDeadException;
import org.apache.hadoop.hbase.ZNodeClearer;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.ConnectionUtils;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
@ -325,7 +325,7 @@ public class HRegionServer extends HasThread implements
LogRoller metaHLogRoller;
// flag set after we're done setting up server threads
protected AtomicBoolean online;
final AtomicBoolean online = new AtomicBoolean(false);
// zookeeper connection and watcher
protected ZooKeeperWatcher zooKeeper;
@ -347,7 +347,7 @@ public class HRegionServer extends HasThread implements
private final RegionServerAccounting regionServerAccounting;
// Cache configuration and block cache reference
final CacheConfig cacheConfig;
protected CacheConfig cacheConfig;
/** The health check chore. */
private HealthCheckChore healthCheckChore;
@ -441,7 +441,6 @@ public class HRegionServer extends HasThread implements
this.fsOk = true;
this.conf = conf;
checkCodecs(this.conf);
this.online = new AtomicBoolean(false);
this.userProvider = UserProvider.instantiate(conf);
FSUtils.setupShortCircuitRead(this.conf);
@ -478,7 +477,6 @@ public class HRegionServer extends HasThread implements
login(userProvider, hostName);
regionServerAccounting = new RegionServerAccounting();
cacheConfig = new CacheConfig(conf);
uncaughtExceptionHandler = new UncaughtExceptionHandler() {
@Override
public void uncaughtException(Thread t, Throwable e) {
@ -530,6 +528,9 @@ public class HRegionServer extends HasThread implements
"hbase.regionserver.kerberos.principal", host);
}
protected void waitForMasterActive(){
}
protected String getProcessName() {
return REGIONSERVER;
}
@ -597,8 +598,26 @@ public class HRegionServer extends HasThread implements
*/
private void preRegistrationInitialization(){
try {
synchronized (this) {
if (shortCircuitConnection == null) {
shortCircuitConnection = createShortCircuitConnection();
metaTableLocator = new MetaTableLocator();
}
}
// Health checker thread.
if (isHealthCheckerConfigured()) {
int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
}
this.pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor.start();
initializeZooKeeper();
initializeThreads();
if (!isStopped() && !isAborted()) {
initializeThreads();
}
} catch (Throwable t) {
// Call stop if error or process will stick around for ever since server
// puts up non-daemon threads.
@ -619,8 +638,6 @@ public class HRegionServer extends HasThread implements
// Create the master address tracker, register with zk, and start it. Then
// block until a master is available. No point in starting up if no master
// running.
this.masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
this.masterAddressTracker.start();
blockAndCheckIfStopped(this.masterAddressTracker);
// Wait on cluster being up. Master will set this flag up in zookeeper
@ -640,11 +657,13 @@ public class HRegionServer extends HasThread implements
this.abort("Failed to retrieve Cluster ID",e);
}
synchronized (this) {
if (shortCircuitConnection == null) {
shortCircuitConnection = createShortCircuitConnection();
metaTableLocator = new MetaTableLocator();
}
// In case colocated master, wait here till it's active.
// So backup masters won't start as regionservers.
// This is to avoid showing backup masters as regionservers
// in master web UI, or assigning any region to them.
waitForMasterActive();
if (isStopped() || isAborted()) {
return; // No need for further initialization
}
// watch for snapshots and other procedures
@ -693,13 +712,6 @@ public class HRegionServer extends HasThread implements
// in a while. It will take care of not checking too frequently on store-by-store basis.
this.compactionChecker = new CompactionChecker(this, this.threadWakeFrequency, this);
this.periodicFlusher = new PeriodicMemstoreFlusher(this.threadWakeFrequency, this);
// Health checker thread.
int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ,
HConstants.DEFAULT_THREAD_WAKE_FREQUENCY);
if (isHealthCheckerConfigured()) {
healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration());
}
this.leases = new Leases(this.threadWakeFrequency);
// Create the thread to clean the moved regions list
@ -716,8 +728,6 @@ public class HRegionServer extends HasThread implements
// Setup RPC client for master communication
rpcClient = new RpcClient(conf, clusterId, new InetSocketAddress(
rpcServices.isa.getAddress(), 0));
this.pauseMonitor = new JvmPauseMonitor(conf);
pauseMonitor.start();
int storefileRefreshPeriod = conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD
, StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD);
@ -836,7 +846,7 @@ public class HRegionServer extends HasThread implements
}
}
// Send cache a shutdown.
if (cacheConfig.isBlockCacheEnabled()) {
if (cacheConfig != null && cacheConfig.isBlockCacheEnabled()) {
cacheConfig.getBlockCache().shutdown();
}
@ -941,6 +951,7 @@ public class HRegionServer extends HasThread implements
try {
deleteMyEphemeralNode();
} catch (KeeperException.NoNodeException nn) {
} catch (KeeperException e) {
LOG.warn("Failed deleting my ephemeral node", e);
}
@ -1188,6 +1199,7 @@ public class HRegionServer extends HasThread implements
// Save it in a file, this will allow to see if we crash
ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath());
this.cacheConfig = new CacheConfig(conf);
this.hlog = setupWALAndReplication();
// Init in here rather than in constructor after thread name has been set
this.metricsRegionServer = new MetricsRegionServer(new MetricsRegionServerWrapperImpl(this));
@ -1198,6 +1210,8 @@ public class HRegionServer extends HasThread implements
", RpcServer on " + rpcServices.isa +
", sessionid=0x" +
Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId()));
// Wake up anyone waiting for this server to online
synchronized (online) {
online.set(true);
online.notifyAll();
@ -1582,10 +1596,6 @@ public class HRegionServer extends HasThread implements
}
}
// Start Server. This service is like leases in that it internally runs
// a thread.
rpcServices.rpcServer.start();
// Create the log splitting worker and start it
// set a smaller retries to fast fail otherwise splitlogworker could be blocked for
// quite a while inside HConnection layer. The worker won't be available for other
@ -1713,8 +1723,15 @@ public class HRegionServer extends HasThread implements
}
public void waitForServerOnline(){
while (!isOnline() && !isStopped()){
sleeper.sleep();
while (!isStopped() && !isOnline()) {
synchronized (online) {
try {
online.wait(msgInterval);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
}
}
}
@ -1976,12 +1993,11 @@ public class HRegionServer extends HasThread implements
}
ServerName sn = null;
long previousLogTime = 0;
RegionServerStatusService.BlockingInterface master = null;
boolean refresh = false; // for the first time, use cached data
RegionServerStatusService.BlockingInterface intf = null;
boolean interrupted = false;
try {
while (keepLooping() && master == null) {
while (keepLooping()) {
sn = this.masterAddressTracker.getMasterAddress(refresh);
if (sn == null) {
if (!keepLooping()) {

View File

@ -25,12 +25,10 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.HasThread;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Delayed;
import java.util.concurrent.DelayQueue;
import java.util.concurrent.TimeUnit;
import java.io.IOException;

View File

@ -18,14 +18,25 @@
*/
package org.apache.hadoop.hbase.regionserver;
import com.google.protobuf.ByteString;
import com.google.protobuf.Message;
import com.google.protobuf.RpcController;
import com.google.protobuf.ServiceException;
import com.google.protobuf.TextFormat;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScannable;
@ -43,6 +54,7 @@ import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.UnknownScannerException;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Append;
import org.apache.hadoop.hbase.client.ConnectionUtils;
import org.apache.hadoop.hbase.client.Delete;
@ -133,7 +145,6 @@ import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RequestHeader;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
import org.apache.hadoop.hbase.quotas.OperationQuota;
import org.apache.hadoop.hbase.quotas.RegionServerQuotaManager;
import org.apache.hadoop.hbase.quotas.ThrottlingException;
import org.apache.hadoop.hbase.regionserver.HRegion.Operation;
import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException;
import org.apache.hadoop.hbase.regionserver.handler.OpenMetaHandler;
@ -153,27 +164,17 @@ import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
import org.apache.hadoop.net.DNS;
import org.apache.zookeeper.KeeperException;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.lang.annotation.Retention;
import java.lang.annotation.RetentionPolicy;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import com.google.protobuf.ByteString;
import com.google.protobuf.Message;
import com.google.protobuf.RpcController;
import com.google.protobuf.ServiceException;
import com.google.protobuf.TextFormat;
/**
* Implements the regionserver RPC services.
*/
@InterfaceAudience.Private
@SuppressWarnings("deprecation")
public class RSRpcServices implements HBaseRPCErrorHandler,
AdminService.BlockingInterface, ClientService.BlockingInterface, PriorityFunction {
protected static final Log LOG = LogFactory.getLog(RSRpcServices.class);
@ -1137,7 +1138,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
* @throws ServiceException
*/
@Override
@SuppressWarnings("deprecation")
public GetServerInfoResponse getServerInfo(final RpcController controller,
final GetServerInfoRequest request) throws ServiceException {
try {
@ -1272,7 +1272,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
try {
while (System.currentTimeMillis() <= endTime
&& !regionServer.isStopped() && !regionServer.isOnline()) {
regionServer.online.wait(100);
regionServer.online.wait(regionServer.msgInterval);
}
checkOpen();
} catch (InterruptedException t) {
@ -1837,7 +1837,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
Result r = null;
Boolean processed = null;
MutationType type = mutation.getMutateType();
long mutationSize = 0;
quota = getQuotaManager().checkQuota(region, OperationQuota.OperationType.MUTATE);

View File

@ -23,15 +23,13 @@ import java.io.InterruptedIOException;
import java.util.ArrayList;
import java.util.List;
import java.util.NavigableMap;
import java.util.NavigableSet;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.master.ServerManager;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo;
@ -54,12 +52,12 @@ public class RegionServerTracker extends ZooKeeperListener {
private NavigableMap<ServerName, RegionServerInfo> regionServers =
new TreeMap<ServerName, RegionServerInfo>();
private ServerManager serverManager;
private Abortable abortable;
private Server server;
public RegionServerTracker(ZooKeeperWatcher watcher,
Abortable abortable, ServerManager serverManager) {
Server server, ServerManager serverManager) {
super(watcher);
this.abortable = abortable;
this.server = server;
this.serverManager = serverManager;
}
@ -133,15 +131,16 @@ public class RegionServerTracker extends ZooKeeperListener {
@Override
public void nodeChildrenChanged(String path) {
if (path.equals(watcher.rsZNode)) {
if (path.equals(watcher.rsZNode)
&& !server.isAborted() && !server.isStopped()) {
try {
List<String> servers =
ZKUtil.listChildrenAndWatchThem(watcher, watcher.rsZNode);
add(servers);
} catch (IOException e) {
abortable.abort("Unexpected zk exception getting RS nodes", e);
server.abort("Unexpected zk exception getting RS nodes", e);
} catch (KeeperException e) {
abortable.abort("Unexpected zk exception getting RS nodes", e);
server.abort("Unexpected zk exception getting RS nodes", e);
}
}
}

View File

@ -49,12 +49,12 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Jdk14Logger;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Waiter.Predicate;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
@ -77,6 +77,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.ChecksumUtil;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.ipc.RpcServerInterface;
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.master.RegionStates;
@ -2954,6 +2955,8 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
}
} catch (RegionServerStoppedException e) {
// That's fine.
} catch (ServerNotRunningYetException e) {
// That's fine.
}
}
return online;

View File

@ -26,6 +26,7 @@ import static org.junit.Assert.fail;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ThreadPoolExecutor;
@ -36,6 +37,8 @@ import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.exceptions.OperationConflictException;
import org.apache.hadoop.hbase.ipc.RpcClient;
@ -151,10 +154,15 @@ public class TestMultiParallel {
Table table = new HTable(UTIL.getConfiguration(), TEST_TABLE);
List<Row> puts = constructPutRequests(); // creates a Put for every region
table.batch(puts);
HashSet<ServerName> regionservers = new HashSet<ServerName>();
for (byte[] k : KEYS) {
HRegionLocation location = ((HTable)table).getRegionLocation(k);
regionservers.add(location.getServerName());
}
Field poolField = table.getClass().getDeclaredField("pool");
poolField.setAccessible(true);
ThreadPoolExecutor tExecutor = (ThreadPoolExecutor) poolField.get(table);
assertEquals(slaves, tExecutor.getLargestPoolSize());
assertEquals(regionservers.size(), tExecutor.getLargestPoolSize());
table.close();
}

View File

@ -82,6 +82,7 @@ import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager;
import org.apache.hadoop.hbase.coordination.ZKSplitLogManagerCoordination;
import org.apache.hadoop.hbase.exceptions.OperationConflictException;
import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException;
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
@ -1461,7 +1462,13 @@ public class TestDistributedLogSplitting {
for (MasterThread mt : cluster.getLiveMasterThreads()) {
HRegionServer hrs = mt.getMaster();
List<HRegionInfo> hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
List<HRegionInfo> hris;
try {
hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices());
} catch (ServerNotRunningYetException e) {
// It's ok: this master may be a backup. Ignored.
continue;
}
for (HRegionInfo hri : hris) {
if (hri.getTable().isSystemTable()) {
continue;

View File

@ -151,7 +151,7 @@ public class TestMasterFailover {
assertEquals(2, masterThreads.size());
int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize();
LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers");
assertEquals(5, rsCount);
assertEquals(4, rsCount);
// Check that ClusterStatus reports the correct active and backup masters
assertNotNull(active);

View File

@ -215,7 +215,7 @@ public class BalancerTestBase {
protected BaseLoadBalancer.Cluster mockCluster(int[] mockCluster) {
return new BaseLoadBalancer.Cluster(null,
mockClusterServers(mockCluster, -1), null, null, null, null, null);
mockClusterServers(mockCluster, -1), null, null, null, null);
}
protected TreeMap<ServerName, List<HRegionInfo>> mockClusterServers(int[] mockCluster, int numTables) {

View File

@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.master.balancer;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@ -37,18 +36,16 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RackManager;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
@ -270,7 +267,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
// cluster is created (constructor code) would make sure the indices of
// the servers are in the order in which it is inserted in the clusterState
// map (linkedhashmap is important). A similar thing applies to the region lists
Cluster cluster = new Cluster(master, clusterState, null, null, null, null, rackManager);
Cluster cluster = new Cluster(master, clusterState, null, null, null, rackManager);
// check whether a move of region1 from servers[0] to servers[1] would lower
// the availability of region1
assertTrue(cluster.wouldLowerAvailability(hri1, servers[1]));
@ -287,7 +284,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
// now lets have servers[1] host replica_of_region2
list1.add(RegionReplicaUtil.getRegionInfoForReplica(hri3, 1));
// create a new clusterState with the above change
cluster = new Cluster(master, clusterState, null, null, null, null, rackManager);
cluster = new Cluster(master, clusterState, null, null, null, rackManager);
// now check whether a move of a replica from servers[0] to servers[1] would lower
// the availability of region2
assertTrue(cluster.wouldLowerAvailability(hri3, servers[1]));
@ -299,14 +296,14 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
clusterState.put(servers[6], list2); //servers[6], rack2 hosts region2
clusterState.put(servers[10], new ArrayList<HRegionInfo>()); //servers[10], rack3 hosts no region
// create a cluster with the above clusterState
cluster = new Cluster(master, clusterState, null, null, null, null, rackManager);
cluster = new Cluster(master, clusterState, null, null, null, rackManager);
// check whether a move of region1 from servers[0],rack1 to servers[6],rack2 would
// lower the availability
assertTrue(cluster.wouldLowerAvailability(hri1, servers[0]));
// now create a cluster without the rack manager
cluster = new Cluster(master, clusterState, null, null, null, null, null);
cluster = new Cluster(master, clusterState, null, null, null, null);
// now repeat check whether a move of region1 from servers[0] to servers[6] would
// lower the availability
assertTrue(!cluster.wouldLowerAvailability(hri1, servers[6]));
@ -339,7 +336,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
// cluster is created (constructor code) would make sure the indices of
// the servers are in the order in which it is inserted in the clusterState
// map (linkedhashmap is important).
Cluster cluster = new Cluster(master, clusterState, null, null, null, null, rackManager);
Cluster cluster = new Cluster(master, clusterState, null, null, null, rackManager);
// check whether moving region1 from servers[1] to servers[2] would lower availability
assertTrue(!cluster.wouldLowerAvailability(hri1, servers[2]));
@ -359,7 +356,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
clusterState.put(servers[6], list2); //servers[6], rack2 hosts region2
clusterState.put(servers[12], list3); //servers[12], rack3 hosts replica_of_region2
// create a cluster with the above clusterState
cluster = new Cluster(master, clusterState, null, null, null, null, rackManager);
cluster = new Cluster(master, clusterState, null, null, null, rackManager);
// check whether a move of replica_of_region2 from servers[12],rack3 to servers[0],rack1 would
// lower the availability
assertTrue(!cluster.wouldLowerAvailability(hri4, servers[0]));
@ -445,7 +442,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
assignRegions(regions, oldServers, clusterState);
// should not throw exception:
BaseLoadBalancer.Cluster cluster = new Cluster(null, clusterState, null, null, null, null, null);
BaseLoadBalancer.Cluster cluster = new Cluster(null, clusterState, null, null, null, null);
assertEquals(101 + 9, cluster.numRegions);
assertEquals(10, cluster.numServers); // only 10 servers because they share the same host + port
}
@ -487,7 +484,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
when(locationFinder.getTopBlockLocations(regions.get(43))).thenReturn(
Lists.newArrayList(ServerName.valueOf("foo", 0, 0))); // this server does not exists in clusterStatus
BaseLoadBalancer.Cluster cluster = new Cluster(null, clusterState, null, locationFinder, null, null, null);
BaseLoadBalancer.Cluster cluster = new Cluster(null, clusterState, null, locationFinder, null, null);
int r0 = ArrayUtils.indexOf(cluster.regions, regions.get(0)); // this is ok, it is just a test
int r1 = ArrayUtils.indexOf(cluster.regions, regions.get(1));
@ -523,49 +520,4 @@ public class TestBaseLoadBalancer extends BalancerTestBase {
assertEquals(1, cluster.regionLocations[r43].length);
assertEquals(-1, cluster.regionLocations[r43][0]);
}
@Test
public void testBackupMastersExcluded() throws HBaseIOException {
ClusterStatus st = Mockito.mock(ClusterStatus.class);
ArrayList<ServerName> backupMasters = new ArrayList<ServerName>();
ServerName backupMaster = ServerName.valueOf("fake-backupmaster", 0, 1L);
backupMasters.add(backupMaster);
BaseLoadBalancer balancer = (BaseLoadBalancer)loadBalancer;
balancer.usingBackupMasters = false;
Mockito.when(st.getBackupMasters()).thenReturn(backupMasters);
loadBalancer.setClusterStatus(st);
assertEquals(1, balancer.excludedServers.size());
assertTrue(balancer.excludedServers.contains(backupMaster));
// Round robin assignment
List<HRegionInfo> regions = randomRegions(1);
HRegionInfo region = regions.get(0);
assertNull(loadBalancer.randomAssignment(region, backupMasters));
assertNull(loadBalancer.roundRobinAssignment(regions, backupMasters));
HashMap<HRegionInfo, ServerName> assignments = new HashMap<HRegionInfo, ServerName>();
assignments.put(region, backupMaster);
assertNull(loadBalancer.retainAssignment(assignments, backupMasters));
ArrayList<ServerName> servers = new ArrayList<ServerName>(backupMasters);
ServerName sn = ServerName.valueOf("fake-rs", 0, 1L);
servers.add(sn);
assertEquals(sn, loadBalancer.randomAssignment(region, servers));
Map<ServerName, List<HRegionInfo>> plans =
loadBalancer.roundRobinAssignment(regions, servers);
assertEquals(1, plans.size());
assertTrue(plans.get(sn).contains(region));
// Retain assignment
plans = loadBalancer.retainAssignment(assignments, servers);
assertEquals(1, plans.size());
assertTrue(plans.get(sn).contains(region));
// Filter backup masters for balance cluster
Map<ServerName, List<HRegionInfo>> clusterMap =
new HashMap<ServerName, List<HRegionInfo>>();
clusterMap.put(backupMaster, new ArrayList<HRegionInfo>());
clusterMap.put(sn, new ArrayList<HRegionInfo>());
balancer.filterExcludedServers(clusterMap);
assertTrue(clusterMap.containsKey(sn));
assertEquals(1, clusterMap.size());
}
}

View File

@ -17,14 +17,8 @@
*/
package org.apache.hadoop.hbase.master.balancer;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -32,7 +26,6 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.testclassification.MasterTests;
@ -40,7 +33,6 @@ import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.mockito.Mockito;
/**
* Test the load balancer that is created by default.
@ -134,43 +126,4 @@ public class TestDefaultLoadBalancer extends BalancerTestBase {
}
}
}
@Test
public void testBalancerClusterWithBackupMaster() throws Exception {
SimpleLoadBalancer balancer = Mockito.spy(new SimpleLoadBalancer());
balancer.setConf(HBaseConfiguration.create());
List<ServerName> backupMasters = new ArrayList<ServerName>();
ServerName backupMaster = ServerName.parseServerName("backup:1:1");
ServerName rs = ServerName.parseServerName("rs:1:1");
backupMasters.add(backupMaster);
Mockito.doReturn(backupMasters).when(balancer).getBackupMasters();
Map<ServerName, List<HRegionInfo>> servers = new TreeMap<ServerName, List<HRegionInfo>>();
List<HRegionInfo> regions = new ArrayList<HRegionInfo>();
TableName table = TableName.valueOf("test");
regions.add(new HRegionInfo(table));
servers.put(backupMaster, regions);
regions = new ArrayList<HRegionInfo>();
balancer.backupMasterWeight = 4;
for (int i=0; i<4; i++) {
regions.add(new HRegionInfo(table));
}
servers.put(rs, regions);
List<RegionPlan> plans = balancer.balanceCluster(servers);
assertNull(plans);
// Reset the cluster map
regions = new ArrayList<HRegionInfo>();
for (int i=0; i<2; i++) {
regions.add(new HRegionInfo(table));
}
servers.put(backupMaster, regions);
regions = new ArrayList<HRegionInfo>();
for (int i=0; i<3; i++) {
regions.add(new HRegionInfo(table));
}
servers.put(rs, regions);
plans = balancer.balanceCluster(servers);
assertNotNull(plans);
assertEquals(1, plans.size());
}
}

View File

@ -194,7 +194,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
public void testSkewCost() {
Configuration conf = HBaseConfiguration.create();
StochasticLoadBalancer.CostFunction
costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf, 1);
costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf);
for (int[] mockCluster : clusterStateMocks) {
costFunction.init(mockCluster(mockCluster));
double cost = costFunction.cost();
@ -319,7 +319,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
BaseLoadBalancer.Cluster cluster;
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null);
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null);
costFunction.init(cluster);
double costWithoutReplicas = costFunction.cost();
assertEquals(0, costWithoutReplicas, 0);
@ -329,7 +329,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
clusterState.firstEntry().getValue().get(0),1);
clusterState.lastEntry().getValue().add(replica1);
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null);
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null);
costFunction.init(cluster);
double costWith1ReplicaDifferentServer = costFunction.cost();
@ -339,7 +339,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
HRegionInfo replica2 = RegionReplicaUtil.getRegionInfoForReplica(replica1, 2);
clusterState.lastEntry().getValue().add(replica2);
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null);
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null);
costFunction.init(cluster);
double costWith1ReplicaSameServer = costFunction.cost();
@ -362,7 +362,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
entry.getValue().add(replica2);
it.next().getValue().add(replica3); //2nd server
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null);
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null);
costFunction.init(cluster);
double costWith3ReplicasSameServer = costFunction.cost();
@ -376,7 +376,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
clusterState.lastEntry().getValue().add(replica2);
clusterState.lastEntry().getValue().add(replica3);
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null);
cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null);
costFunction.init(cluster);
double costWith2ReplicasOnTwoServers = costFunction.cost();
@ -396,7 +396,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
// until the step above s1 holds two replicas of a region
regions = randomRegions(1);
map.put(s2, regions);
assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, null, null)));
assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, null)));
// check for the case where there are two hosts on the same rack and there are two racks
// and both the replicas are on the same rack
map.clear();
@ -407,7 +407,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase {
map.put(s2, regionsOnS2);
// add another server so that the cluster has some host on another rack
map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1));
assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, null,
assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null,
new ForTestRackManagerOne())));
}

View File

@ -348,22 +348,13 @@ $
<title>Configure HBase.</title>
<para>
Edit the <filename>hbase-site.xml</filename> configuration. First, add the following
properties. Property <code>hbase.cluster.distributed</code> is set to <code>true</code>
(Its default is <code>false</code>), which directs HBase to run in distributed mode,
with one JVM instance per daemon. Since HBase version 1.0.0, a HMaster is also a
RegionServer. So in pseudo-distributed mode, just one HMaster (also a RegionServer)
instance is started by default. Because there is just one RegionServer (the HMaster),
property <code>hbase.master.wait.on.regionservers.mintostart</code> should be set to
<code>1</code> (Its default is changed to <code>2</code> since version 1.0.0).
property. which directs HBase to run in distributed mode, with one JVM instance per
daemon.
</para>
<programlisting language="xml"><![CDATA[
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.master.wait.on.regionservers.mintostart</name>
<value>1</value>
</property>
]]></programlisting>
<para>Next, change the <code>hbase.rootdir</code> from the local filesystem to the address