diff --git a/bin/regionservers.sh b/bin/regionservers.sh index a86322de9b0..e2af59828e1 100755 --- a/bin/regionservers.sh +++ b/bin/regionservers.sh @@ -59,17 +59,22 @@ if [ "$HOSTLIST" = "" ]; then fi fi -for regionserver in `cat "$HOSTLIST"`; do - if ${HBASE_SLAVE_PARALLEL:-true}; then - ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \ - 2>&1 | sed "s/^/$regionserver: /" & - else # run each command serially - ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \ - 2>&1 | sed "s/^/$regionserver: /" - fi - if [ "$HBASE_SLAVE_SLEEP" != "" ]; then - sleep $HBASE_SLAVE_SLEEP - fi -done +regionservers=`cat "$HOSTLIST"` +if [ "$regionservers" = "localhost" ]; then + "$bin"/local-regionservers.sh start 1 +else + for regionserver in `cat "$HOSTLIST"`; do + if ${HBASE_SLAVE_PARALLEL:-true}; then + ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \ + 2>&1 | sed "s/^/$regionserver: /" & + else # run each command serially + ssh $HBASE_SSH_OPTS $regionserver $"${@// /\\ }" \ + 2>&1 | sed "s/^/$regionserver: /" + fi + if [ "$HBASE_SLAVE_SLEEP" != "" ]; then + sleep $HBASE_SLAVE_SLEEP + fi + done +fi wait diff --git a/conf/regionservers b/conf/regionservers index e69de29bb2d..2fbb50c4a8d 100644 --- a/conf/regionservers +++ b/conf/regionservers @@ -0,0 +1 @@ +localhost diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index 1362693572e..174efacf0f3 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -560,15 +560,6 @@ possible configurations would overwhelm and obscure the important. 300000 Period at which the region balancer runs in the Master. - - hbase.balancer.backupMasterWeight - 1 - Used to control how many regions the region balancer can assign to - backup Masters, compared to normal region servers. The default value 1 means a - backup Master can host as many regions as a normal region server. The bigger the - weight, the less the regions a backup Master will host. If the weight is less than 1, - the balancer will not assign any region to any backup Master - hbase.regions.slop 0.2 diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon index 4d491448253..0dc6245d4f3 100644 --- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon +++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/BackupMasterStatusTmpl.jamon @@ -24,6 +24,7 @@ java.util.*; org.apache.hadoop.hbase.ServerName; org.apache.hadoop.hbase.ClusterStatus; org.apache.hadoop.hbase.master.HMaster; +org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; <%java> Collection masters = null; @@ -32,7 +33,9 @@ if (master.isActiveMaster()) { ClusterStatus status = master.getClusterStatus(); masters = status.getBackupMasters(); } else{ - ServerName sn = master.getMasterAddressTracker().getMasterAddress(); + MasterAddressTracker masterAddressTracker = master.getMasterAddressTracker(); + ServerName sn = masterAddressTracker == null ? null + : masterAddressTracker.getMasterAddress(); assert sn != null : "Failed to retreive master's ServerName!"; masters = Collections.singletonList(sn); } diff --git a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon index c5002b59b6b..523d1b9beba 100644 --- a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon +++ b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheViewTmpl.jamon @@ -40,8 +40,8 @@ org.apache.hadoop.util.StringUtils; com.yammer.metrics.stats.Snapshot; <%java> - BlockCache bc = cacheConfig.getBlockCache(); - BlockCache [] bcs = bc.getBlockCaches(); + BlockCache bc = cacheConfig == null ? null : cacheConfig.getBlockCache(); + BlockCache [] bcs = bc == null ? null : bc.getBlockCaches(); if (bcn.equals("L1")) { bc = bcs == null || bcs.length == 0? bc: bcs[0]; } else { @@ -51,6 +51,10 @@ com.yammer.metrics.stats.Snapshot; } bc = bcs[1]; } + if (bc == null) { + System.out.println("There is no block cache"); + return; + } CachedBlocksByFile cbsbf = BlockCacheUtil.getLoadedCachedBlocksByFile(conf, bc); <%if bcv.equals("file") %><& bc_by_file; cbsbf = cbsbf; &><%else>[ <% BlockCacheUtil.toJSON(bc) %>, <% BlockCacheUtil.toJSON(cbsbf) %> ] diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index c09f23ea639..dd837d35864 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -42,7 +42,6 @@ import javax.servlet.http.HttpServletResponse; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ClusterStatus; @@ -67,6 +66,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.UnknownRegionException; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.MetaScanner; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor; import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase; @@ -80,6 +80,7 @@ import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.MasterRpcServices.BalanceSwitchMode; import org.apache.hadoop.hbase.master.balancer.BalancerChore; +import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore; import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; @@ -226,6 +227,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // monitor for distributed procedures MasterProcedureManagerHost mpmHost; + // A flag to indicate if any table is configured to put on the active master + protected final boolean tablesOnMaster; + private MasterQuotaManager quotaManager; // handle table states @@ -287,6 +291,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server { this.masterCheckCompression = conf.getBoolean("hbase.master.check.compression", true); this.metricsMaster = new MetricsMaster( new MetricsMasterWrapperImpl(this)); + String[] tablesOnMaster = BaseLoadBalancer.getTablesOnMaster(conf); + this.tablesOnMaster = tablesOnMaster != null && tablesOnMaster.length > 0; // Do we publish the status? boolean shouldPublish = conf.getBoolean(HConstants.STATUS_PUBLISHED, @@ -349,6 +355,18 @@ public class HMaster extends HRegionServer implements MasterServices, Server { } } + /** + * If configured to put regions on active master, + * wait till a backup master becomes active. + * Otherwise, loop till the server is stopped or aborted. + */ + protected void waitForMasterActive(){ + while (!(tablesOnMaster && isActiveMaster) + && !isStopped() && !isAborted()) { + sleeper.sleep(); + } + } + @VisibleForTesting public MasterRpcServices getMasterRpcServices() { return (MasterRpcServices)rpcServices; @@ -377,7 +395,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { protected void configureInfoServer() { infoServer.addServlet("master-status", "/master-status", MasterStatusServlet.class); infoServer.setAttribute(MASTER, this); - super.configureInfoServer(); + if (tablesOnMaster) { + super.configureInfoServer(); + } } protected Class getDumpServlet() { @@ -563,10 +583,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server { this.initializationBeforeMetaAssignment = true; // Wait for regionserver to finish initialization. - synchronized (online) { - while (!isStopped() && !isOnline()) { - online.wait(100); - } + if (tablesOnMaster) { + waitForServerOnline(); } //initialize load balancer @@ -1596,6 +1614,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server { @Override public void abort(final String msg, final Throwable t) { + if (isAborted() || isStopped()) { + return; + } if (cpHost != null) { // HBASE-4014: dump a list of loaded coprocessors. LOG.fatal("Master server abort: loaded coprocessors are: " + diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java index e54b65c8c43..d01e61819f2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java @@ -32,10 +32,10 @@ import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CoordinatedStateManager; import org.apache.hadoop.hbase.CoordinatedStateManagerFactory; -import org.apache.hadoop.hbase.MasterNotRunningException; -import org.apache.hadoop.hbase.ZNodeClearer; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.LocalHBaseCluster; +import org.apache.hadoop.hbase.MasterNotRunningException; +import org.apache.hadoop.hbase.ZNodeClearer; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.HBaseAdmin; @@ -154,7 +154,6 @@ public class HMasterCommandLine extends ServerCommandLine { // and regionserver both in the one JVM. if (LocalHBaseCluster.isLocal(conf)) { DefaultMetricsSystem.setMiniClusterMode(true); - conf.setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1); final MiniZooKeeperCluster zooKeeperCluster = new MiniZooKeeperCluster(conf); File zkDataPath = new File(conf.get(HConstants.ZOOKEEPER_DATA_DIR)); int zkClientPort = conf.getInt(HConstants.ZOOKEEPER_CLIENT_PORT, 0); @@ -183,7 +182,7 @@ public class HMasterCommandLine extends ServerCommandLine { // Need to have the zk cluster shutdown when master is shutdown. // Run a subclass that does the zk cluster shutdown on its way out. LocalHBaseCluster cluster = new LocalHBaseCluster(conf, conf.getInt("hbase.masters", 1), - conf.getInt("hbase.regionservers", 0), LocalHMaster.class, HRegionServer.class); + conf.getInt("hbase.regionservers", 1), LocalHMaster.class, HRegionServer.class); ((LocalHMaster)cluster.getMaster(0)).setZKCluster(zooKeeperCluster); cluster.startup(); waitOnMasterThreads(cluster); @@ -208,6 +207,7 @@ public class HMasterCommandLine extends ServerCommandLine { return 0; } + @SuppressWarnings("resource") private int stopMaster() { Admin adm = null; try { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java index 96d14093861..9a369188eb2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java @@ -170,6 +170,7 @@ import com.google.protobuf.ServiceException; * Implements the master RPC services. */ @InterfaceAudience.Private +@SuppressWarnings("deprecation") public class MasterRpcServices extends RSRpcServices implements MasterService.BlockingInterface, RegionServerStatusService.BlockingInterface { protected static final Log LOG = LogFactory.getLog(MasterRpcServices.class.getName()); @@ -1097,7 +1098,6 @@ public class MasterRpcServices extends RSRpcServices * */ @Override - @SuppressWarnings("deprecation") public OfflineRegionResponse offlineRegion(RpcController controller, OfflineRegionRequest request) throws ServiceException { final byte [] regionName = request.getRegion().getValue().toByteArray(); @@ -1227,7 +1227,6 @@ public class MasterRpcServices extends RSRpcServices } @Override - @SuppressWarnings("deprecation") public UnassignRegionResponse unassignRegion(RpcController controller, UnassignRegionRequest req) throws ServiceException { try { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 6b259eb1eb9..dd18ca84be8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -49,7 +49,6 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.RetriesExhaustedException; -import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; import org.apache.hadoop.hbase.monitoring.MonitoredTask; @@ -95,6 +94,7 @@ import com.google.protobuf.ServiceException; * and has completed the handling. */ @InterfaceAudience.Private +@SuppressWarnings("deprecation") public class ServerManager { public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART = "hbase.master.wait.on.regionservers.maxtostart"; @@ -142,8 +142,6 @@ public class ServerManager { private final long maxSkew; private final long warningSkew; - private final boolean checkingBackupMaster; - private BaseLoadBalancer balancer; /** * Set of region servers which are dead but not processed immediately. If one @@ -203,18 +201,6 @@ public class ServerManager { maxSkew = c.getLong("hbase.master.maxclockskew", 30000); warningSkew = c.getLong("hbase.master.warningclockskew", 10000); this.connection = connect ? HConnectionManager.getConnection(c) : null; - - // Put this in constructor so we don't cast it every time - // - // We need to check if a newly added server is a backup master - // only if we are configured not to assign any region to it. - checkingBackupMaster = (master instanceof HMaster) - && ((HMaster)master).balancer instanceof BaseLoadBalancer - && (c.getInt(BaseLoadBalancer.BACKUP_MASTER_WEIGHT_KEY, - BaseLoadBalancer.DEFAULT_BACKUP_MASTER_WEIGHT) < 1); - if (checkingBackupMaster) { - balancer = (BaseLoadBalancer)((HMaster)master).balancer; - } } /** @@ -419,18 +405,6 @@ public class ServerManager { @VisibleForTesting void recordNewServerWithLock(final ServerName serverName, final ServerLoad sl) { LOG.info("Registering server=" + serverName); - if (checkingBackupMaster) { - ZooKeeperWatcher zooKeeper = master.getZooKeeper(); - String backupZNode = ZKUtil.joinZNode( - zooKeeper.backupMasterAddressesZNode, serverName.toString()); - try { - if (ZKUtil.checkExists(zooKeeper, backupZNode) != -1) { - balancer.excludeServer(serverName); - } - } catch (KeeperException e) { - master.abort("Failed to check if a new server a backup master", e); - } - } this.onlineServers.put(serverName, sl); this.rsAdmins.remove(serverName); } @@ -468,19 +442,10 @@ public class ServerManager { (double)totalLoad / (double)numServers; } - /** - * Get the count of active regionservers that are not backup - * masters. This count may not be accurate depending on timing. - * @return the count of active regionservers - */ + /** @return the count of active regionservers */ private int countOfRegionServers() { // Presumes onlineServers is a concurrent map - int count = this.onlineServers.size(); - if (balancer != null) { - count -= balancer.getExcludedServers().size(); - if (count < 0) count = 0; - } - return count; + return this.onlineServers.size(); } /** @@ -535,7 +500,7 @@ public class ServerManager { try { List servers = ZKUtil.listChildrenNoWatch(zkw, zkw.rsZNode); - if (servers == null || (servers.size() == 1 + if (servers == null || servers.size() == 0 || (servers.size() == 1 && servers.contains(sn.toString()))) { LOG.info("ZK shows there is only the master self online, exiting now"); // Master could have lost some ZK events, no need to wait more. @@ -854,7 +819,6 @@ public class ServerManager { * @throws IOException * @throws RetriesExhaustedException wrapping a ConnectException if failed */ - @SuppressWarnings("deprecation") private AdminService.BlockingInterface getRsAdmin(final ServerName sn) throws IOException { AdminService.BlockingInterface admin = this.rsAdmins.get(sn); @@ -890,8 +854,16 @@ public class ServerManager { getLong(WAIT_ON_REGIONSERVERS_INTERVAL, 1500); final long timeout = this.master.getConfiguration(). getLong(WAIT_ON_REGIONSERVERS_TIMEOUT, 4500); + int defaultMinToStart = 1; + if (((HMaster)services).tablesOnMaster) { + // If we assign regions to master, we'd like to start + // at least another region server so that we don't + // assign all regions to master if other region servers + // don't come up in time. + defaultMinToStart = 2; + } int minToStart = this.master.getConfiguration(). - getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, 2); + getInt(WAIT_ON_REGIONSERVERS_MINTOSTART, defaultMinToStart); if (minToStart < 1) { LOG.warn(String.format( "The value of '%s' (%d) can not be less than 1, ignoring.", @@ -915,10 +887,8 @@ public class ServerManager { long lastCountChange = startTime; int count = countOfRegionServers(); int oldCount = 0; - ServerName masterSn = master.getServerName(); - boolean selfCheckedIn = isServerOnline(masterSn); - while (!this.master.isStopped() && (!selfCheckedIn || (count < maxToStart - && (lastCountChange+interval > now || timeout > slept || count < minToStart)))) { + while (!this.master.isStopped() && count < maxToStart + && (lastCountChange+interval > now || timeout > slept || count < minToStart)) { // Log some info at every interval time or if there is a change if (oldCount != count || lastLogTime+interval < now){ lastLogTime = now; @@ -926,8 +896,7 @@ public class ServerManager { "Waiting for region servers count to settle; currently"+ " checked in " + count + ", slept for " + slept + " ms," + " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+ - ", timeout of "+timeout+" ms, interval of "+interval+" ms," + - " selfCheckedIn " + selfCheckedIn; + ", timeout of "+timeout+" ms, interval of "+interval+" ms."; LOG.info(msg); status.setStatus(msg); } @@ -938,8 +907,6 @@ public class ServerManager { now = System.currentTimeMillis(); slept = now - startTime; - selfCheckedIn = isServerOnline(masterSn); - oldCount = count; count = countOfRegionServers(); if (count != oldCount) { @@ -950,8 +917,7 @@ public class ServerManager { LOG.info("Finished waiting for region servers count to settle;" + " checked in " + count + ", slept for " + slept + " ms," + " expecting minimum of " + minToStart + ", maximum of "+ maxToStart+","+ - " master is "+ (this.master.isStopped() ? "stopped.": "running," + - " selfCheckedIn " + selfCheckedIn) + " master is "+ (this.master.isStopped() ? "stopped.": "running") ); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index d8172782ac2..495f2fcf9b0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -44,14 +44,15 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.RegionLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.AssignmentManager; import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.master.RegionPlan; -import org.apache.hadoop.hbase.security.access.AccessControlLists; -import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.master.RackManager; +import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.Action.Type; +import org.apache.hadoop.hbase.security.access.AccessControlLists; +import org.apache.hadoop.util.StringUtils; import com.google.common.base.Joiner; import com.google.common.collect.ArrayListMultimap; @@ -100,7 +101,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { ArrayList tables; HRegionInfo[] regions; Deque[] regionLoads; - boolean[] backupMasterFlags; int activeMasterIndex = -1; int[][] regionLocations; //regionIndex -> list of serverIndex sorted by locality @@ -153,10 +153,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer { Map> clusterState, Map> loads, RegionLocationFinder regionFinder, - Collection backupMasters, Set tablesOnMaster, RackManager rackManager) { - this(masterServerName, null, clusterState, loads, regionFinder, backupMasters, + this(masterServerName, null, clusterState, loads, regionFinder, tablesOnMaster, rackManager); } @@ -167,7 +166,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { Map> clusterState, Map> loads, RegionLocationFinder regionFinder, - Collection backupMasters, Set tablesOnMaster, RackManager rackManager) { @@ -235,7 +233,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { regionLoads = new Deque[numRegions]; regionLocations = new int[numRegions][]; serverIndicesSortedByRegionCount = new Integer[numServers]; - backupMasterFlags = new boolean[numServers]; serverIndexToHostIndex = new int[numServers]; serverIndexToRackIndex = new int[numServers]; @@ -256,8 +253,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { if (servers[serverIndex] == null || servers[serverIndex].getStartcode() < entry.getKey().getStartcode()) { servers[serverIndex] = entry.getKey(); - backupMasterFlags[serverIndex] = backupMasters != null - && backupMasters.contains(servers[serverIndex]); } if (regionsPerServer[serverIndex] != null) { @@ -272,11 +267,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { if (servers[serverIndex].equals(masterServerName)) { activeMasterIndex = serverIndex; - for (HRegionInfo hri: entry.getValue()) { - if (!shouldBeOnMaster(hri)) { - numUserRegionsOnMaster++; - } - } } } @@ -718,15 +708,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } } } - if (oldServer >= 0 && isActiveMaster(oldServer)) { - if (!shouldBeOnMaster(regions[region])) { - numUserRegionsOnMaster--; - } - } else if (isActiveMaster(newServer)) { - if (!shouldBeOnMaster(regions[region])) { - numUserRegionsOnMaster++; - } - } } int[] removeRegion(int[] regions, int regionIndex) { @@ -784,10 +765,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return regionsPerServer[server].length; } - boolean isBackupMaster(int server) { - return backupMasterFlags[server]; - } - boolean isActiveMaster(int server) { return activeMasterIndex == server; } @@ -848,31 +825,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer { private static final Random RANDOM = new Random(System.currentTimeMillis()); private static final Log LOG = LogFactory.getLog(BaseLoadBalancer.class); - // The weight means that each region on the backup master is - // equal to that many regions on a normal regionserver, in calculating - // the region load by the load balancer. So that the backup master - // can host less (or equal if weight = 1) regions than normal regionservers. - // - // The weight can be used to control the number of regions on backup - // masters, which shouldn't host as many regions as normal regionservers. - // So that we don't need to move around too many regions when a - // backup master becomes the active one. - public static final String BACKUP_MASTER_WEIGHT_KEY = - "hbase.balancer.backupMasterWeight"; - public static final int DEFAULT_BACKUP_MASTER_WEIGHT = 1; - // Regions of these tables are put on the master by default. private static final String[] DEFAULT_TABLES_ON_MASTER = new String[] {AccessControlLists.ACL_TABLE_NAME.getNameAsString(), TableName.NAMESPACE_TABLE_NAME.getNameAsString(), TableName.META_TABLE_NAME.getNameAsString()}; - protected int backupMasterWeight; - - // a flag to indicate if assigning regions to backup masters - protected boolean usingBackupMasters = true; - protected final Set excludedServers = - Collections.synchronizedSet(new HashSet()); + public static final String TABLES_ON_MASTER = + "hbase.balancer.tablesOnMaster"; protected final Set tablesOnMaster = new HashSet(); protected final MetricsBalancer metricsBalancer = new MetricsBalancer(); @@ -880,6 +840,24 @@ public abstract class BaseLoadBalancer implements LoadBalancer { protected ServerName masterServerName; protected MasterServices services; + /** + * By default, regions of some small system tables such as meta, + * namespace, and acl are assigned to the active master. If you don't + * want to assign any region to the active master, you need to + * configure "hbase.balancer.tablesOnMaster" to "none". + */ + public static String[] getTablesOnMaster(Configuration conf) { + String valueString = conf.get(TABLES_ON_MASTER); + if (valueString == null) { + return DEFAULT_TABLES_ON_MASTER; + } + valueString = valueString.trim(); + if (valueString.equalsIgnoreCase("none")) { + return null; + } + return StringUtils.getStrings(valueString); + } + @Override public void setConf(Configuration conf) { setSlop(conf); @@ -887,17 +865,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer { else if (slop > 1) slop = 1; this.config = conf; - backupMasterWeight = conf.getInt( - BACKUP_MASTER_WEIGHT_KEY, DEFAULT_BACKUP_MASTER_WEIGHT); - if (backupMasterWeight < 1) { - usingBackupMasters = false; - LOG.info("Backup master won't host any region since " - + BACKUP_MASTER_WEIGHT_KEY + " is " + backupMasterWeight - + "(<1)"); - } - String[] tables = conf.getStrings( - "hbase.balancer.tablesOnMaster", DEFAULT_TABLES_ON_MASTER); - if (tables != null) { + String[] tables = getTablesOnMaster(conf); + if (tables != null && tables.length > 0) { Collections.addAll(tablesOnMaster, tables); } this.rackManager = new RackManager(getConf()); @@ -908,23 +877,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { this.slop = conf.getFloat("hbase.regions.slop", (float) 0.2); } - /** - * If there is any server excluded, filter it out from the cluster map so - * we won't assign any region to it, assuming none's already assigned there. - */ - protected void filterExcludedServers(Map> clusterMap) { - if (excludedServers.isEmpty()) { // No server to filter out - return; - } - Iterator>> it = clusterMap.entrySet().iterator(); - while (it.hasNext()) { - Map.Entry> en = it.next(); - if (excludedServers.contains(en.getKey()) && en.getValue().isEmpty()) { - it.remove(); - } - } - } - /** * Check if a region belongs to some small system table. * If so, it may be expected to be put on the master regionserver. @@ -982,14 +934,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return plans; } - public void excludeServer(ServerName serverName) { - if (!usingBackupMasters) excludedServers.add(serverName); - } - - public Set getExcludedServers() { - return excludedServers; - } - @Override public Configuration getConf() { return this.config; @@ -998,20 +942,12 @@ public abstract class BaseLoadBalancer implements LoadBalancer { @Override public void setClusterStatus(ClusterStatus st) { this.clusterStatus = st; - if (st == null || usingBackupMasters) return; - - // Not assign any region to backup masters. - // Put them on the excluded server list. - // Assume there won't be too much backup masters - // re/starting, so this won't leak much memory. - excludedServers.addAll(st.getBackupMasters()); regionFinder.setClusterStatus(st); } @Override public void setMasterServices(MasterServices masterServices) { masterServerName = masterServices.getServerName(); - excludedServers.remove(masterServerName); this.services = masterServices; this.regionFinder.setServices(masterServices); } @@ -1020,13 +956,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer { this.rackManager = rackManager; } - protected Collection getBackupMasters() { - return clusterStatus == null ? null : clusterStatus.getBackupMasters(); - } - protected boolean needsBalance(Cluster c) { ClusterLoadState cs = new ClusterLoadState( - masterServerName, getBackupMasters(), backupMasterWeight, c.clusterState); + masterServerName, c.clusterState); if (cs.getNumServers() < MIN_SERVER_BALANCE) { if (LOG.isDebugEnabled()) { LOG.debug("Not running balancer because only " + cs.getNumServers() @@ -1045,9 +977,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer { if (LOG.isTraceEnabled()) { // If nothing to balance, then don't say anything unless trace-level logging. LOG.trace("Skipping load balancing because balanced cluster; " + - "servers=" + cs.getNumServers() + "(backupMasters=" + cs.getNumBackupMasters() + - ") regions=" + cs.getNumRegions() + " average=" + average + " " + - "mostloaded=" + serversByLoad.lastKey().getLoad() + + "servers=" + cs.getNumServers() + + " regions=" + cs.getNumRegions() + " average=" + average + + " mostloaded=" + serversByLoad.lastKey().getLoad() + " leastloaded=" + serversByLoad.firstKey().getLoad()); } return false; @@ -1091,10 +1023,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return null; } - List backupMasters = normalizeServers(servers); int numServers = servers == null ? 0 : servers.size(); - int numBackupMasters = backupMasters == null ? 0 : backupMasters.size(); - if (numServers == 0 && numBackupMasters == 0) { + if (numServers == 0) { LOG.warn("Wanted to do round robin assignment but no servers to assign to"); return null; } @@ -1105,40 +1035,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer { // and balanced. This should also run fast with fewer number of iterations. Map> assignments = new TreeMap>(); - if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here - ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0); + if (numServers == 1) { // Only one server, nothing fancy we can do here + ServerName server = servers.get(0); assignments.put(server, new ArrayList(regions)); return assignments; } List masterRegions = null; - if (numServers > 0 && servers.contains(masterServerName)) { + if (servers.contains(masterServerName)) { masterRegions = new ArrayList(); - if (numServers == 1) { - // The only server in servers is the master, - // Assign all regions to backup masters - numServers = 0; - } } - Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster); + Cluster cluster = createCluster(servers, regions, tablesOnMaster); List unassignedRegions = new ArrayList(); - int total = regions.size(); - // Get the number of regions to be assigned - // to backup masters based on the weight - int numRegions = total * numBackupMasters - / (numServers * backupMasterWeight + numBackupMasters); - if (numRegions > 0) { - // backupMasters can't be null, according to the formula, numBackupMasters != 0 - roundRobinAssignment(cluster, regions, unassignedRegions, 0, - numRegions, backupMasters, masterRegions, assignments); - } - int remainder = total - numRegions; - if (remainder > 0) { - // servers can't be null, or contains the master only since numServers != 0 - roundRobinAssignment(cluster, regions, unassignedRegions, numRegions, remainder, - servers, masterRegions, assignments); - } + roundRobinAssignment(cluster, regions, unassignedRegions, + servers, masterRegions, assignments); + if (masterRegions != null && !masterRegions.isEmpty()) { assignments.put(masterServerName, masterRegions); for (HRegionInfo r : masterRegions) { @@ -1175,16 +1087,12 @@ public abstract class BaseLoadBalancer implements LoadBalancer { // just sprinkle the rest of the regions on random regionservers. The balanceCluster will // make it optimal later. we can end up with this if numReplicas > numServers. for (HRegionInfo region : lastFewRegions) { - ServerName server = null; - if (numServers == 0) { - // select from backup masters - int i = RANDOM.nextInt(backupMasters.size()); - server = backupMasters.get(i); - } else { - do { - int i = RANDOM.nextInt(numServers); - server = servers.get(i); - } while (numServers > 1 && server.equals(masterServerName)); + int i = RANDOM.nextInt(numServers); + ServerName server = servers.get(i); + if (server.equals(masterServerName)) { + // Try to avoid master for a user region + i = (i == 0 ? 1 : i - 1); + server = servers.get(i); } List serverRegions = assignments.get(server); if (serverRegions == null) { @@ -1198,7 +1106,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } protected Cluster createCluster(List servers, - Collection regions, List backupMasters, Set tablesOnMaster) { + Collection regions, Set tablesOnMaster) { // Get the snapshot of the current assignments for the regions in question, and then create // a cluster out of it. Note that we might have replicas already assigned to some servers // earlier. So we want to get the snapshot to see those assignments, but this will only contain @@ -1210,7 +1118,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { clusterState.put(server, EMPTY_REGION_LIST); } } - return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder, backupMasters, + return new Cluster(masterServerName, regions, clusterState, null, this.regionFinder, tablesOnMaster, rackManager); } @@ -1253,15 +1161,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer { @Override public ServerName randomAssignment(HRegionInfo regionInfo, List servers) { metricsBalancer.incrMiscInvocations(); - if (servers == null || servers.isEmpty()) { - LOG.warn("Wanted to do random assignment but no servers to assign to"); + int numServers = servers == null ? 0 : servers.size(); + if (numServers == 0) { + LOG.warn("Wanted to do retain assignment but no servers to assign to"); return null; } - List backupMasters = normalizeServers(servers); - List regions = Lists.newArrayList(regionInfo); - Cluster cluster = createCluster(servers, regions, backupMasters, tablesOnMaster); + if (numServers == 1) { // Only one server, nothing fancy we can do here + return servers.get(0); + } + if (shouldBeOnMaster(regionInfo) + && servers.contains(masterServerName)) { + return masterServerName; + } - return randomAssignment(cluster, regionInfo, servers, backupMasters); + List regions = Lists.newArrayList(regionInfo); + Cluster cluster = createCluster(servers, regions, tablesOnMaster); + return randomAssignment(cluster, regionInfo, servers); } /** @@ -1290,16 +1205,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return null; } - List backupMasters = normalizeServers(servers); int numServers = servers == null ? 0 : servers.size(); - int numBackupMasters = backupMasters == null ? 0 : backupMasters.size(); - if (numServers == 0 && numBackupMasters == 0) { + if (numServers == 0) { LOG.warn("Wanted to do retain assignment but no servers to assign to"); return null; } Map> assignments = new TreeMap>(); - if (numServers + numBackupMasters == 1) { // Only one server, nothing fancy we can do here - ServerName server = numServers > 0 ? servers.get(0) : backupMasters.get(0); + if (numServers == 1) { // Only one server, nothing fancy we can do here + ServerName server = servers.get(0); assignments.put(server, new ArrayList(regions.keySet())); return assignments; } @@ -1317,11 +1230,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { serversByHostname.put(server.getHostname(), server); } } - if (numBackupMasters > 0) { - for (ServerName server : backupMasters) { - assignments.put(server, new ArrayList()); - } - } // Collection of the hostnames that used to have regions // assigned, but for which we no longer have any RS running @@ -1334,7 +1242,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { int numRandomAssignments = 0; int numRetainedAssigments = 0; - Cluster cluster = createCluster(servers, regions.keySet(), backupMasters, tablesOnMaster); + Cluster cluster = createCluster(servers, regions.keySet(), tablesOnMaster); for (Map.Entry entry : regions.entrySet()) { HRegionInfo region = entry.getKey(); @@ -1353,7 +1261,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } else if (localServers.isEmpty()) { // No servers on the new cluster match up with this hostname, // assign randomly. - ServerName randomServer = randomAssignment(cluster, region, servers, backupMasters); + ServerName randomServer = randomAssignment(cluster, region, servers); assignments.get(randomServer).add(region); numRandomAssignments++; if (oldServerName != null) oldHostsNoLongerPresent.add(oldServerName.getHostname()); @@ -1377,7 +1285,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } } if (target == null) { - target = randomAssignment(cluster, region, localServers, backupMasters); + target = randomAssignment(cluster, region, localServers); } assignments.get(target).add(region); } @@ -1423,78 +1331,22 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } /** - * Prepare the list of target regionservers so that it doesn't - * contain any excluded server, or backup master. Those backup masters - * used to be in the original list are returned. - */ - private List normalizeServers(List servers) { - if (servers == null) { - return null; - } - if (!excludedServers.isEmpty()) { - servers.removeAll(excludedServers); - } - Collection allBackupMasters = getBackupMasters(); - List backupMasters = null; - if (allBackupMasters != null && !allBackupMasters.isEmpty()) { - for (ServerName server: allBackupMasters) { - if (!servers.contains(server)) { - // Ignore backup masters not included - continue; - } - servers.remove(server); - if (backupMasters == null) { - backupMasters = new ArrayList(); - } - backupMasters.add(server); - } - } - return backupMasters; - } - - /** - * Used to assign a single region to a random server. The input should - * have been already normalized: 1) servers doesn't include any exclude sever, - * 2) servers doesn't include any backup master, 3) backupMasters contains - * only backup masters that are intended to host this region, i.e, it - * may not have all the backup masters. + * Used to assign a single region to a random server. */ private ServerName randomAssignment(Cluster cluster, HRegionInfo regionInfo, - List servers, List backupMasters) { - int numServers = servers == null ? 0 : servers.size(); - int numBackupMasters = backupMasters == null ? 0 : backupMasters.size(); - if (numServers == 0 && numBackupMasters == 0) { - LOG.warn("Wanted to do random assignment but no servers to assign to"); - return null; - } - if (servers != null && shouldBeOnMaster(regionInfo) - && servers.contains(masterServerName)) { - return masterServerName; - } + List servers) { + int numServers = servers.size(); // servers is not null, numServers > 1 ServerName sn = null; - final int maxIterations = servers.size() * 4; + final int maxIterations = numServers * 4; int iterations = 0; do { - // Generate a random number weighted more towards - // regular regionservers instead of backup masters. - // This formula is chosen for simplicity. - int i = RANDOM.nextInt( - numBackupMasters + numServers * backupMasterWeight); - if (i < numBackupMasters) { - sn = backupMasters.get(i); - continue; - } - i = (i - numBackupMasters)/backupMasterWeight; + int i = RANDOM.nextInt(numServers); sn = servers.get(i); if (sn.equals(masterServerName)) { // Try to avoid master for a user region - if (numServers > 1) { - i = (i == 0 ? 1 : i - 1); - sn = servers.get(i); - } else if (numBackupMasters > 0) { - sn = backupMasters.get(0); - } + i = (i == 0 ? 1 : i - 1); + sn = servers.get(i); } } while (cluster.wouldLowerAvailability(regionInfo, sn) && iterations++ < maxIterations); @@ -1503,12 +1355,11 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } /** - * Round robin a chunk of a list of regions to a list of servers + * Round robin a list of regions to a list of servers */ private void roundRobinAssignment(Cluster cluster, List regions, - List unassignedRegions, int offset, - int numRegions, List servers, List masterRegions, - Map> assignments) { + List unassignedRegions, List servers, + List masterRegions, Map> assignments) { boolean masterIncluded = servers.contains(masterServerName); int numServers = servers.size(); @@ -1516,6 +1367,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { if (masterIncluded) { skipServers--; } + int numRegions = regions.size(); int max = (int) Math.ceil((float) numRegions / skipServers); int serverIdx = 0; if (numServers > 1) { @@ -1532,7 +1384,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } List serverRegions = new ArrayList(max); for (int i = regionIdx; i < numRegions; i += skipServers) { - HRegionInfo region = regions.get(offset + i % numRegions); + HRegionInfo region = regions.get(i % numRegions); if (masterRegions == null || !shouldBeOnMaster(region)) { if (cluster.wouldLowerAvailability(region, server)) { unassignedRegions.add(region); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java index d1e6beba9b2..e7fbc4a2783 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterLoadState.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hbase.master.balancer; -import java.util.Collection; import java.util.List; import java.util.Map; import java.util.NavigableMap; @@ -35,12 +34,9 @@ public class ClusterLoadState { private boolean emptyRegionServerPresent = false; private int numRegions = 0; private int numServers = 0; - private int numBackupMasters = 0; - private int backupMasterWeight; - public ClusterLoadState(ServerName master, Collection backupMasters, - int backupMasterWeight, Map> clusterState) { - this.backupMasterWeight = backupMasterWeight; + public ClusterLoadState(ServerName master, + Map> clusterState) { this.numRegions = 0; this.numServers = clusterState.size(); this.clusterState = clusterState; @@ -56,10 +52,6 @@ public class ClusterLoadState { int sz = regions.size(); if (sz == 0) emptyRegionServerPresent = true; numRegions += sz; - if (backupMasters != null && backupMasters.contains(server.getKey())) { - sz *= backupMasterWeight; - numBackupMasters++; - } serversByLoad.put(new ServerAndLoad(server.getKey(), sz), regions); } } @@ -84,12 +76,8 @@ public class ClusterLoadState { return numServers; } - int getNumBackupMasters() { - return numBackupMasters; - } - float getLoadAverage() { - return numRegions / (numServers - numBackupMasters * (1 - 1.0f/backupMasterWeight)); + return (float) numRegions / numServers; } int getMaxLoad() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java index 6225f6c8517..fb269acbe05 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/SimpleLoadBalancer.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.master.balancer; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -187,17 +186,14 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { if (regionsToReturn != null) { return regionsToReturn; } - filterExcludedServers(clusterMap); boolean emptyRegionServerPresent = false; long startTime = System.currentTimeMillis(); - Collection backupMasters = getBackupMasters(); - ClusterLoadState cs = new ClusterLoadState(masterServerName, - backupMasters, backupMasterWeight, clusterMap); + ClusterLoadState cs = new ClusterLoadState(masterServerName, clusterMap); // construct a Cluster object with clusterMap and rest of the // argument as defaults Cluster c = new Cluster(masterServerName, clusterMap, null, this.regionFinder, - getBackupMasters(), tablesOnMaster, this.rackManager); + tablesOnMaster, this.rackManager); if (!this.needsBalance(c)) return null; int numServers = cs.getNumServers(); @@ -210,9 +206,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { // Using to check balance result. StringBuilder strBalanceParam = new StringBuilder(); strBalanceParam.append("Balance parameter: numRegions=").append(numRegions) - .append(", numServers=").append(numServers).append(", numBackupMasters=") - .append(cs.getNumBackupMasters()).append(", backupMasterWeight=") - .append(backupMasterWeight).append(", max=").append(max) + .append(", numServers=").append(numServers).append(", max=").append(max) .append(", min=").append(min); LOG.debug(strBalanceParam.toString()); @@ -238,11 +232,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { } serversOverloaded++; List regions = server.getValue(); - int w = 1; // Normal region server has weight 1 - if (backupMasters != null && backupMasters.contains(sal.getServerName())) { - w = backupMasterWeight; // Backup master has heavier weight - } - int numToOffload = Math.min((load - max) / w, regions.size()); + int numToOffload = Math.min(load - max, regions.size()); // account for the out-of-band regions which were assigned to this server // after some other region server crashed Collections.sort(regions, riComparator); @@ -282,12 +272,7 @@ public class SimpleLoadBalancer extends BaseLoadBalancer { if (load >= min && load > 0) { continue; // look for other servers which haven't reached min } - int w = 1; // Normal region server has weight 1 - if (backupMasters != null - && backupMasters.contains(server.getKey().getServerName())) { - w = backupMasterWeight; // Backup master has heavier weight - } - int regionsToPut = (min - load) / w; + int regionsToPut = min - load; if (regionsToPut == 0) { regionsToPut = 1; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index a9bc7ff81d7..6f564e0f14a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -157,7 +157,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf); costFunctions = new CostFunction[]{ - new RegionCountSkewCostFunction(conf, backupMasterWeight), + new RegionCountSkewCostFunction(conf), new MoveCostFunction(conf), localityCost, new TableSkewCostFunction(conf), @@ -211,12 +211,11 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { if (plans != null) { return plans; } - filterExcludedServers(clusterState); //The clusterState that is given to this method contains the state //of all the regions in the table(s) (that's true today) // Keep track of servers to iterate through them. Cluster cluster = new Cluster(masterServerName, - clusterState, loads, regionFinder, getBackupMasters(), tablesOnMaster, rackManager); + clusterState, loads, regionFinder, tablesOnMaster, rackManager); if (!needsBalance(cluster)) { return null; } @@ -437,7 +436,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { } protected int pickOtherRandomServer(Cluster cluster, int serverIndex) { - if (cluster.numServers <= 2) { + if (cluster.numServers < 2) { + return -1; + } + if (cluster.activeMasterIndex != -1 && cluster.numServers == 2) { return -1; } while (true) { @@ -527,10 +529,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { private int pickLeastLoadedServer(final Cluster cluster, int thisServer) { Integer[] servers = cluster.serverIndicesSortedByRegionCount; - if (servers.length <= 2) { - return thisServer -1; - } - int index = 0; while (servers[index] == null || servers[index] == thisServer || cluster.isActiveMaster(index)) { @@ -583,6 +581,10 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // Pick the server with the highest locality int otherServer = pickHighestLocalityServer(cluster, thisServer, thisRegion); + if (otherServer == -1) { + return Cluster.NullAction; + } + // pick an region on the other server to potentially swap int otherRegion = this.pickRandomRegion(cluster, otherServer, 0.5f); @@ -799,7 +801,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { double total = getSum(stats); double count = stats.length; - if (stats.length > 1 && cluster.masterServerName != null) { + if (stats.length > 1 && cluster.activeMasterIndex != -1) { count--; // Exclude the active master } double mean = total/count; @@ -900,14 +902,12 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { "hbase.master.balancer.stochastic.regionCountCost"; private static final float DEFAULT_REGION_COUNT_SKEW_COST = 500; - private double backupMasterWeight; private double[] stats = null; - RegionCountSkewCostFunction(Configuration conf, double backupMasterWeight) { + RegionCountSkewCostFunction(Configuration conf) { super(conf); // Load multiplier should be the greatest as it is the most general way to balance data. this.setMultiplier(conf.getFloat(REGION_COUNT_SKEW_COST_KEY, DEFAULT_REGION_COUNT_SKEW_COST)); - this.backupMasterWeight = backupMasterWeight; } @Override @@ -918,11 +918,6 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { for (int i =0; i < cluster.numServers; i++) { stats[i] = cluster.regionsPerServer[i].length; - // Use some weight on regions assigned to active/backup masters, - // so that they won't carry as many regions as normal regionservers. - if (cluster.isBackupMaster(i)) { - stats[i] *= backupMasterWeight; - } } return costFromArray(stats); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 1bc478fa443..1004d405291 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -52,7 +52,6 @@ import javax.servlet.http.HttpServlet; import org.apache.commons.lang.math.RandomUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -72,6 +71,7 @@ import org.apache.hadoop.hbase.TableDescriptors; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.YouAreDeadException; import org.apache.hadoop.hbase.ZNodeClearer; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.ConnectionUtils; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; @@ -325,7 +325,7 @@ public class HRegionServer extends HasThread implements LogRoller metaHLogRoller; // flag set after we're done setting up server threads - protected AtomicBoolean online; + final AtomicBoolean online = new AtomicBoolean(false); // zookeeper connection and watcher protected ZooKeeperWatcher zooKeeper; @@ -347,7 +347,7 @@ public class HRegionServer extends HasThread implements private final RegionServerAccounting regionServerAccounting; // Cache configuration and block cache reference - final CacheConfig cacheConfig; + protected CacheConfig cacheConfig; /** The health check chore. */ private HealthCheckChore healthCheckChore; @@ -441,7 +441,6 @@ public class HRegionServer extends HasThread implements this.fsOk = true; this.conf = conf; checkCodecs(this.conf); - this.online = new AtomicBoolean(false); this.userProvider = UserProvider.instantiate(conf); FSUtils.setupShortCircuitRead(this.conf); @@ -478,7 +477,6 @@ public class HRegionServer extends HasThread implements login(userProvider, hostName); regionServerAccounting = new RegionServerAccounting(); - cacheConfig = new CacheConfig(conf); uncaughtExceptionHandler = new UncaughtExceptionHandler() { @Override public void uncaughtException(Thread t, Throwable e) { @@ -530,6 +528,9 @@ public class HRegionServer extends HasThread implements "hbase.regionserver.kerberos.principal", host); } + protected void waitForMasterActive(){ + } + protected String getProcessName() { return REGIONSERVER; } @@ -597,8 +598,26 @@ public class HRegionServer extends HasThread implements */ private void preRegistrationInitialization(){ try { + synchronized (this) { + if (shortCircuitConnection == null) { + shortCircuitConnection = createShortCircuitConnection(); + metaTableLocator = new MetaTableLocator(); + } + } + + // Health checker thread. + if (isHealthCheckerConfigured()) { + int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ, + HConstants.DEFAULT_THREAD_WAKE_FREQUENCY); + healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration()); + } + this.pauseMonitor = new JvmPauseMonitor(conf); + pauseMonitor.start(); + initializeZooKeeper(); - initializeThreads(); + if (!isStopped() && !isAborted()) { + initializeThreads(); + } } catch (Throwable t) { // Call stop if error or process will stick around for ever since server // puts up non-daemon threads. @@ -619,8 +638,6 @@ public class HRegionServer extends HasThread implements // Create the master address tracker, register with zk, and start it. Then // block until a master is available. No point in starting up if no master // running. - this.masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this); - this.masterAddressTracker.start(); blockAndCheckIfStopped(this.masterAddressTracker); // Wait on cluster being up. Master will set this flag up in zookeeper @@ -640,11 +657,13 @@ public class HRegionServer extends HasThread implements this.abort("Failed to retrieve Cluster ID",e); } - synchronized (this) { - if (shortCircuitConnection == null) { - shortCircuitConnection = createShortCircuitConnection(); - metaTableLocator = new MetaTableLocator(); - } + // In case colocated master, wait here till it's active. + // So backup masters won't start as regionservers. + // This is to avoid showing backup masters as regionservers + // in master web UI, or assigning any region to them. + waitForMasterActive(); + if (isStopped() || isAborted()) { + return; // No need for further initialization } // watch for snapshots and other procedures @@ -693,13 +712,6 @@ public class HRegionServer extends HasThread implements // in a while. It will take care of not checking too frequently on store-by-store basis. this.compactionChecker = new CompactionChecker(this, this.threadWakeFrequency, this); this.periodicFlusher = new PeriodicMemstoreFlusher(this.threadWakeFrequency, this); - // Health checker thread. - int sleepTime = this.conf.getInt(HConstants.HEALTH_CHORE_WAKE_FREQ, - HConstants.DEFAULT_THREAD_WAKE_FREQUENCY); - if (isHealthCheckerConfigured()) { - healthCheckChore = new HealthCheckChore(sleepTime, this, getConfiguration()); - } - this.leases = new Leases(this.threadWakeFrequency); // Create the thread to clean the moved regions list @@ -716,8 +728,6 @@ public class HRegionServer extends HasThread implements // Setup RPC client for master communication rpcClient = new RpcClient(conf, clusterId, new InetSocketAddress( rpcServices.isa.getAddress(), 0)); - this.pauseMonitor = new JvmPauseMonitor(conf); - pauseMonitor.start(); int storefileRefreshPeriod = conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD , StorefileRefresherChore.DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD); @@ -836,7 +846,7 @@ public class HRegionServer extends HasThread implements } } // Send cache a shutdown. - if (cacheConfig.isBlockCacheEnabled()) { + if (cacheConfig != null && cacheConfig.isBlockCacheEnabled()) { cacheConfig.getBlockCache().shutdown(); } @@ -941,6 +951,7 @@ public class HRegionServer extends HasThread implements try { deleteMyEphemeralNode(); + } catch (KeeperException.NoNodeException nn) { } catch (KeeperException e) { LOG.warn("Failed deleting my ephemeral node", e); } @@ -1188,6 +1199,7 @@ public class HRegionServer extends HasThread implements // Save it in a file, this will allow to see if we crash ZNodeClearer.writeMyEphemeralNodeOnDisk(getMyEphemeralNodePath()); + this.cacheConfig = new CacheConfig(conf); this.hlog = setupWALAndReplication(); // Init in here rather than in constructor after thread name has been set this.metricsRegionServer = new MetricsRegionServer(new MetricsRegionServerWrapperImpl(this)); @@ -1198,6 +1210,8 @@ public class HRegionServer extends HasThread implements ", RpcServer on " + rpcServices.isa + ", sessionid=0x" + Long.toHexString(this.zooKeeper.getRecoverableZooKeeper().getSessionId())); + + // Wake up anyone waiting for this server to online synchronized (online) { online.set(true); online.notifyAll(); @@ -1582,10 +1596,6 @@ public class HRegionServer extends HasThread implements } } - // Start Server. This service is like leases in that it internally runs - // a thread. - rpcServices.rpcServer.start(); - // Create the log splitting worker and start it // set a smaller retries to fast fail otherwise splitlogworker could be blocked for // quite a while inside HConnection layer. The worker won't be available for other @@ -1713,8 +1723,15 @@ public class HRegionServer extends HasThread implements } public void waitForServerOnline(){ - while (!isOnline() && !isStopped()){ - sleeper.sleep(); + while (!isStopped() && !isOnline()) { + synchronized (online) { + try { + online.wait(msgInterval); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + break; + } + } } } @@ -1976,12 +1993,11 @@ public class HRegionServer extends HasThread implements } ServerName sn = null; long previousLogTime = 0; - RegionServerStatusService.BlockingInterface master = null; boolean refresh = false; // for the first time, use cached data RegionServerStatusService.BlockingInterface intf = null; boolean interrupted = false; try { - while (keepLooping() && master == null) { + while (keepLooping()) { sn = this.masterAddressTracker.getMasterAddress(refresh); if (sn == null) { if (!keepLooping()) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java index b0a69f873c9..83b9fb12dea 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Leases.java @@ -25,12 +25,10 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.HasThread; import java.util.ConcurrentModificationException; -import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.Delayed; -import java.util.concurrent.DelayQueue; import java.util.concurrent.TimeUnit; import java.io.IOException; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 34044484f78..9bbd6918e13 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -18,14 +18,25 @@ */ package org.apache.hadoop.hbase.regionserver; -import com.google.protobuf.ByteString; -import com.google.protobuf.Message; -import com.google.protobuf.RpcController; -import com.google.protobuf.ServiceException; -import com.google.protobuf.TextFormat; +import java.io.IOException; +import java.io.InterruptedIOException; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellScannable; @@ -43,6 +54,7 @@ import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownScannerException; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.ConnectionUtils; import org.apache.hadoop.hbase.client.Delete; @@ -133,7 +145,6 @@ import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RequestHeader; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor; import org.apache.hadoop.hbase.quotas.OperationQuota; import org.apache.hadoop.hbase.quotas.RegionServerQuotaManager; -import org.apache.hadoop.hbase.quotas.ThrottlingException; import org.apache.hadoop.hbase.regionserver.HRegion.Operation; import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; import org.apache.hadoop.hbase.regionserver.handler.OpenMetaHandler; @@ -153,27 +164,17 @@ import org.apache.hadoop.hbase.zookeeper.ZKSplitLog; import org.apache.hadoop.net.DNS; import org.apache.zookeeper.KeeperException; -import java.io.IOException; -import java.io.InterruptedIOException; -import java.lang.annotation.Retention; -import java.lang.annotation.RetentionPolicy; -import java.net.InetSocketAddress; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeSet; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; +import com.google.protobuf.ByteString; +import com.google.protobuf.Message; +import com.google.protobuf.RpcController; +import com.google.protobuf.ServiceException; +import com.google.protobuf.TextFormat; /** * Implements the regionserver RPC services. */ @InterfaceAudience.Private +@SuppressWarnings("deprecation") public class RSRpcServices implements HBaseRPCErrorHandler, AdminService.BlockingInterface, ClientService.BlockingInterface, PriorityFunction { protected static final Log LOG = LogFactory.getLog(RSRpcServices.class); @@ -1137,7 +1138,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler, * @throws ServiceException */ @Override - @SuppressWarnings("deprecation") public GetServerInfoResponse getServerInfo(final RpcController controller, final GetServerInfoRequest request) throws ServiceException { try { @@ -1272,7 +1272,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, try { while (System.currentTimeMillis() <= endTime && !regionServer.isStopped() && !regionServer.isOnline()) { - regionServer.online.wait(100); + regionServer.online.wait(regionServer.msgInterval); } checkOpen(); } catch (InterruptedException t) { @@ -1837,7 +1837,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler, Result r = null; Boolean processed = null; MutationType type = mutation.getMutateType(); - long mutationSize = 0; quota = getQuotaManager().checkQuota(region, OperationQuota.OperationType.MUTATE); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java index f038ed334cb..0b8846c28ed 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java @@ -23,15 +23,13 @@ import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.List; import java.util.NavigableMap; -import java.util.NavigableSet; import java.util.TreeMap; -import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Abortable; +import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.master.ServerManager; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionServerInfo; @@ -54,12 +52,12 @@ public class RegionServerTracker extends ZooKeeperListener { private NavigableMap regionServers = new TreeMap(); private ServerManager serverManager; - private Abortable abortable; + private Server server; public RegionServerTracker(ZooKeeperWatcher watcher, - Abortable abortable, ServerManager serverManager) { + Server server, ServerManager serverManager) { super(watcher); - this.abortable = abortable; + this.server = server; this.serverManager = serverManager; } @@ -133,15 +131,16 @@ public class RegionServerTracker extends ZooKeeperListener { @Override public void nodeChildrenChanged(String path) { - if (path.equals(watcher.rsZNode)) { + if (path.equals(watcher.rsZNode) + && !server.isAborted() && !server.isStopped()) { try { List servers = ZKUtil.listChildrenAndWatchThem(watcher, watcher.rsZNode); add(servers); } catch (IOException e) { - abortable.abort("Unexpected zk exception getting RS nodes", e); + server.abort("Unexpected zk exception getting RS nodes", e); } catch (KeeperException e) { - abortable.abort("Unexpected zk exception getting RS nodes", e); + server.abort("Unexpected zk exception getting RS nodes", e); } } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 6ef5926cf8b..70ef0ba11aa 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -49,12 +49,12 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Jdk14Logger; import org.apache.commons.logging.impl.Log4JLogger; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Waiter.Predicate; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; @@ -77,6 +77,7 @@ import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.ChecksumUtil; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.ipc.RpcServerInterface; +import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.mapreduce.MapreduceTestingShim; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.RegionStates; @@ -2954,6 +2955,8 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { } } catch (RegionServerStoppedException e) { // That's fine. + } catch (ServerNotRunningYetException e) { + // That's fine. } } return online; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java index 5a7b83abc3e..1558eaf1c4b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMultiParallel.java @@ -26,6 +26,7 @@ import static org.junit.Assert.fail; import java.io.IOException; import java.lang.reflect.Field; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ThreadPoolExecutor; @@ -36,6 +37,8 @@ import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.exceptions.OperationConflictException; import org.apache.hadoop.hbase.ipc.RpcClient; @@ -151,10 +154,15 @@ public class TestMultiParallel { Table table = new HTable(UTIL.getConfiguration(), TEST_TABLE); List puts = constructPutRequests(); // creates a Put for every region table.batch(puts); + HashSet regionservers = new HashSet(); + for (byte[] k : KEYS) { + HRegionLocation location = ((HTable)table).getRegionLocation(k); + regionservers.add(location.getServerName()); + } Field poolField = table.getClass().getDeclaredField("pool"); poolField.setAccessible(true); ThreadPoolExecutor tExecutor = (ThreadPoolExecutor) poolField.get(table); - assertEquals(slaves, tExecutor.getLargestPoolSize()); + assertEquals(regionservers.size(), tExecutor.getLargestPoolSize()); table.close(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java index 56b96b384fd..3c845fd0674 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java @@ -82,6 +82,7 @@ import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager; import org.apache.hadoop.hbase.coordination.ZKSplitLogManagerCoordination; import org.apache.hadoop.hbase.exceptions.OperationConflictException; import org.apache.hadoop.hbase.exceptions.RegionInRecoveryException; +import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.SplitLogManager.TaskBatch; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; @@ -1461,7 +1462,13 @@ public class TestDistributedLogSplitting { for (MasterThread mt : cluster.getLiveMasterThreads()) { HRegionServer hrs = mt.getMaster(); - List hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); + List hris; + try { + hris = ProtobufUtil.getOnlineRegions(hrs.getRSRpcServices()); + } catch (ServerNotRunningYetException e) { + // It's ok: this master may be a backup. Ignored. + continue; + } for (HRegionInfo hri : hris) { if (hri.getTable().isSystemTable()) { continue; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index 7537e35a344..0af95b9001e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -151,7 +151,7 @@ public class TestMasterFailover { assertEquals(2, masterThreads.size()); int rsCount = masterThreads.get(activeIndex).getMaster().getClusterStatus().getServersSize(); LOG.info("Active master " + active.getServerName() + " managing " + rsCount + " regions servers"); - assertEquals(5, rsCount); + assertEquals(4, rsCount); // Check that ClusterStatus reports the correct active and backup masters assertNotNull(active); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java index 7216abd95d7..55c91f49b3b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/BalancerTestBase.java @@ -215,7 +215,7 @@ public class BalancerTestBase { protected BaseLoadBalancer.Cluster mockCluster(int[] mockCluster) { return new BaseLoadBalancer.Cluster(null, - mockClusterServers(mockCluster, -1), null, null, null, null, null); + mockClusterServers(mockCluster, -1), null, null, null, null); } protected TreeMap> mockClusterServers(int[] mockCluster, int numTables) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java index 6a9f97d0818..a0746555f5c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestBaseLoadBalancer.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hbase.master.balancer; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -37,18 +36,16 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HBaseConfiguration; -import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.master.LoadBalancer; -import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.master.RegionPlan; -import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.master.LoadBalancer; +import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.RackManager; +import org.apache.hadoop.hbase.master.RegionPlan; +import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer.Cluster.MoveRegionAction; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MediumTests; @@ -270,7 +267,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase { // cluster is created (constructor code) would make sure the indices of // the servers are in the order in which it is inserted in the clusterState // map (linkedhashmap is important). A similar thing applies to the region lists - Cluster cluster = new Cluster(master, clusterState, null, null, null, null, rackManager); + Cluster cluster = new Cluster(master, clusterState, null, null, null, rackManager); // check whether a move of region1 from servers[0] to servers[1] would lower // the availability of region1 assertTrue(cluster.wouldLowerAvailability(hri1, servers[1])); @@ -287,7 +284,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase { // now lets have servers[1] host replica_of_region2 list1.add(RegionReplicaUtil.getRegionInfoForReplica(hri3, 1)); // create a new clusterState with the above change - cluster = new Cluster(master, clusterState, null, null, null, null, rackManager); + cluster = new Cluster(master, clusterState, null, null, null, rackManager); // now check whether a move of a replica from servers[0] to servers[1] would lower // the availability of region2 assertTrue(cluster.wouldLowerAvailability(hri3, servers[1])); @@ -299,14 +296,14 @@ public class TestBaseLoadBalancer extends BalancerTestBase { clusterState.put(servers[6], list2); //servers[6], rack2 hosts region2 clusterState.put(servers[10], new ArrayList()); //servers[10], rack3 hosts no region // create a cluster with the above clusterState - cluster = new Cluster(master, clusterState, null, null, null, null, rackManager); + cluster = new Cluster(master, clusterState, null, null, null, rackManager); // check whether a move of region1 from servers[0],rack1 to servers[6],rack2 would // lower the availability assertTrue(cluster.wouldLowerAvailability(hri1, servers[0])); // now create a cluster without the rack manager - cluster = new Cluster(master, clusterState, null, null, null, null, null); + cluster = new Cluster(master, clusterState, null, null, null, null); // now repeat check whether a move of region1 from servers[0] to servers[6] would // lower the availability assertTrue(!cluster.wouldLowerAvailability(hri1, servers[6])); @@ -339,7 +336,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase { // cluster is created (constructor code) would make sure the indices of // the servers are in the order in which it is inserted in the clusterState // map (linkedhashmap is important). - Cluster cluster = new Cluster(master, clusterState, null, null, null, null, rackManager); + Cluster cluster = new Cluster(master, clusterState, null, null, null, rackManager); // check whether moving region1 from servers[1] to servers[2] would lower availability assertTrue(!cluster.wouldLowerAvailability(hri1, servers[2])); @@ -359,7 +356,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase { clusterState.put(servers[6], list2); //servers[6], rack2 hosts region2 clusterState.put(servers[12], list3); //servers[12], rack3 hosts replica_of_region2 // create a cluster with the above clusterState - cluster = new Cluster(master, clusterState, null, null, null, null, rackManager); + cluster = new Cluster(master, clusterState, null, null, null, rackManager); // check whether a move of replica_of_region2 from servers[12],rack3 to servers[0],rack1 would // lower the availability assertTrue(!cluster.wouldLowerAvailability(hri4, servers[0])); @@ -445,7 +442,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase { assignRegions(regions, oldServers, clusterState); // should not throw exception: - BaseLoadBalancer.Cluster cluster = new Cluster(null, clusterState, null, null, null, null, null); + BaseLoadBalancer.Cluster cluster = new Cluster(null, clusterState, null, null, null, null); assertEquals(101 + 9, cluster.numRegions); assertEquals(10, cluster.numServers); // only 10 servers because they share the same host + port } @@ -487,7 +484,7 @@ public class TestBaseLoadBalancer extends BalancerTestBase { when(locationFinder.getTopBlockLocations(regions.get(43))).thenReturn( Lists.newArrayList(ServerName.valueOf("foo", 0, 0))); // this server does not exists in clusterStatus - BaseLoadBalancer.Cluster cluster = new Cluster(null, clusterState, null, locationFinder, null, null, null); + BaseLoadBalancer.Cluster cluster = new Cluster(null, clusterState, null, locationFinder, null, null); int r0 = ArrayUtils.indexOf(cluster.regions, regions.get(0)); // this is ok, it is just a test int r1 = ArrayUtils.indexOf(cluster.regions, regions.get(1)); @@ -523,49 +520,4 @@ public class TestBaseLoadBalancer extends BalancerTestBase { assertEquals(1, cluster.regionLocations[r43].length); assertEquals(-1, cluster.regionLocations[r43][0]); } - - @Test - public void testBackupMastersExcluded() throws HBaseIOException { - ClusterStatus st = Mockito.mock(ClusterStatus.class); - ArrayList backupMasters = new ArrayList(); - ServerName backupMaster = ServerName.valueOf("fake-backupmaster", 0, 1L); - backupMasters.add(backupMaster); - BaseLoadBalancer balancer = (BaseLoadBalancer)loadBalancer; - balancer.usingBackupMasters = false; - Mockito.when(st.getBackupMasters()).thenReturn(backupMasters); - loadBalancer.setClusterStatus(st); - assertEquals(1, balancer.excludedServers.size()); - assertTrue(balancer.excludedServers.contains(backupMaster)); - - // Round robin assignment - List regions = randomRegions(1); - HRegionInfo region = regions.get(0); - assertNull(loadBalancer.randomAssignment(region, backupMasters)); - assertNull(loadBalancer.roundRobinAssignment(regions, backupMasters)); - HashMap assignments = new HashMap(); - assignments.put(region, backupMaster); - assertNull(loadBalancer.retainAssignment(assignments, backupMasters)); - ArrayList servers = new ArrayList(backupMasters); - ServerName sn = ServerName.valueOf("fake-rs", 0, 1L); - servers.add(sn); - assertEquals(sn, loadBalancer.randomAssignment(region, servers)); - Map> plans = - loadBalancer.roundRobinAssignment(regions, servers); - assertEquals(1, plans.size()); - assertTrue(plans.get(sn).contains(region)); - - // Retain assignment - plans = loadBalancer.retainAssignment(assignments, servers); - assertEquals(1, plans.size()); - assertTrue(plans.get(sn).contains(region)); - - // Filter backup masters for balance cluster - Map> clusterMap = - new HashMap>(); - clusterMap.put(backupMaster, new ArrayList()); - clusterMap.put(sn, new ArrayList()); - balancer.filterExcludedServers(clusterMap); - assertTrue(clusterMap.containsKey(sn)); - assertEquals(1, clusterMap.size()); - } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDefaultLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDefaultLoadBalancer.java index 15ff805ad03..d905c26b173 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDefaultLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestDefaultLoadBalancer.java @@ -17,14 +17,8 @@ */ package org.apache.hadoop.hbase.master.balancer; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; - -import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -32,7 +26,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.LoadBalancer; import org.apache.hadoop.hbase.master.RegionPlan; import org.apache.hadoop.hbase.testclassification.MasterTests; @@ -40,7 +33,6 @@ import org.apache.hadoop.hbase.testclassification.MediumTests; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; -import org.mockito.Mockito; /** * Test the load balancer that is created by default. @@ -134,43 +126,4 @@ public class TestDefaultLoadBalancer extends BalancerTestBase { } } } - - @Test - public void testBalancerClusterWithBackupMaster() throws Exception { - SimpleLoadBalancer balancer = Mockito.spy(new SimpleLoadBalancer()); - balancer.setConf(HBaseConfiguration.create()); - List backupMasters = new ArrayList(); - ServerName backupMaster = ServerName.parseServerName("backup:1:1"); - ServerName rs = ServerName.parseServerName("rs:1:1"); - backupMasters.add(backupMaster); - Mockito.doReturn(backupMasters).when(balancer).getBackupMasters(); - Map> servers = new TreeMap>(); - List regions = new ArrayList(); - TableName table = TableName.valueOf("test"); - regions.add(new HRegionInfo(table)); - servers.put(backupMaster, regions); - regions = new ArrayList(); - balancer.backupMasterWeight = 4; - for (int i=0; i<4; i++) { - regions.add(new HRegionInfo(table)); - } - servers.put(rs, regions); - List plans = balancer.balanceCluster(servers); - assertNull(plans); - - // Reset the cluster map - regions = new ArrayList(); - for (int i=0; i<2; i++) { - regions.add(new HRegionInfo(table)); - } - servers.put(backupMaster, regions); - regions = new ArrayList(); - for (int i=0; i<3; i++) { - regions.add(new HRegionInfo(table)); - } - servers.put(rs, regions); - plans = balancer.balanceCluster(servers); - assertNotNull(plans); - assertEquals(1, plans.size()); - } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java index e7757f58d22..4ff5afe0fc3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancer.java @@ -194,7 +194,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { public void testSkewCost() { Configuration conf = HBaseConfiguration.create(); StochasticLoadBalancer.CostFunction - costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf, 1); + costFunction = new StochasticLoadBalancer.RegionCountSkewCostFunction(conf); for (int[] mockCluster : clusterStateMocks) { costFunction.init(mockCluster(mockCluster)); double cost = costFunction.cost(); @@ -319,7 +319,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { BaseLoadBalancer.Cluster cluster; - cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null); + cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null); costFunction.init(cluster); double costWithoutReplicas = costFunction.cost(); assertEquals(0, costWithoutReplicas, 0); @@ -329,7 +329,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { clusterState.firstEntry().getValue().get(0),1); clusterState.lastEntry().getValue().add(replica1); - cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null); + cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null); costFunction.init(cluster); double costWith1ReplicaDifferentServer = costFunction.cost(); @@ -339,7 +339,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { HRegionInfo replica2 = RegionReplicaUtil.getRegionInfoForReplica(replica1, 2); clusterState.lastEntry().getValue().add(replica2); - cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null); + cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null); costFunction.init(cluster); double costWith1ReplicaSameServer = costFunction.cost(); @@ -362,7 +362,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { entry.getValue().add(replica2); it.next().getValue().add(replica3); //2nd server - cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null); + cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null); costFunction.init(cluster); double costWith3ReplicasSameServer = costFunction.cost(); @@ -376,7 +376,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { clusterState.lastEntry().getValue().add(replica2); clusterState.lastEntry().getValue().add(replica3); - cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null, null); + cluster = new BaseLoadBalancer.Cluster(master, clusterState, null, null, null, null); costFunction.init(cluster); double costWith2ReplicasOnTwoServers = costFunction.cost(); @@ -396,7 +396,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { // until the step above s1 holds two replicas of a region regions = randomRegions(1); map.put(s2, regions); - assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, null, null))); + assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, null))); // check for the case where there are two hosts on the same rack and there are two racks // and both the replicas are on the same rack map.clear(); @@ -407,7 +407,7 @@ public class TestStochasticLoadBalancer extends BalancerTestBase { map.put(s2, regionsOnS2); // add another server so that the cluster has some host on another rack map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1)); - assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, null, + assertTrue(loadBalancer.needsBalance(new Cluster(master, map, null, null, null, new ForTestRackManagerOne()))); } diff --git a/src/main/docbkx/getting_started.xml b/src/main/docbkx/getting_started.xml index 2e0ee4e1174..79478bab838 100644 --- a/src/main/docbkx/getting_started.xml +++ b/src/main/docbkx/getting_started.xml @@ -348,22 +348,13 @@ $ Configure HBase. Edit the hbase-site.xml configuration. First, add the following - properties. Property hbase.cluster.distributed is set to true - (Its default is false), which directs HBase to run in distributed mode, - with one JVM instance per daemon. Since HBase version 1.0.0, a HMaster is also a - RegionServer. So in pseudo-distributed mode, just one HMaster (also a RegionServer) - instance is started by default. Because there is just one RegionServer (the HMaster), - property hbase.master.wait.on.regionservers.mintostart should be set to - 1 (Its default is changed to 2 since version 1.0.0). + property. which directs HBase to run in distributed mode, with one JVM instance per + daemon. hbase.cluster.distributed true - - - hbase.master.wait.on.regionservers.mintostart - 1 ]]> Next, change the hbase.rootdir from the local filesystem to the address