HBASE-25775 Use a special balancer to deal with maintenance mode (#3161)

Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
Duo Zhang 2021-04-16 09:50:24 +08:00
parent 32207cd478
commit a177fb4c62
4 changed files with 192 additions and 48 deletions

View File

@ -107,6 +107,7 @@ import org.apache.hadoop.hbase.master.balancer.BalancerChore;
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
import org.apache.hadoop.hbase.master.balancer.MaintenanceLoadBalancer;
import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
@ -670,9 +671,13 @@ public class HMaster extends HRegionServer implements MasterServices {
* Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it
* should have already been initialized along with {@link ServerManager}.
*/
@InterfaceAudience.Private
protected void initializeZKBasedSystemTrackers()
throws IOException, InterruptedException, KeeperException, ReplicationException {
private void initializeZKBasedSystemTrackers()
throws IOException, KeeperException, ReplicationException {
if (maintenanceMode) {
// in maintenance mode, always use MaintenanceLoadBalancer.
conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MaintenanceLoadBalancer.class,
LoadBalancer.class);
}
this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
this.loadBalancerTracker.start();

View File

@ -35,6 +35,7 @@ import java.util.NavigableMap;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.commons.lang3.NotImplementedException;
@ -1027,7 +1028,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
protected float overallSlop;
protected Configuration config = HBaseConfiguration.create();
protected RackManager rackManager;
private static final Random RANDOM = new Random(System.currentTimeMillis());
private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class);
protected MetricsBalancer metricsBalancer = null;
protected ClusterMetrics clusterStatus = null;
@ -1041,17 +1041,21 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
@Deprecated
protected boolean onlySystemTablesOnMaster;
protected boolean maintenanceMode;
@Override
public void setConf(Configuration conf) {
this.config = conf;
setSlop(conf);
if (slop < 0) slop = 0;
else if (slop > 1) slop = 1;
if (slop < 0) {
slop = 0;
} else if (slop > 1) {
slop = 1;
}
if (overallSlop < 0) overallSlop = 0;
else if (overallSlop > 1) overallSlop = 1;
if (overallSlop < 0) {
overallSlop = 0;
} else if (overallSlop > 1) {
overallSlop = 1;
}
this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(this.config);
@ -1061,8 +1065,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
}
this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
// Print out base configs. Don't print overallSlop since it for simple balancer exclusively.
LOG.info("slop={}, systemTablesOnMaster={}",
this.slop, this.onlySystemTablesOnMaster);
LOG.info("slop={}, systemTablesOnMaster={}", this.slop, this.onlySystemTablesOnMaster);
}
protected void setSlop(Configuration conf) {
@ -1079,8 +1082,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
*/
@Deprecated
public boolean shouldBeOnMaster(RegionInfo region) {
return (this.maintenanceMode || this.onlySystemTablesOnMaster)
&& region.getTable().isSystemTable();
return this.onlySystemTablesOnMaster && region.getTable().isSystemTable();
}
/**
@ -1147,7 +1149,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
protected Map<ServerName, List<RegionInfo>> assignMasterSystemRegions(
Collection<RegionInfo> regions, List<ServerName> servers) {
Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>();
if (this.maintenanceMode || this.onlySystemTablesOnMaster) {
if (this.onlySystemTablesOnMaster) {
if (masterServerName != null && servers.contains(masterServerName)) {
assignments.put(masterServerName, new ArrayList<>());
for (RegionInfo region : regions) {
@ -1181,9 +1183,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
if (useRegionFinder) {
this.regionFinder.setServices(masterServices);
}
if (this.services.isInMaintenanceMode()) {
this.maintenanceMode = true;
}
}
@Override
@ -1298,7 +1297,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
/**
* only need assign system table
*/
if (this.maintenanceMode || regions.isEmpty()) {
if (regions.isEmpty()) {
return assignments;
}
@ -1438,7 +1437,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
if (this.maintenanceMode || regions.isEmpty()) {
if (regions.isEmpty()) {
return assignments;
}
@ -1585,8 +1584,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
final int maxIterations = numServers * 4;
int iterations = 0;
List<ServerName> usedSNs = new ArrayList<>(servers.size());
Random rand = ThreadLocalRandom.current();
do {
int i = RANDOM.nextInt(numServers);
int i = rand.nextInt(numServers);
sn = servers.get(i);
if (!usedSNs.contains(sn)) {
usedSNs.add(sn);
@ -1616,13 +1616,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
*/
private void roundRobinAssignment(Cluster cluster, List<RegionInfo> regions,
List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) {
Random rand = ThreadLocalRandom.current();
List<RegionInfo> unassignedRegions = new ArrayList<>();
int numServers = servers.size();
int numRegions = regions.size();
int max = (int) Math.ceil((float) numRegions / numServers);
int serverIdx = 0;
if (numServers > 1) {
serverIdx = RANDOM.nextInt(numServers);
serverIdx = rand.nextInt(numServers);
}
int regionIdx = 0;
for (int j = 0; j < numServers; j++) {
@ -1644,17 +1645,17 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
List<RegionInfo> lastFewRegions = new ArrayList<>();
// assign the remaining by going through the list and try to assign to servers one-by-one
serverIdx = RANDOM.nextInt(numServers);
OUTER : for (RegionInfo region : unassignedRegions) {
serverIdx = rand.nextInt(numServers);
for (RegionInfo region : unassignedRegions) {
boolean assigned = false;
INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one
for (int j = 0; j < numServers; j++) { // try all servers one by one
ServerName server = servers.get((j + serverIdx) % numServers);
if (cluster.wouldLowerAvailability(region, server)) {
continue INNER;
continue;
} else {
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
cluster.doAssignRegion(region, server);
serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
serverIdx = (j + serverIdx + 1) % numServers; // remain from next server
assigned = true;
break;
}
@ -1666,7 +1667,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
// just sprinkle the rest of the regions on random regionservers. The balanceCluster will
// make it optimal later. we can end up with this if numReplicas > numServers.
for (RegionInfo region : lastFewRegions) {
int i = RANDOM.nextInt(numServers);
int i = rand.nextInt(numServers);
ServerName server = servers.get(i);
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
cluster.doAssignRegion(region, server);

View File

@ -0,0 +1,131 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.balancer;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.yetus.audience.InterfaceAudience;
/**
* a balancer which is only used in maintenance mode.
*/
@InterfaceAudience.Private
public class MaintenanceLoadBalancer extends Configured implements LoadBalancer {
private volatile boolean stopped = false;
@Override
public void stop(String why) {
stopped = true;
}
@Override
public boolean isStopped() {
return stopped;
}
@Override
public void setClusterMetrics(ClusterMetrics st) {
}
@Override
public void setMasterServices(MasterServices masterServices) {
}
@Override
public List<RegionPlan> balanceCluster(
Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) throws IOException {
// do not need to balance in maintenance mode
return Collections.emptyList();
}
@Override
public List<RegionPlan> balanceTable(TableName tableName,
Map<ServerName, List<RegionInfo>> loadOfOneTable) {
return Collections.emptyList();
}
private Map<ServerName, List<RegionInfo>> assign(Collection<RegionInfo> regions,
List<ServerName> servers) {
// should only have 1 region server in maintenance mode
assert servers.size() == 1;
List<RegionInfo> systemRegions =
regions.stream().filter(r -> r.getTable().isSystemTable()).collect(Collectors.toList());
if (!systemRegions.isEmpty()) {
return Collections.singletonMap(servers.get(0), systemRegions);
} else {
return Collections.emptyMap();
}
}
@Override
public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
List<ServerName> servers) {
return assign(regions, servers);
}
@Override
public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
List<ServerName> servers) {
return assign(regions.keySet(), servers);
}
@Override
public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) {
// should only have 1 region server in maintenance mode
assert servers.size() == 1;
return regionInfo.getTable().isSystemTable() ? servers.get(0) : null;
}
@Override
public void initialize() {
}
@Override
public void regionOnline(RegionInfo regionInfo, ServerName sn) {
}
@Override
public void regionOffline(RegionInfo regionInfo) {
}
@Override
public void onConfigurationChange(Configuration conf) {
}
@Override
public void postMasterStartupInitialize() {
}
@Override
public void updateBalancerStatus(boolean status) {
}
}

View File

@ -18,9 +18,11 @@
package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.hadoop.conf.Configuration;
@ -29,7 +31,10 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.StartMiniClusterOption;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.AsyncConnection;
import org.apache.hadoop.hbase.client.AsyncTable;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
@ -48,12 +53,12 @@ import org.junit.rules.TestName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Category({MasterTests.class, LargeTests.class})
@Category({ MasterTests.class, LargeTests.class })
public class TestMasterRepairMode {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestMasterRepairMode.class);
HBaseClassTestRule.forClass(TestMasterRepairMode.class);
@Rule
public TestName name = new TestName();
@ -84,16 +89,14 @@ public class TestMasterRepairMode {
public void testNewCluster() throws Exception {
enableMaintenanceMode();
TEST_UTIL.startMiniCluster(StartMiniClusterOption.builder()
.numRegionServers(0)
.numDataNodes(3)
.build());
TEST_UTIL.startMiniCluster(
StartMiniClusterOption.builder().numRegionServers(0).numDataNodes(3).build());
Connection conn = TEST_UTIL.getConnection();
assertTrue(conn.getAdmin().isMasterInMaintenanceMode());
try (Table table = conn.getTable(TableName.META_TABLE_NAME);
ResultScanner scanner = table.getScanner(new Scan())) {
ResultScanner scanner = table.getScanner(new Scan())) {
assertNotNull("Could not read meta.", scanner.next());
}
}
@ -113,25 +116,29 @@ public class TestMasterRepairMode {
LOG.info("Starting master-only");
enableMaintenanceMode();
TEST_UTIL.startMiniHBaseCluster(StartMiniClusterOption.builder()
.numRegionServers(0).createRootDir(false).build());
TEST_UTIL.startMiniHBaseCluster(
StartMiniClusterOption.builder().numRegionServers(0).createRootDir(false).build());
Connection conn = TEST_UTIL.getConnection();
assertTrue(conn.getAdmin().isMasterInMaintenanceMode());
try (Table table = conn.getTable(TableName.META_TABLE_NAME);
ResultScanner scanner = table.getScanner(HConstants.TABLE_FAMILY);
Stream<Result> results = StreamSupport.stream(scanner.spliterator(), false)) {
ResultScanner scanner = table.getScanner(HConstants.TABLE_FAMILY);
Stream<Result> results = StreamSupport.stream(scanner.spliterator(), false)) {
assertTrue("Did not find user table records while reading hbase:meta",
results.anyMatch(r -> Arrays.equals(r.getRow(), testRepairMode.getName())));
results.anyMatch(r -> Arrays.equals(r.getRow(), testRepairMode.getName())));
}
try (Table table = conn.getTable(testRepairMode);
ResultScanner scanner = table.getScanner(new Scan())) {
scanner.next();
fail("Should not be able to access user-space tables in repair mode.");
} catch (Exception e) {
// Expected
try (AsyncConnection asyncConn =
ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get()) {
// use async table so we can set the timeout and retry value to let the operation fail fast
AsyncTable<?> table = asyncConn.getTableBuilder(testRepairMode)
.setScanTimeout(5, TimeUnit.SECONDS).setMaxRetries(2).build();
assertThrows("Should not be able to access user-space tables in repair mode.",
Exception.class, () -> {
try (ResultScanner scanner = table.getScanner(new Scan())) {
scanner.next();
}
});
}
}
}