HBASE-25775 Use a special balancer to deal with maintenance mode (#3161)

Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
Duo Zhang 2021-04-16 09:50:24 +08:00 committed by GitHub
parent 533c84d330
commit bf78246b4f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 189 additions and 47 deletions

View File

@ -103,6 +103,7 @@ import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
import org.apache.hadoop.hbase.master.balancer.BalancerChore; import org.apache.hadoop.hbase.master.balancer.BalancerChore;
import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore; import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory; import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
import org.apache.hadoop.hbase.master.balancer.MaintenanceLoadBalancer;
import org.apache.hadoop.hbase.master.cleaner.DirScanPool; import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
import org.apache.hadoop.hbase.master.cleaner.HFileCleaner; import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
import org.apache.hadoop.hbase.master.cleaner.LogCleaner; import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
@ -677,7 +678,13 @@ public class HMaster extends HRegionServer implements MasterServices {
* should have already been initialized along with {@link ServerManager}. * should have already been initialized along with {@link ServerManager}.
*/ */
private void initializeZKBasedSystemTrackers() private void initializeZKBasedSystemTrackers()
throws IOException, KeeperException, ReplicationException { throws IOException, KeeperException, ReplicationException {
if (maintenanceMode) {
// in maintenance mode, always use MaintenanceLoadBalancer.
conf.unset(LoadBalancer.HBASE_RSGROUP_LOADBALANCER_CLASS);
conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MaintenanceLoadBalancer.class,
LoadBalancer.class);
}
this.balancer = new RSGroupBasedLoadBalancer(); this.balancer = new RSGroupBasedLoadBalancer();
this.balancer.setConf(conf); this.balancer.setConf(conf);
this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this); this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);

View File

@ -35,6 +35,7 @@ import java.util.NavigableMap;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Predicate; import java.util.function.Predicate;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.NotImplementedException;
@ -1101,7 +1102,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
protected float overallSlop; protected float overallSlop;
protected Configuration config = HBaseConfiguration.create(); protected Configuration config = HBaseConfiguration.create();
protected RackManager rackManager; protected RackManager rackManager;
private static final Random RANDOM = new Random(System.currentTimeMillis());
private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class); private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class);
protected MetricsBalancer metricsBalancer = null; protected MetricsBalancer metricsBalancer = null;
protected ClusterMetrics clusterStatus = null; protected ClusterMetrics clusterStatus = null;
@ -1115,17 +1115,21 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
@Deprecated @Deprecated
protected boolean onlySystemTablesOnMaster; protected boolean onlySystemTablesOnMaster;
protected boolean maintenanceMode;
@Override @Override
public void setConf(Configuration conf) { public void setConf(Configuration conf) {
this.config = conf; this.config = conf;
setSlop(conf); setSlop(conf);
if (slop < 0) slop = 0; if (slop < 0) {
else if (slop > 1) slop = 1; slop = 0;
} else if (slop > 1) {
slop = 1;
}
if (overallSlop < 0) overallSlop = 0; if (overallSlop < 0) {
else if (overallSlop > 1) overallSlop = 1; overallSlop = 0;
} else if (overallSlop > 1) {
overallSlop = 1;
}
this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(this.config); this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(this.config);
@ -1135,8 +1139,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
} }
this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable); this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
// Print out base configs. Don't print overallSlop since it for simple balancer exclusively. // Print out base configs. Don't print overallSlop since it for simple balancer exclusively.
LOG.info("slop={}, systemTablesOnMaster={}", LOG.info("slop={}, systemTablesOnMaster={}", this.slop, this.onlySystemTablesOnMaster);
this.slop, this.onlySystemTablesOnMaster);
} }
protected void setSlop(Configuration conf) { protected void setSlop(Configuration conf) {
@ -1153,8 +1156,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
*/ */
@Deprecated @Deprecated
public boolean shouldBeOnMaster(RegionInfo region) { public boolean shouldBeOnMaster(RegionInfo region) {
return (this.maintenanceMode || this.onlySystemTablesOnMaster) return this.onlySystemTablesOnMaster && region.getTable().isSystemTable();
&& region.getTable().isSystemTable();
} }
/** /**
@ -1221,7 +1223,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
protected Map<ServerName, List<RegionInfo>> assignMasterSystemRegions( protected Map<ServerName, List<RegionInfo>> assignMasterSystemRegions(
Collection<RegionInfo> regions, List<ServerName> servers) { Collection<RegionInfo> regions, List<ServerName> servers) {
Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>(); Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>();
if (this.maintenanceMode || this.onlySystemTablesOnMaster) { if (this.onlySystemTablesOnMaster) {
if (masterServerName != null && servers.contains(masterServerName)) { if (masterServerName != null && servers.contains(masterServerName)) {
assignments.put(masterServerName, new ArrayList<>()); assignments.put(masterServerName, new ArrayList<>());
for (RegionInfo region : regions) { for (RegionInfo region : regions) {
@ -1255,9 +1257,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
if (useRegionFinder) { if (useRegionFinder) {
this.regionFinder.setClusterInfoProvider(new MasterClusterInfoProvider(services)); this.regionFinder.setClusterInfoProvider(new MasterClusterInfoProvider(services));
} }
if (this.services.isInMaintenanceMode()) {
this.maintenanceMode = true;
}
} }
@Override @Override
@ -1372,7 +1371,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
/** /**
* only need assign system table * only need assign system table
*/ */
if (this.maintenanceMode || regions.isEmpty()) { if (regions.isEmpty()) {
return assignments; return assignments;
} }
@ -1512,7 +1511,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey())) regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey()))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
} }
if (this.maintenanceMode || regions.isEmpty()) { if (regions.isEmpty()) {
return assignments; return assignments;
} }
@ -1659,8 +1658,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
final int maxIterations = numServers * 4; final int maxIterations = numServers * 4;
int iterations = 0; int iterations = 0;
List<ServerName> usedSNs = new ArrayList<>(servers.size()); List<ServerName> usedSNs = new ArrayList<>(servers.size());
Random rand = ThreadLocalRandom.current();
do { do {
int i = RANDOM.nextInt(numServers); int i = rand.nextInt(numServers);
sn = servers.get(i); sn = servers.get(i);
if (!usedSNs.contains(sn)) { if (!usedSNs.contains(sn)) {
usedSNs.add(sn); usedSNs.add(sn);
@ -1690,13 +1690,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
*/ */
private void roundRobinAssignment(Cluster cluster, List<RegionInfo> regions, private void roundRobinAssignment(Cluster cluster, List<RegionInfo> regions,
List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) { List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) {
Random rand = ThreadLocalRandom.current();
List<RegionInfo> unassignedRegions = new ArrayList<>(); List<RegionInfo> unassignedRegions = new ArrayList<>();
int numServers = servers.size(); int numServers = servers.size();
int numRegions = regions.size(); int numRegions = regions.size();
int max = (int) Math.ceil((float) numRegions / numServers); int max = (int) Math.ceil((float) numRegions / numServers);
int serverIdx = 0; int serverIdx = 0;
if (numServers > 1) { if (numServers > 1) {
serverIdx = RANDOM.nextInt(numServers); serverIdx = rand.nextInt(numServers);
} }
int regionIdx = 0; int regionIdx = 0;
for (int j = 0; j < numServers; j++) { for (int j = 0; j < numServers; j++) {
@ -1718,17 +1719,17 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
List<RegionInfo> lastFewRegions = new ArrayList<>(); List<RegionInfo> lastFewRegions = new ArrayList<>();
// assign the remaining by going through the list and try to assign to servers one-by-one // assign the remaining by going through the list and try to assign to servers one-by-one
serverIdx = RANDOM.nextInt(numServers); serverIdx = rand.nextInt(numServers);
OUTER : for (RegionInfo region : unassignedRegions) { for (RegionInfo region : unassignedRegions) {
boolean assigned = false; boolean assigned = false;
INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one for (int j = 0; j < numServers; j++) { // try all servers one by one
ServerName server = servers.get((j + serverIdx) % numServers); ServerName server = servers.get((j + serverIdx) % numServers);
if (cluster.wouldLowerAvailability(region, server)) { if (cluster.wouldLowerAvailability(region, server)) {
continue INNER; continue;
} else { } else {
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region); assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
cluster.doAssignRegion(region, server); cluster.doAssignRegion(region, server);
serverIdx = (j + serverIdx + 1) % numServers; //remain from next server serverIdx = (j + serverIdx + 1) % numServers; // remain from next server
assigned = true; assigned = true;
break; break;
} }
@ -1740,7 +1741,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
// just sprinkle the rest of the regions on random regionservers. The balanceCluster will // just sprinkle the rest of the regions on random regionservers. The balanceCluster will
// make it optimal later. we can end up with this if numReplicas > numServers. // make it optimal later. we can end up with this if numReplicas > numServers.
for (RegionInfo region : lastFewRegions) { for (RegionInfo region : lastFewRegions) {
int i = RANDOM.nextInt(numServers); int i = rand.nextInt(numServers);
ServerName server = servers.get(i); ServerName server = servers.get(i);
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region); assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
cluster.doAssignRegion(region, server); cluster.doAssignRegion(region, server);

View File

@ -0,0 +1,132 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.balancer;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.master.LoadBalancer;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.yetus.audience.InterfaceAudience;
/**
* a balancer which is only used in maintenance mode.
*/
@InterfaceAudience.Private
public class MaintenanceLoadBalancer extends Configured implements LoadBalancer {
private volatile boolean stopped = false;
@Override
public void stop(String why) {
stopped = true;
}
@Override
public boolean isStopped() {
return stopped;
}
@Override
public void setClusterMetrics(ClusterMetrics st) {
}
@Override
public void setMasterServices(MasterServices masterServices) {
}
@Override
public List<RegionPlan> balanceCluster(
Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) throws IOException {
// do not need to balance in maintenance mode
return Collections.emptyList();
}
@Override
public List<RegionPlan> balanceTable(TableName tableName,
Map<ServerName, List<RegionInfo>> loadOfOneTable) {
return Collections.emptyList();
}
private Map<ServerName, List<RegionInfo>> assign(Collection<RegionInfo> regions,
List<ServerName> servers) {
// should only have 1 region server in maintenance mode
assert servers.size() == 1;
List<RegionInfo> systemRegions =
regions.stream().filter(r -> r.getTable().isSystemTable()).collect(Collectors.toList());
if (!systemRegions.isEmpty()) {
return Collections.singletonMap(servers.get(0), systemRegions);
} else {
return Collections.emptyMap();
}
}
@Override
public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
List<ServerName> servers) throws IOException {
return assign(regions, servers);
}
@Override
public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
List<ServerName> servers) throws IOException {
return assign(regions.keySet(), servers);
}
@Override
public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers)
throws IOException {
// should only have 1 region server in maintenance mode
assert servers.size() == 1;
return regionInfo.getTable().isSystemTable() ? servers.get(0) : null;
}
@Override
public void initialize() {
}
@Override
public void regionOnline(RegionInfo regionInfo, ServerName sn) {
}
@Override
public void regionOffline(RegionInfo regionInfo) {
}
@Override
public void onConfigurationChange(Configuration conf) {
}
@Override
public void postMasterStartupInitialize() {
}
@Override
public void updateBalancerStatus(boolean status) {
}
}

View File

@ -18,9 +18,11 @@
package org.apache.hadoop.hbase.master; package org.apache.hadoop.hbase.master;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThrows;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.util.Arrays; import java.util.Arrays;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream; import java.util.stream.Stream;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -29,6 +31,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.StartMiniClusterOption; import org.apache.hadoop.hbase.StartMiniClusterOption;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.AsyncTable;
import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Result;
@ -48,12 +51,12 @@ import org.junit.rules.TestName;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@Category({MasterTests.class, LargeTests.class}) @Category({ MasterTests.class, LargeTests.class })
public class TestMasterRepairMode { public class TestMasterRepairMode {
@ClassRule @ClassRule
public static final HBaseClassTestRule CLASS_RULE = public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestMasterRepairMode.class); HBaseClassTestRule.forClass(TestMasterRepairMode.class);
@Rule @Rule
public TestName name = new TestName(); public TestName name = new TestName();
@ -84,16 +87,14 @@ public class TestMasterRepairMode {
public void testNewCluster() throws Exception { public void testNewCluster() throws Exception {
enableMaintenanceMode(); enableMaintenanceMode();
TEST_UTIL.startMiniCluster(StartMiniClusterOption.builder() TEST_UTIL.startMiniCluster(
.numRegionServers(0) StartMiniClusterOption.builder().numRegionServers(0).numDataNodes(3).build());
.numDataNodes(3)
.build());
Connection conn = TEST_UTIL.getConnection(); Connection conn = TEST_UTIL.getConnection();
assertTrue(conn.getAdmin().isMasterInMaintenanceMode()); assertTrue(conn.getAdmin().isMasterInMaintenanceMode());
try (Table table = conn.getTable(TableName.META_TABLE_NAME); try (Table table = conn.getTable(TableName.META_TABLE_NAME);
ResultScanner scanner = table.getScanner(new Scan())) { ResultScanner scanner = table.getScanner(new Scan())) {
assertNotNull("Could not read meta.", scanner.next()); assertNotNull("Could not read meta.", scanner.next());
} }
} }
@ -113,25 +114,26 @@ public class TestMasterRepairMode {
LOG.info("Starting master-only"); LOG.info("Starting master-only");
enableMaintenanceMode(); enableMaintenanceMode();
TEST_UTIL.startMiniHBaseCluster(StartMiniClusterOption.builder() TEST_UTIL.startMiniHBaseCluster(
.numRegionServers(0).createRootDir(false).build()); StartMiniClusterOption.builder().numRegionServers(0).createRootDir(false).build());
Connection conn = TEST_UTIL.getConnection(); Connection conn = TEST_UTIL.getConnection();
assertTrue(conn.getAdmin().isMasterInMaintenanceMode()); assertTrue(conn.getAdmin().isMasterInMaintenanceMode());
try (Table table = conn.getTable(TableName.META_TABLE_NAME); try (Table table = conn.getTable(TableName.META_TABLE_NAME);
ResultScanner scanner = table.getScanner(HConstants.TABLE_FAMILY); ResultScanner scanner = table.getScanner(HConstants.TABLE_FAMILY);
Stream<Result> results = StreamSupport.stream(scanner.spliterator(), false)) { Stream<Result> results = StreamSupport.stream(scanner.spliterator(), false)) {
assertTrue("Did not find user table records while reading hbase:meta", assertTrue("Did not find user table records while reading hbase:meta",
results.anyMatch(r -> Arrays.equals(r.getRow(), testRepairMode.getName()))); results.anyMatch(r -> Arrays.equals(r.getRow(), testRepairMode.getName())));
}
try (Table table = conn.getTable(testRepairMode);
ResultScanner scanner = table.getScanner(new Scan())) {
scanner.next();
fail("Should not be able to access user-space tables in repair mode.");
} catch (Exception e) {
// Expected
} }
// use async table so we can set the timeout and retry value to let the operation fail fast
AsyncTable<?> table = conn.toAsyncConnection().getTableBuilder(testRepairMode)
.setScanTimeout(5, TimeUnit.SECONDS).setMaxRetries(2).build();
assertThrows("Should not be able to access user-space tables in repair mode.", Exception.class,
() -> {
try (ResultScanner scanner = table.getScanner(new Scan())) {
scanner.next();
}
});
} }
} }