HBASE-25775 Use a special balancer to deal with maintenance mode (#3161)
Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org>
This commit is contained in:
parent
32207cd478
commit
a177fb4c62
|
@ -107,6 +107,7 @@ import org.apache.hadoop.hbase.master.balancer.BalancerChore;
|
|||
import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
|
||||
import org.apache.hadoop.hbase.master.balancer.ClusterStatusChore;
|
||||
import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory;
|
||||
import org.apache.hadoop.hbase.master.balancer.MaintenanceLoadBalancer;
|
||||
import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
|
||||
import org.apache.hadoop.hbase.master.cleaner.HFileCleaner;
|
||||
import org.apache.hadoop.hbase.master.cleaner.LogCleaner;
|
||||
|
@ -670,9 +671,13 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
* Initialize all ZK based system trackers. But do not include {@link RegionServerTracker}, it
|
||||
* should have already been initialized along with {@link ServerManager}.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
protected void initializeZKBasedSystemTrackers()
|
||||
throws IOException, InterruptedException, KeeperException, ReplicationException {
|
||||
private void initializeZKBasedSystemTrackers()
|
||||
throws IOException, KeeperException, ReplicationException {
|
||||
if (maintenanceMode) {
|
||||
// in maintenance mode, always use MaintenanceLoadBalancer.
|
||||
conf.setClass(HConstants.HBASE_MASTER_LOADBALANCER_CLASS, MaintenanceLoadBalancer.class,
|
||||
LoadBalancer.class);
|
||||
}
|
||||
this.balancer = LoadBalancerFactory.getLoadBalancer(conf);
|
||||
this.loadBalancerTracker = new LoadBalancerTracker(zooKeeper, this);
|
||||
this.loadBalancerTracker.start();
|
||||
|
|
|
@ -35,6 +35,7 @@ import java.util.NavigableMap;
|
|||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.NotImplementedException;
|
||||
|
@ -1027,7 +1028,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
protected float overallSlop;
|
||||
protected Configuration config = HBaseConfiguration.create();
|
||||
protected RackManager rackManager;
|
||||
private static final Random RANDOM = new Random(System.currentTimeMillis());
|
||||
private static final Logger LOG = LoggerFactory.getLogger(BaseLoadBalancer.class);
|
||||
protected MetricsBalancer metricsBalancer = null;
|
||||
protected ClusterMetrics clusterStatus = null;
|
||||
|
@ -1041,17 +1041,21 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
@Deprecated
|
||||
protected boolean onlySystemTablesOnMaster;
|
||||
|
||||
protected boolean maintenanceMode;
|
||||
|
||||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
this.config = conf;
|
||||
setSlop(conf);
|
||||
if (slop < 0) slop = 0;
|
||||
else if (slop > 1) slop = 1;
|
||||
if (slop < 0) {
|
||||
slop = 0;
|
||||
} else if (slop > 1) {
|
||||
slop = 1;
|
||||
}
|
||||
|
||||
if (overallSlop < 0) overallSlop = 0;
|
||||
else if (overallSlop > 1) overallSlop = 1;
|
||||
if (overallSlop < 0) {
|
||||
overallSlop = 0;
|
||||
} else if (overallSlop > 1) {
|
||||
overallSlop = 1;
|
||||
}
|
||||
|
||||
this.onlySystemTablesOnMaster = LoadBalancer.isSystemTablesOnlyOnMaster(this.config);
|
||||
|
||||
|
@ -1061,8 +1065,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
}
|
||||
this.isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable);
|
||||
// Print out base configs. Don't print overallSlop since it for simple balancer exclusively.
|
||||
LOG.info("slop={}, systemTablesOnMaster={}",
|
||||
this.slop, this.onlySystemTablesOnMaster);
|
||||
LOG.info("slop={}, systemTablesOnMaster={}", this.slop, this.onlySystemTablesOnMaster);
|
||||
}
|
||||
|
||||
protected void setSlop(Configuration conf) {
|
||||
|
@ -1079,8 +1082,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
*/
|
||||
@Deprecated
|
||||
public boolean shouldBeOnMaster(RegionInfo region) {
|
||||
return (this.maintenanceMode || this.onlySystemTablesOnMaster)
|
||||
&& region.getTable().isSystemTable();
|
||||
return this.onlySystemTablesOnMaster && region.getTable().isSystemTable();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1147,7 +1149,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
protected Map<ServerName, List<RegionInfo>> assignMasterSystemRegions(
|
||||
Collection<RegionInfo> regions, List<ServerName> servers) {
|
||||
Map<ServerName, List<RegionInfo>> assignments = new TreeMap<>();
|
||||
if (this.maintenanceMode || this.onlySystemTablesOnMaster) {
|
||||
if (this.onlySystemTablesOnMaster) {
|
||||
if (masterServerName != null && servers.contains(masterServerName)) {
|
||||
assignments.put(masterServerName, new ArrayList<>());
|
||||
for (RegionInfo region : regions) {
|
||||
|
@ -1181,9 +1183,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
if (useRegionFinder) {
|
||||
this.regionFinder.setServices(masterServices);
|
||||
}
|
||||
if (this.services.isInMaintenanceMode()) {
|
||||
this.maintenanceMode = true;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1298,7 +1297,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
/**
|
||||
* only need assign system table
|
||||
*/
|
||||
if (this.maintenanceMode || regions.isEmpty()) {
|
||||
if (regions.isEmpty()) {
|
||||
return assignments;
|
||||
}
|
||||
|
||||
|
@ -1438,7 +1437,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
regions = regions.entrySet().stream().filter(e -> !masterRegions.contains(e.getKey()))
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||
}
|
||||
if (this.maintenanceMode || regions.isEmpty()) {
|
||||
if (regions.isEmpty()) {
|
||||
return assignments;
|
||||
}
|
||||
|
||||
|
@ -1585,8 +1584,9 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
final int maxIterations = numServers * 4;
|
||||
int iterations = 0;
|
||||
List<ServerName> usedSNs = new ArrayList<>(servers.size());
|
||||
Random rand = ThreadLocalRandom.current();
|
||||
do {
|
||||
int i = RANDOM.nextInt(numServers);
|
||||
int i = rand.nextInt(numServers);
|
||||
sn = servers.get(i);
|
||||
if (!usedSNs.contains(sn)) {
|
||||
usedSNs.add(sn);
|
||||
|
@ -1616,13 +1616,14 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
*/
|
||||
private void roundRobinAssignment(Cluster cluster, List<RegionInfo> regions,
|
||||
List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) {
|
||||
Random rand = ThreadLocalRandom.current();
|
||||
List<RegionInfo> unassignedRegions = new ArrayList<>();
|
||||
int numServers = servers.size();
|
||||
int numRegions = regions.size();
|
||||
int max = (int) Math.ceil((float) numRegions / numServers);
|
||||
int serverIdx = 0;
|
||||
if (numServers > 1) {
|
||||
serverIdx = RANDOM.nextInt(numServers);
|
||||
serverIdx = rand.nextInt(numServers);
|
||||
}
|
||||
int regionIdx = 0;
|
||||
for (int j = 0; j < numServers; j++) {
|
||||
|
@ -1644,17 +1645,17 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
|
||||
List<RegionInfo> lastFewRegions = new ArrayList<>();
|
||||
// assign the remaining by going through the list and try to assign to servers one-by-one
|
||||
serverIdx = RANDOM.nextInt(numServers);
|
||||
OUTER : for (RegionInfo region : unassignedRegions) {
|
||||
serverIdx = rand.nextInt(numServers);
|
||||
for (RegionInfo region : unassignedRegions) {
|
||||
boolean assigned = false;
|
||||
INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one
|
||||
for (int j = 0; j < numServers; j++) { // try all servers one by one
|
||||
ServerName server = servers.get((j + serverIdx) % numServers);
|
||||
if (cluster.wouldLowerAvailability(region, server)) {
|
||||
continue INNER;
|
||||
continue;
|
||||
} else {
|
||||
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
|
||||
cluster.doAssignRegion(region, server);
|
||||
serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
|
||||
serverIdx = (j + serverIdx + 1) % numServers; // remain from next server
|
||||
assigned = true;
|
||||
break;
|
||||
}
|
||||
|
@ -1666,7 +1667,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
// just sprinkle the rest of the regions on random regionservers. The balanceCluster will
|
||||
// make it optimal later. we can end up with this if numReplicas > numServers.
|
||||
for (RegionInfo region : lastFewRegions) {
|
||||
int i = RANDOM.nextInt(numServers);
|
||||
int i = rand.nextInt(numServers);
|
||||
ServerName server = servers.get(i);
|
||||
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
|
||||
cluster.doAssignRegion(region, server);
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.master.balancer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.conf.Configured;
|
||||
import org.apache.hadoop.hbase.ClusterMetrics;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.master.LoadBalancer;
|
||||
import org.apache.hadoop.hbase.master.MasterServices;
|
||||
import org.apache.hadoop.hbase.master.RegionPlan;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
|
||||
/**
|
||||
* a balancer which is only used in maintenance mode.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
public class MaintenanceLoadBalancer extends Configured implements LoadBalancer {
|
||||
|
||||
private volatile boolean stopped = false;
|
||||
|
||||
@Override
|
||||
public void stop(String why) {
|
||||
stopped = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isStopped() {
|
||||
return stopped;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setClusterMetrics(ClusterMetrics st) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMasterServices(MasterServices masterServices) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<RegionPlan> balanceCluster(
|
||||
Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable) throws IOException {
|
||||
// do not need to balance in maintenance mode
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<RegionPlan> balanceTable(TableName tableName,
|
||||
Map<ServerName, List<RegionInfo>> loadOfOneTable) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
private Map<ServerName, List<RegionInfo>> assign(Collection<RegionInfo> regions,
|
||||
List<ServerName> servers) {
|
||||
// should only have 1 region server in maintenance mode
|
||||
assert servers.size() == 1;
|
||||
List<RegionInfo> systemRegions =
|
||||
regions.stream().filter(r -> r.getTable().isSystemTable()).collect(Collectors.toList());
|
||||
if (!systemRegions.isEmpty()) {
|
||||
return Collections.singletonMap(servers.get(0), systemRegions);
|
||||
} else {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<ServerName, List<RegionInfo>> roundRobinAssignment(List<RegionInfo> regions,
|
||||
List<ServerName> servers) {
|
||||
return assign(regions, servers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<ServerName, List<RegionInfo>> retainAssignment(Map<RegionInfo, ServerName> regions,
|
||||
List<ServerName> servers) {
|
||||
return assign(regions.keySet(), servers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ServerName randomAssignment(RegionInfo regionInfo, List<ServerName> servers) {
|
||||
// should only have 1 region server in maintenance mode
|
||||
assert servers.size() == 1;
|
||||
return regionInfo.getTable().isSystemTable() ? servers.get(0) : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void regionOnline(RegionInfo regionInfo, ServerName sn) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void regionOffline(RegionInfo regionInfo) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onConfigurationChange(Configuration conf) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void postMasterStartupInitialize() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateBalancerStatus(boolean status) {
|
||||
}
|
||||
}
|
|
@ -18,9 +18,11 @@
|
|||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertThrows;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
|
@ -29,7 +31,10 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
|
|||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.StartMiniClusterOption;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.AsyncConnection;
|
||||
import org.apache.hadoop.hbase.client.AsyncTable;
|
||||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.ResultScanner;
|
||||
|
@ -48,12 +53,12 @@ import org.junit.rules.TestName;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@Category({MasterTests.class, LargeTests.class})
|
||||
@Category({ MasterTests.class, LargeTests.class })
|
||||
public class TestMasterRepairMode {
|
||||
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestMasterRepairMode.class);
|
||||
HBaseClassTestRule.forClass(TestMasterRepairMode.class);
|
||||
|
||||
@Rule
|
||||
public TestName name = new TestName();
|
||||
|
@ -84,16 +89,14 @@ public class TestMasterRepairMode {
|
|||
public void testNewCluster() throws Exception {
|
||||
enableMaintenanceMode();
|
||||
|
||||
TEST_UTIL.startMiniCluster(StartMiniClusterOption.builder()
|
||||
.numRegionServers(0)
|
||||
.numDataNodes(3)
|
||||
.build());
|
||||
TEST_UTIL.startMiniCluster(
|
||||
StartMiniClusterOption.builder().numRegionServers(0).numDataNodes(3).build());
|
||||
|
||||
Connection conn = TEST_UTIL.getConnection();
|
||||
assertTrue(conn.getAdmin().isMasterInMaintenanceMode());
|
||||
|
||||
try (Table table = conn.getTable(TableName.META_TABLE_NAME);
|
||||
ResultScanner scanner = table.getScanner(new Scan())) {
|
||||
ResultScanner scanner = table.getScanner(new Scan())) {
|
||||
assertNotNull("Could not read meta.", scanner.next());
|
||||
}
|
||||
}
|
||||
|
@ -113,25 +116,29 @@ public class TestMasterRepairMode {
|
|||
LOG.info("Starting master-only");
|
||||
|
||||
enableMaintenanceMode();
|
||||
TEST_UTIL.startMiniHBaseCluster(StartMiniClusterOption.builder()
|
||||
.numRegionServers(0).createRootDir(false).build());
|
||||
TEST_UTIL.startMiniHBaseCluster(
|
||||
StartMiniClusterOption.builder().numRegionServers(0).createRootDir(false).build());
|
||||
|
||||
Connection conn = TEST_UTIL.getConnection();
|
||||
assertTrue(conn.getAdmin().isMasterInMaintenanceMode());
|
||||
|
||||
try (Table table = conn.getTable(TableName.META_TABLE_NAME);
|
||||
ResultScanner scanner = table.getScanner(HConstants.TABLE_FAMILY);
|
||||
Stream<Result> results = StreamSupport.stream(scanner.spliterator(), false)) {
|
||||
ResultScanner scanner = table.getScanner(HConstants.TABLE_FAMILY);
|
||||
Stream<Result> results = StreamSupport.stream(scanner.spliterator(), false)) {
|
||||
assertTrue("Did not find user table records while reading hbase:meta",
|
||||
results.anyMatch(r -> Arrays.equals(r.getRow(), testRepairMode.getName())));
|
||||
results.anyMatch(r -> Arrays.equals(r.getRow(), testRepairMode.getName())));
|
||||
}
|
||||
|
||||
try (Table table = conn.getTable(testRepairMode);
|
||||
ResultScanner scanner = table.getScanner(new Scan())) {
|
||||
scanner.next();
|
||||
fail("Should not be able to access user-space tables in repair mode.");
|
||||
} catch (Exception e) {
|
||||
// Expected
|
||||
try (AsyncConnection asyncConn =
|
||||
ConnectionFactory.createAsyncConnection(TEST_UTIL.getConfiguration()).get()) {
|
||||
// use async table so we can set the timeout and retry value to let the operation fail fast
|
||||
AsyncTable<?> table = asyncConn.getTableBuilder(testRepairMode)
|
||||
.setScanTimeout(5, TimeUnit.SECONDS).setMaxRetries(2).build();
|
||||
assertThrows("Should not be able to access user-space tables in repair mode.",
|
||||
Exception.class, () -> {
|
||||
try (ResultScanner scanner = table.getScanner(new Scan())) {
|
||||
scanner.next();
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue