HBASE-23659 BaseLoadBalancer#wouldLowerAvailability should consider region replicas (#1001)
Signed-off-by: stack <stack@apache.org> Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
parent
1217d57492
commit
75ca8606df
|
@ -746,10 +746,20 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
int server = serversToIndex.get(serverName.getHostAndPort());
|
||||
int region = regionsToIndex.get(regionInfo);
|
||||
|
||||
// Region replicas for same region should better assign to different servers
|
||||
for (int i : regionsPerServer[server]) {
|
||||
RegionInfo otherRegionInfo = regions[i];
|
||||
if (RegionReplicaUtil.isReplicasForSameRegion(regionInfo, otherRegionInfo)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
int primary = regionIndexToPrimaryIndex[region];
|
||||
if (primary == -1) {
|
||||
return false;
|
||||
}
|
||||
// there is a subset relation for server < host < rack
|
||||
// check server first
|
||||
|
||||
if (contains(primariesOfRegionsPerServer[server], primary)) {
|
||||
// check for whether there are other servers that we can place this region
|
||||
for (int i = 0; i < primariesOfRegionsPerServer.length; i++) {
|
||||
|
@ -761,7 +771,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
}
|
||||
|
||||
// check host
|
||||
if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host
|
||||
if (multiServersPerHost) {
|
||||
// these arrays would only be allocated if we have more than one server per host
|
||||
int host = serverIndexToHostIndex[server];
|
||||
if (contains(primariesOfRegionsPerHost[host], primary)) {
|
||||
// check for whether there are other hosts that we can place this region
|
||||
|
@ -787,6 +798,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
return false; // there is not a better rack to place this
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1267,58 +1279,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
}
|
||||
|
||||
Cluster cluster = createCluster(servers, regions);
|
||||
List<RegionInfo> unassignedRegions = new ArrayList<>();
|
||||
|
||||
roundRobinAssignment(cluster, regions, unassignedRegions,
|
||||
servers, assignments);
|
||||
|
||||
List<RegionInfo> lastFewRegions = new ArrayList<>();
|
||||
// assign the remaining by going through the list and try to assign to servers one-by-one
|
||||
int serverIdx = RANDOM.nextInt(numServers);
|
||||
OUTER : for (RegionInfo region : unassignedRegions) {
|
||||
boolean assigned = false;
|
||||
INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one
|
||||
ServerName serverName = servers.get((j + serverIdx) % numServers);
|
||||
if (!cluster.wouldLowerAvailability(region, serverName)) {
|
||||
List<RegionInfo> serverRegions =
|
||||
assignments.computeIfAbsent(serverName, k -> new ArrayList<>());
|
||||
if (!RegionReplicaUtil.isDefaultReplica(region.getReplicaId())) {
|
||||
// if the region is not a default replica
|
||||
// check if the assignments map has the other replica region on this server
|
||||
for (RegionInfo hri : serverRegions) {
|
||||
if (RegionReplicaUtil.isReplicasForSameRegion(region, hri)) {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace("Skipping the server, " + serverName
|
||||
+ " , got the same server for the region " + region);
|
||||
}
|
||||
// do not allow this case. The unassignedRegions we got because the
|
||||
// replica region in this list was not assigned because of lower availablity issue.
|
||||
// So when we assign here we should ensure that as far as possible the server being
|
||||
// selected does not have the server where the replica region was not assigned.
|
||||
continue INNER; // continue the inner loop, ie go to the next server
|
||||
}
|
||||
}
|
||||
}
|
||||
serverRegions.add(region);
|
||||
cluster.doAssignRegion(region, serverName);
|
||||
serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
|
||||
assigned = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!assigned) {
|
||||
lastFewRegions.add(region);
|
||||
}
|
||||
}
|
||||
// just sprinkle the rest of the regions on random regionservers. The balanceCluster will
|
||||
// make it optimal later. we can end up with this if numReplicas > numServers.
|
||||
for (RegionInfo region : lastFewRegions) {
|
||||
int i = RANDOM.nextInt(numServers);
|
||||
ServerName server = servers.get(i);
|
||||
List<RegionInfo> serverRegions = assignments.computeIfAbsent(server, k -> new ArrayList<>());
|
||||
serverRegions.add(region);
|
||||
cluster.doAssignRegion(region, server);
|
||||
}
|
||||
roundRobinAssignment(cluster, regions, servers, assignments);
|
||||
return assignments;
|
||||
}
|
||||
|
||||
|
@ -1611,9 +1572,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
* Round robin a list of regions to a list of servers
|
||||
*/
|
||||
private void roundRobinAssignment(Cluster cluster, List<RegionInfo> regions,
|
||||
List<RegionInfo> unassignedRegions, List<ServerName> servers,
|
||||
Map<ServerName, List<RegionInfo>> assignments) {
|
||||
|
||||
List<ServerName> servers, Map<ServerName, List<RegionInfo>> assignments) {
|
||||
List<RegionInfo> unassignedRegions = new ArrayList<>();
|
||||
int numServers = servers.size();
|
||||
int numRegions = regions.size();
|
||||
int max = (int) Math.ceil((float) numRegions / numServers);
|
||||
|
@ -1622,7 +1582,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
serverIdx = RANDOM.nextInt(numServers);
|
||||
}
|
||||
int regionIdx = 0;
|
||||
|
||||
for (int j = 0; j < numServers; j++) {
|
||||
ServerName server = servers.get((j + serverIdx) % numServers);
|
||||
List<RegionInfo> serverRegions = new ArrayList<>(max);
|
||||
|
@ -1638,6 +1597,37 @@ public abstract class BaseLoadBalancer implements LoadBalancer {
|
|||
assignments.put(server, serverRegions);
|
||||
regionIdx++;
|
||||
}
|
||||
|
||||
|
||||
List<RegionInfo> lastFewRegions = new ArrayList<>();
|
||||
// assign the remaining by going through the list and try to assign to servers one-by-one
|
||||
serverIdx = RANDOM.nextInt(numServers);
|
||||
OUTER : for (RegionInfo region : unassignedRegions) {
|
||||
boolean assigned = false;
|
||||
INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one
|
||||
ServerName server = servers.get((j + serverIdx) % numServers);
|
||||
if (cluster.wouldLowerAvailability(region, server)) {
|
||||
continue INNER;
|
||||
} else {
|
||||
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
|
||||
cluster.doAssignRegion(region, server);
|
||||
serverIdx = (j + serverIdx + 1) % numServers; //remain from next server
|
||||
assigned = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!assigned) {
|
||||
lastFewRegions.add(region);
|
||||
}
|
||||
}
|
||||
// just sprinkle the rest of the regions on random regionservers. The balanceCluster will
|
||||
// make it optimal later. we can end up with this if numReplicas > numServers.
|
||||
for (RegionInfo region : lastFewRegions) {
|
||||
int i = RANDOM.nextInt(numServers);
|
||||
ServerName server = servers.get(i);
|
||||
assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region);
|
||||
cluster.doAssignRegion(region, server);
|
||||
}
|
||||
}
|
||||
|
||||
protected Map<ServerName, List<RegionInfo>> getRegionAssignmentsByServer(
|
||||
|
|
|
@ -19,8 +19,11 @@ package org.apache.hadoop.hbase.client;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
|
@ -31,8 +34,10 @@ import org.apache.hadoop.hbase.ServerName;
|
|||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.Waiter;
|
||||
import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
|
||||
import org.apache.hadoop.hbase.regionserver.Region;
|
||||
import org.apache.hadoop.hbase.util.JVMClusterUtil;
|
||||
|
||||
final class RegionReplicaTestHelper {
|
||||
public final class RegionReplicaTestHelper {
|
||||
|
||||
private RegionReplicaTestHelper() {
|
||||
}
|
||||
|
@ -156,4 +161,32 @@ final class RegionReplicaTestHelper {
|
|||
assertEquals(newServerName2,
|
||||
locator.getRegionLocations(tableName, 2, false).getRegionLocation(2).getServerName());
|
||||
}
|
||||
|
||||
public static void assertReplicaDistributed(HBaseTestingUtility util, Table t)
|
||||
throws IOException {
|
||||
if (t.getDescriptor().getRegionReplication() <= 1) {
|
||||
return;
|
||||
}
|
||||
List<RegionInfo> regionInfos = new ArrayList<>();
|
||||
for (JVMClusterUtil.RegionServerThread rs : util.getMiniHBaseCluster()
|
||||
.getRegionServerThreads()) {
|
||||
regionInfos.clear();
|
||||
for (Region r : rs.getRegionServer().getRegions(t.getName())) {
|
||||
if (contains(regionInfos, r.getRegionInfo())) {
|
||||
fail("Replica regions should be assigned to different region servers");
|
||||
} else {
|
||||
regionInfos.add(r.getRegionInfo());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean contains(List<RegionInfo> regionInfos, RegionInfo regionInfo) {
|
||||
for (RegionInfo info : regionInfos) {
|
||||
if (RegionReplicaUtil.isReplicasForSameRegion(info, regionInfo)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,9 +18,6 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.master.assignment;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -29,10 +26,9 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
|
||||
import org.apache.hadoop.hbase.client.RegionReplicaTestHelper;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
|
@ -107,11 +103,10 @@ public class TestRegionReplicaSplit {
|
|||
List<RegionInfo> regions = new ArrayList<RegionInfo>();
|
||||
for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
|
||||
for (Region r : rs.getRegionServer().getRegions(table.getName())) {
|
||||
System.out.println("the region before split is is " + r.getRegionInfo()
|
||||
+ rs.getRegionServer().getServerName());
|
||||
regions.add(r.getRegionInfo());
|
||||
}
|
||||
}
|
||||
// There are 6 regions before split, 9 regions after split.
|
||||
HTU.getAdmin().split(table.getName(), Bytes.toBytes(1));
|
||||
int count = 0;
|
||||
while (true) {
|
||||
|
@ -125,33 +120,7 @@ public class TestRegionReplicaSplit {
|
|||
}
|
||||
count = 0;
|
||||
}
|
||||
List<ServerName> newRegionLocations = new ArrayList<ServerName>();
|
||||
for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) {
|
||||
RegionInfo prevInfo = null;
|
||||
for (Region r : rs.getRegionServer().getRegions(table.getName())) {
|
||||
if (!regions.contains(r.getRegionInfo())
|
||||
&& !RegionReplicaUtil.isDefaultReplica(r.getRegionInfo())) {
|
||||
LOG.info("The region is " + r.getRegionInfo() + " the location is "
|
||||
+ rs.getRegionServer().getServerName());
|
||||
if (!RegionReplicaUtil.isDefaultReplica(r.getRegionInfo())
|
||||
&& newRegionLocations.contains(rs.getRegionServer().getServerName())
|
||||
&& prevInfo != null
|
||||
&& Bytes.equals(prevInfo.getStartKey(), r.getRegionInfo().getStartKey())
|
||||
&& Bytes.equals(prevInfo.getEndKey(), r.getRegionInfo().getEndKey())) {
|
||||
fail("Splitted regions should not be assigned to same region server");
|
||||
} else {
|
||||
prevInfo = r.getRegionInfo();
|
||||
if (!RegionReplicaUtil.isDefaultReplica(r.getRegionInfo())
|
||||
&& !newRegionLocations.contains(rs.getRegionServer().getServerName())) {
|
||||
newRegionLocations.add(rs.getRegionServer().getServerName());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// since we assign the daughter regions in round robin fashion, both the daugther region
|
||||
// replicas will be assigned to two unique servers.
|
||||
assertEquals("The new regions should be assigned to 3 unique servers ", 3,
|
||||
newRegionLocations.size());
|
||||
|
||||
RegionReplicaTestHelper.assertReplicaDistributed(HTU, table);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,11 +19,8 @@ package org.apache.hadoop.hbase.master.procedure;
|
|||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
|
@ -32,14 +29,12 @@ import org.apache.hadoop.hbase.MiniHBaseCluster;
|
|||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
|
||||
import org.apache.hadoop.hbase.client.RegionReplicaTestHelper;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
|
||||
import org.apache.hadoop.hbase.regionserver.Region;
|
||||
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -131,7 +126,7 @@ public class TestSCPBase {
|
|||
long procId = getSCPProcId(procExec);
|
||||
ProcedureTestingUtility.waitProcedure(procExec, procId);
|
||||
}
|
||||
assertReplicaDistributed(t);
|
||||
RegionReplicaTestHelper.assertReplicaDistributed(util, t);
|
||||
assertEquals(count, util.countRows(t));
|
||||
assertEquals(checksum, util.checksumRows(t));
|
||||
}
|
||||
|
@ -142,36 +137,6 @@ public class TestSCPBase {
|
|||
return procExec.getActiveProcIds().stream().mapToLong(Long::longValue).min().getAsLong();
|
||||
}
|
||||
|
||||
private void assertReplicaDistributed(Table t) throws IOException {
|
||||
if (t.getDescriptor().getRegionReplication() <= 1) {
|
||||
return;
|
||||
}
|
||||
// Assert all data came back.
|
||||
List<RegionInfo> regionInfos = new ArrayList<>();
|
||||
for (RegionServerThread rs : this.util.getMiniHBaseCluster().getRegionServerThreads()) {
|
||||
regionInfos.clear();
|
||||
for (Region r : rs.getRegionServer().getRegions(t.getName())) {
|
||||
LOG.info("The region is " + r.getRegionInfo() + " the location is " +
|
||||
rs.getRegionServer().getServerName());
|
||||
if (contains(regionInfos, r.getRegionInfo())) {
|
||||
LOG.error("Am exiting");
|
||||
fail("Replica regions should be assigned to different region servers");
|
||||
} else {
|
||||
regionInfos.add(r.getRegionInfo());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean contains(List<RegionInfo> regionInfos, RegionInfo regionInfo) {
|
||||
for (RegionInfo info : regionInfos) {
|
||||
if (RegionReplicaUtil.isReplicasForSameRegion(info, regionInfo)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
protected Table createTable(final TableName tableName) throws IOException {
|
||||
final Table t = this.util.createTable(tableName, HBaseTestingUtility.COLUMNS,
|
||||
HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE, getRegionReplication());
|
||||
|
|
Loading…
Reference in New Issue