diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 5f1e0f91827..f62d307da35 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -746,10 +746,20 @@ public abstract class BaseLoadBalancer implements LoadBalancer { int server = serversToIndex.get(serverName.getHostAndPort()); int region = regionsToIndex.get(regionInfo); + // Region replicas for same region should better assign to different servers + for (int i : regionsPerServer[server]) { + RegionInfo otherRegionInfo = regions[i]; + if (RegionReplicaUtil.isReplicasForSameRegion(regionInfo, otherRegionInfo)) { + return true; + } + } + int primary = regionIndexToPrimaryIndex[region]; + if (primary == -1) { + return false; + } // there is a subset relation for server < host < rack // check server first - if (contains(primariesOfRegionsPerServer[server], primary)) { // check for whether there are other servers that we can place this region for (int i = 0; i < primariesOfRegionsPerServer.length; i++) { @@ -761,7 +771,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } // check host - if (multiServersPerHost) { // these arrays would only be allocated if we have more than one server per host + if (multiServersPerHost) { + // these arrays would only be allocated if we have more than one server per host int host = serverIndexToHostIndex[server]; if (contains(primariesOfRegionsPerHost[host], primary)) { // check for whether there are other hosts that we can place this region @@ -787,6 +798,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return false; // there is not a better rack to place this } } + return false; } @@ -1267,58 +1279,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } Cluster cluster = createCluster(servers, regions); - List unassignedRegions = new ArrayList<>(); - - roundRobinAssignment(cluster, regions, unassignedRegions, - servers, assignments); - - List lastFewRegions = new ArrayList<>(); - // assign the remaining by going through the list and try to assign to servers one-by-one - int serverIdx = RANDOM.nextInt(numServers); - OUTER : for (RegionInfo region : unassignedRegions) { - boolean assigned = false; - INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one - ServerName serverName = servers.get((j + serverIdx) % numServers); - if (!cluster.wouldLowerAvailability(region, serverName)) { - List serverRegions = - assignments.computeIfAbsent(serverName, k -> new ArrayList<>()); - if (!RegionReplicaUtil.isDefaultReplica(region.getReplicaId())) { - // if the region is not a default replica - // check if the assignments map has the other replica region on this server - for (RegionInfo hri : serverRegions) { - if (RegionReplicaUtil.isReplicasForSameRegion(region, hri)) { - if (LOG.isTraceEnabled()) { - LOG.trace("Skipping the server, " + serverName - + " , got the same server for the region " + region); - } - // do not allow this case. The unassignedRegions we got because the - // replica region in this list was not assigned because of lower availablity issue. - // So when we assign here we should ensure that as far as possible the server being - // selected does not have the server where the replica region was not assigned. - continue INNER; // continue the inner loop, ie go to the next server - } - } - } - serverRegions.add(region); - cluster.doAssignRegion(region, serverName); - serverIdx = (j + serverIdx + 1) % numServers; //remain from next server - assigned = true; - break; - } - } - if (!assigned) { - lastFewRegions.add(region); - } - } - // just sprinkle the rest of the regions on random regionservers. The balanceCluster will - // make it optimal later. we can end up with this if numReplicas > numServers. - for (RegionInfo region : lastFewRegions) { - int i = RANDOM.nextInt(numServers); - ServerName server = servers.get(i); - List serverRegions = assignments.computeIfAbsent(server, k -> new ArrayList<>()); - serverRegions.add(region); - cluster.doAssignRegion(region, server); - } + roundRobinAssignment(cluster, regions, servers, assignments); return assignments; } @@ -1611,9 +1572,8 @@ public abstract class BaseLoadBalancer implements LoadBalancer { * Round robin a list of regions to a list of servers */ private void roundRobinAssignment(Cluster cluster, List regions, - List unassignedRegions, List servers, - Map> assignments) { - + List servers, Map> assignments) { + List unassignedRegions = new ArrayList<>(); int numServers = servers.size(); int numRegions = regions.size(); int max = (int) Math.ceil((float) numRegions / numServers); @@ -1622,7 +1582,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { serverIdx = RANDOM.nextInt(numServers); } int regionIdx = 0; - for (int j = 0; j < numServers; j++) { ServerName server = servers.get((j + serverIdx) % numServers); List serverRegions = new ArrayList<>(max); @@ -1638,6 +1597,37 @@ public abstract class BaseLoadBalancer implements LoadBalancer { assignments.put(server, serverRegions); regionIdx++; } + + + List lastFewRegions = new ArrayList<>(); + // assign the remaining by going through the list and try to assign to servers one-by-one + serverIdx = RANDOM.nextInt(numServers); + OUTER : for (RegionInfo region : unassignedRegions) { + boolean assigned = false; + INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one + ServerName server = servers.get((j + serverIdx) % numServers); + if (cluster.wouldLowerAvailability(region, server)) { + continue INNER; + } else { + assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region); + cluster.doAssignRegion(region, server); + serverIdx = (j + serverIdx + 1) % numServers; //remain from next server + assigned = true; + break; + } + } + if (!assigned) { + lastFewRegions.add(region); + } + } + // just sprinkle the rest of the regions on random regionservers. The balanceCluster will + // make it optimal later. we can end up with this if numReplicas > numServers. + for (RegionInfo region : lastFewRegions) { + int i = RANDOM.nextInt(numServers); + ServerName server = servers.get(i); + assignments.computeIfAbsent(server, k -> new ArrayList<>()).add(region); + cluster.doAssignRegion(region, server); + } } protected Map> getRegionAssignmentsByServer( diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/RegionReplicaTestHelper.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/RegionReplicaTestHelper.java index 57ff7b7e9c5..ba3e74c278d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/RegionReplicaTestHelper.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/RegionReplicaTestHelper.java @@ -19,8 +19,11 @@ package org.apache.hadoop.hbase.client; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -31,8 +34,10 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.Waiter.ExplainingPredicate; +import org.apache.hadoop.hbase.regionserver.Region; +import org.apache.hadoop.hbase.util.JVMClusterUtil; -final class RegionReplicaTestHelper { +public final class RegionReplicaTestHelper { private RegionReplicaTestHelper() { } @@ -156,4 +161,32 @@ final class RegionReplicaTestHelper { assertEquals(newServerName2, locator.getRegionLocations(tableName, 2, false).getRegionLocation(2).getServerName()); } + + public static void assertReplicaDistributed(HBaseTestingUtility util, Table t) + throws IOException { + if (t.getDescriptor().getRegionReplication() <= 1) { + return; + } + List regionInfos = new ArrayList<>(); + for (JVMClusterUtil.RegionServerThread rs : util.getMiniHBaseCluster() + .getRegionServerThreads()) { + regionInfos.clear(); + for (Region r : rs.getRegionServer().getRegions(t.getName())) { + if (contains(regionInfos, r.getRegionInfo())) { + fail("Replica regions should be assigned to different region servers"); + } else { + regionInfos.add(r.getRegionInfo()); + } + } + } + } + + private static boolean contains(List regionInfos, RegionInfo regionInfo) { + for (RegionInfo info : regionInfos) { + if (RegionReplicaUtil.isReplicasForSameRegion(info, regionInfo)) { + return true; + } + } + return false; + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java index 7cb01f7cd74..6522b7a5f17 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java @@ -18,9 +18,6 @@ */ package org.apache.hadoop.hbase.master.assignment; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -29,10 +26,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.client.RegionReplicaTestHelper; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.regionserver.HRegionServer; @@ -108,11 +104,10 @@ public class TestRegionReplicaSplit { List regions = new ArrayList(); for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) { for (Region r : rs.getRegionServer().getRegions(table.getName())) { - System.out.println("the region before split is is " + r.getRegionInfo() - + rs.getRegionServer().getServerName()); regions.add(r.getRegionInfo()); } } + // There are 6 regions before split, 9 regions after split. HTU.getAdmin().split(table.getName(), Bytes.toBytes(1)); int count = 0; while (true) { @@ -126,33 +121,7 @@ public class TestRegionReplicaSplit { } count = 0; } - List newRegionLocations = new ArrayList(); - for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) { - RegionInfo prevInfo = null; - for (Region r : rs.getRegionServer().getRegions(table.getName())) { - if (!regions.contains(r.getRegionInfo()) - && !RegionReplicaUtil.isDefaultReplica(r.getRegionInfo())) { - LOG.info("The region is " + r.getRegionInfo() + " the location is " - + rs.getRegionServer().getServerName()); - if (!RegionReplicaUtil.isDefaultReplica(r.getRegionInfo()) - && newRegionLocations.contains(rs.getRegionServer().getServerName()) - && prevInfo != null - && Bytes.equals(prevInfo.getStartKey(), r.getRegionInfo().getStartKey()) - && Bytes.equals(prevInfo.getEndKey(), r.getRegionInfo().getEndKey())) { - fail("Splitted regions should not be assigned to same region server"); - } else { - prevInfo = r.getRegionInfo(); - if (!RegionReplicaUtil.isDefaultReplica(r.getRegionInfo()) - && !newRegionLocations.contains(rs.getRegionServer().getServerName())) { - newRegionLocations.add(rs.getRegionServer().getServerName()); - } - } - } - } - } - // since we assign the daughter regions in round robin fashion, both the daugther region - // replicas will be assigned to two unique servers. - assertEquals("The new regions should be assigned to 3 unique servers ", 3, - newRegionLocations.size()); + + RegionReplicaTestHelper.assertReplicaDistributed(HTU, table); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPBase.java index 0f1b4265038..b1b8d3c48d3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestSCPBase.java @@ -19,11 +19,8 @@ package org.apache.hadoop.hbase.master.procedure; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -32,14 +29,12 @@ import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; -import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.client.RegionReplicaTestHelper; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; -import org.apache.hadoop.hbase.regionserver.Region; -import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.junit.After; import org.junit.Before; import org.slf4j.Logger; @@ -131,7 +126,7 @@ public class TestSCPBase { long procId = getSCPProcId(procExec); ProcedureTestingUtility.waitProcedure(procExec, procId); } - assertReplicaDistributed(t); + RegionReplicaTestHelper.assertReplicaDistributed(util, t); assertEquals(count, HBaseTestingUtility.countRows(t)); assertEquals(checksum, util.checksumRows(t)); } @@ -142,36 +137,6 @@ public class TestSCPBase { return procExec.getActiveProcIds().stream().mapToLong(Long::longValue).min().getAsLong(); } - private void assertReplicaDistributed(Table t) throws IOException { - if (t.getDescriptor().getRegionReplication() <= 1) { - return; - } - // Assert all data came back. - List regionInfos = new ArrayList<>(); - for (RegionServerThread rs : this.util.getMiniHBaseCluster().getRegionServerThreads()) { - regionInfos.clear(); - for (Region r : rs.getRegionServer().getRegions(t.getName())) { - LOG.info("The region is " + r.getRegionInfo() + " the location is " + - rs.getRegionServer().getServerName()); - if (contains(regionInfos, r.getRegionInfo())) { - LOG.error("Am exiting"); - fail("Replica regions should be assigned to different region servers"); - } else { - regionInfos.add(r.getRegionInfo()); - } - } - } - } - - private boolean contains(List regionInfos, RegionInfo regionInfo) { - for (RegionInfo info : regionInfos) { - if (RegionReplicaUtil.isReplicasForSameRegion(info, regionInfo)) { - return true; - } - } - return false; - } - protected Table createTable(final TableName tableName) throws IOException { final Table t = this.util.createTable(tableName, HBaseTestingUtility.COLUMNS, HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE, getRegionReplication());