From 2051b0982ddbf575f88d7ca20b9960b43bb3da30 Mon Sep 17 00:00:00 2001 From: Vasudevan Date: Thu, 6 Sep 2018 16:43:50 +0530 Subject: [PATCH] HBASE-20741 Split of a region with replicas creates all daughter regions and its replica in same server (Ram) --- .../master/assignment/AssignmentManager.java | 21 ++- .../MergeTableRegionsProcedure.java | 45 +++-- .../assignment/SplitTableRegionProcedure.java | 44 +++-- .../master/balancer/BaseLoadBalancer.java | 21 ++- .../assignment/TestRegionReplicaSplit.java | 157 ++++++++++++++++++ 5 files changed, 259 insertions(+), 29 deletions(-) create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java index 70a96802f3a..f0a723dae16 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java @@ -566,6 +566,17 @@ public class AssignmentManager implements ServerListener { return waitForAssignment(regionInfo, Long.MAX_VALUE); } + /** + * Create round-robin assigns. Use on table creation to distribute out regions across cluster. + * @return AssignProcedures made out of the passed in hris and a call to the balancer + * to populate the assigns with targets chosen using round-robin (default balancer + * scheme). If at assign-time, the target chosen is no longer up, thats fine, the + * AssignProcedure will ask the balancer for a new target, and so on. + */ + public AssignProcedure[] createRoundRobinAssignProcedures(List hris) { + return createRoundRobinAssignProcedures(hris, null); + } + @VisibleForTesting // TODO: Remove this? public boolean waitForAssignment(final RegionInfo regionInfo, final long timeout) @@ -609,15 +620,21 @@ public class AssignmentManager implements ServerListener { * balancer scheme). If at assign-time, the target chosen is no longer up, thats fine, * the AssignProcedure will ask the balancer for a new target, and so on. */ - public AssignProcedure[] createRoundRobinAssignProcedures(final List hris) { + public AssignProcedure[] createRoundRobinAssignProcedures(final List hris, + List serversToExclude) { if (hris.isEmpty()) { return null; } + if (serversToExclude != null + && this.master.getServerManager().getOnlineServersList().size() == 1) { + LOG.debug("Only one region server found and hence going ahead with the assignment"); + serversToExclude = null; + } try { // Ask the balancer to assign our regions. Pass the regions en masse. The balancer can do // a better job if it has all the assignments in the one lump. Map> assignments = getBalancer().roundRobinAssignment(hris, - this.master.getServerManager().createDestinationServersList(null)); + this.master.getServerManager().createDestinationServersList(serversToExclude)); // Return mid-method! return createAssignProcedures(assignments, hris.size()); } catch (HBaseIOException hioe) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index 20ae444256b..9296222c0dc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -682,16 +683,38 @@ public class MergeTableRegionsProcedure final int regionReplication = getRegionReplication(env); final ServerName serverName = getServerName(env); - final AssignProcedure[] procs = - new AssignProcedure[regionsToMerge.length * regionReplication]; + AssignProcedure[] procs = + createAssignProcedures(regionReplication, env, Arrays.asList(regionsToMerge), serverName); + env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); + } + + private AssignProcedure[] createAssignProcedures(final int regionReplication, + final MasterProcedureEnv env, final List hris, final ServerName serverName) { + final AssignProcedure[] procs = new AssignProcedure[hris.size() * regionReplication]; int procsIdx = 0; - for (int i = 0; i < regionsToMerge.length; ++i) { - for (int j = 0; j < regionReplication; ++j) { - final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(regionsToMerge[i], j); - procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, serverName); + for (int i = 0; i < hris.size(); ++i) { + // create procs for the primary region with the target server. + final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), 0); + procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, serverName); + } + if (regionReplication > 1) { + List regionReplicas = + new ArrayList(hris.size() * (regionReplication - 1)); + for (int i = 0; i < hris.size(); ++i) { + // We don't include primary replica here + for (int j = 1; j < regionReplication; ++j) { + regionReplicas.add(RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j)); + } + } + // for the replica regions exclude the primary region's server and call LB's roundRobin + // assignment + AssignProcedure[] replicaAssignProcs = env.getAssignmentManager() + .createRoundRobinAssignProcedures(regionReplicas, Collections.singletonList(serverName)); + for (AssignProcedure proc : replicaAssignProcs) { + procs[procsIdx++] = proc; } } - env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); + return procs; } private UnassignProcedure[] createUnassignProcedures(final MasterProcedureEnv env, @@ -712,12 +735,8 @@ public class MergeTableRegionsProcedure private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env, final int regionReplication) { final ServerName targetServer = getServerName(env); - final AssignProcedure[] procs = new AssignProcedure[regionReplication]; - for (int i = 0; i < procs.length; ++i) { - final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(mergedRegion, i); - procs[i] = env.getAssignmentManager().createAssignProcedure(hri, targetServer); - } - return procs; + return createAssignProcedures(regionReplication, env, Collections.singletonList(mergedRegion), + targetServer); } private int getRegionReplication(final MasterProcedureEnv env) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 29dbabb6311..2baa056bee2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -23,6 +23,7 @@ import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -551,11 +552,8 @@ public class SplitTableRegionProcedure final int regionReplication = getRegionReplication(env); final ServerName serverName = getParentRegionServerName(env); - final AssignProcedure[] procs = new AssignProcedure[regionReplication]; - for (int i = 0; i < regionReplication; ++i) { - final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(getParentRegion(), i); - procs[i] = env.getAssignmentManager().createAssignProcedure(hri, serverName); - } + final AssignProcedure[] procs = createAssignProcedures(regionReplication, env, + Collections.singletonList(getParentRegion()), serverName); env.getMasterServices().getMasterProcedureExecutor().submitProcedures(procs); } @@ -836,15 +834,37 @@ public class SplitTableRegionProcedure private AssignProcedure[] createAssignProcedures(final MasterProcedureEnv env, final int regionReplication) { final ServerName targetServer = getParentRegionServerName(env); - final AssignProcedure[] procs = new AssignProcedure[regionReplication * 2]; + List daughterRegions = new ArrayList(2); + daughterRegions.add(daughter_1_RI); + daughterRegions.add(daughter_2_RI); + return createAssignProcedures(regionReplication, env, daughterRegions, targetServer); + } + + private AssignProcedure[] createAssignProcedures(final int regionReplication, + final MasterProcedureEnv env, final List hris, final ServerName serverName) { + final AssignProcedure[] procs = new AssignProcedure[hris.size() * regionReplication]; int procsIdx = 0; - for (int i = 0; i < regionReplication; ++i) { - final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(daughter_1_RI, i); - procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, targetServer); + for (int i = 0; i < hris.size(); ++i) { + // create procs for the primary region with the target server. + final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), 0); + procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, serverName); } - for (int i = 0; i < regionReplication; ++i) { - final RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(daughter_2_RI, i); - procs[procsIdx++] = env.getAssignmentManager().createAssignProcedure(hri, targetServer); + if (regionReplication > 1) { + List regionReplicas = + new ArrayList(hris.size() * (regionReplication - 1)); + for (int i = 0; i < hris.size(); ++i) { + // We don't include primary replica here + for (int j = 1; j < regionReplication; ++j) { + regionReplicas.add(RegionReplicaUtil.getRegionInfoForReplica(hris.get(i), j)); + } + } + // for the replica regions exclude the primary region's server and call LB's roundRobin + // assignment + AssignProcedure[] replicaAssignProcs = env.getAssignmentManager() + .createRoundRobinAssignProcedures(regionReplicas, Collections.singletonList(serverName)); + for (AssignProcedure proc : replicaAssignProcs) { + procs[procsIdx++] = proc; + } } return procs; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 51c2758c6e8..0f6e3489deb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -1271,13 +1271,30 @@ public abstract class BaseLoadBalancer implements LoadBalancer { List lastFewRegions = new ArrayList<>(); // assign the remaining by going through the list and try to assign to servers one-by-one int serverIdx = RANDOM.nextInt(numServers); - for (RegionInfo region : unassignedRegions) { + OUTER : for (RegionInfo region : unassignedRegions) { boolean assigned = false; - for (int j = 0; j < numServers; j++) { // try all servers one by one + INNER : for (int j = 0; j < numServers; j++) { // try all servers one by one ServerName serverName = servers.get((j + serverIdx) % numServers); if (!cluster.wouldLowerAvailability(region, serverName)) { List serverRegions = assignments.computeIfAbsent(serverName, k -> new ArrayList<>()); + if (!RegionReplicaUtil.isDefaultReplica(region.getReplicaId())) { + // if the region is not a default replica + // check if the assignments map has the other replica region on this server + for (RegionInfo hri : serverRegions) { + if (RegionReplicaUtil.isReplicasForSameRegion(region, hri)) { + if (LOG.isTraceEnabled()) { + LOG.trace("Skipping the server, " + serverName + + " , got the same server for the region " + region); + } + // do not allow this case. The unassignedRegions we got because the + // replica region in this list was not assigned because of lower availablity issue. + // So when we assign here we should ensure that as far as possible the server being + // selected does not have the server where the replica region was not assigned. + continue INNER; // continue the inner loop, ie go to the next server + } + } + } serverRegions.add(region); cluster.doAssignRegion(region, serverName); serverIdx = (j + serverIdx + 1) % numServers; //remain from next server diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java new file mode 100644 index 00000000000..d216d0a7e85 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestRegionReplicaSplit.java @@ -0,0 +1,157 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.assignment; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.Region; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; +import org.apache.hadoop.hbase.util.RegionSplitter; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category({ MasterTests.class, LargeTests.class }) +public class TestRegionReplicaSplit { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRegionReplicaSplit.class); + private static final Logger LOG = LoggerFactory.getLogger(TestRegionReplicaSplit.class); + + private static final int NB_SERVERS = 4; + private static Table table; + + private static final HBaseTestingUtility HTU = new HBaseTestingUtility(); + private static final byte[] f = HConstants.CATALOG_FAMILY; + + @BeforeClass + public static void beforeClass() throws Exception { + HTU.getConfiguration().setInt("hbase.master.wait.on.regionservers.mintostart", 3); + HTU.startMiniCluster(NB_SERVERS); + final TableName tableName = TableName.valueOf(TestRegionReplicaSplit.class.getSimpleName()); + + // Create table then get the single region for our new table. + createTable(tableName); + } + + @Rule + public TestName name = new TestName(); + + private static void createTable(final TableName tableName) throws IOException { + TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName); + builder.setRegionReplication(3); + // create a table with 3 replication + table = HTU.createTable(builder.build(), new byte[][] { f }, getSplits(2), + new Configuration(HTU.getConfiguration())); + } + + private static byte[][] getSplits(int numRegions) { + RegionSplitter.UniformSplit split = new RegionSplitter.UniformSplit(); + split.setFirstRow(Bytes.toBytes(0L)); + split.setLastRow(Bytes.toBytes(Long.MAX_VALUE)); + return split.split(numRegions); + } + + @AfterClass + public static void afterClass() throws Exception { + HRegionServer.TEST_SKIP_REPORTING_TRANSITION = false; + table.close(); + HTU.shutdownMiniCluster(); + } + + @Test + public void testRegionReplicaSplitRegionAssignment() throws Exception { + HTU.loadNumericRows(table, f, 0, 3); + // split the table + List regions = new ArrayList(); + for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) { + for (Region r : rs.getRegionServer().getRegions(table.getName())) { + System.out.println("the region before split is is " + r.getRegionInfo() + + rs.getRegionServer().getServerName()); + regions.add(r.getRegionInfo()); + } + } + HTU.getAdmin().split(table.getName(), Bytes.toBytes(1)); + int count = 0; + while (true) { + for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) { + for (Region r : rs.getRegionServer().getRegions(table.getName())) { + count++; + } + } + if (count >= 9) { + break; + } + count = 0; + } + List newRegionLocations = new ArrayList(); + for (RegionServerThread rs : HTU.getMiniHBaseCluster().getRegionServerThreads()) { + RegionInfo prevInfo = null; + for (Region r : rs.getRegionServer().getRegions(table.getName())) { + if (!regions.contains(r.getRegionInfo()) + && !RegionReplicaUtil.isDefaultReplica(r.getRegionInfo())) { + LOG.info("The region is " + r.getRegionInfo() + " the location is " + + rs.getRegionServer().getServerName()); + if (!RegionReplicaUtil.isDefaultReplica(r.getRegionInfo()) + && newRegionLocations.contains(rs.getRegionServer().getServerName()) + && prevInfo != null + && Bytes.equals(prevInfo.getStartKey(), r.getRegionInfo().getStartKey()) + && Bytes.equals(prevInfo.getEndKey(), r.getRegionInfo().getEndKey())) { + fail("Splitted regions should not be assigned to same region server"); + } else { + prevInfo = r.getRegionInfo(); + if (!RegionReplicaUtil.isDefaultReplica(r.getRegionInfo()) + && !newRegionLocations.contains(rs.getRegionServer().getServerName())) { + newRegionLocations.add(rs.getRegionServer().getServerName()); + } + } + } + } + } + // since we assign the daughter regions in round robin fashion, both the daugther region + // replicas will be assigned to two unique servers. + assertEquals("The new regions should be assigned to 3 unique servers ", 3, + newRegionLocations.size()); + } +}