From a4738e5184b836d779c0bf41418980e744e9253b Mon Sep 17 00:00:00 2001 From: haxiaolin Date: Thu, 27 Jun 2019 10:52:01 +0800 Subject: [PATCH] HBASE-22403 Balance in RSGroup should consider throttling and a failure affects the whole Signed-off-by: Guanghao Zhang --- .../hbase/rsgroup/RSGroupAdminServer.java | 10 +-- .../apache/hadoop/hbase/master/HMaster.java | 80 ++++++++++--------- .../hadoop/hbase/master/MasterServices.java | 8 ++ .../hbase/master/MockNoopMasterServices.java | 5 ++ 4 files changed, 58 insertions(+), 45 deletions(-) diff --git a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java index 2dae85356df..8cc7ab72148 100644 --- a/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java +++ b/hbase-rsgroup/src/main/java/org/apache/hadoop/hbase/rsgroup/RSGroupAdminServer.java @@ -409,7 +409,6 @@ public class RSGroupAdminServer implements RSGroupAdmin { @Override public boolean balanceRSGroup(String groupName) throws IOException { ServerManager serverManager = master.getServerManager(); - AssignmentManager assignmentManager = master.getAssignmentManager(); LoadBalancer balancer = master.getLoadBalancer(); synchronized (balancer) { @@ -447,16 +446,11 @@ public class RSGroupAdminServer implements RSGroupAdmin { plans.addAll(partialPlans); } } - long startTime = System.currentTimeMillis(); boolean balancerRan = !plans.isEmpty(); if (balancerRan) { LOG.info("RSGroup balance {} starting with plan count: {}", groupName, plans.size()); - for (RegionPlan plan: plans) { - LOG.info("balance {}", plan); - assignmentManager.moveAsync(plan); - } - LOG.info("RSGroup balance {} completed after {} seconds", groupName, - (System.currentTimeMillis() - startTime)); + master.executeRegionPlansWithThrottling(plans); + LOG.info("RSGroup balance " + groupName + " completed"); } return balancerRan; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 4adf648f918..5e0362345f1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1672,7 +1672,6 @@ public class HMaster extends HRegionServer implements MasterServices { return false; } - int maxRegionsInTransition = getMaxRegionsInTransition(); synchronized (this.balancer) { // If balance not true, don't run balancer. if (!this.loadBalancerTracker.isBalancerOn()) return false; @@ -1731,45 +1730,11 @@ public class HMaster extends HRegionServer implements MasterServices { } } - long balanceStartTime = System.currentTimeMillis(); - long cutoffTime = balanceStartTime + this.maxBlancingTime; - int rpCount = 0; // number of RegionPlans balanced so far - if (plans != null && !plans.isEmpty()) { - int balanceInterval = this.maxBlancingTime / plans.size(); - LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is " - + balanceInterval + " ms, and the max number regions in transition is " - + maxRegionsInTransition); - - for (RegionPlan plan: plans) { - LOG.info("balance " + plan); - //TODO: bulk assign - try { - this.assignmentManager.moveAsync(plan); - } catch (HBaseIOException hioe) { - //should ignore failed plans here, avoiding the whole balance plans be aborted - //later calls of balance() can fetch up the failed and skipped plans - LOG.warn("Failed balance plan: {}, just skip it", plan, hioe); - } - //rpCount records balance plans processed, does not care if a plan succeeds - rpCount++; - - balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition, - cutoffTime); - - // if performing next balance exceeds cutoff time, exit the loop - if (rpCount < plans.size() && System.currentTimeMillis() > cutoffTime) { - // TODO: After balance, there should not be a cutoff time (keeping it as - // a security net for now) - LOG.debug("No more balancing till next balance run; maxBalanceTime=" - + this.maxBlancingTime); - break; - } - } - } + List sucRPs = executeRegionPlansWithThrottling(plans); if (this.cpHost != null) { try { - this.cpHost.postBalance(rpCount < plans.size() ? plans.subList(0, rpCount) : plans); + this.cpHost.postBalance(sucRPs); } catch (IOException ioe) { // balancing already succeeded so don't change the result LOG.error("Error invoking master coprocessor postBalance()", ioe); @@ -1781,6 +1746,47 @@ public class HMaster extends HRegionServer implements MasterServices { return true; } + public List executeRegionPlansWithThrottling(List plans) { + List sucRPs = new ArrayList<>(); + int maxRegionsInTransition = getMaxRegionsInTransition(); + long balanceStartTime = System.currentTimeMillis(); + long cutoffTime = balanceStartTime + this.maxBlancingTime; + int rpCount = 0; // number of RegionPlans balanced so far + if (plans != null && !plans.isEmpty()) { + int balanceInterval = this.maxBlancingTime / plans.size(); + LOG.info("Balancer plans size is " + plans.size() + ", the balance interval is " + + balanceInterval + " ms, and the max number regions in transition is " + + maxRegionsInTransition); + + for (RegionPlan plan: plans) { + LOG.info("balance " + plan); + //TODO: bulk assign + try { + this.assignmentManager.moveAsync(plan); + } catch (HBaseIOException hioe) { + //should ignore failed plans here, avoiding the whole balance plans be aborted + //later calls of balance() can fetch up the failed and skipped plans + LOG.warn("Failed balance plan: {}, just skip it", plan, hioe); + } + //rpCount records balance plans processed, does not care if a plan succeeds + rpCount++; + + balanceThrottling(balanceStartTime + rpCount * balanceInterval, maxRegionsInTransition, + cutoffTime); + + // if performing next balance exceeds cutoff time, exit the loop + if (rpCount < plans.size() && System.currentTimeMillis() > cutoffTime) { + // TODO: After balance, there should not be a cutoff time (keeping it as + // a security net for now) + LOG.debug("No more balancing till next balance run; maxBalanceTime=" + + this.maxBlancingTime); + break; + } + } + } + return sucRPs; + } + @Override @VisibleForTesting public RegionNormalizer getRegionNormalizer() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index d5aa4fecd33..41cec5cfb23 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -529,4 +529,12 @@ public interface MasterServices extends Server { * @return the {@link ZKPermissionWatcher} */ ZKPermissionWatcher getZKPermissionWatcher(); + + /** + * Execute region plans with throttling + * @param plans to execute + * @return succeeded plans + */ + List executeRegionPlansWithThrottling(List plans); + } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java index 7cfec57deca..cbfdd3f7449 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/MockNoopMasterServices.java @@ -481,6 +481,11 @@ public class MockNoopMasterServices implements MasterServices { return null; } + @Override + public List executeRegionPlansWithThrottling(List plans) { + return null; + } + @Override public AsyncClusterConnection getAsyncClusterConnection() { return null;