From 9e52e9eb7d14acddbbf7987f93f1cf0a641e6c65 Mon Sep 17 00:00:00 2001 From: Allan Yang Date: Mon, 5 Nov 2018 20:12:00 +0800 Subject: [PATCH] HBASE-21395 Abort split/merge procedure if there is a table procedure of the same table going on --- .../MergeTableRegionsProcedure.java | 20 +++++++++++++++++ .../assignment/SplitTableRegionProcedure.java | 22 +++++++++++++++++++ .../hbase/master/procedure/TableQueue.java | 4 ++-- 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index a9ecffd2c17..7b39fdab740 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -24,6 +24,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -52,6 +53,7 @@ import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; +import org.apache.hadoop.hbase.master.procedure.TableQueue; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.quotas.QuotaExceededException; @@ -528,6 +530,24 @@ public class MergeTableRegionsProcedure new IOException("Merge of " + regionsStr + " failed because merge switch is off")); return false; } + // See HBASE-21395, for 2.0.x and 2.1.x only. + // A safe fence here, if there is a table procedure going on, abort the merge. + // There some cases that may lead to table procedure roll back (more serious + // than roll back the merge procedure here), or the merged regions was brought online + // by the table procedure because of the race between merge procedure and table procedure + List tableProcedures = env + .getMasterServices().getProcedures().stream() + .filter(p -> p instanceof AbstractStateMachineTableProcedure) + .map(p -> (AbstractStateMachineTableProcedure) p) + .filter(p -> p.getProcId() != this.getProcId() && p.getTableName() + .equals(regionsToMerge[0].getTable()) && !p.isFinished() + && TableQueue.requireTableExclusiveLock(p)) + .collect(Collectors.toList()); + if (tableProcedures != null && tableProcedures.size() > 0) { + throw new MergeRegionException(tableProcedures.get(0).toString() + + " is going on against the same table, abort the merge of " + this + .toString()); + } // Ask the remote regionserver if regions are mergeable. If we get an IOE, report it // along with the failure, so we can see why regions are not mergeable at this time. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java index 28828d3801d..e1e2805ef60 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java @@ -33,6 +33,8 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -55,8 +57,10 @@ import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode; import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan; import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure; +import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil; +import org.apache.hadoop.hbase.master.procedure.TableQueue; import org.apache.hadoop.hbase.procedure2.ProcedureMetrics; import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; import org.apache.hadoop.hbase.quotas.QuotaExceededException; @@ -504,6 +508,24 @@ public class SplitTableRegionProcedure return false; } + // See HBASE-21395, for 2.0.x and 2.1.x only. + // A safe fence here, if there is a table procedure going on, abort the split. + // There some cases that may lead to table procedure roll back (more serious + // than roll back the split procedure here), or the split parent was brought online + // by the table procedure because of the race between split procedure and table procedure + List tableProcedures = env + .getMasterServices().getProcedures().stream() + .filter(p -> p instanceof AbstractStateMachineTableProcedure) + .map(p -> (AbstractStateMachineTableProcedure) p) + .filter(p -> p.getTableName().equals(getParentRegion().getTable()) && + !p.isFinished() && TableQueue.requireTableExclusiveLock(p)) + .collect(Collectors.toList()); + if (tableProcedures != null && tableProcedures.size() > 0) { + throw new DoNotRetryIOException(tableProcedures.get(0).toString() + + " is going on against the same table, abort the split of " + this + .toString()); + } + // set node state as SPLITTING node.setState(State.SPLITTING); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableQueue.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableQueue.java index 81c883b0580..637f9e5a664 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableQueue.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableQueue.java @@ -23,7 +23,7 @@ import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Private -class TableQueue extends Queue { +public class TableQueue extends Queue { private final LockStatus namespaceLockStatus; public TableQueue(TableName tableName, int priority, LockStatus tableLock, @@ -45,7 +45,7 @@ class TableQueue extends Queue { /** * @param proc must not be null */ - private static boolean requireTableExclusiveLock(TableProcedureInterface proc) { + public static boolean requireTableExclusiveLock(TableProcedureInterface proc) { switch (proc.getTableOperationType()) { case CREATE: case DELETE: