HBASE-21395 Abort split/merge procedure if there is a table procedure of the same table going on

This commit is contained in:
Allan Yang 2018-11-05 20:12:00 +08:00
parent 8df5878932
commit 9e52e9eb7d
3 changed files with 44 additions and 2 deletions

View File

@ -24,6 +24,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
@ -52,6 +53,7 @@ import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
import org.apache.hadoop.hbase.master.procedure.TableQueue;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.quotas.QuotaExceededException;
@ -528,6 +530,24 @@ public class MergeTableRegionsProcedure
new IOException("Merge of " + regionsStr + " failed because merge switch is off"));
return false;
}
// See HBASE-21395, for 2.0.x and 2.1.x only.
// A safe fence here, if there is a table procedure going on, abort the merge.
// There some cases that may lead to table procedure roll back (more serious
// than roll back the merge procedure here), or the merged regions was brought online
// by the table procedure because of the race between merge procedure and table procedure
List<AbstractStateMachineTableProcedure> tableProcedures = env
.getMasterServices().getProcedures().stream()
.filter(p -> p instanceof AbstractStateMachineTableProcedure)
.map(p -> (AbstractStateMachineTableProcedure) p)
.filter(p -> p.getProcId() != this.getProcId() && p.getTableName()
.equals(regionsToMerge[0].getTable()) && !p.isFinished()
&& TableQueue.requireTableExclusiveLock(p))
.collect(Collectors.toList());
if (tableProcedures != null && tableProcedures.size() > 0) {
throw new MergeRegionException(tableProcedures.get(0).toString()
+ " is going on against the same table, abort the merge of " + this
.toString());
}
// Ask the remote regionserver if regions are mergeable. If we get an IOE, report it
// along with the failure, so we can see why regions are not mergeable at this time.

View File

@ -33,6 +33,8 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -55,8 +57,10 @@ import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.master.assignment.RegionStates.RegionStateNode;
import org.apache.hadoop.hbase.master.normalizer.NormalizationPlan;
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineRegionProcedure;
import org.apache.hadoop.hbase.master.procedure.AbstractStateMachineTableProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
import org.apache.hadoop.hbase.master.procedure.TableQueue;
import org.apache.hadoop.hbase.procedure2.ProcedureMetrics;
import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
import org.apache.hadoop.hbase.quotas.QuotaExceededException;
@ -504,6 +508,24 @@ public class SplitTableRegionProcedure
return false;
}
// See HBASE-21395, for 2.0.x and 2.1.x only.
// A safe fence here, if there is a table procedure going on, abort the split.
// There some cases that may lead to table procedure roll back (more serious
// than roll back the split procedure here), or the split parent was brought online
// by the table procedure because of the race between split procedure and table procedure
List<AbstractStateMachineTableProcedure> tableProcedures = env
.getMasterServices().getProcedures().stream()
.filter(p -> p instanceof AbstractStateMachineTableProcedure)
.map(p -> (AbstractStateMachineTableProcedure) p)
.filter(p -> p.getTableName().equals(getParentRegion().getTable()) &&
!p.isFinished() && TableQueue.requireTableExclusiveLock(p))
.collect(Collectors.toList());
if (tableProcedures != null && tableProcedures.size() > 0) {
throw new DoNotRetryIOException(tableProcedures.get(0).toString()
+ " is going on against the same table, abort the split of " + this
.toString());
}
// set node state as SPLITTING
node.setState(State.SPLITTING);

View File

@ -23,7 +23,7 @@ import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private
class TableQueue extends Queue<TableName> {
public class TableQueue extends Queue<TableName> {
private final LockStatus namespaceLockStatus;
public TableQueue(TableName tableName, int priority, LockStatus tableLock,
@ -45,7 +45,7 @@ class TableQueue extends Queue<TableName> {
/**
* @param proc must not be null
*/
private static boolean requireTableExclusiveLock(TableProcedureInterface proc) {
public static boolean requireTableExclusiveLock(TableProcedureInterface proc) {
switch (proc.getTableOperationType()) {
case CREATE:
case DELETE: