From f49fc24ecff7805a1ee33ca468708b328458a1ce Mon Sep 17 00:00:00 2001 From: huaxiangsun Date: Wed, 6 May 2020 10:01:25 -0700 Subject: [PATCH] HBASE-24316 GCMulitpleMergedRegionsProcedure is not idempotent (#1660) (#1663) It addresses couple issues: 1. Make sure deleteMergeQualifiers() does not delete the row if there is no columns with "merge" keyword. 2. GCMulitpleMergedRegionsProcedure now acquire an exclusive lock on the child region. Signed-off-by: stack --- .../hadoop/hbase/MetaTableAccessor.java | 10 +++++ .../GCMultipleMergedRegionsProcedure.java | 20 ++++++++++ .../hadoop/hbase/master/TestMetaFixer.java | 37 +++++++++++++++++-- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java index ac79e8aaffd..e345483351e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/MetaTableAccessor.java @@ -1904,6 +1904,16 @@ public class MetaTableAccessor { qualifiers.add(qualifier); delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP); } + + // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while + // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second + // GCMultipleMergedRegionsProcedure could delete the merged region by accident! + if (qualifiers.isEmpty()) { + LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() + + " in meta table, they are cleaned up already, Skip."); + return; + } + deleteFromMetaTable(connection, delete); LOG.info("Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers " + qualifiers.stream().map(Bytes::toStringBinary). diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMultipleMergedRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMultipleMergedRegionsProcedure.java index 285891eb2b2..8042d6172f3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMultipleMergedRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/GCMultipleMergedRegionsProcedure.java @@ -63,6 +63,26 @@ public class GCMultipleMergedRegionsProcedure extends super(); } + @Override protected boolean holdLock(MasterProcedureEnv env) { + return true; + } + + @Override + protected LockState acquireLock(final MasterProcedureEnv env) { + // It now takes an exclusive lock on the merged child region to make sure + // that no two parallel running of two GCMultipleMergedRegionsProcedures on the + // region. + if (env.getProcedureScheduler().waitRegion(this, mergedChild)) { + return LockState.LOCK_EVENT_WAIT; + } + return LockState.LOCK_ACQUIRED; + } + + @Override + protected void releaseLock(final MasterProcedureEnv env) { + env.getProcedureScheduler().wakeRegion(this, mergedChild); + } + @Override public TableOperationType getTableOperationType() { return TableOperationType.MERGED_REGIONS_GC; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java index ee671109f96..8d080e8d974 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java @@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Collections; import java.util.List; +import java.util.Map; import java.util.function.BooleanSupplier; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -31,8 +32,12 @@ import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; +import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.util.Threads; @@ -168,18 +173,44 @@ public class TestMetaFixer { assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); MetaFixer fixer = new MetaFixer(services); fixer.fixOverlaps(report); + + CatalogJanitor cj = services.getCatalogJanitor(); await(10, () -> { try { - services.getCatalogJanitor().scan(); - final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport(); - return postReport.isEmpty(); + if (cj.scan() > 0) { + // It submits GC once, then it will immediately kick off another GC to test if + // GCMultipleMergedRegionsProcedure is idempotent. If it is not, it will create + // a hole. + Map mergedRegions = cj.getLastReport().mergedRegions; + for (Map.Entry e : mergedRegions.entrySet()) { + List parents = MetaTableAccessor.getMergeRegions(e.getValue().rawCells()); + if (parents != null) { + ProcedureExecutor pe = services.getMasterProcedureExecutor(); + pe.submitProcedure(new GCMultipleMergedRegionsProcedure(pe.getEnvironment(), + e.getKey(), parents)); + } + } + return true; + } + return false; } catch (Exception e) { throw new RuntimeException(e); } }); + // Wait until all GCs settled down + await(10, () -> { + return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); + }); + + // No orphan regions on FS hbckChore.chore(); assertEquals(0, hbckChore.getOrphanRegionsOnFS().size()); + + // No holes reported. + cj.scan(); + final CatalogJanitor.Report postReport = cj.getLastReport(); + assertTrue(postReport.isEmpty()); } /**