From 950237566beed787981fedd1f6eeb2dc33a213b0 Mon Sep 17 00:00:00 2001 From: huaxiangsun Date: Fri, 22 May 2020 16:40:38 -0700 Subject: [PATCH] HBASE-24370 Avoid aggressive MergeRegion and GCMultipleMergedRegionsProcedure (#1719) (#1762) Signed-off-by: Jan Hentschel --- .../hadoop/hbase/master/CatalogJanitor.java | 6 +- .../hadoop/hbase/client/TestAdmin1.java | 38 ++++++--- .../client/TestAsyncRegionAdminApi2.java | 28 ++++++- .../hadoop/hbase/master/TestMetaFixer.java | 78 ++++++++++++++++++- 4 files changed, 134 insertions(+), 16 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index 0b42561c64e..c959e926c29 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -424,7 +424,11 @@ public class CatalogJanitor extends ScheduledChore { // It doesn't have merge qualifier, no need to clean return true; } - return cleanMergeRegion(region, parents); + + // If a parent region is a merged child region and GC has not kicked in/finish its work yet, + // return false in this case to avoid kicking in a merge, trying later. + cleanMergeRegion(region, parents); + return false; } /** diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin1.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin1.java index 3b3ffc855f0..0637e69b80a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin1.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAdmin1.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.client; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -36,6 +37,8 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.exceptions.MergeRegionException; +import org.apache.hadoop.hbase.master.CatalogJanitor; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HStore; @@ -523,25 +526,42 @@ public class TestAdmin1 extends TestAdminBase { List tableRegions; RegionInfo regionA; RegionInfo regionB; + RegionInfo regionC; + RegionInfo mergedChildRegion = null; // merge with full name tableRegions = ADMIN.getRegions(tableName); assertEquals(3, tableRegions.size()); regionA = tableRegions.get(0); regionB = tableRegions.get(1); + regionC = tableRegions.get(2); // TODO convert this to version that is synchronous (See HBASE-16668) - ADMIN.mergeRegionsAsync(regionA.getRegionName(), regionB.getRegionName(), false).get(60, - TimeUnit.SECONDS); + ADMIN.mergeRegionsAsync(regionA.getRegionName(), regionB.getRegionName(), + false).get(60, TimeUnit.SECONDS); - assertEquals(2, ADMIN.getRegions(tableName).size()); - - // merge with encoded name tableRegions = ADMIN.getRegions(tableName); - regionA = tableRegions.get(0); - regionB = tableRegions.get(1); + + assertEquals(2, tableRegions.size()); + for (RegionInfo ri : tableRegions) { + if (regionC.compareTo(ri) != 0) { + mergedChildRegion = ri; + break; + } + } + + assertNotNull(mergedChildRegion); + // Need to wait GC for merged child region is done. + HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); + CatalogJanitor cj = services.getCatalogJanitor(); + cj.cleanMergeQualifier(mergedChildRegion); + // Wait until all procedures settled down + while (!services.getMasterProcedureExecutor().getActiveProcIds().isEmpty()) { + Thread.sleep(200); + } + // TODO convert this to version that is synchronous (See HBASE-16668) - ADMIN - .mergeRegionsAsync(regionA.getEncodedNameAsBytes(), regionB.getEncodedNameAsBytes(), false) + ADMIN.mergeRegionsAsync(regionC.getEncodedNameAsBytes(), + mergedChildRegion.getEncodedNameAsBytes(), false) .get(60, TimeUnit.SECONDS); assertEquals(1, ADMIN.getRegions(tableName).size()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi2.java index 7ea6b94cf82..ef73b20850b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi2.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestAsyncRegionAdminApi2.java @@ -21,6 +21,7 @@ import static org.apache.hadoop.hbase.TableName.META_TABLE_NAME; import static org.hamcrest.CoreMatchers.instanceOf; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -35,6 +36,8 @@ import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.master.CatalogJanitor; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.util.Bytes; @@ -161,20 +164,39 @@ public class TestAsyncRegionAdminApi2 extends TestAsyncAdminBase { .getTableHRegionLocations(metaTable, tableName).get(); RegionInfo regionA; RegionInfo regionB; + RegionInfo regionC; + RegionInfo mergedChildRegion = null; // merge with full name assertEquals(3, regionLocations.size()); regionA = regionLocations.get(0).getRegion(); regionB = regionLocations.get(1).getRegion(); + regionC = regionLocations.get(2).getRegion(); admin.mergeRegions(regionA.getRegionName(), regionB.getRegionName(), false).get(); regionLocations = AsyncMetaTableAccessor .getTableHRegionLocations(metaTable, tableName).get(); + assertEquals(2, regionLocations.size()); + for (HRegionLocation rl : regionLocations) { + if (regionC.compareTo(rl.getRegion()) != 0) { + mergedChildRegion = rl.getRegion(); + break; + } + } + + assertNotNull(mergedChildRegion); + // Need to wait GC for merged child region is done. + HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); + CatalogJanitor cj = services.getCatalogJanitor(); + cj.cleanMergeQualifier(mergedChildRegion); + // Wait until all procedures settled down + while (!services.getMasterProcedureExecutor().getActiveProcIds().isEmpty()) { + Thread.sleep(200); + } // merge with encoded name - regionA = regionLocations.get(0).getRegion(); - regionB = regionLocations.get(1).getRegion(); - admin.mergeRegions(regionA.getRegionName(), regionB.getRegionName(), false).get(); + admin.mergeRegions(regionC.getRegionName(), mergedChildRegion.getRegionName(), + false).get(); regionLocations = AsyncMetaTableAccessor .getTableHRegionLocations(metaTable, tableName).get(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java index bf8c28943cf..a36ef882c69 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java @@ -26,11 +26,15 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.function.BooleanSupplier; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellBuilderFactory; +import org.apache.hadoop.hbase.CellBuilderType; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.Result; @@ -43,6 +47,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; import org.junit.AfterClass; @@ -141,7 +146,7 @@ public class TestMetaFixer { assertEquals(0, ris.size()); } - private static void makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) + private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b) throws IOException { RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()). setStartKey(a.getStartKey()). @@ -152,6 +157,7 @@ public class TestMetaFixer { System.currentTimeMillis()))); // TODO: Add checks at assign time to PREVENT being able to assign over existing assign. services.getAssignmentManager().assign(overlapRegion); + return overlapRegion; } private void testOverlapCommon(final TableName tn) throws Exception { @@ -167,7 +173,6 @@ public class TestMetaFixer { makeOverlap(services, ris.get(1), ris.get(3)); makeOverlap(services, ris.get(2), ris.get(3)); makeOverlap(services, ris.get(2), ris.get(4)); - Threads.sleep(10000); } @Test @@ -308,6 +313,74 @@ public class TestMetaFixer { } } + /** + * This test covers the case that one of merged parent regions is a merged child region that + * has not been GCed but there is no reference files anymore. In this case, it will kick off + * a GC procedure, but no merge will happen. + */ + @Test + public void testMergeWithMergedChildRegion() throws Exception { + TableName tn = TableName.valueOf(this.name.getMethodName()); + Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); + List ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); + assertTrue(ris.size() > 5); + HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); + CatalogJanitor cj = services.getCatalogJanitor(); + cj.scan(); + CatalogJanitor.Report report = cj.getLastReport(); + assertTrue(report.isEmpty()); + RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2)); + + cj.scan(); + report = cj.getLastReport(); + assertEquals(2, report.getOverlaps().size()); + + // Mark it as a merged child region. + RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn). + setStartKey(overlapRegion.getStartKey()). + build(); + + Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection()); + Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion, + HConstants.LATEST_TIMESTAMP); + String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0); + putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow( + putOfMerged.getRow()). + setFamily(HConstants.CATALOG_FAMILY). + setQualifier(Bytes.toBytes(qualifier)). + setTimestamp(putOfMerged.getTimestamp()). + setType(Cell.Type.Put). + setValue(RegionInfo.toByteArray(fakedParentRegion)). + build()); + + meta.put(putOfMerged); + + MetaFixer fixer = new MetaFixer(services); + fixer.fixOverlaps(report); + + // Wait until all procedures settled down + await(200, () -> { + return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); + }); + + // No merge is done, overlap is still there. + cj.scan(); + report = cj.getLastReport(); + assertEquals(2, report.getOverlaps().size()); + + fixer.fixOverlaps(report); + + // Wait until all procedures settled down + await(200, () -> { + return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty(); + }); + + // Merge is done and no more overlaps + cj.scan(); + report = cj.getLastReport(); + assertEquals(0, report.getOverlaps().size()); + } + /** * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so * we can fix this condition. HBASE-24247 @@ -336,7 +409,6 @@ public class TestMetaFixer { while (!services.getMasterProcedureExecutor().isFinished(pid)) { Threads.sleep(100); } - Threads.sleep(10000); services.getCatalogJanitor().scan(); report = services.getCatalogJanitor().getLastReport(); assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());