From 30f07e81876ad746cd73aed538cb5638aad058a4 Mon Sep 17 00:00:00 2001 From: huaxiangsun Date: Fri, 8 May 2020 10:16:42 -0700 Subject: [PATCH] HBASE-24256 When fixOverlap hits the max region limit, it is possible to include the same region in multiple merge request (#1584) Signed-off-by: stack --- .../apache/hadoop/hbase/master/MetaFixer.java | 31 ++++- .../hadoop/hbase/master/TestMetaFixer.java | 111 ++++++++++++++++-- 2 files changed, 130 insertions(+), 12 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java index 40ea395ae55..a928e6843f3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Optional; import java.util.Set; @@ -242,6 +243,7 @@ class MetaFixer { } List> merges = new ArrayList<>(); SortedSet currentMergeSet = new TreeSet<>(); + HashSet regionsInMergeSet = new HashSet<>(); RegionInfo regionInfoWithlargestEndKey = null; for (Pair pair: overlaps) { if (regionInfoWithlargestEndKey != null) { @@ -251,12 +253,33 @@ class MetaFixer { if (currentMergeSet.size() >= maxMergeCount) { LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount); } - merges.add(currentMergeSet); - currentMergeSet = new TreeSet<>(); + + // In the case of the merge set contains only 1 region or empty, it does not need to + // submit this merge request as no merge is going to happen. currentMergeSet can be + // reused in this case. + if (currentMergeSet.size() <= 1) { + for (RegionInfo ri : currentMergeSet) { + regionsInMergeSet.remove(ri); + } + currentMergeSet.clear(); + } else { + merges.add(currentMergeSet); + currentMergeSet = new TreeSet<>(); + } } } - currentMergeSet.add(pair.getFirst()); - currentMergeSet.add(pair.getSecond()); + + // Do not add the same region into multiple merge set, this will fail + // the second merge request. + if (!regionsInMergeSet.contains(pair.getFirst())) { + currentMergeSet.add(pair.getFirst()); + regionsInMergeSet.add(pair.getFirst()); + } + if (!regionsInMergeSet.contains(pair.getSecond())) { + currentMergeSet.add(pair.getSecond()); + regionsInMergeSet.add(pair.getSecond()); + } + regionInfoWithlargestEndKey = getRegionInfoWithLargestEndKey( getRegionInfoWithLargestEndKey(pair.getFirst(), pair.getSecond()), regionInfoWithlargestEndKey); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java index 8d080e8d974..bf8c28943cf 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMetaFixer.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.function.BooleanSupplier; @@ -34,12 +35,15 @@ import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.assignment.AssignmentManager; import org.apache.hadoop.hbase.master.assignment.GCRegionProcedure; import org.apache.hadoop.hbase.master.assignment.GCMultipleMergedRegionsProcedure; +import org.apache.hadoop.hbase.master.assignment.RegionStates; import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Threads; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -150,15 +154,12 @@ public class TestMetaFixer { services.getAssignmentManager().assign(overlapRegion); } - @Test - public void testOverlap() throws Exception { - TableName tn = TableName.valueOf(this.name.getMethodName()); + private void testOverlapCommon(final TableName tn) throws Exception { Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY); TEST_UTIL.loadTable(t, HConstants.CATALOG_FAMILY); List ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn); assertTrue(ris.size() > 5); HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); - HbckChore hbckChore = services.getHbckChore(); services.getCatalogJanitor().scan(); CatalogJanitor.Report report = services.getCatalogJanitor().getLastReport(); assertTrue(report.isEmpty()); @@ -167,14 +168,24 @@ public class TestMetaFixer { makeOverlap(services, ris.get(2), ris.get(3)); makeOverlap(services, ris.get(2), ris.get(4)); Threads.sleep(10000); - services.getCatalogJanitor().scan(); - report = services.getCatalogJanitor().getLastReport(); + } + + @Test + public void testOverlap() throws Exception { + TableName tn = TableName.valueOf(this.name.getMethodName()); + testOverlapCommon(tn); + HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); + HbckChore hbckChore = services.getHbckChore(); + + CatalogJanitor cj = services.getCatalogJanitor(); + cj.scan(); + CatalogJanitor.Report report = cj.getLastReport(); assertEquals(6, report.getOverlaps().size()); - assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size()); + assertEquals(1, + MetaFixer.calculateMerges(10, report.getOverlaps()).size()); MetaFixer fixer = new MetaFixer(services); fixer.fixOverlaps(report); - CatalogJanitor cj = services.getCatalogJanitor(); await(10, () -> { try { if (cj.scan() > 0) { @@ -213,6 +224,90 @@ public class TestMetaFixer { assertTrue(postReport.isEmpty()); } + @Test + public void testOverlapWithSmallMergeCount() throws Exception { + TableName tn = TableName.valueOf(this.name.getMethodName()); + try { + testOverlapCommon(tn); + HMaster services = TEST_UTIL.getHBaseCluster().getMaster(); + CatalogJanitor cj = services.getCatalogJanitor(); + cj.scan(); + CatalogJanitor.Report report = cj.getLastReport(); + assertEquals(6, report.getOverlaps().size()); + assertEquals(2, + MetaFixer.calculateMerges(5, report.getOverlaps()).size()); + + // The max merge count is set to 5 so overlap regions are divided into + // two merge requests. + TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().setInt( + "hbase.master.metafixer.max.merge.count", 5); + + // Get overlap regions + HashSet overlapRegions = new HashSet<>(); + for (Pair pair : report.getOverlaps()) { + overlapRegions.add(pair.getFirst().getRegionNameAsString()); + overlapRegions.add(pair.getSecond().getRegionNameAsString()); + } + + MetaFixer fixer = new MetaFixer(services); + fixer.fixOverlaps(report); + AssignmentManager am = services.getAssignmentManager(); + + await(200, () -> { + try { + cj.scan(); + final CatalogJanitor.Report postReport = cj.getLastReport(); + RegionStates regionStates = am.getRegionStates(); + + // Make sure that two merged regions are opened and GCs are done. + if (postReport.getOverlaps().size() == 1) { + Pair pair = postReport.getOverlaps().get(0); + if ((!overlapRegions.contains(pair.getFirst().getRegionNameAsString()) && + regionStates.getRegionState(pair.getFirst()).isOpened()) && + (!overlapRegions.contains(pair.getSecond().getRegionNameAsString()) && + regionStates.getRegionState(pair.getSecond()).isOpened())) { + // Make sure GC is done. + List firstParents = MetaTableAccessor.getMergeRegions( + services.getConnection(), pair.getFirst().getRegionName()); + List secondParents = MetaTableAccessor.getMergeRegions( + services.getConnection(), pair.getSecond().getRegionName()); + + return (firstParents == null || firstParents.isEmpty()) && + (secondParents == null || secondParents.isEmpty()); + } + } + return false; + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + // Second run of fixOverlap should fix all. + report = cj.getLastReport(); + fixer.fixOverlaps(report); + + await(20, () -> { + try { + // Make sure it GC only once. + return (cj.scan() > 0); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + + // No holes reported. + cj.scan(); + final CatalogJanitor.Report postReport = cj.getLastReport(); + assertTrue(postReport.isEmpty()); + + } finally { + TEST_UTIL.getHBaseCluster().getMaster().getConfiguration().unset( + "hbase.master.metafixer.max.merge.count"); + + TEST_UTIL.deleteTable(tn); + } + } + /** * Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so * we can fix this condition. HBASE-24247