HBASE-24370 Avoid aggressive MergeRegion and GCMultipleMergedRegionsProcedure (#1719) (#1762)

Signed-off-by: Jan Hentschel <jan.hentschel@ultratendency.com>
This commit is contained in:
huaxiangsun 2020-05-22 16:40:38 -07:00 committed by GitHub
parent ae4f1b4830
commit 950237566b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 134 additions and 16 deletions

View File

@ -424,7 +424,11 @@ public class CatalogJanitor extends ScheduledChore {
// It doesn't have merge qualifier, no need to clean
return true;
}
return cleanMergeRegion(region, parents);
// If a parent region is a merged child region and GC has not kicked in/finish its work yet,
// return false in this case to avoid kicking in a merge, trying later.
cleanMergeRegion(region, parents);
return false;
}
/**

View File

@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.client;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@ -36,6 +37,8 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
import org.apache.hadoop.hbase.master.CatalogJanitor;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HStore;
@ -523,25 +526,42 @@ public class TestAdmin1 extends TestAdminBase {
List<RegionInfo> tableRegions;
RegionInfo regionA;
RegionInfo regionB;
RegionInfo regionC;
RegionInfo mergedChildRegion = null;
// merge with full name
tableRegions = ADMIN.getRegions(tableName);
assertEquals(3, tableRegions.size());
regionA = tableRegions.get(0);
regionB = tableRegions.get(1);
regionC = tableRegions.get(2);
// TODO convert this to version that is synchronous (See HBASE-16668)
ADMIN.mergeRegionsAsync(regionA.getRegionName(), regionB.getRegionName(), false).get(60,
TimeUnit.SECONDS);
ADMIN.mergeRegionsAsync(regionA.getRegionName(), regionB.getRegionName(),
false).get(60, TimeUnit.SECONDS);
assertEquals(2, ADMIN.getRegions(tableName).size());
// merge with encoded name
tableRegions = ADMIN.getRegions(tableName);
regionA = tableRegions.get(0);
regionB = tableRegions.get(1);
assertEquals(2, tableRegions.size());
for (RegionInfo ri : tableRegions) {
if (regionC.compareTo(ri) != 0) {
mergedChildRegion = ri;
break;
}
}
assertNotNull(mergedChildRegion);
// Need to wait GC for merged child region is done.
HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
CatalogJanitor cj = services.getCatalogJanitor();
cj.cleanMergeQualifier(mergedChildRegion);
// Wait until all procedures settled down
while (!services.getMasterProcedureExecutor().getActiveProcIds().isEmpty()) {
Thread.sleep(200);
}
// TODO convert this to version that is synchronous (See HBASE-16668)
ADMIN
.mergeRegionsAsync(regionA.getEncodedNameAsBytes(), regionB.getEncodedNameAsBytes(), false)
ADMIN.mergeRegionsAsync(regionC.getEncodedNameAsBytes(),
mergedChildRegion.getEncodedNameAsBytes(), false)
.get(60, TimeUnit.SECONDS);
assertEquals(1, ADMIN.getRegions(tableName).size());

View File

@ -21,6 +21,7 @@ import static org.apache.hadoop.hbase.TableName.META_TABLE_NAME;
import static org.hamcrest.CoreMatchers.instanceOf;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@ -35,6 +36,8 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.master.CatalogJanitor;
import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.testclassification.ClientTests;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
@ -161,20 +164,39 @@ public class TestAsyncRegionAdminApi2 extends TestAsyncAdminBase {
.getTableHRegionLocations(metaTable, tableName).get();
RegionInfo regionA;
RegionInfo regionB;
RegionInfo regionC;
RegionInfo mergedChildRegion = null;
// merge with full name
assertEquals(3, regionLocations.size());
regionA = regionLocations.get(0).getRegion();
regionB = regionLocations.get(1).getRegion();
regionC = regionLocations.get(2).getRegion();
admin.mergeRegions(regionA.getRegionName(), regionB.getRegionName(), false).get();
regionLocations = AsyncMetaTableAccessor
.getTableHRegionLocations(metaTable, tableName).get();
assertEquals(2, regionLocations.size());
for (HRegionLocation rl : regionLocations) {
if (regionC.compareTo(rl.getRegion()) != 0) {
mergedChildRegion = rl.getRegion();
break;
}
}
assertNotNull(mergedChildRegion);
// Need to wait GC for merged child region is done.
HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
CatalogJanitor cj = services.getCatalogJanitor();
cj.cleanMergeQualifier(mergedChildRegion);
// Wait until all procedures settled down
while (!services.getMasterProcedureExecutor().getActiveProcIds().isEmpty()) {
Thread.sleep(200);
}
// merge with encoded name
regionA = regionLocations.get(0).getRegion();
regionB = regionLocations.get(1).getRegion();
admin.mergeRegions(regionA.getRegionName(), regionB.getRegionName(), false).get();
admin.mergeRegions(regionC.getRegionName(), mergedChildRegion.getRegionName(),
false).get();
regionLocations = AsyncMetaTableAccessor
.getTableHRegionLocations(metaTable, tableName).get();

View File

@ -26,11 +26,15 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.function.BooleanSupplier;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellBuilderFactory;
import org.apache.hadoop.hbase.CellBuilderType;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.Result;
@ -43,6 +47,7 @@ import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;
import org.junit.AfterClass;
@ -141,7 +146,7 @@ public class TestMetaFixer {
assertEquals(0, ris.size());
}
private static void makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
private static RegionInfo makeOverlap(MasterServices services, RegionInfo a, RegionInfo b)
throws IOException {
RegionInfo overlapRegion = RegionInfoBuilder.newBuilder(a.getTable()).
setStartKey(a.getStartKey()).
@ -152,6 +157,7 @@ public class TestMetaFixer {
System.currentTimeMillis())));
// TODO: Add checks at assign time to PREVENT being able to assign over existing assign.
services.getAssignmentManager().assign(overlapRegion);
return overlapRegion;
}
private void testOverlapCommon(final TableName tn) throws Exception {
@ -167,7 +173,6 @@ public class TestMetaFixer {
makeOverlap(services, ris.get(1), ris.get(3));
makeOverlap(services, ris.get(2), ris.get(3));
makeOverlap(services, ris.get(2), ris.get(4));
Threads.sleep(10000);
}
@Test
@ -308,6 +313,74 @@ public class TestMetaFixer {
}
}
/**
* This test covers the case that one of merged parent regions is a merged child region that
* has not been GCed but there is no reference files anymore. In this case, it will kick off
* a GC procedure, but no merge will happen.
*/
@Test
public void testMergeWithMergedChildRegion() throws Exception {
TableName tn = TableName.valueOf(this.name.getMethodName());
Table t = TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
assertTrue(ris.size() > 5);
HMaster services = TEST_UTIL.getHBaseCluster().getMaster();
CatalogJanitor cj = services.getCatalogJanitor();
cj.scan();
CatalogJanitor.Report report = cj.getLastReport();
assertTrue(report.isEmpty());
RegionInfo overlapRegion = makeOverlap(services, ris.get(1), ris.get(2));
cj.scan();
report = cj.getLastReport();
assertEquals(2, report.getOverlaps().size());
// Mark it as a merged child region.
RegionInfo fakedParentRegion = RegionInfoBuilder.newBuilder(tn).
setStartKey(overlapRegion.getStartKey()).
build();
Table meta = MetaTableAccessor.getMetaHTable(TEST_UTIL.getConnection());
Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(overlapRegion,
HConstants.LATEST_TIMESTAMP);
String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", 0);
putOfMerged.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(
putOfMerged.getRow()).
setFamily(HConstants.CATALOG_FAMILY).
setQualifier(Bytes.toBytes(qualifier)).
setTimestamp(putOfMerged.getTimestamp()).
setType(Cell.Type.Put).
setValue(RegionInfo.toByteArray(fakedParentRegion)).
build());
meta.put(putOfMerged);
MetaFixer fixer = new MetaFixer(services);
fixer.fixOverlaps(report);
// Wait until all procedures settled down
await(200, () -> {
return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
});
// No merge is done, overlap is still there.
cj.scan();
report = cj.getLastReport();
assertEquals(2, report.getOverlaps().size());
fixer.fixOverlaps(report);
// Wait until all procedures settled down
await(200, () -> {
return services.getMasterProcedureExecutor().getActiveProcIds().isEmpty();
});
// Merge is done and no more overlaps
cj.scan();
report = cj.getLastReport();
assertEquals(0, report.getOverlaps().size());
}
/**
* Make it so a big overlap spans many Regions, some of which are non-contiguous. Make it so
* we can fix this condition. HBASE-24247
@ -336,7 +409,6 @@ public class TestMetaFixer {
while (!services.getMasterProcedureExecutor().isFinished(pid)) {
Threads.sleep(100);
}
Threads.sleep(10000);
services.getCatalogJanitor().scan();
report = services.getCatalogJanitor().getLastReport();
assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());