HBASE-23383 [hbck2] `fixHoles` should queue assignment procedures for any regions its fixing (#917)
The current process for an operator, after fixing holes in meta, is to manually disable and enable the whole table. Let's try to avoid bringing the whole table offline if we can. Have the master attempt to queue up assignment procedures for any new regions it creates. Signed-off-by: stack <stack@apache.org>
This commit is contained in:
parent
1d2c3efc69
commit
499ff32f00
|
@ -21,26 +21,23 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.MetaTableAccessor;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||
import org.apache.hadoop.hbase.exceptions.MergeRegionException;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||
|
||||
|
||||
|
@ -56,6 +53,7 @@ class MetaFixer {
|
|||
private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
|
||||
private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
|
||||
private static final int MAX_MERGE_COUNT_DEFAULT = 10;
|
||||
|
||||
private final MasterServices masterServices;
|
||||
/**
|
||||
* Maximum for many regions to merge at a time.
|
||||
|
@ -86,74 +84,133 @@ class MetaFixer {
|
|||
* If hole, it papers it over by adding a region in the filesystem and to hbase:meta.
|
||||
* Does not assign.
|
||||
*/
|
||||
void fixHoles(CatalogJanitor.Report report) throws IOException {
|
||||
List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles();
|
||||
void fixHoles(CatalogJanitor.Report report) {
|
||||
final List<Pair<RegionInfo, RegionInfo>> holes = report.getHoles();
|
||||
if (holes.isEmpty()) {
|
||||
LOG.debug("No holes.");
|
||||
LOG.info("CatalogJanitor Report contains no holes to fix. Skipping.");
|
||||
return;
|
||||
}
|
||||
for (Pair<RegionInfo, RegionInfo> p: holes) {
|
||||
RegionInfo ri = getHoleCover(p);
|
||||
if (ri == null) {
|
||||
continue;
|
||||
}
|
||||
Configuration configuration = this.masterServices.getConfiguration();
|
||||
HRegion.createRegionDir(configuration, ri, FSUtils.getRootDir(configuration));
|
||||
// If an error here, then we'll have a region in the filesystem but not
|
||||
// in hbase:meta (if the below fails). Should be able to rerun the fix.
|
||||
// Add to hbase:meta and then update in-memory state so it knows of new
|
||||
// Region; addRegionToMeta adds region and adds a state column set to CLOSED.
|
||||
MetaTableAccessor.addRegionToMeta(this.masterServices.getConnection(), ri);
|
||||
this.masterServices.getAssignmentManager().getRegionStates().
|
||||
updateRegionState(ri, RegionState.State.CLOSED);
|
||||
LOG.info("Fixed hole by adding {} in CLOSED state; region NOT assigned (assign to ONLINE).",
|
||||
ri);
|
||||
}
|
||||
|
||||
LOG.info("Identified {} region holes to fix. Detailed fixup progress logged at DEBUG.",
|
||||
holes.size());
|
||||
|
||||
final List<RegionInfo> newRegionInfos = createRegionInfosForHoles(holes);
|
||||
final List<RegionInfo> newMetaEntries = createMetaEntries(masterServices, newRegionInfos);
|
||||
final TransitRegionStateProcedure[] assignProcedures = masterServices
|
||||
.getAssignmentManager()
|
||||
.createRoundRobinAssignProcedures(newMetaEntries);
|
||||
|
||||
masterServices.getMasterProcedureExecutor().submitProcedures(assignProcedures);
|
||||
LOG.info(
|
||||
"Scheduled {}/{} new regions for assignment.", assignProcedures.length, holes.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Calculated RegionInfo that covers the hole <code>hole</code>
|
||||
* Create a new {@link RegionInfo} corresponding to each provided "hole" pair.
|
||||
*/
|
||||
private RegionInfo getHoleCover(Pair<RegionInfo, RegionInfo> hole) {
|
||||
RegionInfo holeCover = null;
|
||||
RegionInfo left = hole.getFirst();
|
||||
RegionInfo right = hole.getSecond();
|
||||
private static List<RegionInfo> createRegionInfosForHoles(
|
||||
final List<Pair<RegionInfo, RegionInfo>> holes) {
|
||||
final List<RegionInfo> newRegionInfos = holes.stream()
|
||||
.map(MetaFixer::getHoleCover)
|
||||
.filter(Optional::isPresent)
|
||||
.map(Optional::get)
|
||||
.collect(Collectors.toList());
|
||||
LOG.debug("Constructed {}/{} RegionInfo descriptors corresponding to identified holes.",
|
||||
newRegionInfos.size(), holes.size());
|
||||
return newRegionInfos;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Attempts to calculate a new {@link RegionInfo} that covers the region range described
|
||||
* in {@code hole}.
|
||||
*/
|
||||
private static Optional<RegionInfo> getHoleCover(Pair<RegionInfo, RegionInfo> hole) {
|
||||
final RegionInfo left = hole.getFirst();
|
||||
final RegionInfo right = hole.getSecond();
|
||||
|
||||
if (left.getTable().equals(right.getTable())) {
|
||||
// Simple case.
|
||||
if (Bytes.compareTo(left.getEndKey(), right.getStartKey()) >= 0) {
|
||||
LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey; " +
|
||||
"left=<{}>, right=<{}>", left, right);
|
||||
return holeCover;
|
||||
}
|
||||
holeCover = buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey());
|
||||
} else {
|
||||
boolean leftUndefined = left.equals(RegionInfo.UNDEFINED);
|
||||
boolean rightUnefined = right.equals(RegionInfo.UNDEFINED);
|
||||
boolean last = left.isLast();
|
||||
boolean first = right.isFirst();
|
||||
if (leftUndefined && rightUnefined) {
|
||||
LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " +
|
||||
"UNDEFINED; left=<{}>, right=<{}>", left, right);
|
||||
return holeCover;
|
||||
}
|
||||
if (leftUndefined || last) {
|
||||
holeCover = buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW,
|
||||
right.getStartKey());
|
||||
} else if (rightUnefined || first) {
|
||||
holeCover = buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW);
|
||||
} else {
|
||||
LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>",
|
||||
left, right);
|
||||
return holeCover;
|
||||
LOG.warn("Skipping hole fix; left-side endKey is not less than right-side startKey;"
|
||||
+ " left=<{}>, right=<{}>", left, right);
|
||||
return Optional.empty();
|
||||
}
|
||||
return Optional.of(buildRegionInfo(left.getTable(), left.getEndKey(), right.getStartKey()));
|
||||
}
|
||||
return holeCover;
|
||||
|
||||
final boolean leftUndefined = left.equals(RegionInfo.UNDEFINED);
|
||||
final boolean rightUndefined = right.equals(RegionInfo.UNDEFINED);
|
||||
final boolean last = left.isLast();
|
||||
final boolean first = right.isFirst();
|
||||
if (leftUndefined && rightUndefined) {
|
||||
LOG.warn("Skipping hole fix; both the hole left-side and right-side RegionInfos are " +
|
||||
"UNDEFINED; left=<{}>, right=<{}>", left, right);
|
||||
return Optional.empty();
|
||||
}
|
||||
if (leftUndefined || last) {
|
||||
return Optional.of(
|
||||
buildRegionInfo(right.getTable(), HConstants.EMPTY_START_ROW, right.getStartKey()));
|
||||
}
|
||||
if (rightUndefined || first) {
|
||||
return Optional.of(
|
||||
buildRegionInfo(left.getTable(), left.getEndKey(), HConstants.EMPTY_END_ROW));
|
||||
}
|
||||
LOG.warn("Skipping hole fix; don't know what to do with left=<{}>, right=<{}>", left, right);
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
private RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) {
|
||||
private static RegionInfo buildRegionInfo(TableName tn, byte [] start, byte [] end) {
|
||||
return RegionInfoBuilder.newBuilder(tn).setStartKey(start).setEndKey(end).build();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create entries in the {@code hbase:meta} for each provided {@link RegionInfo}. Best effort.
|
||||
* @param masterServices used to connect to {@code hbase:meta}
|
||||
* @param newRegionInfos the new {@link RegionInfo} entries to add to the filesystem
|
||||
* @return a list of {@link RegionInfo} entries for which {@code hbase:meta} entries were
|
||||
* successfully created
|
||||
*/
|
||||
private static List<RegionInfo> createMetaEntries(final MasterServices masterServices,
|
||||
final List<RegionInfo> newRegionInfos) {
|
||||
|
||||
final List<Either<RegionInfo, IOException>> addMetaEntriesResults = newRegionInfos.stream()
|
||||
.map(regionInfo -> {
|
||||
try {
|
||||
MetaTableAccessor.addRegionToMeta(masterServices.getConnection(), regionInfo);
|
||||
masterServices.getAssignmentManager()
|
||||
.getRegionStates()
|
||||
.updateRegionState(regionInfo, RegionState.State.CLOSED);
|
||||
return Either.<RegionInfo, IOException>ofLeft(regionInfo);
|
||||
} catch (IOException e) {
|
||||
return Either.<RegionInfo, IOException>ofRight(e);
|
||||
}
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
final List<RegionInfo> createMetaEntriesSuccesses = addMetaEntriesResults.stream()
|
||||
.filter(Either::hasLeft)
|
||||
.map(Either::getLeft)
|
||||
.collect(Collectors.toList());
|
||||
final List<IOException> createMetaEntriesFailures = addMetaEntriesResults.stream()
|
||||
.filter(Either::hasRight)
|
||||
.map(Either::getRight)
|
||||
.collect(Collectors.toList());
|
||||
LOG.debug("Added {}/{} entries to hbase:meta",
|
||||
createMetaEntriesSuccesses.size(), newRegionInfos.size());
|
||||
|
||||
if (!createMetaEntriesFailures.isEmpty()) {
|
||||
LOG.warn("Failed to create entries in hbase:meta for {}/{} RegionInfo descriptors. First"
|
||||
+ " failure message included; full list of failures with accompanying stack traces is"
|
||||
+ " available at log level DEBUG. message={}", createMetaEntriesFailures.size(),
|
||||
addMetaEntriesResults.size(), createMetaEntriesFailures.get(0).getMessage());
|
||||
if (LOG.isDebugEnabled()) {
|
||||
createMetaEntriesFailures.forEach(
|
||||
ioe -> LOG.debug("Attempt to fix region hole in hbase:meta failed.", ioe));
|
||||
}
|
||||
}
|
||||
|
||||
return createMetaEntriesSuccesses;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix overlaps noted in CJ consistency report.
|
||||
*/
|
||||
|
@ -244,4 +301,47 @@ class MetaFixer {
|
|||
}
|
||||
return ri.isOverlap(pair.getFirst()) || ri.isOverlap(pair.getSecond());
|
||||
}
|
||||
|
||||
/**
|
||||
* A union over {@link L} and {@link R}.
|
||||
*/
|
||||
private static class Either<L, R> {
|
||||
private final L left;
|
||||
private final R right;
|
||||
|
||||
public static <L, R> Either<L, R> ofLeft(L left) {
|
||||
return new Either<>(left, null);
|
||||
}
|
||||
|
||||
public static <L, R> Either<L, R> ofRight(R right) {
|
||||
return new Either<>(null, right);
|
||||
}
|
||||
|
||||
Either(L left, R right) {
|
||||
this.left = left;
|
||||
this.right = right;
|
||||
}
|
||||
|
||||
public boolean hasLeft() {
|
||||
return left != null;
|
||||
}
|
||||
|
||||
public L getLeft() {
|
||||
if (!hasLeft()) {
|
||||
throw new IllegalStateException("Either contains no left.");
|
||||
}
|
||||
return left;
|
||||
}
|
||||
|
||||
public boolean hasRight() {
|
||||
return right != null;
|
||||
}
|
||||
|
||||
public R getRight() {
|
||||
if (!hasRight()) {
|
||||
throw new IllegalStateException("Either contains no right.");
|
||||
}
|
||||
return right;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,13 +17,13 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import static org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils.isNotEmpty;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import java.util.function.BooleanSupplier;
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
|
@ -34,7 +34,6 @@ import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
|||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.ClassRule;
|
||||
|
@ -70,7 +69,7 @@ public class TestMetaFixer {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testPlugsHoles() throws IOException {
|
||||
public void testPlugsHoles() throws Exception {
|
||||
TableName tn = TableName.valueOf(this.name.getMethodName());
|
||||
TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
|
||||
List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
|
||||
|
@ -96,9 +95,11 @@ public class TestMetaFixer {
|
|||
assertTrue(report.toString(), report.isEmpty());
|
||||
assertEquals(initialSize,
|
||||
services.getAssignmentManager().getRegionStates().getRegionStates().size());
|
||||
// Disable and reenable so the added regions get reassigned.
|
||||
TEST_UTIL.getAdmin().disableTable(tn);
|
||||
TEST_UTIL.getAdmin().enableTable(tn);
|
||||
|
||||
// wait for RITs to settle -- those are the fixed regions being assigned -- or until the
|
||||
// watchdog TestRule terminates the test.
|
||||
await(50, () -> isNotEmpty(services.getAssignmentManager().getRegionsInTransition()));
|
||||
|
||||
ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
|
||||
assertEquals(originalCount, ris.size());
|
||||
}
|
||||
|
@ -143,7 +144,7 @@ public class TestMetaFixer {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testOverlap() throws IOException {
|
||||
public void testOverlap() throws Exception {
|
||||
TableName tn = TableName.valueOf(this.name.getMethodName());
|
||||
TEST_UTIL.createMultiRegionTable(tn, HConstants.CATALOG_FAMILY);
|
||||
List<RegionInfo> ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), tn);
|
||||
|
@ -163,14 +164,32 @@ public class TestMetaFixer {
|
|||
assertEquals(1, MetaFixer.calculateMerges(10, report.getOverlaps()).size());
|
||||
MetaFixer fixer = new MetaFixer(services);
|
||||
fixer.fixOverlaps(report);
|
||||
while (true) {
|
||||
services.getCatalogJanitor().scan();
|
||||
report = services.getCatalogJanitor().getLastReport();
|
||||
if (report.isEmpty()) {
|
||||
break;
|
||||
await(10, () -> {
|
||||
try {
|
||||
services.getCatalogJanitor().scan();
|
||||
final CatalogJanitor.Report postReport = services.getCatalogJanitor().getLastReport();
|
||||
return postReport.isEmpty();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
Threads.sleep(10);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Await the successful return of {@code condition}, sleeping {@code sleepMillis} between
|
||||
* invocations.
|
||||
*/
|
||||
private static void await(final long sleepMillis, final BooleanSupplier condition)
|
||||
throws InterruptedException {
|
||||
try {
|
||||
while (!condition.getAsBoolean()) {
|
||||
Thread.sleep(sleepMillis);
|
||||
}
|
||||
} catch (RuntimeException e) {
|
||||
if (e.getCause() instanceof AssertionError) {
|
||||
throw (AssertionError) e.getCause();
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
assertTrue(report.toString(), report.isEmpty());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue