HBASE-24246 Miscellaneous hbck2 fixMeta bulk merge fixes: better logging around merges/overlap-fixing, 'HBCK Report' overlap listing, and configuration (#1572)

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
 Fix weird brackets around each region name when logging.

hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java
  Log when we hit the max merge limit. Also up limit to 64.

hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
 Make logs make more sense to operator.

hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
 Make RegionName show when you mouseover so long names don't mess up
 display of holes and overlaps.

Address Mingliang Liu liuml07 feedback

Signed-off-by: Peter Somogyi <psomogyi@apache.org>
Signed-off-by: Mingliang Liu <liuml07@apache.org>
This commit is contained in:
Michael Stack 2020-05-04 08:21:52 -07:00 committed by GitHub
parent c6be5b0916
commit 1dd4b13e17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 26 additions and 19 deletions

View File

@ -304,12 +304,11 @@ public interface RegionInfo extends Comparable<RegionInfo> {
} }
/** /**
* @return Return a String of short, printable names for <code>hris</code> * @return Return a String of short, printable names for <code>hris</code> (usually encoded name)
* (usually encoded name) for us logging. * for us logging.
*/ */
static String getShortNameToLog(final List<RegionInfo> ris) { static String getShortNameToLog(final List<RegionInfo> ris) {
return ris.stream().map(ri -> ri.getShortNameToLog()). return ris.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()).toString();
collect(Collectors.toList()).toString();
} }
/** /**

View File

@ -1942,8 +1942,8 @@ public class HMaster extends HRegionServer implements MasterServices {
" failed because merge switch is off"); " failed because merge switch is off");
} }
final String mergeRegionsStr = Arrays.stream(regionsToMerge). final String mergeRegionsStr = Arrays.stream(regionsToMerge).map(r -> r.getEncodedName()).
map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", ")); collect(Collectors.joining(", "));
return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) { return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) {
@Override @Override
protected void run() throws IOException { protected void run() throws IOException {

View File

@ -52,7 +52,7 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti
class MetaFixer { class MetaFixer {
private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class); private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count"; private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
private static final int MAX_MERGE_COUNT_DEFAULT = 10; private static final int MAX_MERGE_COUNT_DEFAULT = 64;
private final MasterServices masterServices; private final MasterServices masterServices;
/** /**
@ -247,6 +247,10 @@ class MetaFixer {
if (regionInfoWithlargestEndKey != null) { if (regionInfoWithlargestEndKey != null) {
if (!isOverlap(regionInfoWithlargestEndKey, pair) || if (!isOverlap(regionInfoWithlargestEndKey, pair) ||
currentMergeSet.size() >= maxMergeCount) { currentMergeSet.size() >= maxMergeCount) {
// Log when we cut-off-merge because we hit the configured maximum merge limit.
if (currentMergeSet.size() >= maxMergeCount) {
LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount);
}
merges.add(currentMergeSet); merges.add(currentMergeSet);
currentMergeSet = new TreeSet<>(); currentMergeSet = new TreeSet<>();
} }

View File

@ -60,7 +60,6 @@ import org.apache.hadoop.hbase.wal.WALSplitUtil;
import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
@ -134,8 +133,8 @@ public class MergeTableRegionsProcedure
throw new MergeRegionException(msg); throw new MergeRegionException(msg);
} }
if (!force && !ri.isAdjacent(previous) && !ri.isOverlap(previous)) { if (!force && !ri.isAdjacent(previous) && !ri.isOverlap(previous)) {
String msg = "Unable to merge non-adjacent or non-overlapping regions " + String msg = "Unable to merge non-adjacent or non-overlapping regions '" +
previous.getShortNameToLog() + ", " + ri.getShortNameToLog() + " when force=false"; previous.getShortNameToLog() + "', '" + ri.getShortNameToLog() + "' when force=false";
LOG.warn(msg); LOG.warn(msg);
throw new MergeRegionException(msg); throw new MergeRegionException(msg);
} }
@ -478,16 +477,20 @@ public class MergeTableRegionsProcedure
for (RegionInfo ri: this.regionsToMerge) { for (RegionInfo ri: this.regionsToMerge) {
if (!catalogJanitor.cleanMergeQualifier(ri)) { if (!catalogJanitor.cleanMergeQualifier(ri)) {
String msg = "Skip merging " + RegionInfo.getShortNameToLog(regionsToMerge) + String msg = "Skip merging " + RegionInfo.getShortNameToLog(regionsToMerge) +
", because parent " + RegionInfo.getShortNameToLog(ri) + " has a merge qualifier"; ", because a parent, " + RegionInfo.getShortNameToLog(ri) + ", has a merge qualifier " +
"(if a 'merge column' in parent, it was recently merged but still has outstanding " +
"references to its parents that must be cleared before it can participate in merge -- " +
"major compact it to hurry clearing of its references)";
LOG.warn(msg); LOG.warn(msg);
throw new MergeRegionException(msg); throw new MergeRegionException(msg);
} }
RegionState state = regionStates.getRegionState(ri.getEncodedName()); RegionState state = regionStates.getRegionState(ri.getEncodedName());
if (state == null) { if (state == null) {
throw new UnknownRegionException("No state for " + RegionInfo.getShortNameToLog(ri)); throw new UnknownRegionException(RegionInfo.getShortNameToLog(ri) +
" UNKNOWN (Has it been garbage collected?)");
} }
if (!state.isOpened()) { if (!state.isOpened()) {
throw new MergeRegionException("Unable to merge regions that are not online: " + ri); throw new MergeRegionException("Unable to merge regions that are NOT online: " + ri);
} }
// Ask the remote regionserver if regions are mergeable. If we get an IOE, report it // Ask the remote regionserver if regions are mergeable. If we get an IOE, report it
// along with the failure, so we can see why regions are not mergeable at this time. // along with the failure, so we can see why regions are not mergeable at this time.

View File

@ -166,7 +166,8 @@
<p> <p>
<span> <span>
The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster. The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster.
First make sure <em>hbase:meta</em> is in a healthy state; run <em>hbck2 fixMeta</em> to be sure. Once this is done, per Region below, run a bulk First make sure <em>hbase:meta</em> is in a healthy state, that there are not holes, overlaps or inconsistencies (else bulk load may complain);
run <em>hbck2 fixMeta</em>. Once this is done, per Region below, run a bulk
load -- <em>$ hbase completebulkload REGION_DIR_PATH TABLE_NAME</em> -- and then delete the desiccated directory content (HFiles are removed upon load -- <em>$ hbase completebulkload REGION_DIR_PATH TABLE_NAME</em> -- and then delete the desiccated directory content (HFiles are removed upon
successful load; all that is left are empty directories and occasionally a seqid marking file). successful load; all that is left are empty directories and occasionally a seqid marking file).
</span> </span>
@ -223,8 +224,8 @@
</tr> </tr>
<% for (Pair<RegionInfo, RegionInfo> p : report.getHoles()) { %> <% for (Pair<RegionInfo, RegionInfo> p : report.getHoles()) { %>
<tr> <tr>
<td><%= p.getFirst() %></td> <td><span title="<%= p.getFirst() %>"><%= p.getFirst().getEncodedName() %></span></td>
<td><%= p.getSecond() %></td> <td><span title="<%= p.getSecond() %>"><%= p.getSecond().getEncodedName() %></span></td>
</tr> </tr>
<% } %> <% } %>
@ -244,8 +245,8 @@
</tr> </tr>
<% for (Pair<RegionInfo, RegionInfo> p : report.getOverlaps()) { %> <% for (Pair<RegionInfo, RegionInfo> p : report.getOverlaps()) { %>
<tr> <tr>
<td><%= p.getFirst() %></td> <td><span title="<%= p.getFirst() %>"><%= p.getFirst().getEncodedName() %></span></td>
<td><%= p.getSecond() %></td> <td><span title="<%= p.getSecond() %>"><%= p.getSecond().getEncodedName() %></span></td>
</tr> </tr>
<% } %> <% } %>
@ -265,7 +266,7 @@
</tr> </tr>
<% for (Pair<RegionInfo, ServerName> p: report.getUnknownServers()) { %> <% for (Pair<RegionInfo, ServerName> p: report.getUnknownServers()) { %>
<tr> <tr>
<td><%= p.getFirst() %></td> <td><span title="<%= p.getFirst() %>"><%= p.getFirst().getEncodedName() %></span></td>
<td><%= p.getSecond() %></td> <td><%= p.getSecond() %></td>
</tr> </tr>
<% } %> <% } %>