HBASE-24246 Miscellaneous hbck2 fixMeta bulk merge fixes: better logging around merges/overlap-fixing, 'HBCK Report' overlap listing, and configuration (#1572)
hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Fix weird brackets around each region name when logging. hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java Log when we hit the max merge limit. Also up limit to 64. hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java Make logs make more sense to operator. hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp Make RegionName show when you mouseover so long names don't mess up display of holes and overlaps. Address Mingliang Liu liuml07 feedback Signed-off-by: Peter Somogyi <psomogyi@apache.org> Signed-off-by: Mingliang Liu <liuml07@apache.org>
This commit is contained in:
parent
c6be5b0916
commit
1dd4b13e17
|
@ -304,12 +304,11 @@ public interface RegionInfo extends Comparable<RegionInfo> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return Return a String of short, printable names for <code>hris</code>
|
* @return Return a String of short, printable names for <code>hris</code> (usually encoded name)
|
||||||
* (usually encoded name) for us logging.
|
* for us logging.
|
||||||
*/
|
*/
|
||||||
static String getShortNameToLog(final List<RegionInfo> ris) {
|
static String getShortNameToLog(final List<RegionInfo> ris) {
|
||||||
return ris.stream().map(ri -> ri.getShortNameToLog()).
|
return ris.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()).toString();
|
||||||
collect(Collectors.toList()).toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1942,8 +1942,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
" failed because merge switch is off");
|
" failed because merge switch is off");
|
||||||
}
|
}
|
||||||
|
|
||||||
final String mergeRegionsStr = Arrays.stream(regionsToMerge).
|
final String mergeRegionsStr = Arrays.stream(regionsToMerge).map(r -> r.getEncodedName()).
|
||||||
map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", "));
|
collect(Collectors.joining(", "));
|
||||||
return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) {
|
return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) {
|
||||||
@Override
|
@Override
|
||||||
protected void run() throws IOException {
|
protected void run() throws IOException {
|
||||||
|
|
|
@ -52,7 +52,7 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti
|
||||||
class MetaFixer {
|
class MetaFixer {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
|
private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class);
|
||||||
private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
|
private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count";
|
||||||
private static final int MAX_MERGE_COUNT_DEFAULT = 10;
|
private static final int MAX_MERGE_COUNT_DEFAULT = 64;
|
||||||
|
|
||||||
private final MasterServices masterServices;
|
private final MasterServices masterServices;
|
||||||
/**
|
/**
|
||||||
|
@ -247,6 +247,10 @@ class MetaFixer {
|
||||||
if (regionInfoWithlargestEndKey != null) {
|
if (regionInfoWithlargestEndKey != null) {
|
||||||
if (!isOverlap(regionInfoWithlargestEndKey, pair) ||
|
if (!isOverlap(regionInfoWithlargestEndKey, pair) ||
|
||||||
currentMergeSet.size() >= maxMergeCount) {
|
currentMergeSet.size() >= maxMergeCount) {
|
||||||
|
// Log when we cut-off-merge because we hit the configured maximum merge limit.
|
||||||
|
if (currentMergeSet.size() >= maxMergeCount) {
|
||||||
|
LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount);
|
||||||
|
}
|
||||||
merges.add(currentMergeSet);
|
merges.add(currentMergeSet);
|
||||||
currentMergeSet = new TreeSet<>();
|
currentMergeSet = new TreeSet<>();
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,7 +60,6 @@ import org.apache.hadoop.hbase.wal.WALSplitUtil;
|
||||||
import org.apache.yetus.audience.InterfaceAudience;
|
import org.apache.yetus.audience.InterfaceAudience;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
|
||||||
|
@ -134,8 +133,8 @@ public class MergeTableRegionsProcedure
|
||||||
throw new MergeRegionException(msg);
|
throw new MergeRegionException(msg);
|
||||||
}
|
}
|
||||||
if (!force && !ri.isAdjacent(previous) && !ri.isOverlap(previous)) {
|
if (!force && !ri.isAdjacent(previous) && !ri.isOverlap(previous)) {
|
||||||
String msg = "Unable to merge non-adjacent or non-overlapping regions " +
|
String msg = "Unable to merge non-adjacent or non-overlapping regions '" +
|
||||||
previous.getShortNameToLog() + ", " + ri.getShortNameToLog() + " when force=false";
|
previous.getShortNameToLog() + "', '" + ri.getShortNameToLog() + "' when force=false";
|
||||||
LOG.warn(msg);
|
LOG.warn(msg);
|
||||||
throw new MergeRegionException(msg);
|
throw new MergeRegionException(msg);
|
||||||
}
|
}
|
||||||
|
@ -478,16 +477,20 @@ public class MergeTableRegionsProcedure
|
||||||
for (RegionInfo ri: this.regionsToMerge) {
|
for (RegionInfo ri: this.regionsToMerge) {
|
||||||
if (!catalogJanitor.cleanMergeQualifier(ri)) {
|
if (!catalogJanitor.cleanMergeQualifier(ri)) {
|
||||||
String msg = "Skip merging " + RegionInfo.getShortNameToLog(regionsToMerge) +
|
String msg = "Skip merging " + RegionInfo.getShortNameToLog(regionsToMerge) +
|
||||||
", because parent " + RegionInfo.getShortNameToLog(ri) + " has a merge qualifier";
|
", because a parent, " + RegionInfo.getShortNameToLog(ri) + ", has a merge qualifier " +
|
||||||
|
"(if a 'merge column' in parent, it was recently merged but still has outstanding " +
|
||||||
|
"references to its parents that must be cleared before it can participate in merge -- " +
|
||||||
|
"major compact it to hurry clearing of its references)";
|
||||||
LOG.warn(msg);
|
LOG.warn(msg);
|
||||||
throw new MergeRegionException(msg);
|
throw new MergeRegionException(msg);
|
||||||
}
|
}
|
||||||
RegionState state = regionStates.getRegionState(ri.getEncodedName());
|
RegionState state = regionStates.getRegionState(ri.getEncodedName());
|
||||||
if (state == null) {
|
if (state == null) {
|
||||||
throw new UnknownRegionException("No state for " + RegionInfo.getShortNameToLog(ri));
|
throw new UnknownRegionException(RegionInfo.getShortNameToLog(ri) +
|
||||||
|
" UNKNOWN (Has it been garbage collected?)");
|
||||||
}
|
}
|
||||||
if (!state.isOpened()) {
|
if (!state.isOpened()) {
|
||||||
throw new MergeRegionException("Unable to merge regions that are not online: " + ri);
|
throw new MergeRegionException("Unable to merge regions that are NOT online: " + ri);
|
||||||
}
|
}
|
||||||
// Ask the remote regionserver if regions are mergeable. If we get an IOE, report it
|
// Ask the remote regionserver if regions are mergeable. If we get an IOE, report it
|
||||||
// along with the failure, so we can see why regions are not mergeable at this time.
|
// along with the failure, so we can see why regions are not mergeable at this time.
|
||||||
|
|
|
@ -166,7 +166,8 @@
|
||||||
<p>
|
<p>
|
||||||
<span>
|
<span>
|
||||||
The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster.
|
The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster.
|
||||||
First make sure <em>hbase:meta</em> is in a healthy state; run <em>hbck2 fixMeta</em> to be sure. Once this is done, per Region below, run a bulk
|
First make sure <em>hbase:meta</em> is in a healthy state, that there are not holes, overlaps or inconsistencies (else bulk load may complain);
|
||||||
|
run <em>hbck2 fixMeta</em>. Once this is done, per Region below, run a bulk
|
||||||
load -- <em>$ hbase completebulkload REGION_DIR_PATH TABLE_NAME</em> -- and then delete the desiccated directory content (HFiles are removed upon
|
load -- <em>$ hbase completebulkload REGION_DIR_PATH TABLE_NAME</em> -- and then delete the desiccated directory content (HFiles are removed upon
|
||||||
successful load; all that is left are empty directories and occasionally a seqid marking file).
|
successful load; all that is left are empty directories and occasionally a seqid marking file).
|
||||||
</span>
|
</span>
|
||||||
|
@ -223,8 +224,8 @@
|
||||||
</tr>
|
</tr>
|
||||||
<% for (Pair<RegionInfo, RegionInfo> p : report.getHoles()) { %>
|
<% for (Pair<RegionInfo, RegionInfo> p : report.getHoles()) { %>
|
||||||
<tr>
|
<tr>
|
||||||
<td><%= p.getFirst() %></td>
|
<td><span title="<%= p.getFirst() %>"><%= p.getFirst().getEncodedName() %></span></td>
|
||||||
<td><%= p.getSecond() %></td>
|
<td><span title="<%= p.getSecond() %>"><%= p.getSecond().getEncodedName() %></span></td>
|
||||||
</tr>
|
</tr>
|
||||||
<% } %>
|
<% } %>
|
||||||
|
|
||||||
|
@ -244,8 +245,8 @@
|
||||||
</tr>
|
</tr>
|
||||||
<% for (Pair<RegionInfo, RegionInfo> p : report.getOverlaps()) { %>
|
<% for (Pair<RegionInfo, RegionInfo> p : report.getOverlaps()) { %>
|
||||||
<tr>
|
<tr>
|
||||||
<td><%= p.getFirst() %></td>
|
<td><span title="<%= p.getFirst() %>"><%= p.getFirst().getEncodedName() %></span></td>
|
||||||
<td><%= p.getSecond() %></td>
|
<td><span title="<%= p.getSecond() %>"><%= p.getSecond().getEncodedName() %></span></td>
|
||||||
</tr>
|
</tr>
|
||||||
<% } %>
|
<% } %>
|
||||||
|
|
||||||
|
@ -265,7 +266,7 @@
|
||||||
</tr>
|
</tr>
|
||||||
<% for (Pair<RegionInfo, ServerName> p: report.getUnknownServers()) { %>
|
<% for (Pair<RegionInfo, ServerName> p: report.getUnknownServers()) { %>
|
||||||
<tr>
|
<tr>
|
||||||
<td><%= p.getFirst() %></td>
|
<td><span title="<%= p.getFirst() %>"><%= p.getFirst().getEncodedName() %></span></td>
|
||||||
<td><%= p.getSecond() %></td>
|
<td><%= p.getSecond() %></td>
|
||||||
</tr>
|
</tr>
|
||||||
<% } %>
|
<% } %>
|
||||||
|
|
Loading…
Reference in New Issue