From 1dd4b13e175a59756e812b154ab1a9399f885190 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Mon, 4 May 2020 08:21:52 -0700 Subject: [PATCH] HBASE-24246 Miscellaneous hbck2 fixMeta bulk merge fixes: better logging around merges/overlap-fixing, 'HBCK Report' overlap listing, and configuration (#1572) hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java Fix weird brackets around each region name when logging. hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java Log when we hit the max merge limit. Also up limit to 64. hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java Make logs make more sense to operator. hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp Make RegionName show when you mouseover so long names don't mess up display of holes and overlaps. Address Mingliang Liu liuml07 feedback Signed-off-by: Peter Somogyi Signed-off-by: Mingliang Liu --- .../apache/hadoop/hbase/client/RegionInfo.java | 7 +++---- .../org/apache/hadoop/hbase/master/HMaster.java | 4 ++-- .../org/apache/hadoop/hbase/master/MetaFixer.java | 6 +++++- .../assignment/MergeTableRegionsProcedure.java | 15 +++++++++------ .../main/resources/hbase-webapps/master/hbck.jsp | 13 +++++++------ 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionInfo.java index 538d744d676..a6d16528cb2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RegionInfo.java @@ -304,12 +304,11 @@ public interface RegionInfo extends Comparable { } /** - * @return Return a String of short, printable names for hris - * (usually encoded name) for us logging. + * @return Return a String of short, printable names for hris (usually encoded name) + * for us logging. */ static String getShortNameToLog(final List ris) { - return ris.stream().map(ri -> ri.getShortNameToLog()). - collect(Collectors.toList()).toString(); + return ris.stream().map(RegionInfo::getEncodedName).collect(Collectors.toList()).toString(); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 00bf0f7b151..482e9bb5cb4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1942,8 +1942,8 @@ public class HMaster extends HRegionServer implements MasterServices { " failed because merge switch is off"); } - final String mergeRegionsStr = Arrays.stream(regionsToMerge). - map(r -> RegionInfo.getShortNameToLog(r)).collect(Collectors.joining(", ")); + final String mergeRegionsStr = Arrays.stream(regionsToMerge).map(r -> r.getEncodedName()). + collect(Collectors.joining(", ")); return MasterProcedureUtil.submitProcedure(new NonceProcedureRunnable(this, ng, nonce) { @Override protected void run() throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java index ae29c01e573..ed96b0168f7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MetaFixer.java @@ -52,7 +52,7 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti class MetaFixer { private static final Logger LOG = LoggerFactory.getLogger(MetaFixer.class); private static final String MAX_MERGE_COUNT_KEY = "hbase.master.metafixer.max.merge.count"; - private static final int MAX_MERGE_COUNT_DEFAULT = 10; + private static final int MAX_MERGE_COUNT_DEFAULT = 64; private final MasterServices masterServices; /** @@ -247,6 +247,10 @@ class MetaFixer { if (regionInfoWithlargestEndKey != null) { if (!isOverlap(regionInfoWithlargestEndKey, pair) || currentMergeSet.size() >= maxMergeCount) { + // Log when we cut-off-merge because we hit the configured maximum merge limit. + if (currentMergeSet.size() >= maxMergeCount) { + LOG.warn("Ran into maximum-at-a-time merges limit={}", maxMergeCount); + } merges.add(currentMergeSet); currentMergeSet = new TreeSet<>(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index 6a241c6c4b4..13b68c15b7c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -60,7 +60,6 @@ import org.apache.hadoop.hbase.wal.WALSplitUtil; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; @@ -134,8 +133,8 @@ public class MergeTableRegionsProcedure throw new MergeRegionException(msg); } if (!force && !ri.isAdjacent(previous) && !ri.isOverlap(previous)) { - String msg = "Unable to merge non-adjacent or non-overlapping regions " + - previous.getShortNameToLog() + ", " + ri.getShortNameToLog() + " when force=false"; + String msg = "Unable to merge non-adjacent or non-overlapping regions '" + + previous.getShortNameToLog() + "', '" + ri.getShortNameToLog() + "' when force=false"; LOG.warn(msg); throw new MergeRegionException(msg); } @@ -478,16 +477,20 @@ public class MergeTableRegionsProcedure for (RegionInfo ri: this.regionsToMerge) { if (!catalogJanitor.cleanMergeQualifier(ri)) { String msg = "Skip merging " + RegionInfo.getShortNameToLog(regionsToMerge) + - ", because parent " + RegionInfo.getShortNameToLog(ri) + " has a merge qualifier"; + ", because a parent, " + RegionInfo.getShortNameToLog(ri) + ", has a merge qualifier " + + "(if a 'merge column' in parent, it was recently merged but still has outstanding " + + "references to its parents that must be cleared before it can participate in merge -- " + + "major compact it to hurry clearing of its references)"; LOG.warn(msg); throw new MergeRegionException(msg); } RegionState state = regionStates.getRegionState(ri.getEncodedName()); if (state == null) { - throw new UnknownRegionException("No state for " + RegionInfo.getShortNameToLog(ri)); + throw new UnknownRegionException(RegionInfo.getShortNameToLog(ri) + + " UNKNOWN (Has it been garbage collected?)"); } if (!state.isOpened()) { - throw new MergeRegionException("Unable to merge regions that are not online: " + ri); + throw new MergeRegionException("Unable to merge regions that are NOT online: " + ri); } // Ask the remote regionserver if regions are mergeable. If we get an IOE, report it // along with the failure, so we can see why regions are not mergeable at this time. diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp index e9a8658286a..85d44e29f26 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp @@ -166,7 +166,8 @@

The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster. - First make sure hbase:meta is in a healthy state; run hbck2 fixMeta to be sure. Once this is done, per Region below, run a bulk + First make sure hbase:meta is in a healthy state, that there are not holes, overlaps or inconsistencies (else bulk load may complain); + run hbck2 fixMeta. Once this is done, per Region below, run a bulk load -- $ hbase completebulkload REGION_DIR_PATH TABLE_NAME -- and then delete the desiccated directory content (HFiles are removed upon successful load; all that is left are empty directories and occasionally a seqid marking file). @@ -223,8 +224,8 @@ <% for (Pair p : report.getHoles()) { %> - <%= p.getFirst() %> - <%= p.getSecond() %> + <%= p.getFirst().getEncodedName() %> + <%= p.getSecond().getEncodedName() %> <% } %> @@ -244,8 +245,8 @@ <% for (Pair p : report.getOverlaps()) { %> - <%= p.getFirst() %> - <%= p.getSecond() %> + <%= p.getFirst().getEncodedName() %> + <%= p.getSecond().getEncodedName() %> <% } %> @@ -265,7 +266,7 @@ <% for (Pair p: report.getUnknownServers()) { %> - <%= p.getFirst() %> + <%= p.getFirst().getEncodedName() %> <%= p.getSecond() %> <% } %>