From 9799e0b6b28cc12a11511b9cf33792a7a224482e Mon Sep 17 00:00:00 2001 From: stack Date: Mon, 29 Jul 2019 17:10:58 -0700 Subject: [PATCH] HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page Signed-off-by: huzheng Signed-off-by: Guanghao Zhang --- .../hadoop/hbase/master/CatalogJanitor.java | 84 +++++++--- .../resources/hbase-webapps/master/hbck.jsp | 151 ++++++++++++++++-- .../master/TestCatalogJanitorCluster.java | 22 +-- 3 files changed, 209 insertions(+), 48 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java index 68c4bed6d2e..7b291429042 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Comparator; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -30,6 +29,7 @@ import java.util.Properties; import java.util.TreeMap; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; @@ -42,8 +42,11 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.master.assignment.AssignmentManager; @@ -239,7 +242,7 @@ public class CatalogJanitor extends ScheduledChore { * @return Returns last published Report that comes of last successful scan * of hbase:meta. */ - Report getLastReport() { + public Report getLastReport() { return this.lastReport; } @@ -443,29 +446,49 @@ public class CatalogJanitor extends ScheduledChore { } /** - * Report made by {@link ReportMakingVisitor}. + * Report made by ReportMakingVisitor */ - static class Report { + public static class Report { private final long now = EnvironmentEdgeManager.currentTime(); // Keep Map of found split parents. These are candidates for cleanup. // Use a comparator that has split parents come before its daughters. final Map splitParents = new TreeMap<>(new SplitParentFirstComparator()); final Map mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR); - - final List> holes = new ArrayList<>(); - final List> overlaps = new ArrayList<>(); - final Map unknownServers = new HashMap(); - final List emptyRegionInfo = new ArrayList<>(); int count = 0; + private final List> holes = new ArrayList<>(); + private final List> overlaps = new ArrayList<>(); + private final List> unknownServers = new ArrayList<>(); + private final List emptyRegionInfo = new ArrayList<>(); + @VisibleForTesting Report() {} + public long getCreateTime() { + return this.now; + } + + public List> getHoles() { + return this.holes; + } + + public List> getOverlaps() { + return this.overlaps; + } + + public List> getUnknownServers() { + return unknownServers; + } + + public List getEmptyRegionInfo() { + return emptyRegionInfo; + } + /** * @return True if an 'empty' lastReport -- no problems found. */ - boolean isEmpty() { + public boolean isEmpty() { return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() && this.emptyRegionInfo.isEmpty(); } @@ -477,28 +500,28 @@ public class CatalogJanitor extends ScheduledChore { if (sb.length() > 0) { sb.append(", "); } - sb.append("hole=" + Bytes.toString(p.getFirst().metaRow) + "/" + - Bytes.toString(p.getSecond().metaRow)); + sb.append("hole=" + Bytes.toStringBinary(p.getFirst().metaRow) + "/" + + Bytes.toStringBinary(p.getSecond().metaRow)); } for (Pair p: this.overlaps) { if (sb.length() > 0) { sb.append(", "); } - sb.append("overlap=").append(Bytes.toString(p.getFirst().metaRow)).append("/"). - append(Bytes.toString(p.getSecond().metaRow)); + sb.append("overlap=").append(Bytes.toStringBinary(p.getFirst().metaRow)).append("/"). + append(Bytes.toStringBinary(p.getSecond().metaRow)); } for (byte [] r: this.emptyRegionInfo) { if (sb.length() > 0) { sb.append(", "); } - sb.append("empty=").append(Bytes.toString(r)); + sb.append("empty=").append(Bytes.toStringBinary(r)); } - for (Map.Entry e: this.unknownServers.entrySet()) { + for (Pair p: this.unknownServers) { if (sb.length() > 0) { sb.append(", "); } - sb.append("unknown_server=").append(e.getKey()).append("/"). - append(e.getValue().getRegionNameAsString()); + sb.append("unknown_server=").append(p.getSecond()).append("/"). + append(Bytes.toStringBinary(p.getFirst().metaRow)); } return sb.toString(); } @@ -507,7 +530,7 @@ public class CatalogJanitor extends ScheduledChore { /** * Simple datastructure to hold a MetaRow content. */ - static class MetaRow { + public static class MetaRow { /** * A marker for use in case where there is a hole at the very * first row in hbase:meta. Should never happen. @@ -518,17 +541,25 @@ public class CatalogJanitor extends ScheduledChore { /** * Row from hbase:meta table. */ - final byte [] metaRow; + private final byte [] metaRow; /** * The decoded RegionInfo gotten from hbase:meta. */ - final RegionInfo regionInfo; + private final RegionInfo regionInfo; MetaRow(byte [] metaRow, RegionInfo regionInfo) { this.metaRow = metaRow; this.regionInfo = regionInfo; } + + public RegionInfo getRegionInfo() { + return regionInfo; + } + + public byte[] getMetaRow() { + return metaRow; + } } /** @@ -609,6 +640,7 @@ public class CatalogJanitor extends ScheduledChore { this.report.emptyRegionInfo.add(metaTableRow.getRow()); return ri; } + MetaRow mrri = new MetaRow(metaTableRow.getRow(), ri); // If table is disabled, skip integrity check. if (!isTableDisabled(ri)) { @@ -700,20 +732,22 @@ public class CatalogJanitor extends ScheduledChore { public static void main(String [] args) throws IOException { checkLog4jProperties(); ReportMakingVisitor visitor = new ReportMakingVisitor(null); - try (Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create())) { + Configuration configuration = HBaseConfiguration.create(); + configuration.setBoolean("hbase.defaults.for.version.skip", true); + try (Connection connection = ConnectionFactory.createConnection(configuration)) { /* Used to generate an overlap. - Get g = new Get(Bytes.toBytes("t2,40,1563939166317.5a8be963741d27e9649e5c67a34259d9.")); + */ + Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0.")); g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); try (Table t = connection.getTable(TableName.META_TABLE_NAME)) { Result r = t.get(g); byte [] row = g.getRow(); - row[row.length - 3] <<= ((byte)row[row.length -3]); + row[row.length - 2] <<= ((byte)row[row.length - 2]); Put p = new Put(g.getRow()); p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER, r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER)); t.put(p); } - */ MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null); Report report = visitor.getReport(); LOG.info(report != null? report.toString(): "empty"); diff --git a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp index 0245d4771da..a2adeb09c73 100644 --- a/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp +++ b/hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp @@ -18,15 +18,23 @@ */ --%> <%@ page contentType="text/html;charset=UTF-8" + import="java.time.Instant" + import="java.time.ZoneId" import="java.util.Date" import="java.util.List" import="java.util.Map" import="java.util.stream.Collectors" + import="java.time.ZonedDateTime" + import="java.time.format.DateTimeFormatter" %> <%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %> <%@ page import="org.apache.hadoop.hbase.master.HMaster" %> <%@ page import="org.apache.hadoop.hbase.ServerName" %> +<%@ page import="org.apache.hadoop.hbase.util.Bytes" %> <%@ page import="org.apache.hadoop.hbase.util.Pair" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %> +<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.MetaRow" %> <% HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER); pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName()); @@ -43,6 +51,14 @@ startTimestamp = hbckChecker.getCheckingStartTimestamp(); endTimestamp = hbckChecker.getCheckingEndTimestamp(); } + ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTimestamp), + ZoneId.systemDefault()); + String iso8601start = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(endTimestamp), + ZoneId.systemDefault()); + String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + CatalogJanitor cj = master.getCatalogJanitor(); + CatalogJanitor.Report report = cj == null? null: cj.getLastReport(); %> @@ -61,29 +77,32 @@
+
<% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %> +

+ + There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assigns + command; 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix: + need to check the server is still exist. If not, schedule SCP for it. If exist, restart Server2 and Server1): + 3. More than one regionservers reported opened this region (Fix: restart the RegionServers). + Notice: the reported online regionservers may be not right when there are regions in transition. + Please check them in regionserver's web UI. + +

+ @@ -147,7 +166,115 @@
Region
<% } %> +
+ +
+ <% if (report != null && !report.isEmpty()) { + zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(report.getCreateTime()), + ZoneId.systemDefault()); + String iso8601reportTime = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(System.currentTimeMillis()), + ZoneId.systemDefault()); + String iso8601Now = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); + %> +

Report created: <%= iso8601reportTime %> (now=<%= iso8601Now %>). Run catalogjanitor_run in hbase shell to generate a new sub-report.

+ <% if (!report.getHoles().isEmpty()) { %> +
+ +
+ + + + + + + + <% for (Pair p : report.getHoles()) { %> + + + + + + + <% } %> + +

<%= report.getHoles().size() %> hole(s).

+
Row before holeRegionInfoRow after holeRegionInfo
<%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %><%= p.getFirst().getRegionInfo() %><%= Bytes.toStringBinary(p.getSecond().getMetaRow()) %><%= p.getSecond().getRegionInfo() %>
+ <% } %> + <% if (!report.getOverlaps().isEmpty()) { %> +
+ +
+ + + + + + + + <% for (Pair p : report.getOverlaps()) { %> + + + + + + + <% } %> + +

<%= report.getOverlaps().size() %> overlap(s).

+
RowRegionInfoOther RowOther RegionInfo
<%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %><%= p.getFirst().getRegionInfo() %><%= Bytes.toStringBinary(p.getSecond().getMetaRow()) %><%= p.getSecond().getRegionInfo() %>
+ <% } %> + <% if (!report.getUnknownServers().isEmpty()) { %> +
+ +
+ + + + + + + <% for (Pair p: report.getUnknownServers()) { %> + + + + + + <% } %> + +

<%= report.getUnknownServers().size() %> unknown servers(s).

+
RowServerNameRegionInfo
<%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %><%= p.getSecond() %><%= p.getFirst().getRegionInfo() %>
+ <% } %> + <% if (!report.getEmptyRegionInfo().isEmpty()) { %> +
+ +
+ + + + + <% for (byte [] row: report.getEmptyRegionInfo()) { %> + + + + <% } %> + +

<%= report.getEmptyRegionInfo().size() %> emptyRegionInfo(s).

+
Row
<%= Bytes.toStringBinary(row) %>
+ <% } %> + <% } %> + <% } %> - \ No newline at end of file + diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java index d9fb07369e0..090690bfff5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitorCluster.java @@ -92,11 +92,11 @@ public class TestCatalogJanitorCluster { gc = janitor.scan(); report = janitor.getLastReport(); assertFalse(report.isEmpty()); - assertEquals(1, report.holes.size()); - assertTrue(report.holes.get(0).getFirst().regionInfo.getTable().equals(T1)); - assertTrue(report.holes.get(0).getFirst().regionInfo.isLast()); - assertTrue(report.holes.get(0).getSecond().regionInfo.getTable().equals(T2)); - assertEquals(0, report.overlaps.size()); + assertEquals(1, report.getHoles().size()); + assertTrue(report.getHoles().get(0).getFirst().getRegionInfo().getTable().equals(T1)); + assertTrue(report.getHoles().get(0).getFirst().getRegionInfo().isLast()); + assertTrue(report.getHoles().get(0).getSecond().getRegionInfo().getTable().equals(T2)); + assertEquals(0, report.getOverlaps().size()); // Next, add overlaps to first row in t3 List t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3); RegionInfo ri = t3Ris.get(0); @@ -113,12 +113,12 @@ public class TestCatalogJanitorCluster { report = janitor.getLastReport(); assertFalse(report.isEmpty()); // We added two overlaps so total three. - assertEquals(3, report.overlaps.size()); + assertEquals(3, report.getOverlaps().size()); // Assert hole is still there. - assertEquals(1, report.holes.size()); + assertEquals(1, report.getHoles().size()); // Assert other attributes are empty still. - assertTrue(report.emptyRegionInfo.isEmpty()); - assertTrue(report.unknownServers.isEmpty()); + assertTrue(report.getEmptyRegionInfo().isEmpty()); + assertTrue(report.getUnknownServers().isEmpty()); // Now make bad server in t1. List t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1); RegionInfo t1Ri1 = t1Ris.get(1); @@ -129,7 +129,7 @@ public class TestCatalogJanitorCluster { gc = janitor.scan(); report = janitor.getLastReport(); assertFalse(report.isEmpty()); - assertEquals(1, report.unknownServers.size()); + assertEquals(1, report.getUnknownServers().size()); // Finally, make an empty regioninfo in t1. RegionInfo t1Ri2 = t1Ris.get(2); Put pEmptyRI = new Put(t1Ri2.getRegionName()); @@ -138,7 +138,7 @@ public class TestCatalogJanitorCluster { MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI)); gc = janitor.scan(); report = janitor.getLastReport(); - assertEquals(1, report.emptyRegionInfo.size()); + assertEquals(1, report.getEmptyRegionInfo().size()); } /**