HBASE-22741 Show catalogjanitor consistency complaints in new 'HBCK Report' page
Signed-off-by: huzheng <openinx@gmail.com> Signed-off-by: Guanghao Zhang <zghao@apache.org>
This commit is contained in:
parent
4587b39e63
commit
9799e0b6b2
|
@ -22,7 +22,6 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -30,6 +29,7 @@ import java.util.Properties;
|
|||
import java.util.TreeMap;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||
|
@ -42,8 +42,11 @@ import org.apache.hadoop.hbase.TableName;
|
|||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
import org.apache.hadoop.hbase.client.TableDescriptor;
|
||||
import org.apache.hadoop.hbase.client.TableState;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||
|
@ -239,7 +242,7 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
* @return Returns last published Report that comes of last successful scan
|
||||
* of hbase:meta.
|
||||
*/
|
||||
Report getLastReport() {
|
||||
public Report getLastReport() {
|
||||
return this.lastReport;
|
||||
}
|
||||
|
||||
|
@ -443,29 +446,49 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
}
|
||||
|
||||
/**
|
||||
* Report made by {@link ReportMakingVisitor}.
|
||||
* Report made by ReportMakingVisitor
|
||||
*/
|
||||
static class Report {
|
||||
public static class Report {
|
||||
private final long now = EnvironmentEdgeManager.currentTime();
|
||||
|
||||
// Keep Map of found split parents. These are candidates for cleanup.
|
||||
// Use a comparator that has split parents come before its daughters.
|
||||
final Map<RegionInfo, Result> splitParents = new TreeMap<>(new SplitParentFirstComparator());
|
||||
final Map<RegionInfo, Result> mergedRegions = new TreeMap<>(RegionInfo.COMPARATOR);
|
||||
|
||||
final List<Pair<MetaRow, MetaRow>> holes = new ArrayList<>();
|
||||
final List<Pair<MetaRow, MetaRow>> overlaps = new ArrayList<>();
|
||||
final Map<ServerName, RegionInfo> unknownServers = new HashMap<ServerName, RegionInfo>();
|
||||
final List<byte []> emptyRegionInfo = new ArrayList<>();
|
||||
int count = 0;
|
||||
|
||||
private final List<Pair<MetaRow, MetaRow>> holes = new ArrayList<>();
|
||||
private final List<Pair<MetaRow, MetaRow>> overlaps = new ArrayList<>();
|
||||
private final List<Pair<MetaRow, ServerName>> unknownServers = new ArrayList<>();
|
||||
private final List<byte []> emptyRegionInfo = new ArrayList<>();
|
||||
|
||||
@VisibleForTesting
|
||||
Report() {}
|
||||
|
||||
public long getCreateTime() {
|
||||
return this.now;
|
||||
}
|
||||
|
||||
public List<Pair<MetaRow, MetaRow>> getHoles() {
|
||||
return this.holes;
|
||||
}
|
||||
|
||||
public List<Pair<MetaRow, MetaRow>> getOverlaps() {
|
||||
return this.overlaps;
|
||||
}
|
||||
|
||||
public List<Pair<MetaRow, ServerName>> getUnknownServers() {
|
||||
return unknownServers;
|
||||
}
|
||||
|
||||
public List<byte[]> getEmptyRegionInfo() {
|
||||
return emptyRegionInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if an 'empty' lastReport -- no problems found.
|
||||
*/
|
||||
boolean isEmpty() {
|
||||
public boolean isEmpty() {
|
||||
return this.holes.isEmpty() && this.overlaps.isEmpty() && this.unknownServers.isEmpty() &&
|
||||
this.emptyRegionInfo.isEmpty();
|
||||
}
|
||||
|
@ -477,28 +500,28 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
if (sb.length() > 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append("hole=" + Bytes.toString(p.getFirst().metaRow) + "/" +
|
||||
Bytes.toString(p.getSecond().metaRow));
|
||||
sb.append("hole=" + Bytes.toStringBinary(p.getFirst().metaRow) + "/" +
|
||||
Bytes.toStringBinary(p.getSecond().metaRow));
|
||||
}
|
||||
for (Pair<MetaRow, MetaRow> p: this.overlaps) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append("overlap=").append(Bytes.toString(p.getFirst().metaRow)).append("/").
|
||||
append(Bytes.toString(p.getSecond().metaRow));
|
||||
sb.append("overlap=").append(Bytes.toStringBinary(p.getFirst().metaRow)).append("/").
|
||||
append(Bytes.toStringBinary(p.getSecond().metaRow));
|
||||
}
|
||||
for (byte [] r: this.emptyRegionInfo) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append("empty=").append(Bytes.toString(r));
|
||||
sb.append("empty=").append(Bytes.toStringBinary(r));
|
||||
}
|
||||
for (Map.Entry<ServerName, RegionInfo> e: this.unknownServers.entrySet()) {
|
||||
for (Pair<MetaRow, ServerName> p: this.unknownServers) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(", ");
|
||||
}
|
||||
sb.append("unknown_server=").append(e.getKey()).append("/").
|
||||
append(e.getValue().getRegionNameAsString());
|
||||
sb.append("unknown_server=").append(p.getSecond()).append("/").
|
||||
append(Bytes.toStringBinary(p.getFirst().metaRow));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
@ -507,7 +530,7 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
/**
|
||||
* Simple datastructure to hold a MetaRow content.
|
||||
*/
|
||||
static class MetaRow {
|
||||
public static class MetaRow {
|
||||
/**
|
||||
* A marker for use in case where there is a hole at the very
|
||||
* first row in hbase:meta. Should never happen.
|
||||
|
@ -518,17 +541,25 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
/**
|
||||
* Row from hbase:meta table.
|
||||
*/
|
||||
final byte [] metaRow;
|
||||
private final byte [] metaRow;
|
||||
|
||||
/**
|
||||
* The decoded RegionInfo gotten from hbase:meta.
|
||||
*/
|
||||
final RegionInfo regionInfo;
|
||||
private final RegionInfo regionInfo;
|
||||
|
||||
MetaRow(byte [] metaRow, RegionInfo regionInfo) {
|
||||
this.metaRow = metaRow;
|
||||
this.regionInfo = regionInfo;
|
||||
}
|
||||
|
||||
public RegionInfo getRegionInfo() {
|
||||
return regionInfo;
|
||||
}
|
||||
|
||||
public byte[] getMetaRow() {
|
||||
return metaRow;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -609,6 +640,7 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
this.report.emptyRegionInfo.add(metaTableRow.getRow());
|
||||
return ri;
|
||||
}
|
||||
|
||||
MetaRow mrri = new MetaRow(metaTableRow.getRow(), ri);
|
||||
// If table is disabled, skip integrity check.
|
||||
if (!isTableDisabled(ri)) {
|
||||
|
@ -700,20 +732,22 @@ public class CatalogJanitor extends ScheduledChore {
|
|||
public static void main(String [] args) throws IOException {
|
||||
checkLog4jProperties();
|
||||
ReportMakingVisitor visitor = new ReportMakingVisitor(null);
|
||||
try (Connection connection = ConnectionFactory.createConnection(HBaseConfiguration.create())) {
|
||||
Configuration configuration = HBaseConfiguration.create();
|
||||
configuration.setBoolean("hbase.defaults.for.version.skip", true);
|
||||
try (Connection connection = ConnectionFactory.createConnection(configuration)) {
|
||||
/* Used to generate an overlap.
|
||||
Get g = new Get(Bytes.toBytes("t2,40,1563939166317.5a8be963741d27e9649e5c67a34259d9."));
|
||||
*/
|
||||
Get g = new Get(Bytes.toBytes("t2,40,1564119846424.1db8c57d64e0733e0f027aaeae7a0bf0."));
|
||||
g.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
|
||||
try (Table t = connection.getTable(TableName.META_TABLE_NAME)) {
|
||||
Result r = t.get(g);
|
||||
byte [] row = g.getRow();
|
||||
row[row.length - 3] <<= ((byte)row[row.length -3]);
|
||||
row[row.length - 2] <<= ((byte)row[row.length - 2]);
|
||||
Put p = new Put(g.getRow());
|
||||
p.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER,
|
||||
r.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER));
|
||||
t.put(p);
|
||||
}
|
||||
*/
|
||||
MetaTableAccessor.scanMetaForTableRegions(connection, visitor, null);
|
||||
Report report = visitor.getReport();
|
||||
LOG.info(report != null? report.toString(): "empty");
|
||||
|
|
|
@ -18,15 +18,23 @@
|
|||
*/
|
||||
--%>
|
||||
<%@ page contentType="text/html;charset=UTF-8"
|
||||
import="java.time.Instant"
|
||||
import="java.time.ZoneId"
|
||||
import="java.util.Date"
|
||||
import="java.util.List"
|
||||
import="java.util.Map"
|
||||
import="java.util.stream.Collectors"
|
||||
import="java.time.ZonedDateTime"
|
||||
import="java.time.format.DateTimeFormatter"
|
||||
%>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.MetaRow" %>
|
||||
<%
|
||||
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
|
||||
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
|
||||
|
@ -43,6 +51,14 @@
|
|||
startTimestamp = hbckChecker.getCheckingStartTimestamp();
|
||||
endTimestamp = hbckChecker.getCheckingEndTimestamp();
|
||||
}
|
||||
ZonedDateTime zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(startTimestamp),
|
||||
ZoneId.systemDefault());
|
||||
String iso8601start = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
|
||||
zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(endTimestamp),
|
||||
ZoneId.systemDefault());
|
||||
String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
|
||||
CatalogJanitor cj = master.getCatalogJanitor();
|
||||
CatalogJanitor.Report report = cj == null? null: cj.getLastReport();
|
||||
%>
|
||||
<jsp:include page="header.jsp">
|
||||
<jsp:param name="pageTitle" value="${pageTitle}"/>
|
||||
|
@ -61,29 +77,32 @@
|
|||
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<h1>HBCK Report</h1>
|
||||
<h1>HBCK Chore Report</h1>
|
||||
<p>
|
||||
<span>Checking started at <%= new Date(startTimestamp) %> and generated report at <%= new Date(endTimestamp) %></span>
|
||||
<span>Checking started at <%= iso8601start %> and generated report at <%= iso8601end %>. Execute 'hbck_chore_run' in hbase shell to generate a new sub-report.</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<h2>Inconsistent Regions</h2>
|
||||
<p>
|
||||
<span>
|
||||
There are three case: 1. Master thought this region opened, but no regionserver reported it.
|
||||
2. Master thought this region opened on Server1, but regionserver reported Server2.
|
||||
3. More than one regionservers reported opened this region.
|
||||
Notice: the reported online regionservers may be not right when there are regions in transition.
|
||||
Please check them in regionserver's web UI.
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
|
||||
<p>
|
||||
<span>
|
||||
There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assigns
|
||||
command; 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix:
|
||||
need to check the server is still exist. If not, schedule SCP for it. If exist, restart Server2 and Server1):
|
||||
3. More than one regionservers reported opened this region (Fix: restart the RegionServers).
|
||||
Notice: the reported online regionservers may be not right when there are regions in transition.
|
||||
Please check them in regionserver's web UI.
|
||||
</span>
|
||||
</p>
|
||||
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Region</th>
|
||||
|
@ -147,6 +166,114 @@
|
|||
</table>
|
||||
<% } %>
|
||||
|
||||
<div class="row inner_header">
|
||||
<div class="page-header">
|
||||
<h1>CatalogJanitor <em>hbase:meta</em> Consistency Issues</h1>
|
||||
</div>
|
||||
</div>
|
||||
<% if (report != null && !report.isEmpty()) {
|
||||
zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(report.getCreateTime()),
|
||||
ZoneId.systemDefault());
|
||||
String iso8601reportTime = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
|
||||
zdt = ZonedDateTime.ofInstant(Instant.ofEpochMilli(System.currentTimeMillis()),
|
||||
ZoneId.systemDefault());
|
||||
String iso8601Now = zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
|
||||
%>
|
||||
<p>Report created: <%= iso8601reportTime %> (now=<%= iso8601Now %>). Run <i>catalogjanitor_run</i> in hbase shell to generate a new sub-report.</p>
|
||||
<% if (!report.getHoles().isEmpty()) { %>
|
||||
<div class="row inner_header">
|
||||
<div class="page-header">
|
||||
<h2>Holes</h2>
|
||||
</div>
|
||||
</div>
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Row before hole</th>
|
||||
<th>RegionInfo</th>
|
||||
<th>Row after hole</th>
|
||||
<th>RegionInfo</th>
|
||||
</tr>
|
||||
<% for (Pair<MetaRow, MetaRow> p : report.getHoles()) { %>
|
||||
<tr>
|
||||
<td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
|
||||
<td><%= p.getFirst().getRegionInfo() %></td>
|
||||
<td><%= Bytes.toStringBinary(p.getSecond().getMetaRow()) %></td>
|
||||
<td><%= p.getSecond().getRegionInfo() %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= report.getHoles().size() %> hole(s).</p>
|
||||
</table>
|
||||
<% } %>
|
||||
<% if (!report.getOverlaps().isEmpty()) { %>
|
||||
<div class="row inner_header">
|
||||
<div class="page-header">
|
||||
<h2>Overlaps</h2>
|
||||
</div>
|
||||
</div>
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Row</th>
|
||||
<th>RegionInfo</th>
|
||||
<th>Other Row</th>
|
||||
<th>Other RegionInfo</th>
|
||||
</tr>
|
||||
<% for (Pair<MetaRow, MetaRow> p : report.getOverlaps()) { %>
|
||||
<tr>
|
||||
<td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
|
||||
<td><%= p.getFirst().getRegionInfo() %></td>
|
||||
<td><%= Bytes.toStringBinary(p.getSecond().getMetaRow()) %></td>
|
||||
<td><%= p.getSecond().getRegionInfo() %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= report.getOverlaps().size() %> overlap(s).</p>
|
||||
</table>
|
||||
<% } %>
|
||||
<% if (!report.getUnknownServers().isEmpty()) { %>
|
||||
<div class="row inner_header">
|
||||
<div class="page-header">
|
||||
<h2>Unknown Servers</h2>
|
||||
</div>
|
||||
</div>
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Row</th>
|
||||
<th>ServerName</th>
|
||||
<th>RegionInfo</th>
|
||||
</tr>
|
||||
<% for (Pair<MetaRow, ServerName> p: report.getUnknownServers()) { %>
|
||||
<tr>
|
||||
<td><%= Bytes.toStringBinary(p.getFirst().getMetaRow()) %></td>
|
||||
<td><%= p.getSecond() %></td>
|
||||
<td><%= p.getFirst().getRegionInfo() %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= report.getUnknownServers().size() %> unknown servers(s).</p>
|
||||
</table>
|
||||
<% } %>
|
||||
<% if (!report.getEmptyRegionInfo().isEmpty()) { %>
|
||||
<div class="row inner_header">
|
||||
<div class="page-header">
|
||||
<h2>Empty <em>info:regioninfo</em></h2>
|
||||
</div>
|
||||
</div>
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Row</th>
|
||||
</tr>
|
||||
<% for (byte [] row: report.getEmptyRegionInfo()) { %>
|
||||
<tr>
|
||||
<td><%= Bytes.toStringBinary(row) %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= report.getEmptyRegionInfo().size() %> emptyRegionInfo(s).</p>
|
||||
</table>
|
||||
<% } %>
|
||||
<% } %>
|
||||
|
||||
<% } %>
|
||||
</div>
|
||||
|
||||
|
|
|
@ -92,11 +92,11 @@ public class TestCatalogJanitorCluster {
|
|||
gc = janitor.scan();
|
||||
report = janitor.getLastReport();
|
||||
assertFalse(report.isEmpty());
|
||||
assertEquals(1, report.holes.size());
|
||||
assertTrue(report.holes.get(0).getFirst().regionInfo.getTable().equals(T1));
|
||||
assertTrue(report.holes.get(0).getFirst().regionInfo.isLast());
|
||||
assertTrue(report.holes.get(0).getSecond().regionInfo.getTable().equals(T2));
|
||||
assertEquals(0, report.overlaps.size());
|
||||
assertEquals(1, report.getHoles().size());
|
||||
assertTrue(report.getHoles().get(0).getFirst().getRegionInfo().getTable().equals(T1));
|
||||
assertTrue(report.getHoles().get(0).getFirst().getRegionInfo().isLast());
|
||||
assertTrue(report.getHoles().get(0).getSecond().getRegionInfo().getTable().equals(T2));
|
||||
assertEquals(0, report.getOverlaps().size());
|
||||
// Next, add overlaps to first row in t3
|
||||
List<RegionInfo> t3Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T3);
|
||||
RegionInfo ri = t3Ris.get(0);
|
||||
|
@ -113,12 +113,12 @@ public class TestCatalogJanitorCluster {
|
|||
report = janitor.getLastReport();
|
||||
assertFalse(report.isEmpty());
|
||||
// We added two overlaps so total three.
|
||||
assertEquals(3, report.overlaps.size());
|
||||
assertEquals(3, report.getOverlaps().size());
|
||||
// Assert hole is still there.
|
||||
assertEquals(1, report.holes.size());
|
||||
assertEquals(1, report.getHoles().size());
|
||||
// Assert other attributes are empty still.
|
||||
assertTrue(report.emptyRegionInfo.isEmpty());
|
||||
assertTrue(report.unknownServers.isEmpty());
|
||||
assertTrue(report.getEmptyRegionInfo().isEmpty());
|
||||
assertTrue(report.getUnknownServers().isEmpty());
|
||||
// Now make bad server in t1.
|
||||
List<RegionInfo> t1Ris = MetaTableAccessor.getTableRegions(TEST_UTIL.getConnection(), T1);
|
||||
RegionInfo t1Ri1 = t1Ris.get(1);
|
||||
|
@ -129,7 +129,7 @@ public class TestCatalogJanitorCluster {
|
|||
gc = janitor.scan();
|
||||
report = janitor.getLastReport();
|
||||
assertFalse(report.isEmpty());
|
||||
assertEquals(1, report.unknownServers.size());
|
||||
assertEquals(1, report.getUnknownServers().size());
|
||||
// Finally, make an empty regioninfo in t1.
|
||||
RegionInfo t1Ri2 = t1Ris.get(2);
|
||||
Put pEmptyRI = new Put(t1Ri2.getRegionName());
|
||||
|
@ -138,7 +138,7 @@ public class TestCatalogJanitorCluster {
|
|||
MetaTableAccessor.putsToMetaTable(TEST_UTIL.getConnection(), Arrays.asList(pEmptyRI));
|
||||
gc = janitor.scan();
|
||||
report = janitor.getLastReport();
|
||||
assertEquals(1, report.emptyRegionInfo.size());
|
||||
assertEquals(1, report.getEmptyRegionInfo().size());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in New Issue