HBASE-23572 In 'HBCK Report', distringush between live, dead, and unknown servers

Signed-off-by: Sean Busbey <busbey@apache.org>
This commit is contained in:
stack 2019-12-12 13:51:35 -08:00
parent ab40b9648b
commit 4b6ce0fcbf
1 changed files with 46 additions and 24 deletions

View File

@ -31,13 +31,14 @@
<%@ page import="org.apache.hadoop.hbase.client.RegionInfo" %> <%@ page import="org.apache.hadoop.hbase.client.RegionInfo" %>
<%@ page import="org.apache.hadoop.hbase.master.HbckChore" %> <%@ page import="org.apache.hadoop.hbase.master.HbckChore" %>
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %> <%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
<%@ page import="org.apache.hadoop.hbase.master.ServerManager" %>
<%@ page import="org.apache.hadoop.hbase.ServerName" %> <%@ page import="org.apache.hadoop.hbase.ServerName" %>
<%@ page import="org.apache.hadoop.hbase.util.Bytes" %> <%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
<%@ page import="org.apache.hadoop.hbase.util.Pair" %> <%@ page import="org.apache.hadoop.hbase.util.Pair" %>
<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %> <%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %>
<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %> <%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %>
<% <%
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER); final HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName()); pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
HbckChore hbckChore = master.getHbckChore(); HbckChore hbckChore = master.getHbckChore();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null; Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
@ -60,13 +61,13 @@
String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME); String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
CatalogJanitor cj = master.getCatalogJanitor(); CatalogJanitor cj = master.getCatalogJanitor();
CatalogJanitor.Report report = cj == null? null: cj.getLastReport(); CatalogJanitor.Report report = cj == null? null: cj.getLastReport();
final ServerManager serverManager = master.getServerManager();
%> %>
<jsp:include page="header.jsp"> <jsp:include page="header.jsp">
<jsp:param name="pageTitle" value="${pageTitle}"/> <jsp:param name="pageTitle" value="${pageTitle}"/>
</jsp:include> </jsp:include>
<div class="container-fluid content"> <div class="container-fluid content">
<% if (!master.isInitialized()) { %> <% if (!master.isInitialized()) { %>
<div class="row"> <div class="row">
<div class="page-header"> <div class="page-header">
@ -78,7 +79,7 @@
<div class="row"> <div class="row">
<div class="page-header"> <div class="page-header">
<p><span>This page displays two reports: the 'HBCK Chore Report' and the 'CatalogJanitor Consistency Issues' report. Only titles show if there are no problems to report. Note some conditions are <em>transitory</em> as regions migrate.</span></p> <p><span>This page displays two reports: the <em>HBCK Chore Report</em> and the <em>CatalogJanitor Consistency Issues</em> report. Only report titles show if there are no problems to list. Note some conditions are <strong>transitory</strong> as regions migrate. See below for how to run reports. ServerNames will be links if server is live, italic if dead, and plain if unknown.</span></p>
</div> </div>
</div> </div>
<div class="row"> <div class="row">
@ -98,8 +99,6 @@
</div> </div>
</div> </div>
<% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %> <% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
<div class="row"> <div class="row">
<div class="page-header"> <div class="page-header">
@ -108,10 +107,11 @@
</div> </div>
<p> <p>
<span> <span>
There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assigns There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assign
command); 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix: command); 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix:
need to check the server is still exist. If not, schedule SCP for it. If exist, restart Server2 and Server1): need to check the server still exists. If not, schedule <em>ServerCrashProcedure</em> for it. If exists,
3. More than one regionservers reported opened this region (Fix: restart the RegionServers). restart Server2 and Server1):
3. More than one regionserver reports opened this region (Fix: restart the RegionServers).
Notice: the reported online regionservers may be not right when there are regions in transition. Notice: the reported online regionservers may be not right when there are regions in transition.
Please check them in regionserver's web UI. Please check them in regionserver's web UI.
</span> </span>
@ -123,15 +123,14 @@
<th>Location in META</th> <th>Location in META</th>
<th>Reported Online RegionServers</th> <th>Reported Online RegionServers</th>
</tr> </tr>
<% for (Map.Entry<String, Pair<ServerName, List<ServerName>>> entry : inconsistentRegions.entrySet()) { %> <% for (Map.Entry<String, Pair<ServerName, List<ServerName>>> entry : inconsistentRegions.entrySet()) {%>
<tr> <tr>
<td><%= entry.getKey() %></td> <td><%= entry.getKey() %></td>
<td><%= entry.getValue().getFirst() %></td> <td><%= formatServerName(master, serverManager, entry.getValue().getFirst()) %></td>
<td><%= entry.getValue().getSecond().stream().map(ServerName::getServerName) <td><%= entry.getValue().getSecond().stream().map(s -> formatServerName(master, serverManager, s)).
.collect(Collectors.joining(", ")) %></td> collect(Collectors.joining(", ")) %></td>
</tr> </tr>
<% } %> <% } %>
<p><%= inconsistentRegions.size() %> region(s) in set.</p> <p><%= inconsistentRegions.size() %> region(s) in set.</p>
</table> </table>
<% } %> <% } %>
@ -142,14 +141,6 @@
<h2>Orphan Regions on RegionServer</h2> <h2>Orphan Regions on RegionServer</h2>
</div> </div>
</div> </div>
<p>
<span>
The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster.
First make sure hbase:meta is in healthy state; run 'hbkc2 fixMeta' to be sure. Once this is done, per Region below, run a bulk
load -- '$ hbase completebulkload REGION_DIR_PATH TABLE_NAME' -- and then delete the desiccated directory content (HFiles are removed upon successful load; all that is left are empty directories
and occasionally a seqid marking file).
</span>
</p>
<table class="table table-striped"> <table class="table table-striped">
<tr> <tr>
@ -159,10 +150,9 @@
<% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet()) { %> <% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet()) { %>
<tr> <tr>
<td><%= entry.getKey() %></td> <td><%= entry.getKey() %></td>
<td><%= entry.getValue() %></td> <td><%= formatServerName(master, serverManager, entry.getValue()) %></td>
</tr> </tr>
<% } %> <% } %>
<p><%= orphanRegionsOnRS.size() %> region(s) in set.</p> <p><%= orphanRegionsOnRS.size() %> region(s) in set.</p>
</table> </table>
<% } %> <% } %>
@ -173,7 +163,14 @@
<h2>Orphan Regions on FileSystem</h2> <h2>Orphan Regions on FileSystem</h2>
</div> </div>
</div> </div>
<p>
<span>
The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster.
First make sure <em>hbase:meta</em> is in a healthy state; run <em>hbck2 fixMeta</em> to be sure. Once this is done, per Region below, run a bulk
load -- <em>$ hbase completebulkload REGION_DIR_PATH TABLE_NAME</em> -- and then delete the desiccated directory content (HFiles are removed upon
successful load; all that is left are empty directories and occasionally a seqid marking file).
</span>
</p>
<table class="table table-striped"> <table class="table table-striped">
<tr> <tr>
<th>Region Encoded Name</th> <th>Region Encoded Name</th>
@ -301,3 +298,28 @@
</div> </div>
<jsp:include page="footer.jsp"/> <jsp:include page="footer.jsp"/>
<%!
/**
* Format serverName for display.
* If a live server reference, make it a link.
* If dead, make it italic.
* If unknown, make it plain.
*/
private static String formatServerName(HMaster master,
ServerManager serverManager, ServerName serverName) {
String sn = serverName.toString();
if (serverManager.isServerOnline(serverName)) {
int infoPort = master.getRegionServerInfoPort(serverName);
if (infoPort > 0) {
return "<a href=" + "//" + serverName.getHostname() + ":" +
infoPort + "/rs-status>" + sn + "</a>";
} else {
return "<b>" + sn + "</b>";
}
} else if (serverManager.isServerDead(serverName)) {
return "<i>" + sn + "</i>";
}
return sn;
}
%>