HBASE-23572 In 'HBCK Report', distringush between live, dead, and unknown servers
Signed-off-by: Sean Busbey <busbey@apache.org>
This commit is contained in:
parent
ab40b9648b
commit
4b6ce0fcbf
|
@ -31,13 +31,14 @@
|
|||
<%@ page import="org.apache.hadoop.hbase.client.RegionInfo" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.HbckChore" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.ServerManager" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.util.Bytes" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.CatalogJanitor.Report" %>
|
||||
<%
|
||||
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
|
||||
final HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
|
||||
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
|
||||
HbckChore hbckChore = master.getHbckChore();
|
||||
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
|
||||
|
@ -60,13 +61,13 @@
|
|||
String iso8601end = startTimestamp == 0? "-1": zdt.format(DateTimeFormatter.ISO_OFFSET_DATE_TIME);
|
||||
CatalogJanitor cj = master.getCatalogJanitor();
|
||||
CatalogJanitor.Report report = cj == null? null: cj.getLastReport();
|
||||
final ServerManager serverManager = master.getServerManager();
|
||||
%>
|
||||
<jsp:include page="header.jsp">
|
||||
<jsp:param name="pageTitle" value="${pageTitle}"/>
|
||||
</jsp:include>
|
||||
|
||||
<div class="container-fluid content">
|
||||
|
||||
<% if (!master.isInitialized()) { %>
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
|
@ -78,7 +79,7 @@
|
|||
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<p><span>This page displays two reports: the 'HBCK Chore Report' and the 'CatalogJanitor Consistency Issues' report. Only titles show if there are no problems to report. Note some conditions are <em>transitory</em> as regions migrate.</span></p>
|
||||
<p><span>This page displays two reports: the <em>HBCK Chore Report</em> and the <em>CatalogJanitor Consistency Issues</em> report. Only report titles show if there are no problems to list. Note some conditions are <strong>transitory</strong> as regions migrate. See below for how to run reports. ServerNames will be links if server is live, italic if dead, and plain if unknown.</span></p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="row">
|
||||
|
@ -98,8 +99,6 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
|
||||
<% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
|
@ -108,10 +107,11 @@
|
|||
</div>
|
||||
<p>
|
||||
<span>
|
||||
There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assigns
|
||||
There are three cases: 1. Master thought this region opened, but no regionserver reported it (Fix: use assign
|
||||
command); 2. Master thought this region opened on Server1, but regionserver reported Server2 (Fix:
|
||||
need to check the server is still exist. If not, schedule SCP for it. If exist, restart Server2 and Server1):
|
||||
3. More than one regionservers reported opened this region (Fix: restart the RegionServers).
|
||||
need to check the server still exists. If not, schedule <em>ServerCrashProcedure</em> for it. If exists,
|
||||
restart Server2 and Server1):
|
||||
3. More than one regionserver reports opened this region (Fix: restart the RegionServers).
|
||||
Notice: the reported online regionservers may be not right when there are regions in transition.
|
||||
Please check them in regionserver's web UI.
|
||||
</span>
|
||||
|
@ -126,12 +126,11 @@
|
|||
<% for (Map.Entry<String, Pair<ServerName, List<ServerName>>> entry : inconsistentRegions.entrySet()) {%>
|
||||
<tr>
|
||||
<td><%= entry.getKey() %></td>
|
||||
<td><%= entry.getValue().getFirst() %></td>
|
||||
<td><%= entry.getValue().getSecond().stream().map(ServerName::getServerName)
|
||||
.collect(Collectors.joining(", ")) %></td>
|
||||
<td><%= formatServerName(master, serverManager, entry.getValue().getFirst()) %></td>
|
||||
<td><%= entry.getValue().getSecond().stream().map(s -> formatServerName(master, serverManager, s)).
|
||||
collect(Collectors.joining(", ")) %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= inconsistentRegions.size() %> region(s) in set.</p>
|
||||
</table>
|
||||
<% } %>
|
||||
|
@ -142,14 +141,6 @@
|
|||
<h2>Orphan Regions on RegionServer</h2>
|
||||
</div>
|
||||
</div>
|
||||
<p>
|
||||
<span>
|
||||
The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster.
|
||||
First make sure hbase:meta is in healthy state; run 'hbkc2 fixMeta' to be sure. Once this is done, per Region below, run a bulk
|
||||
load -- '$ hbase completebulkload REGION_DIR_PATH TABLE_NAME' -- and then delete the desiccated directory content (HFiles are removed upon successful load; all that is left are empty directories
|
||||
and occasionally a seqid marking file).
|
||||
</span>
|
||||
</p>
|
||||
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
|
@ -159,10 +150,9 @@
|
|||
<% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet()) { %>
|
||||
<tr>
|
||||
<td><%= entry.getKey() %></td>
|
||||
<td><%= entry.getValue() %></td>
|
||||
<td><%= formatServerName(master, serverManager, entry.getValue()) %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= orphanRegionsOnRS.size() %> region(s) in set.</p>
|
||||
</table>
|
||||
<% } %>
|
||||
|
@ -173,7 +163,14 @@
|
|||
<h2>Orphan Regions on FileSystem</h2>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p>
|
||||
<span>
|
||||
The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster.
|
||||
First make sure <em>hbase:meta</em> is in a healthy state; run <em>hbck2 fixMeta</em> to be sure. Once this is done, per Region below, run a bulk
|
||||
load -- <em>$ hbase completebulkload REGION_DIR_PATH TABLE_NAME</em> -- and then delete the desiccated directory content (HFiles are removed upon
|
||||
successful load; all that is left are empty directories and occasionally a seqid marking file).
|
||||
</span>
|
||||
</p>
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Region Encoded Name</th>
|
||||
|
@ -301,3 +298,28 @@
|
|||
</div>
|
||||
|
||||
<jsp:include page="footer.jsp"/>
|
||||
|
||||
<%!
|
||||
/**
|
||||
* Format serverName for display.
|
||||
* If a live server reference, make it a link.
|
||||
* If dead, make it italic.
|
||||
* If unknown, make it plain.
|
||||
*/
|
||||
private static String formatServerName(HMaster master,
|
||||
ServerManager serverManager, ServerName serverName) {
|
||||
String sn = serverName.toString();
|
||||
if (serverManager.isServerOnline(serverName)) {
|
||||
int infoPort = master.getRegionServerInfoPort(serverName);
|
||||
if (infoPort > 0) {
|
||||
return "<a href=" + "//" + serverName.getHostname() + ":" +
|
||||
infoPort + "/rs-status>" + sn + "</a>";
|
||||
} else {
|
||||
return "<b>" + sn + "</b>";
|
||||
}
|
||||
} else if (serverManager.isServerDead(serverName)) {
|
||||
return "<i>" + sn + "</i>";
|
||||
}
|
||||
return sn;
|
||||
}
|
||||
%>
|
||||
|
|
Loading…
Reference in New Issue