HBASE-22709 Add a chore thread in master to do hbck checking (#404)
Signed-off-by: stack <stack@apache.org>
This commit is contained in:
parent
1cb37f18aa
commit
cf8114a82e
|
@ -42,84 +42,8 @@ int limit = 100;
|
|||
<%java>
|
||||
SortedSet<RegionState> rit = assignmentManager.getRegionStates()
|
||||
.getRegionsInTransitionOrderedByTimestamp();
|
||||
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = assignmentManager
|
||||
.getProblematicRegions();
|
||||
</%java>
|
||||
|
||||
<%if !problematicRegions.isEmpty() %>
|
||||
<%java>
|
||||
int totalSize = problematicRegions.size();
|
||||
int sizePerPage = Math.min(10, totalSize);
|
||||
int numOfPages = (int) Math.ceil(totalSize * 1.0 / sizePerPage);
|
||||
</%java>
|
||||
<section>
|
||||
<h2><a name="problem-regions">Problematic Regions</a></h2>
|
||||
<p>
|
||||
<span>
|
||||
<% problematicRegions.size() %> problematic region(s). There are three case: 1. Master
|
||||
thought this region opened, but no regionserver reported it. 2. Master thought this
|
||||
region opened on Server1, but regionserver reported Server2. 3. More than one
|
||||
regionservers reported opened this region. Notice: the reported online regionservers
|
||||
may be not right when there are regions in transition. Please check them in
|
||||
regionserver's web UI.
|
||||
</span>
|
||||
</p>
|
||||
<div class="tabbable">
|
||||
<div class="tab-content">
|
||||
<%java int recordItr = 0; %>
|
||||
<%for Map.Entry<String, Pair<ServerName, Set<ServerName>>> entry : problematicRegions.entrySet() %>
|
||||
<%if (recordItr % sizePerPage) == 0 %>
|
||||
<%if recordItr == 0 %>
|
||||
<div class="tab-pane active" id="tab_prs<% (recordItr / sizePerPage) + 1 %>">
|
||||
<%else>
|
||||
<div class="tab-pane" id="tab_prs<% (recordItr / sizePerPage) + 1 %>">
|
||||
</%if>
|
||||
<table class="table table-striped" style="margin-bottom:0px;">
|
||||
<tr>
|
||||
<th>Region</th>
|
||||
<th>Location in META</th>
|
||||
<th>Reported Online Region Servers</th>
|
||||
</tr>
|
||||
</%if>
|
||||
|
||||
<tr>
|
||||
<td><% entry.getKey() %></td>
|
||||
<td><% entry.getValue().getFirst() %></td>
|
||||
<td><% entry.getValue().getSecond().stream().map(ServerName::getServerName)
|
||||
.collect(Collectors.joining(", ")) %></td>
|
||||
</tr>
|
||||
<%java recordItr++; %>
|
||||
<%if (recordItr % sizePerPage) == 0 %>
|
||||
</table>
|
||||
</div>
|
||||
</%if>
|
||||
</%for>
|
||||
|
||||
<%if (recordItr % sizePerPage) != 0 %>
|
||||
<%for ; (recordItr % sizePerPage) != 0 ; recordItr++ %>
|
||||
<tr><td colspan="3" style="height:61px"></td></tr>
|
||||
</%for>
|
||||
</table>
|
||||
</div>
|
||||
</%if>
|
||||
|
||||
</div>
|
||||
<nav>
|
||||
<ul class="nav nav-pills pagination">
|
||||
<%for int i = 1 ; i <= numOfPages; i++ %>
|
||||
<%if i == 1 %>
|
||||
<li class="active">
|
||||
<%else>
|
||||
<li>
|
||||
</%if>
|
||||
<a href="#tab_prs<% i %>"><% i %></a></li>
|
||||
</%for>
|
||||
</ul>
|
||||
</nav>
|
||||
</div>
|
||||
</section>
|
||||
</%if>
|
||||
|
||||
<%if !rit.isEmpty() %>
|
||||
<%java>
|
||||
long currentTime = System.currentTimeMillis();
|
||||
|
|
|
@ -149,7 +149,8 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
|
|||
<li class="active"><a href="/master-status">Home</a></li>
|
||||
<li><a href="/tablesDetailed.jsp">Table Details</a></li>
|
||||
<%if master.isActiveMaster() %>
|
||||
<li><a href="/procedures.jsp">Procedures & Locks</a></li>
|
||||
<li><a href="/procedures.jsp">Procedures & Locks</a></li>
|
||||
<li><a href="/hbck.jsp">HBCK Report</a></li>
|
||||
</%if>
|
||||
<li><a href="/processMaster.jsp">Process Metrics</a></li>
|
||||
<li><a href="/logs/">Local Logs</a></li>
|
||||
|
|
|
@ -385,6 +385,7 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
private ClusterStatusPublisher clusterStatusPublisherChore = null;
|
||||
private SnapshotCleanerChore snapshotCleanerChore = null;
|
||||
|
||||
private HbckChecker hbckChecker;
|
||||
CatalogJanitor catalogJanitorChore;
|
||||
private LogCleaner logCleaner;
|
||||
private HFileCleaner hfileCleaner;
|
||||
|
@ -1108,6 +1109,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
getChoreService().scheduleChore(normalizerChore);
|
||||
this.catalogJanitorChore = new CatalogJanitor(this);
|
||||
getChoreService().scheduleChore(catalogJanitorChore);
|
||||
this.hbckChecker = new HbckChecker(this);
|
||||
getChoreService().scheduleChore(hbckChecker);
|
||||
this.serverManager.startChore();
|
||||
|
||||
// Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
|
||||
|
@ -1587,6 +1590,7 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
choreService.cancelChore(this.hfileCleaner);
|
||||
choreService.cancelChore(this.replicationBarrierCleaner);
|
||||
choreService.cancelChore(this.snapshotCleanerChore);
|
||||
choreService.cancelChore(this.hbckChecker);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3756,4 +3760,8 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
}
|
||||
return super.getWalGroupsReplicationStatus();
|
||||
}
|
||||
|
||||
public HbckChecker getHbckChecker() {
|
||||
return this.hbckChecker;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,282 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.ScheduledChore;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
import org.apache.hadoop.hbase.util.HbckRegionInfo;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.yetus.audience.InterfaceStability;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Used to do the hbck checking job at master side.
|
||||
*/
|
||||
@InterfaceAudience.Private
|
||||
@InterfaceStability.Evolving
|
||||
public class HbckChecker extends ScheduledChore {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName());
|
||||
|
||||
private static final String HBCK_CHECKER_INTERVAL = "hbase.master.hbck.checker.interval";
|
||||
private static final int DEFAULT_HBCK_CHECKER_INTERVAL = 60 * 60 * 1000;
|
||||
|
||||
private final MasterServices master;
|
||||
|
||||
/**
|
||||
* This map contains the state of all hbck items. It maps from encoded region
|
||||
* name to HbckRegionInfo structure. The information contained in HbckRegionInfo is used
|
||||
* to detect and correct consistency (hdfs/meta/deployment) problems.
|
||||
*/
|
||||
private final Map<String, HbckRegionInfo> regionInfoMap = new HashMap<>();
|
||||
|
||||
/**
|
||||
* The regions only opened on RegionServers, but no region info in meta.
|
||||
*/
|
||||
private final Map<String, ServerName> orphanRegionsOnRS = new HashMap<>();
|
||||
/**
|
||||
* The regions have directory on FileSystem, but no region info in meta.
|
||||
*/
|
||||
private final List<String> orphanRegionsOnFS = new LinkedList<>();
|
||||
/**
|
||||
* The inconsistent regions. There are three case:
|
||||
* case 1. Master thought this region opened, but no regionserver reported it.
|
||||
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
|
||||
* case 3. More than one regionservers reported opened this region
|
||||
*/
|
||||
private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
|
||||
new HashMap<>();
|
||||
|
||||
/**
|
||||
* The "snapshot" is used to save the last round's HBCK checking report.
|
||||
*/
|
||||
private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>();
|
||||
private final List<String> orphanRegionsOnFSSnapshot = new LinkedList<>();
|
||||
private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot =
|
||||
new HashMap<>();
|
||||
|
||||
/**
|
||||
* The "snapshot" may be changed after checking. And this checking report "snapshot" may be
|
||||
* accessed by web ui. Use this rwLock to synchronize.
|
||||
*/
|
||||
ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
|
||||
|
||||
/**
|
||||
* When running, the "snapshot" may be changed when this round's checking finish.
|
||||
*/
|
||||
private volatile boolean running = false;
|
||||
private volatile long checkingStartTimestamp = 0;
|
||||
private volatile long checkingEndTimestamp = 0;
|
||||
|
||||
public HbckChecker(MasterServices master) {
|
||||
super("HbckChecker-", master,
|
||||
master.getConfiguration().getInt(HBCK_CHECKER_INTERVAL, DEFAULT_HBCK_CHECKER_INTERVAL));
|
||||
this.master = master;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void chore() {
|
||||
running = true;
|
||||
regionInfoMap.clear();
|
||||
orphanRegionsOnRS.clear();
|
||||
orphanRegionsOnFS.clear();
|
||||
inconsistentRegions.clear();
|
||||
checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
|
||||
loadRegionsFromInMemoryState();
|
||||
loadRegionsFromRSReport();
|
||||
try {
|
||||
loadRegionsFromFS();
|
||||
} catch (IOException e) {
|
||||
LOG.warn("Failed to load the regions from filesystem", e);
|
||||
}
|
||||
saveCheckResultToSnapshot();
|
||||
running = false;
|
||||
}
|
||||
|
||||
private void saveCheckResultToSnapshot() {
|
||||
// Need synchronized here, as this "snapshot" may be access by web ui.
|
||||
rwLock.writeLock().lock();
|
||||
try {
|
||||
orphanRegionsOnRSSnapshot.clear();
|
||||
orphanRegionsOnRS.entrySet()
|
||||
.forEach(e -> orphanRegionsOnRSSnapshot.put(e.getKey(), e.getValue()));
|
||||
orphanRegionsOnFSSnapshot.clear();
|
||||
orphanRegionsOnFSSnapshot.addAll(orphanRegionsOnFS);
|
||||
inconsistentRegionsSnapshot.clear();
|
||||
inconsistentRegions.entrySet()
|
||||
.forEach(e -> inconsistentRegionsSnapshot.put(e.getKey(), e.getValue()));
|
||||
checkingEndTimestamp = EnvironmentEdgeManager.currentTime();
|
||||
} finally {
|
||||
rwLock.writeLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void loadRegionsFromInMemoryState() {
|
||||
List<RegionState> regionStates =
|
||||
master.getAssignmentManager().getRegionStates().getRegionStates();
|
||||
for (RegionState regionState : regionStates) {
|
||||
RegionInfo regionInfo = regionState.getRegion();
|
||||
HbckRegionInfo.MetaEntry metaEntry =
|
||||
new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(),
|
||||
regionState.getStamp());
|
||||
regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry));
|
||||
}
|
||||
}
|
||||
|
||||
private void loadRegionsFromRSReport() {
|
||||
Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports();
|
||||
for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
|
||||
ServerName serverName = entry.getKey();
|
||||
for (byte[] regionName : entry.getValue()) {
|
||||
String encodedRegionName = RegionInfo.encodeRegionName(regionName);
|
||||
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
|
||||
if (hri == null) {
|
||||
orphanRegionsOnRS.put(encodedRegionName, serverName);
|
||||
continue;
|
||||
}
|
||||
hri.addServer(hri.getMetaEntry(), serverName);
|
||||
}
|
||||
}
|
||||
|
||||
for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) {
|
||||
String encodedRegionName = entry.getKey();
|
||||
HbckRegionInfo hri = entry.getValue();
|
||||
ServerName locationInMeta = hri.getMetaEntry().getRegionServer();
|
||||
if (hri.getDeployedOn().size() == 0) {
|
||||
// Master thought this region opened, but no regionserver reported it.
|
||||
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, new LinkedList<>()));
|
||||
} else if (hri.getDeployedOn().size() > 1) {
|
||||
// More than one regionserver reported opened this region
|
||||
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
|
||||
} else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) {
|
||||
// Master thought this region opened on Server1, but regionserver reported Server2
|
||||
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void loadRegionsFromFS() throws IOException {
|
||||
Path rootDir = master.getMasterFileSystem().getRootDir();
|
||||
FileSystem fs = master.getMasterFileSystem().getFileSystem();
|
||||
|
||||
// list all tables from HDFS
|
||||
List<FileStatus> tableDirs = Lists.newArrayList();
|
||||
List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
|
||||
for (Path path : paths) {
|
||||
tableDirs.add(fs.getFileStatus(path));
|
||||
}
|
||||
|
||||
for (FileStatus tableDir : tableDirs) {
|
||||
FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
|
||||
for (FileStatus regionDir : regionDirs) {
|
||||
String encodedRegionName = regionDir.getPath().getName();
|
||||
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
|
||||
if (hri == null) {
|
||||
orphanRegionsOnFS.add(encodedRegionName);
|
||||
continue;
|
||||
}
|
||||
HbckRegionInfo.HdfsEntry hdfsEntry =
|
||||
new HbckRegionInfo.HdfsEntry(regionDir.getPath(), regionDir.getModificationTime());
|
||||
hri.setHdfsEntry(hdfsEntry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* When running, the HBCK report may be changed later.
|
||||
*/
|
||||
public boolean isRunning() {
|
||||
return running;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the regions only opened on RegionServers, but no region info in meta.
|
||||
*/
|
||||
public Map<String, ServerName> getOrphanRegionsOnRS() {
|
||||
// Need synchronized here, as this "snapshot" may be changed after checking.
|
||||
rwLock.readLock().lock();
|
||||
try {
|
||||
return this.orphanRegionsOnRSSnapshot;
|
||||
} finally {
|
||||
rwLock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the regions have directory on FileSystem, but no region info in meta.
|
||||
*/
|
||||
public List<String> getOrphanRegionsOnFS() {
|
||||
// Need synchronized here, as this "snapshot" may be changed after checking.
|
||||
rwLock.readLock().lock();
|
||||
try {
|
||||
return this.orphanRegionsOnFSSnapshot;
|
||||
} finally {
|
||||
rwLock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Found the inconsistent regions. There are three case:
|
||||
* case 1. Master thought this region opened, but no regionserver reported it.
|
||||
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
|
||||
* case 3. More than one regionservers reported opened this region
|
||||
*
|
||||
* @return the map of inconsistent regions. Key is the region name. Value is a pair of location in
|
||||
* meta and the regionservers which reported opened this region.
|
||||
*/
|
||||
public Map<String, Pair<ServerName, List<ServerName>>> getInconsistentRegions() {
|
||||
// Need synchronized here, as this "snapshot" may be changed after checking.
|
||||
rwLock.readLock().lock();
|
||||
try {
|
||||
return this.inconsistentRegionsSnapshot;
|
||||
} finally {
|
||||
rwLock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for web ui to show when the HBCK checking started.
|
||||
*/
|
||||
public long getCheckingStartTimestamp() {
|
||||
return this.checkingStartTimestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used for web ui to show when the HBCK checking report generated.
|
||||
*/
|
||||
public long getCheckingEndTimestamp() {
|
||||
return this.checkingStartTimestamp;
|
||||
}
|
||||
}
|
|
@ -1467,6 +1467,12 @@ public class AssignmentManager {
|
|||
LOG.info("Skip to add SCP for {} since this server should be OFFLINE already", serverName);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Remove the in-memory rsReports result
|
||||
synchronized (rsReports) {
|
||||
rsReports.remove(serverName);
|
||||
}
|
||||
|
||||
// we hold the write lock here for fencing on reportRegionStateTransition. Once we set the
|
||||
// server state to CRASHED, we will no longer accept the reportRegionStateTransition call from
|
||||
// this server. This is used to simplify the implementation for TRSP and SCP, where we can make
|
||||
|
@ -2037,51 +2043,13 @@ public class AssignmentManager {
|
|||
}
|
||||
|
||||
/**
|
||||
* Found the potentially problematic opened regions. There are three case:
|
||||
* case 1. Master thought this region opened, but no regionserver reported it.
|
||||
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
|
||||
* case 3. More than one regionservers reported opened this region
|
||||
*
|
||||
* @return the map of potentially problematic opened regions. Key is the region name. Value is
|
||||
* a pair of location in meta and the regionservers which reported opened this region.
|
||||
* @return a snapshot of rsReports
|
||||
*/
|
||||
public Map<String, Pair<ServerName, Set<ServerName>>> getProblematicRegions() {
|
||||
Map<String, Set<ServerName>> reportedOnlineRegions = new HashMap<>();
|
||||
public Map<ServerName, Set<byte[]>> getRSReports() {
|
||||
Map<ServerName, Set<byte[]>> rsReportsSnapshot = new HashMap<>();
|
||||
synchronized (rsReports) {
|
||||
for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
|
||||
for (byte[] regionName : entry.getValue()) {
|
||||
reportedOnlineRegions
|
||||
.computeIfAbsent(RegionInfo.getRegionNameAsString(regionName), r -> new HashSet<>())
|
||||
.add(entry.getKey());
|
||||
}
|
||||
}
|
||||
rsReports.entrySet().forEach(e -> rsReportsSnapshot.put(e.getKey(), e.getValue()));
|
||||
}
|
||||
|
||||
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = new HashMap<>();
|
||||
List<RegionState> rits = regionStates.getRegionsStateInTransition();
|
||||
for (RegionState regionState : regionStates.getRegionStates()) {
|
||||
// Only consider the opened region and not in transition
|
||||
if (!rits.contains(regionState) && regionState.isOpened()) {
|
||||
String regionName = regionState.getRegion().getRegionNameAsString();
|
||||
ServerName serverName = regionState.getServerName();
|
||||
if (reportedOnlineRegions.containsKey(regionName)) {
|
||||
Set<ServerName> reportedServers = reportedOnlineRegions.get(regionName);
|
||||
if (reportedServers.contains(serverName)) {
|
||||
if (reportedServers.size() > 1) {
|
||||
// More than one regionserver reported opened this region
|
||||
problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
|
||||
}
|
||||
} else {
|
||||
// Master thought this region opened on Server1, but regionserver reported Server2
|
||||
problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
|
||||
}
|
||||
} else {
|
||||
// Master thought this region opened, but no regionserver reported it.
|
||||
problematicRegions.put(regionName, new Pair<>(serverName, new HashSet<>()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return problematicRegions;
|
||||
return rsReportsSnapshot;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
<%--
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
--%>
|
||||
<%@ page contentType="text/html;charset=UTF-8"
|
||||
import="java.util.Date"
|
||||
import="java.util.List"
|
||||
import="java.util.Map"
|
||||
import="java.util.stream.Collectors"
|
||||
%>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
|
||||
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
|
||||
<%
|
||||
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
|
||||
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
|
||||
HbckChecker hbckChecker = master.getHbckChecker();
|
||||
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
|
||||
Map<String, ServerName> orphanRegionsOnRS = null;
|
||||
List<String> orphanRegionsOnFS = null;
|
||||
long startTimestamp = 0;
|
||||
long endTimestamp = 0;
|
||||
if (hbckChecker != null) {
|
||||
inconsistentRegions = hbckChecker.getInconsistentRegions();
|
||||
orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS();
|
||||
orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS();
|
||||
startTimestamp = hbckChecker.getCheckingStartTimestamp();
|
||||
endTimestamp = hbckChecker.getCheckingEndTimestamp();
|
||||
}
|
||||
%>
|
||||
<jsp:include page="header.jsp">
|
||||
<jsp:param name="pageTitle" value="${pageTitle}"/>
|
||||
</jsp:include>
|
||||
|
||||
<div class="container-fluid content">
|
||||
|
||||
<% if (!master.isInitialized()) { %>
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<h1>Master is not initialized</h1>
|
||||
</div>
|
||||
</div>
|
||||
<jsp:include page="redirect.jsp" />
|
||||
<% } else { %>
|
||||
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<h1>HBCK Report</h1>
|
||||
<p>
|
||||
<span>Checking started at <%= new Date(startTimestamp) %> and generated report at <%= new Date(endTimestamp) %></span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<h2>Inconsistent Regions</h2>
|
||||
<p>
|
||||
<span>
|
||||
There are three case: 1. Master thought this region opened, but no regionserver reported it.
|
||||
2. Master thought this region opened on Server1, but regionserver reported Server2.
|
||||
3. More than one regionservers reported opened this region.
|
||||
Notice: the reported online regionservers may be not right when there are regions in transition.
|
||||
Please check them in regionserver's web UI.
|
||||
</span>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Region</th>
|
||||
<th>Location in META</th>
|
||||
<th>Reported Online RegionServers</th>
|
||||
</tr>
|
||||
<% for (Map.Entry<String, Pair<ServerName, List<ServerName>>> entry : inconsistentRegions.entrySet()) { %>
|
||||
<tr>
|
||||
<td><%= entry.getKey() %></td>
|
||||
<td><%= entry.getValue().getFirst() %></td>
|
||||
<td><%= entry.getValue().getSecond().stream().map(ServerName::getServerName)
|
||||
.collect(Collectors.joining(", ")) %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= inconsistentRegions.size() %> region(s) in set.</p>
|
||||
</table>
|
||||
<% } %>
|
||||
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<h2>Orphan Regions on RegionServer</h2>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<% if (orphanRegionsOnRS != null && orphanRegionsOnRS.size() > 0) { %>
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Region</th>
|
||||
<th>Reported Online RegionServer</th>
|
||||
</tr>
|
||||
<% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet()) { %>
|
||||
<tr>
|
||||
<td><%= entry.getKey() %></td>
|
||||
<td><%= entry.getValue() %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= orphanRegionsOnRS.size() %> region(s) in set.</p>
|
||||
</table>
|
||||
<% } %>
|
||||
|
||||
<div class="row">
|
||||
<div class="page-header">
|
||||
<h2>Orphan Regions on FileSystem</h2>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<% if (orphanRegionsOnFS != null && orphanRegionsOnFS.size() > 0) { %>
|
||||
<table class="table table-striped">
|
||||
<tr>
|
||||
<th>Region</th>
|
||||
</tr>
|
||||
<% for (String region : orphanRegionsOnFS) { %>
|
||||
<tr>
|
||||
<td><%= region %></td>
|
||||
</tr>
|
||||
<% } %>
|
||||
|
||||
<p><%= orphanRegionsOnFS.size() %> region(s) in set.</p>
|
||||
</table>
|
||||
<% } %>
|
||||
|
||||
<% } %>
|
||||
</div>
|
||||
|
||||
<jsp:include page="footer.jsp"/>
|
|
@ -24,7 +24,6 @@ import static org.junit.Assert.assertTrue;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
|
@ -32,9 +31,11 @@ import org.apache.hadoop.hbase.ServerName;
|
|||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||
import org.apache.hadoop.hbase.master.HbckChecker;
|
||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.junit.Before;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
@ -42,41 +43,52 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@Category({ MasterTests.class, MediumTests.class })
|
||||
public class TestAMProblematicRegions extends TestAssignmentManagerBase {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(TestAMProblematicRegions.class);
|
||||
public class TestHbckChecker extends TestAssignmentManagerBase {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(TestHbckChecker.class);
|
||||
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestAMProblematicRegions.class);
|
||||
HBaseClassTestRule.forClass(TestHbckChecker.class);
|
||||
|
||||
private HbckChecker hbckChecker;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
hbckChecker = new HbckChecker(master);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForMeta() {
|
||||
byte[] metaRegionNameAsBytes = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName();
|
||||
String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString();
|
||||
String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName();
|
||||
List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
|
||||
assertEquals(NSERVERS, serverNames.size());
|
||||
|
||||
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = am.getProblematicRegions();
|
||||
hbckChecker.choreForTesting();
|
||||
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
|
||||
hbckChecker.getInconsistentRegions();
|
||||
|
||||
// Test for case1: Master thought this region opened, but no regionserver reported it.
|
||||
assertTrue(problematicRegions.containsKey(metaRegionName));
|
||||
Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(metaRegionName);
|
||||
assertTrue(inconsistentRegions.containsKey(metaRegionName));
|
||||
Pair<ServerName, List<ServerName>> pair = inconsistentRegions.get(metaRegionName);
|
||||
ServerName locationInMeta = pair.getFirst();
|
||||
Set<ServerName> reportedRegionServers = pair.getSecond();
|
||||
List<ServerName> reportedRegionServers = pair.getSecond();
|
||||
assertTrue(serverNames.contains(locationInMeta));
|
||||
assertEquals(0, reportedRegionServers.size());
|
||||
|
||||
// Reported right region location. Then not in problematic regions.
|
||||
am.reportOnlineRegions(locationInMeta, Collections.singleton(metaRegionNameAsBytes));
|
||||
problematicRegions = am.getProblematicRegions();
|
||||
assertFalse(problematicRegions.containsKey(metaRegionName));
|
||||
hbckChecker.choreForTesting();
|
||||
inconsistentRegions = hbckChecker.getInconsistentRegions();
|
||||
assertFalse(inconsistentRegions.containsKey(metaRegionName));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForUserTable() throws Exception {
|
||||
TableName tableName = TableName.valueOf("testForUserTable");
|
||||
RegionInfo hri = createRegionInfo(tableName, 1);
|
||||
String regionName = hri.getRegionNameAsString();
|
||||
String regionName = hri.getEncodedName();
|
||||
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
|
||||
Future<byte[]> future = submitProcedure(createAssignProcedure(hri));
|
||||
waitOnFuture(future);
|
||||
|
@ -85,11 +97,13 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
|
|||
assertEquals(NSERVERS, serverNames.size());
|
||||
|
||||
// Test for case1: Master thought this region opened, but no regionserver reported it.
|
||||
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = am.getProblematicRegions();
|
||||
assertTrue(problematicRegions.containsKey(regionName));
|
||||
Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(regionName);
|
||||
hbckChecker.choreForTesting();
|
||||
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
|
||||
hbckChecker.getInconsistentRegions();
|
||||
assertTrue(inconsistentRegions.containsKey(regionName));
|
||||
Pair<ServerName, List<ServerName>> pair = inconsistentRegions.get(regionName);
|
||||
ServerName locationInMeta = pair.getFirst();
|
||||
Set<ServerName> reportedRegionServers = pair.getSecond();
|
||||
List<ServerName> reportedRegionServers = pair.getSecond();
|
||||
assertTrue(serverNames.contains(locationInMeta));
|
||||
assertEquals(0, reportedRegionServers.size());
|
||||
|
||||
|
@ -99,9 +113,10 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
|
|||
final ServerName anotherServer =
|
||||
serverNames.stream().filter(s -> !s.equals(tempLocationInMeta)).findFirst().get();
|
||||
am.reportOnlineRegions(anotherServer, Collections.singleton(hri.getRegionName()));
|
||||
problematicRegions = am.getProblematicRegions();
|
||||
assertTrue(problematicRegions.containsKey(regionName));
|
||||
pair = problematicRegions.get(regionName);
|
||||
hbckChecker.choreForTesting();
|
||||
inconsistentRegions = hbckChecker.getInconsistentRegions();
|
||||
assertTrue(inconsistentRegions.containsKey(regionName));
|
||||
pair = inconsistentRegions.get(regionName);
|
||||
locationInMeta = pair.getFirst();
|
||||
reportedRegionServers = pair.getSecond();
|
||||
assertEquals(1, reportedRegionServers.size());
|
||||
|
@ -110,9 +125,10 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
|
|||
|
||||
// Test for case3: More than one regionservers reported opened this region.
|
||||
am.reportOnlineRegions(locationInMeta, Collections.singleton(hri.getRegionName()));
|
||||
problematicRegions = am.getProblematicRegions();
|
||||
assertTrue(problematicRegions.containsKey(regionName));
|
||||
pair = problematicRegions.get(regionName);
|
||||
hbckChecker.choreForTesting();
|
||||
inconsistentRegions = hbckChecker.getInconsistentRegions();
|
||||
assertTrue(inconsistentRegions.containsKey(regionName));
|
||||
pair = inconsistentRegions.get(regionName);
|
||||
locationInMeta = pair.getFirst();
|
||||
reportedRegionServers = pair.getSecond();
|
||||
assertEquals(2, reportedRegionServers.size());
|
||||
|
@ -121,7 +137,8 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
|
|||
|
||||
// Reported right region location. Then not in problematic regions.
|
||||
am.reportOnlineRegions(anotherServer, Collections.EMPTY_SET);
|
||||
problematicRegions = am.getProblematicRegions();
|
||||
assertFalse(problematicRegions.containsKey(regionName));
|
||||
hbckChecker.choreForTesting();
|
||||
inconsistentRegions = hbckChecker.getInconsistentRegions();
|
||||
assertFalse(inconsistentRegions.containsKey(regionName));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue