HBASE-22709 Add a chore thread in master to do hbck checking (#404)

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
Guanghao Zhang 2019-07-27 18:23:37 +08:00 committed by GitHub
parent 1cb37f18aa
commit cf8114a82e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 497 additions and 144 deletions

View File

@ -42,84 +42,8 @@ int limit = 100;
<%java>
SortedSet<RegionState> rit = assignmentManager.getRegionStates()
.getRegionsInTransitionOrderedByTimestamp();
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = assignmentManager
.getProblematicRegions();
</%java>
<%if !problematicRegions.isEmpty() %>
<%java>
int totalSize = problematicRegions.size();
int sizePerPage = Math.min(10, totalSize);
int numOfPages = (int) Math.ceil(totalSize * 1.0 / sizePerPage);
</%java>
<section>
<h2><a name="problem-regions">Problematic Regions</a></h2>
<p>
<span>
<% problematicRegions.size() %> problematic region(s). There are three case: 1. Master
thought this region opened, but no regionserver reported it. 2. Master thought this
region opened on Server1, but regionserver reported Server2. 3. More than one
regionservers reported opened this region. Notice: the reported online regionservers
may be not right when there are regions in transition. Please check them in
regionserver's web UI.
</span>
</p>
<div class="tabbable">
<div class="tab-content">
<%java int recordItr = 0; %>
<%for Map.Entry<String, Pair<ServerName, Set<ServerName>>> entry : problematicRegions.entrySet() %>
<%if (recordItr % sizePerPage) == 0 %>
<%if recordItr == 0 %>
<div class="tab-pane active" id="tab_prs<% (recordItr / sizePerPage) + 1 %>">
<%else>
<div class="tab-pane" id="tab_prs<% (recordItr / sizePerPage) + 1 %>">
</%if>
<table class="table table-striped" style="margin-bottom:0px;">
<tr>
<th>Region</th>
<th>Location in META</th>
<th>Reported Online Region Servers</th>
</tr>
</%if>
<tr>
<td><% entry.getKey() %></td>
<td><% entry.getValue().getFirst() %></td>
<td><% entry.getValue().getSecond().stream().map(ServerName::getServerName)
.collect(Collectors.joining(", ")) %></td>
</tr>
<%java recordItr++; %>
<%if (recordItr % sizePerPage) == 0 %>
</table>
</div>
</%if>
</%for>
<%if (recordItr % sizePerPage) != 0 %>
<%for ; (recordItr % sizePerPage) != 0 ; recordItr++ %>
<tr><td colspan="3" style="height:61px"></td></tr>
</%for>
</table>
</div>
</%if>
</div>
<nav>
<ul class="nav nav-pills pagination">
<%for int i = 1 ; i <= numOfPages; i++ %>
<%if i == 1 %>
<li class="active">
<%else>
<li>
</%if>
<a href="#tab_prs<% i %>"><% i %></a></li>
</%for>
</ul>
</nav>
</div>
</section>
</%if>
<%if !rit.isEmpty() %>
<%java>
long currentTime = System.currentTimeMillis();

View File

@ -149,7 +149,8 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
<li class="active"><a href="/master-status">Home</a></li>
<li><a href="/tablesDetailed.jsp">Table Details</a></li>
<%if master.isActiveMaster() %>
<li><a href="/procedures.jsp">Procedures &amp; Locks</a></li>
<li><a href="/procedures.jsp">Procedures &amp; Locks</a></li>
<li><a href="/hbck.jsp">HBCK Report</a></li>
</%if>
<li><a href="/processMaster.jsp">Process Metrics</a></li>
<li><a href="/logs/">Local Logs</a></li>

View File

@ -385,6 +385,7 @@ public class HMaster extends HRegionServer implements MasterServices {
private ClusterStatusPublisher clusterStatusPublisherChore = null;
private SnapshotCleanerChore snapshotCleanerChore = null;
private HbckChecker hbckChecker;
CatalogJanitor catalogJanitorChore;
private LogCleaner logCleaner;
private HFileCleaner hfileCleaner;
@ -1108,6 +1109,8 @@ public class HMaster extends HRegionServer implements MasterServices {
getChoreService().scheduleChore(normalizerChore);
this.catalogJanitorChore = new CatalogJanitor(this);
getChoreService().scheduleChore(catalogJanitorChore);
this.hbckChecker = new HbckChecker(this);
getChoreService().scheduleChore(hbckChecker);
this.serverManager.startChore();
// Only for rolling upgrade, where we need to migrate the data in namespace table to meta table.
@ -1587,6 +1590,7 @@ public class HMaster extends HRegionServer implements MasterServices {
choreService.cancelChore(this.hfileCleaner);
choreService.cancelChore(this.replicationBarrierCleaner);
choreService.cancelChore(this.snapshotCleanerChore);
choreService.cancelChore(this.hbckChecker);
}
}
@ -3756,4 +3760,8 @@ public class HMaster extends HRegionServer implements MasterServices {
}
return super.getWalGroupsReplicationStatus();
}
public HbckChecker getHbckChecker() {
return this.hbckChecker;
}
}

View File

@ -0,0 +1,282 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HbckRegionInfo;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
/**
* Used to do the hbck checking job at master side.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class HbckChecker extends ScheduledChore {
private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName());
private static final String HBCK_CHECKER_INTERVAL = "hbase.master.hbck.checker.interval";
private static final int DEFAULT_HBCK_CHECKER_INTERVAL = 60 * 60 * 1000;
private final MasterServices master;
/**
* This map contains the state of all hbck items. It maps from encoded region
* name to HbckRegionInfo structure. The information contained in HbckRegionInfo is used
* to detect and correct consistency (hdfs/meta/deployment) problems.
*/
private final Map<String, HbckRegionInfo> regionInfoMap = new HashMap<>();
/**
* The regions only opened on RegionServers, but no region info in meta.
*/
private final Map<String, ServerName> orphanRegionsOnRS = new HashMap<>();
/**
* The regions have directory on FileSystem, but no region info in meta.
*/
private final List<String> orphanRegionsOnFS = new LinkedList<>();
/**
* The inconsistent regions. There are three case:
* case 1. Master thought this region opened, but no regionserver reported it.
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
* case 3. More than one regionservers reported opened this region
*/
private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
new HashMap<>();
/**
* The "snapshot" is used to save the last round's HBCK checking report.
*/
private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>();
private final List<String> orphanRegionsOnFSSnapshot = new LinkedList<>();
private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot =
new HashMap<>();
/**
* The "snapshot" may be changed after checking. And this checking report "snapshot" may be
* accessed by web ui. Use this rwLock to synchronize.
*/
ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
/**
* When running, the "snapshot" may be changed when this round's checking finish.
*/
private volatile boolean running = false;
private volatile long checkingStartTimestamp = 0;
private volatile long checkingEndTimestamp = 0;
public HbckChecker(MasterServices master) {
super("HbckChecker-", master,
master.getConfiguration().getInt(HBCK_CHECKER_INTERVAL, DEFAULT_HBCK_CHECKER_INTERVAL));
this.master = master;
}
@Override
protected void chore() {
running = true;
regionInfoMap.clear();
orphanRegionsOnRS.clear();
orphanRegionsOnFS.clear();
inconsistentRegions.clear();
checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
loadRegionsFromInMemoryState();
loadRegionsFromRSReport();
try {
loadRegionsFromFS();
} catch (IOException e) {
LOG.warn("Failed to load the regions from filesystem", e);
}
saveCheckResultToSnapshot();
running = false;
}
private void saveCheckResultToSnapshot() {
// Need synchronized here, as this "snapshot" may be access by web ui.
rwLock.writeLock().lock();
try {
orphanRegionsOnRSSnapshot.clear();
orphanRegionsOnRS.entrySet()
.forEach(e -> orphanRegionsOnRSSnapshot.put(e.getKey(), e.getValue()));
orphanRegionsOnFSSnapshot.clear();
orphanRegionsOnFSSnapshot.addAll(orphanRegionsOnFS);
inconsistentRegionsSnapshot.clear();
inconsistentRegions.entrySet()
.forEach(e -> inconsistentRegionsSnapshot.put(e.getKey(), e.getValue()));
checkingEndTimestamp = EnvironmentEdgeManager.currentTime();
} finally {
rwLock.writeLock().unlock();
}
}
private void loadRegionsFromInMemoryState() {
List<RegionState> regionStates =
master.getAssignmentManager().getRegionStates().getRegionStates();
for (RegionState regionState : regionStates) {
RegionInfo regionInfo = regionState.getRegion();
HbckRegionInfo.MetaEntry metaEntry =
new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(),
regionState.getStamp());
regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry));
}
}
private void loadRegionsFromRSReport() {
Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports();
for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
ServerName serverName = entry.getKey();
for (byte[] regionName : entry.getValue()) {
String encodedRegionName = RegionInfo.encodeRegionName(regionName);
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
if (hri == null) {
orphanRegionsOnRS.put(encodedRegionName, serverName);
continue;
}
hri.addServer(hri.getMetaEntry(), serverName);
}
}
for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) {
String encodedRegionName = entry.getKey();
HbckRegionInfo hri = entry.getValue();
ServerName locationInMeta = hri.getMetaEntry().getRegionServer();
if (hri.getDeployedOn().size() == 0) {
// Master thought this region opened, but no regionserver reported it.
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, new LinkedList<>()));
} else if (hri.getDeployedOn().size() > 1) {
// More than one regionserver reported opened this region
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
} else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) {
// Master thought this region opened on Server1, but regionserver reported Server2
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
}
}
}
private void loadRegionsFromFS() throws IOException {
Path rootDir = master.getMasterFileSystem().getRootDir();
FileSystem fs = master.getMasterFileSystem().getFileSystem();
// list all tables from HDFS
List<FileStatus> tableDirs = Lists.newArrayList();
List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
for (Path path : paths) {
tableDirs.add(fs.getFileStatus(path));
}
for (FileStatus tableDir : tableDirs) {
FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
for (FileStatus regionDir : regionDirs) {
String encodedRegionName = regionDir.getPath().getName();
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
if (hri == null) {
orphanRegionsOnFS.add(encodedRegionName);
continue;
}
HbckRegionInfo.HdfsEntry hdfsEntry =
new HbckRegionInfo.HdfsEntry(regionDir.getPath(), regionDir.getModificationTime());
hri.setHdfsEntry(hdfsEntry);
}
}
}
/**
* When running, the HBCK report may be changed later.
*/
public boolean isRunning() {
return running;
}
/**
* @return the regions only opened on RegionServers, but no region info in meta.
*/
public Map<String, ServerName> getOrphanRegionsOnRS() {
// Need synchronized here, as this "snapshot" may be changed after checking.
rwLock.readLock().lock();
try {
return this.orphanRegionsOnRSSnapshot;
} finally {
rwLock.readLock().unlock();
}
}
/**
* @return the regions have directory on FileSystem, but no region info in meta.
*/
public List<String> getOrphanRegionsOnFS() {
// Need synchronized here, as this "snapshot" may be changed after checking.
rwLock.readLock().lock();
try {
return this.orphanRegionsOnFSSnapshot;
} finally {
rwLock.readLock().unlock();
}
}
/**
* Found the inconsistent regions. There are three case:
* case 1. Master thought this region opened, but no regionserver reported it.
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
* case 3. More than one regionservers reported opened this region
*
* @return the map of inconsistent regions. Key is the region name. Value is a pair of location in
* meta and the regionservers which reported opened this region.
*/
public Map<String, Pair<ServerName, List<ServerName>>> getInconsistentRegions() {
// Need synchronized here, as this "snapshot" may be changed after checking.
rwLock.readLock().lock();
try {
return this.inconsistentRegionsSnapshot;
} finally {
rwLock.readLock().unlock();
}
}
/**
* Used for web ui to show when the HBCK checking started.
*/
public long getCheckingStartTimestamp() {
return this.checkingStartTimestamp;
}
/**
* Used for web ui to show when the HBCK checking report generated.
*/
public long getCheckingEndTimestamp() {
return this.checkingStartTimestamp;
}
}

View File

@ -1467,6 +1467,12 @@ public class AssignmentManager {
LOG.info("Skip to add SCP for {} since this server should be OFFLINE already", serverName);
return -1;
}
// Remove the in-memory rsReports result
synchronized (rsReports) {
rsReports.remove(serverName);
}
// we hold the write lock here for fencing on reportRegionStateTransition. Once we set the
// server state to CRASHED, we will no longer accept the reportRegionStateTransition call from
// this server. This is used to simplify the implementation for TRSP and SCP, where we can make
@ -2037,51 +2043,13 @@ public class AssignmentManager {
}
/**
* Found the potentially problematic opened regions. There are three case:
* case 1. Master thought this region opened, but no regionserver reported it.
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
* case 3. More than one regionservers reported opened this region
*
* @return the map of potentially problematic opened regions. Key is the region name. Value is
* a pair of location in meta and the regionservers which reported opened this region.
* @return a snapshot of rsReports
*/
public Map<String, Pair<ServerName, Set<ServerName>>> getProblematicRegions() {
Map<String, Set<ServerName>> reportedOnlineRegions = new HashMap<>();
public Map<ServerName, Set<byte[]>> getRSReports() {
Map<ServerName, Set<byte[]>> rsReportsSnapshot = new HashMap<>();
synchronized (rsReports) {
for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
for (byte[] regionName : entry.getValue()) {
reportedOnlineRegions
.computeIfAbsent(RegionInfo.getRegionNameAsString(regionName), r -> new HashSet<>())
.add(entry.getKey());
}
}
rsReports.entrySet().forEach(e -> rsReportsSnapshot.put(e.getKey(), e.getValue()));
}
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = new HashMap<>();
List<RegionState> rits = regionStates.getRegionsStateInTransition();
for (RegionState regionState : regionStates.getRegionStates()) {
// Only consider the opened region and not in transition
if (!rits.contains(regionState) && regionState.isOpened()) {
String regionName = regionState.getRegion().getRegionNameAsString();
ServerName serverName = regionState.getServerName();
if (reportedOnlineRegions.containsKey(regionName)) {
Set<ServerName> reportedServers = reportedOnlineRegions.get(regionName);
if (reportedServers.contains(serverName)) {
if (reportedServers.size() > 1) {
// More than one regionserver reported opened this region
problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
}
} else {
// Master thought this region opened on Server1, but regionserver reported Server2
problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
}
} else {
// Master thought this region opened, but no regionserver reported it.
problematicRegions.put(regionName, new Pair<>(serverName, new HashSet<>()));
}
}
}
return problematicRegions;
return rsReportsSnapshot;
}
}

View File

@ -0,0 +1,153 @@
<%--
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
--%>
<%@ page contentType="text/html;charset=UTF-8"
import="java.util.Date"
import="java.util.List"
import="java.util.Map"
import="java.util.stream.Collectors"
%>
<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %>
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
<%
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
HbckChecker hbckChecker = master.getHbckChecker();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
Map<String, ServerName> orphanRegionsOnRS = null;
List<String> orphanRegionsOnFS = null;
long startTimestamp = 0;
long endTimestamp = 0;
if (hbckChecker != null) {
inconsistentRegions = hbckChecker.getInconsistentRegions();
orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS();
orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS();
startTimestamp = hbckChecker.getCheckingStartTimestamp();
endTimestamp = hbckChecker.getCheckingEndTimestamp();
}
%>
<jsp:include page="header.jsp">
<jsp:param name="pageTitle" value="${pageTitle}"/>
</jsp:include>
<div class="container-fluid content">
<% if (!master.isInitialized()) { %>
<div class="row">
<div class="page-header">
<h1>Master is not initialized</h1>
</div>
</div>
<jsp:include page="redirect.jsp" />
<% } else { %>
<div class="row">
<div class="page-header">
<h1>HBCK Report</h1>
<p>
<span>Checking started at <%= new Date(startTimestamp) %> and generated report at <%= new Date(endTimestamp) %></span>
</p>
</div>
</div>
<div class="row">
<div class="page-header">
<h2>Inconsistent Regions</h2>
<p>
<span>
There are three case: 1. Master thought this region opened, but no regionserver reported it.
2. Master thought this region opened on Server1, but regionserver reported Server2.
3. More than one regionservers reported opened this region.
Notice: the reported online regionservers may be not right when there are regions in transition.
Please check them in regionserver's web UI.
</span>
</p>
</div>
</div>
<% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
<table class="table table-striped">
<tr>
<th>Region</th>
<th>Location in META</th>
<th>Reported Online RegionServers</th>
</tr>
<% for (Map.Entry<String, Pair<ServerName, List<ServerName>>> entry : inconsistentRegions.entrySet()) { %>
<tr>
<td><%= entry.getKey() %></td>
<td><%= entry.getValue().getFirst() %></td>
<td><%= entry.getValue().getSecond().stream().map(ServerName::getServerName)
.collect(Collectors.joining(", ")) %></td>
</tr>
<% } %>
<p><%= inconsistentRegions.size() %> region(s) in set.</p>
</table>
<% } %>
<div class="row">
<div class="page-header">
<h2>Orphan Regions on RegionServer</h2>
</div>
</div>
<% if (orphanRegionsOnRS != null && orphanRegionsOnRS.size() > 0) { %>
<table class="table table-striped">
<tr>
<th>Region</th>
<th>Reported Online RegionServer</th>
</tr>
<% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet()) { %>
<tr>
<td><%= entry.getKey() %></td>
<td><%= entry.getValue() %></td>
</tr>
<% } %>
<p><%= orphanRegionsOnRS.size() %> region(s) in set.</p>
</table>
<% } %>
<div class="row">
<div class="page-header">
<h2>Orphan Regions on FileSystem</h2>
</div>
</div>
<% if (orphanRegionsOnFS != null && orphanRegionsOnFS.size() > 0) { %>
<table class="table table-striped">
<tr>
<th>Region</th>
</tr>
<% for (String region : orphanRegionsOnFS) { %>
<tr>
<td><%= region %></td>
</tr>
<% } %>
<p><%= orphanRegionsOnFS.size() %> region(s) in set.</p>
</table>
<% } %>
<% } %>
</div>
<jsp:include page="footer.jsp"/>

View File

@ -24,7 +24,6 @@ import static org.junit.Assert.assertTrue;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Future;
import org.apache.hadoop.hbase.HBaseClassTestRule;
@ -32,9 +31,11 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.master.HbckChecker;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Pair;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@ -42,41 +43,52 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@Category({ MasterTests.class, MediumTests.class })
public class TestAMProblematicRegions extends TestAssignmentManagerBase {
private static final Logger LOG = LoggerFactory.getLogger(TestAMProblematicRegions.class);
public class TestHbckChecker extends TestAssignmentManagerBase {
private static final Logger LOG = LoggerFactory.getLogger(TestHbckChecker.class);
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestAMProblematicRegions.class);
HBaseClassTestRule.forClass(TestHbckChecker.class);
private HbckChecker hbckChecker;
@Before
public void setUp() throws Exception {
super.setUp();
hbckChecker = new HbckChecker(master);
}
@Test
public void testForMeta() {
byte[] metaRegionNameAsBytes = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName();
String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString();
String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName();
List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
assertEquals(NSERVERS, serverNames.size());
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = am.getProblematicRegions();
hbckChecker.choreForTesting();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
hbckChecker.getInconsistentRegions();
// Test for case1: Master thought this region opened, but no regionserver reported it.
assertTrue(problematicRegions.containsKey(metaRegionName));
Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(metaRegionName);
assertTrue(inconsistentRegions.containsKey(metaRegionName));
Pair<ServerName, List<ServerName>> pair = inconsistentRegions.get(metaRegionName);
ServerName locationInMeta = pair.getFirst();
Set<ServerName> reportedRegionServers = pair.getSecond();
List<ServerName> reportedRegionServers = pair.getSecond();
assertTrue(serverNames.contains(locationInMeta));
assertEquals(0, reportedRegionServers.size());
// Reported right region location. Then not in problematic regions.
am.reportOnlineRegions(locationInMeta, Collections.singleton(metaRegionNameAsBytes));
problematicRegions = am.getProblematicRegions();
assertFalse(problematicRegions.containsKey(metaRegionName));
hbckChecker.choreForTesting();
inconsistentRegions = hbckChecker.getInconsistentRegions();
assertFalse(inconsistentRegions.containsKey(metaRegionName));
}
@Test
public void testForUserTable() throws Exception {
TableName tableName = TableName.valueOf("testForUserTable");
RegionInfo hri = createRegionInfo(tableName, 1);
String regionName = hri.getRegionNameAsString();
String regionName = hri.getEncodedName();
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
Future<byte[]> future = submitProcedure(createAssignProcedure(hri));
waitOnFuture(future);
@ -85,11 +97,13 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
assertEquals(NSERVERS, serverNames.size());
// Test for case1: Master thought this region opened, but no regionserver reported it.
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = am.getProblematicRegions();
assertTrue(problematicRegions.containsKey(regionName));
Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(regionName);
hbckChecker.choreForTesting();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
hbckChecker.getInconsistentRegions();
assertTrue(inconsistentRegions.containsKey(regionName));
Pair<ServerName, List<ServerName>> pair = inconsistentRegions.get(regionName);
ServerName locationInMeta = pair.getFirst();
Set<ServerName> reportedRegionServers = pair.getSecond();
List<ServerName> reportedRegionServers = pair.getSecond();
assertTrue(serverNames.contains(locationInMeta));
assertEquals(0, reportedRegionServers.size());
@ -99,9 +113,10 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
final ServerName anotherServer =
serverNames.stream().filter(s -> !s.equals(tempLocationInMeta)).findFirst().get();
am.reportOnlineRegions(anotherServer, Collections.singleton(hri.getRegionName()));
problematicRegions = am.getProblematicRegions();
assertTrue(problematicRegions.containsKey(regionName));
pair = problematicRegions.get(regionName);
hbckChecker.choreForTesting();
inconsistentRegions = hbckChecker.getInconsistentRegions();
assertTrue(inconsistentRegions.containsKey(regionName));
pair = inconsistentRegions.get(regionName);
locationInMeta = pair.getFirst();
reportedRegionServers = pair.getSecond();
assertEquals(1, reportedRegionServers.size());
@ -110,9 +125,10 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
// Test for case3: More than one regionservers reported opened this region.
am.reportOnlineRegions(locationInMeta, Collections.singleton(hri.getRegionName()));
problematicRegions = am.getProblematicRegions();
assertTrue(problematicRegions.containsKey(regionName));
pair = problematicRegions.get(regionName);
hbckChecker.choreForTesting();
inconsistentRegions = hbckChecker.getInconsistentRegions();
assertTrue(inconsistentRegions.containsKey(regionName));
pair = inconsistentRegions.get(regionName);
locationInMeta = pair.getFirst();
reportedRegionServers = pair.getSecond();
assertEquals(2, reportedRegionServers.size());
@ -121,7 +137,8 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
// Reported right region location. Then not in problematic regions.
am.reportOnlineRegions(anotherServer, Collections.EMPTY_SET);
problematicRegions = am.getProblematicRegions();
assertFalse(problematicRegions.containsKey(regionName));
hbckChecker.choreForTesting();
inconsistentRegions = hbckChecker.getInconsistentRegions();
assertFalse(inconsistentRegions.containsKey(regionName));
}
}