HBASE-22709 Add a chore thread in master to do hbck checking (#404)

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
Guanghao Zhang 2019-07-27 18:23:37 +08:00
parent f8b592e9f8
commit e54c4018e7
7 changed files with 497 additions and 144 deletions

View File

@ -42,84 +42,8 @@ int limit = 100;
<%java> <%java>
SortedSet<RegionState> rit = assignmentManager.getRegionStates() SortedSet<RegionState> rit = assignmentManager.getRegionStates()
.getRegionsInTransitionOrderedByTimestamp(); .getRegionsInTransitionOrderedByTimestamp();
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = assignmentManager
.getProblematicRegions();
</%java> </%java>
<%if !problematicRegions.isEmpty() %>
<%java>
int totalSize = problematicRegions.size();
int sizePerPage = Math.min(10, totalSize);
int numOfPages = (int) Math.ceil(totalSize * 1.0 / sizePerPage);
</%java>
<section>
<h2><a name="problem-regions">Problematic Regions</a></h2>
<p>
<span>
<% problematicRegions.size() %> problematic region(s). There are three case: 1. Master
thought this region opened, but no regionserver reported it. 2. Master thought this
region opened on Server1, but regionserver reported Server2. 3. More than one
regionservers reported opened this region. Notice: the reported online regionservers
may be not right when there are regions in transition. Please check them in
regionserver's web UI.
</span>
</p>
<div class="tabbable">
<div class="tab-content">
<%java int recordItr = 0; %>
<%for Map.Entry<String, Pair<ServerName, Set<ServerName>>> entry : problematicRegions.entrySet() %>
<%if (recordItr % sizePerPage) == 0 %>
<%if recordItr == 0 %>
<div class="tab-pane active" id="tab_prs<% (recordItr / sizePerPage) + 1 %>">
<%else>
<div class="tab-pane" id="tab_prs<% (recordItr / sizePerPage) + 1 %>">
</%if>
<table class="table table-striped" style="margin-bottom:0px;">
<tr>
<th>Region</th>
<th>Location in META</th>
<th>Reported Online Region Servers</th>
</tr>
</%if>
<tr>
<td><% entry.getKey() %></td>
<td><% entry.getValue().getFirst() %></td>
<td><% entry.getValue().getSecond().stream().map(ServerName::getServerName)
.collect(Collectors.joining(", ")) %></td>
</tr>
<%java recordItr++; %>
<%if (recordItr % sizePerPage) == 0 %>
</table>
</div>
</%if>
</%for>
<%if (recordItr % sizePerPage) != 0 %>
<%for ; (recordItr % sizePerPage) != 0 ; recordItr++ %>
<tr><td colspan="3" style="height:61px"></td></tr>
</%for>
</table>
</div>
</%if>
</div>
<nav>
<ul class="nav nav-pills pagination">
<%for int i = 1 ; i <= numOfPages; i++ %>
<%if i == 1 %>
<li class="active">
<%else>
<li>
</%if>
<a href="#tab_prs<% i %>"><% i %></a></li>
</%for>
</ul>
</nav>
</div>
</section>
</%if>
<%if !rit.isEmpty() %> <%if !rit.isEmpty() %>
<%java> <%java>
long currentTime = System.currentTimeMillis(); long currentTime = System.currentTimeMillis();

View File

@ -149,7 +149,8 @@ AssignmentManager assignmentManager = master.getAssignmentManager();
<li class="active"><a href="/master-status">Home</a></li> <li class="active"><a href="/master-status">Home</a></li>
<li><a href="/tablesDetailed.jsp">Table Details</a></li> <li><a href="/tablesDetailed.jsp">Table Details</a></li>
<%if master.isActiveMaster() %> <%if master.isActiveMaster() %>
<li><a href="/procedures.jsp">Procedures &amp; Locks</a></li> <li><a href="/procedures.jsp">Procedures &amp; Locks</a></li>
<li><a href="/hbck.jsp">HBCK Report</a></li>
</%if> </%if>
<li><a href="/processMaster.jsp">Process Metrics</a></li> <li><a href="/processMaster.jsp">Process Metrics</a></li>
<li><a href="/logs/">Local Logs</a></li> <li><a href="/logs/">Local Logs</a></li>

View File

@ -371,6 +371,7 @@ public class HMaster extends HRegionServer implements MasterServices {
private ClusterStatusChore clusterStatusChore; private ClusterStatusChore clusterStatusChore;
private ClusterStatusPublisher clusterStatusPublisherChore = null; private ClusterStatusPublisher clusterStatusPublisherChore = null;
private HbckChecker hbckChecker;
CatalogJanitor catalogJanitorChore; CatalogJanitor catalogJanitorChore;
private LogCleaner logCleaner; private LogCleaner logCleaner;
private HFileCleaner hfileCleaner; private HFileCleaner hfileCleaner;
@ -1030,6 +1031,8 @@ public class HMaster extends HRegionServer implements MasterServices {
getChoreService().scheduleChore(normalizerChore); getChoreService().scheduleChore(normalizerChore);
this.catalogJanitorChore = new CatalogJanitor(this); this.catalogJanitorChore = new CatalogJanitor(this);
getChoreService().scheduleChore(catalogJanitorChore); getChoreService().scheduleChore(catalogJanitorChore);
this.hbckChecker = new HbckChecker(this);
getChoreService().scheduleChore(hbckChecker);
// NAMESPACE READ!!!! // NAMESPACE READ!!!!
// Here we expect hbase:namespace to be online. See inside initClusterSchemaService. // Here we expect hbase:namespace to be online. See inside initClusterSchemaService.
@ -1495,6 +1498,7 @@ public class HMaster extends HRegionServer implements MasterServices {
choreService.cancelChore(this.logCleaner); choreService.cancelChore(this.logCleaner);
choreService.cancelChore(this.hfileCleaner); choreService.cancelChore(this.hfileCleaner);
choreService.cancelChore(this.replicationBarrierCleaner); choreService.cancelChore(this.replicationBarrierCleaner);
choreService.cancelChore(this.hbckChecker);
} }
} }
@ -3861,4 +3865,8 @@ public class HMaster extends HRegionServer implements MasterServices {
} }
return super.getWalGroupsReplicationStatus(); return super.getWalGroupsReplicationStatus();
} }
public HbckChecker getHbckChecker() {
return this.hbckChecker;
}
} }

View File

@ -0,0 +1,282 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HbckRegionInfo;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
/**
* Used to do the hbck checking job at master side.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class HbckChecker extends ScheduledChore {
private static final Logger LOG = LoggerFactory.getLogger(HbckChecker.class.getName());
private static final String HBCK_CHECKER_INTERVAL = "hbase.master.hbck.checker.interval";
private static final int DEFAULT_HBCK_CHECKER_INTERVAL = 60 * 60 * 1000;
private final MasterServices master;
/**
* This map contains the state of all hbck items. It maps from encoded region
* name to HbckRegionInfo structure. The information contained in HbckRegionInfo is used
* to detect and correct consistency (hdfs/meta/deployment) problems.
*/
private final Map<String, HbckRegionInfo> regionInfoMap = new HashMap<>();
/**
* The regions only opened on RegionServers, but no region info in meta.
*/
private final Map<String, ServerName> orphanRegionsOnRS = new HashMap<>();
/**
* The regions have directory on FileSystem, but no region info in meta.
*/
private final List<String> orphanRegionsOnFS = new LinkedList<>();
/**
* The inconsistent regions. There are three case:
* case 1. Master thought this region opened, but no regionserver reported it.
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
* case 3. More than one regionservers reported opened this region
*/
private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
new HashMap<>();
/**
* The "snapshot" is used to save the last round's HBCK checking report.
*/
private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>();
private final List<String> orphanRegionsOnFSSnapshot = new LinkedList<>();
private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot =
new HashMap<>();
/**
* The "snapshot" may be changed after checking. And this checking report "snapshot" may be
* accessed by web ui. Use this rwLock to synchronize.
*/
ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
/**
* When running, the "snapshot" may be changed when this round's checking finish.
*/
private volatile boolean running = false;
private volatile long checkingStartTimestamp = 0;
private volatile long checkingEndTimestamp = 0;
public HbckChecker(MasterServices master) {
super("HbckChecker-", master,
master.getConfiguration().getInt(HBCK_CHECKER_INTERVAL, DEFAULT_HBCK_CHECKER_INTERVAL));
this.master = master;
}
@Override
protected void chore() {
running = true;
regionInfoMap.clear();
orphanRegionsOnRS.clear();
orphanRegionsOnFS.clear();
inconsistentRegions.clear();
checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
loadRegionsFromInMemoryState();
loadRegionsFromRSReport();
try {
loadRegionsFromFS();
} catch (IOException e) {
LOG.warn("Failed to load the regions from filesystem", e);
}
saveCheckResultToSnapshot();
running = false;
}
private void saveCheckResultToSnapshot() {
// Need synchronized here, as this "snapshot" may be access by web ui.
rwLock.writeLock().lock();
try {
orphanRegionsOnRSSnapshot.clear();
orphanRegionsOnRS.entrySet()
.forEach(e -> orphanRegionsOnRSSnapshot.put(e.getKey(), e.getValue()));
orphanRegionsOnFSSnapshot.clear();
orphanRegionsOnFSSnapshot.addAll(orphanRegionsOnFS);
inconsistentRegionsSnapshot.clear();
inconsistentRegions.entrySet()
.forEach(e -> inconsistentRegionsSnapshot.put(e.getKey(), e.getValue()));
checkingEndTimestamp = EnvironmentEdgeManager.currentTime();
} finally {
rwLock.writeLock().unlock();
}
}
private void loadRegionsFromInMemoryState() {
List<RegionState> regionStates =
master.getAssignmentManager().getRegionStates().getRegionStates();
for (RegionState regionState : regionStates) {
RegionInfo regionInfo = regionState.getRegion();
HbckRegionInfo.MetaEntry metaEntry =
new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(),
regionState.getStamp());
regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry));
}
}
private void loadRegionsFromRSReport() {
Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports();
for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
ServerName serverName = entry.getKey();
for (byte[] regionName : entry.getValue()) {
String encodedRegionName = RegionInfo.encodeRegionName(regionName);
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
if (hri == null) {
orphanRegionsOnRS.put(encodedRegionName, serverName);
continue;
}
hri.addServer(hri.getMetaEntry(), serverName);
}
}
for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) {
String encodedRegionName = entry.getKey();
HbckRegionInfo hri = entry.getValue();
ServerName locationInMeta = hri.getMetaEntry().getRegionServer();
if (hri.getDeployedOn().size() == 0) {
// Master thought this region opened, but no regionserver reported it.
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, new LinkedList<>()));
} else if (hri.getDeployedOn().size() > 1) {
// More than one regionserver reported opened this region
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
} else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) {
// Master thought this region opened on Server1, but regionserver reported Server2
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
}
}
}
private void loadRegionsFromFS() throws IOException {
Path rootDir = master.getMasterFileSystem().getRootDir();
FileSystem fs = master.getMasterFileSystem().getFileSystem();
// list all tables from HDFS
List<FileStatus> tableDirs = Lists.newArrayList();
List<Path> paths = FSUtils.getTableDirs(fs, rootDir);
for (Path path : paths) {
tableDirs.add(fs.getFileStatus(path));
}
for (FileStatus tableDir : tableDirs) {
FileStatus[] regionDirs = fs.listStatus(tableDir.getPath());
for (FileStatus regionDir : regionDirs) {
String encodedRegionName = regionDir.getPath().getName();
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
if (hri == null) {
orphanRegionsOnFS.add(encodedRegionName);
continue;
}
HbckRegionInfo.HdfsEntry hdfsEntry =
new HbckRegionInfo.HdfsEntry(regionDir.getPath(), regionDir.getModificationTime());
hri.setHdfsEntry(hdfsEntry);
}
}
}
/**
* When running, the HBCK report may be changed later.
*/
public boolean isRunning() {
return running;
}
/**
* @return the regions only opened on RegionServers, but no region info in meta.
*/
public Map<String, ServerName> getOrphanRegionsOnRS() {
// Need synchronized here, as this "snapshot" may be changed after checking.
rwLock.readLock().lock();
try {
return this.orphanRegionsOnRSSnapshot;
} finally {
rwLock.readLock().unlock();
}
}
/**
* @return the regions have directory on FileSystem, but no region info in meta.
*/
public List<String> getOrphanRegionsOnFS() {
// Need synchronized here, as this "snapshot" may be changed after checking.
rwLock.readLock().lock();
try {
return this.orphanRegionsOnFSSnapshot;
} finally {
rwLock.readLock().unlock();
}
}
/**
* Found the inconsistent regions. There are three case:
* case 1. Master thought this region opened, but no regionserver reported it.
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
* case 3. More than one regionservers reported opened this region
*
* @return the map of inconsistent regions. Key is the region name. Value is a pair of location in
* meta and the regionservers which reported opened this region.
*/
public Map<String, Pair<ServerName, List<ServerName>>> getInconsistentRegions() {
// Need synchronized here, as this "snapshot" may be changed after checking.
rwLock.readLock().lock();
try {
return this.inconsistentRegionsSnapshot;
} finally {
rwLock.readLock().unlock();
}
}
/**
* Used for web ui to show when the HBCK checking started.
*/
public long getCheckingStartTimestamp() {
return this.checkingStartTimestamp;
}
/**
* Used for web ui to show when the HBCK checking report generated.
*/
public long getCheckingEndTimestamp() {
return this.checkingStartTimestamp;
}
}

View File

@ -1339,6 +1339,12 @@ public class AssignmentManager implements ServerListener {
public long submitServerCrash(final ServerName serverName, final boolean shouldSplitWal) { public long submitServerCrash(final ServerName serverName, final boolean shouldSplitWal) {
boolean carryingMeta = isCarryingMeta(serverName); boolean carryingMeta = isCarryingMeta(serverName);
// Remove the in-memory rsReports result
synchronized (rsReports) {
rsReports.remove(serverName);
}
ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor(); ProcedureExecutor<MasterProcedureEnv> procExec = this.master.getMasterProcedureExecutor();
long pid = procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(), long pid = procExec.submitProcedure(new ServerCrashProcedure(procExec.getEnvironment(),
serverName, shouldSplitWal, carryingMeta)); serverName, shouldSplitWal, carryingMeta));
@ -1892,51 +1898,13 @@ public class AssignmentManager implements ServerListener {
} }
/** /**
* Found the potentially problematic opened regions. There are three case: * @return a snapshot of rsReports
* case 1. Master thought this region opened, but no regionserver reported it.
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
* case 3. More than one regionservers reported opened this region
*
* @return the map of potentially problematic opened regions. Key is the region name. Value is
* a pair of location in meta and the regionservers which reported opened this region.
*/ */
public Map<String, Pair<ServerName, Set<ServerName>>> getProblematicRegions() { public Map<ServerName, Set<byte[]>> getRSReports() {
Map<String, Set<ServerName>> reportedOnlineRegions = new HashMap<>(); Map<ServerName, Set<byte[]>> rsReportsSnapshot = new HashMap<>();
synchronized (rsReports) { synchronized (rsReports) {
for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) { rsReports.entrySet().forEach(e -> rsReportsSnapshot.put(e.getKey(), e.getValue()));
for (byte[] regionName : entry.getValue()) {
reportedOnlineRegions
.computeIfAbsent(RegionInfo.getRegionNameAsString(regionName), r -> new HashSet<>())
.add(entry.getKey());
}
}
} }
return rsReportsSnapshot;
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = new HashMap<>();
List<RegionState> rits = regionStates.getRegionsStateInTransition();
for (RegionState regionState : regionStates.getRegionStates()) {
// Only consider the opened region and not in transition
if (!rits.contains(regionState) && regionState.isOpened()) {
String regionName = regionState.getRegion().getRegionNameAsString();
ServerName serverName = regionState.getServerName();
if (reportedOnlineRegions.containsKey(regionName)) {
Set<ServerName> reportedServers = reportedOnlineRegions.get(regionName);
if (reportedServers.contains(serverName)) {
if (reportedServers.size() > 1) {
// More than one regionserver reported opened this region
problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
}
} else {
// Master thought this region opened on Server1, but regionserver reported Server2
problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
}
} else {
// Master thought this region opened, but no regionserver reported it.
problematicRegions.put(regionName, new Pair<>(serverName, new HashSet<>()));
}
}
}
return problematicRegions;
} }
} }

View File

@ -0,0 +1,153 @@
<%--
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
--%>
<%@ page contentType="text/html;charset=UTF-8"
import="java.util.Date"
import="java.util.List"
import="java.util.Map"
import="java.util.stream.Collectors"
%>
<%@ page import="org.apache.hadoop.hbase.master.HbckChecker" %>
<%@ page import="org.apache.hadoop.hbase.master.HMaster" %>
<%@ page import="org.apache.hadoop.hbase.ServerName" %>
<%@ page import="org.apache.hadoop.hbase.util.Pair" %>
<%
HMaster master = (HMaster) getServletContext().getAttribute(HMaster.MASTER);
pageContext.setAttribute("pageTitle", "HBase Master HBCK Report: " + master.getServerName());
HbckChecker hbckChecker = master.getHbckChecker();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = null;
Map<String, ServerName> orphanRegionsOnRS = null;
List<String> orphanRegionsOnFS = null;
long startTimestamp = 0;
long endTimestamp = 0;
if (hbckChecker != null) {
inconsistentRegions = hbckChecker.getInconsistentRegions();
orphanRegionsOnRS = hbckChecker.getOrphanRegionsOnRS();
orphanRegionsOnFS = hbckChecker.getOrphanRegionsOnFS();
startTimestamp = hbckChecker.getCheckingStartTimestamp();
endTimestamp = hbckChecker.getCheckingEndTimestamp();
}
%>
<jsp:include page="header.jsp">
<jsp:param name="pageTitle" value="${pageTitle}"/>
</jsp:include>
<div class="container-fluid content">
<% if (!master.isInitialized()) { %>
<div class="row">
<div class="page-header">
<h1>Master is not initialized</h1>
</div>
</div>
<jsp:include page="redirect.jsp" />
<% } else { %>
<div class="row">
<div class="page-header">
<h1>HBCK Report</h1>
<p>
<span>Checking started at <%= new Date(startTimestamp) %> and generated report at <%= new Date(endTimestamp) %></span>
</p>
</div>
</div>
<div class="row">
<div class="page-header">
<h2>Inconsistent Regions</h2>
<p>
<span>
There are three case: 1. Master thought this region opened, but no regionserver reported it.
2. Master thought this region opened on Server1, but regionserver reported Server2.
3. More than one regionservers reported opened this region.
Notice: the reported online regionservers may be not right when there are regions in transition.
Please check them in regionserver's web UI.
</span>
</p>
</div>
</div>
<% if (inconsistentRegions != null && inconsistentRegions.size() > 0) { %>
<table class="table table-striped">
<tr>
<th>Region</th>
<th>Location in META</th>
<th>Reported Online RegionServers</th>
</tr>
<% for (Map.Entry<String, Pair<ServerName, List<ServerName>>> entry : inconsistentRegions.entrySet()) { %>
<tr>
<td><%= entry.getKey() %></td>
<td><%= entry.getValue().getFirst() %></td>
<td><%= entry.getValue().getSecond().stream().map(ServerName::getServerName)
.collect(Collectors.joining(", ")) %></td>
</tr>
<% } %>
<p><%= inconsistentRegions.size() %> region(s) in set.</p>
</table>
<% } %>
<div class="row">
<div class="page-header">
<h2>Orphan Regions on RegionServer</h2>
</div>
</div>
<% if (orphanRegionsOnRS != null && orphanRegionsOnRS.size() > 0) { %>
<table class="table table-striped">
<tr>
<th>Region</th>
<th>Reported Online RegionServer</th>
</tr>
<% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet()) { %>
<tr>
<td><%= entry.getKey() %></td>
<td><%= entry.getValue() %></td>
</tr>
<% } %>
<p><%= orphanRegionsOnRS.size() %> region(s) in set.</p>
</table>
<% } %>
<div class="row">
<div class="page-header">
<h2>Orphan Regions on FileSystem</h2>
</div>
</div>
<% if (orphanRegionsOnFS != null && orphanRegionsOnFS.size() > 0) { %>
<table class="table table-striped">
<tr>
<th>Region</th>
</tr>
<% for (String region : orphanRegionsOnFS) { %>
<tr>
<td><%= region %></td>
</tr>
<% } %>
<p><%= orphanRegionsOnFS.size() %> region(s) in set.</p>
</table>
<% } %>
<% } %>
</div>
<jsp:include page="footer.jsp"/>

View File

@ -24,7 +24,6 @@ import static org.junit.Assert.assertTrue;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseClassTestRule;
@ -32,9 +31,11 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder; import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.master.HbckChecker;
import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.hbase.util.Pair;
import org.junit.Before;
import org.junit.ClassRule; import org.junit.ClassRule;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
@ -42,41 +43,52 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@Category({ MasterTests.class, MediumTests.class }) @Category({ MasterTests.class, MediumTests.class })
public class TestAMProblematicRegions extends TestAssignmentManagerBase { public class TestHbckChecker extends TestAssignmentManagerBase {
private static final Logger LOG = LoggerFactory.getLogger(TestAMProblematicRegions.class); private static final Logger LOG = LoggerFactory.getLogger(TestHbckChecker.class);
@ClassRule @ClassRule
public static final HBaseClassTestRule CLASS_RULE = public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestAMProblematicRegions.class); HBaseClassTestRule.forClass(TestHbckChecker.class);
private HbckChecker hbckChecker;
@Before
public void setUp() throws Exception {
super.setUp();
hbckChecker = new HbckChecker(master);
}
@Test @Test
public void testForMeta() throws Exception { public void testForMeta() throws Exception {
byte[] metaRegionNameAsBytes = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName(); byte[] metaRegionNameAsBytes = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName();
String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString(); String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName();
List<ServerName> serverNames = master.getServerManager().getOnlineServersList(); List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
assertEquals(NSERVERS, serverNames.size()); assertEquals(NSERVERS, serverNames.size());
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = am.getProblematicRegions(); hbckChecker.choreForTesting();
Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
hbckChecker.getInconsistentRegions();
// Test for case1: Master thought this region opened, but no regionserver reported it. // Test for case1: Master thought this region opened, but no regionserver reported it.
assertTrue(problematicRegions.containsKey(metaRegionName)); assertTrue(inconsistentRegions.containsKey(metaRegionName));
Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(metaRegionName); Pair<ServerName, List<ServerName>> pair = inconsistentRegions.get(metaRegionName);
ServerName locationInMeta = pair.getFirst(); ServerName locationInMeta = pair.getFirst();
Set<ServerName> reportedRegionServers = pair.getSecond(); List<ServerName> reportedRegionServers = pair.getSecond();
assertTrue(serverNames.contains(locationInMeta)); assertTrue(serverNames.contains(locationInMeta));
assertEquals(0, reportedRegionServers.size()); assertEquals(0, reportedRegionServers.size());
// Reported right region location. Then not in problematic regions. // Reported right region location. Then not in problematic regions.
am.reportOnlineRegions(locationInMeta, Collections.singleton(metaRegionNameAsBytes)); am.reportOnlineRegions(locationInMeta, Collections.singleton(metaRegionNameAsBytes));
problematicRegions = am.getProblematicRegions(); hbckChecker.choreForTesting();
assertFalse(problematicRegions.containsKey(metaRegionName)); inconsistentRegions = hbckChecker.getInconsistentRegions();
assertFalse(inconsistentRegions.containsKey(metaRegionName));
} }
@Test @Test
public void testForUserTable() throws Exception { public void testForUserTable() throws Exception {
TableName tableName = TableName.valueOf("testForUserTable"); TableName tableName = TableName.valueOf("testForUserTable");
RegionInfo hri = createRegionInfo(tableName, 1); RegionInfo hri = createRegionInfo(tableName, 1);
String regionName = hri.getRegionNameAsString(); String regionName = hri.getEncodedName();
rsDispatcher.setMockRsExecutor(new GoodRsExecutor()); rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
Future<byte[]> future = submitProcedure(am.createAssignProcedure(hri)); Future<byte[]> future = submitProcedure(am.createAssignProcedure(hri));
waitOnFuture(future); waitOnFuture(future);
@ -85,11 +97,13 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
assertEquals(NSERVERS, serverNames.size()); assertEquals(NSERVERS, serverNames.size());
// Test for case1: Master thought this region opened, but no regionserver reported it. // Test for case1: Master thought this region opened, but no regionserver reported it.
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = am.getProblematicRegions(); hbckChecker.choreForTesting();
assertTrue(problematicRegions.containsKey(regionName)); Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(regionName); hbckChecker.getInconsistentRegions();
assertTrue(inconsistentRegions.containsKey(regionName));
Pair<ServerName, List<ServerName>> pair = inconsistentRegions.get(regionName);
ServerName locationInMeta = pair.getFirst(); ServerName locationInMeta = pair.getFirst();
Set<ServerName> reportedRegionServers = pair.getSecond(); List<ServerName> reportedRegionServers = pair.getSecond();
assertTrue(serverNames.contains(locationInMeta)); assertTrue(serverNames.contains(locationInMeta));
assertEquals(0, reportedRegionServers.size()); assertEquals(0, reportedRegionServers.size());
@ -99,9 +113,10 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
final ServerName anotherServer = final ServerName anotherServer =
serverNames.stream().filter(s -> !s.equals(tempLocationInMeta)).findFirst().get(); serverNames.stream().filter(s -> !s.equals(tempLocationInMeta)).findFirst().get();
am.reportOnlineRegions(anotherServer, Collections.singleton(hri.getRegionName())); am.reportOnlineRegions(anotherServer, Collections.singleton(hri.getRegionName()));
problematicRegions = am.getProblematicRegions(); hbckChecker.choreForTesting();
assertTrue(problematicRegions.containsKey(regionName)); inconsistentRegions = hbckChecker.getInconsistentRegions();
pair = problematicRegions.get(regionName); assertTrue(inconsistentRegions.containsKey(regionName));
pair = inconsistentRegions.get(regionName);
locationInMeta = pair.getFirst(); locationInMeta = pair.getFirst();
reportedRegionServers = pair.getSecond(); reportedRegionServers = pair.getSecond();
assertEquals(1, reportedRegionServers.size()); assertEquals(1, reportedRegionServers.size());
@ -110,9 +125,10 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
// Test for case3: More than one regionservers reported opened this region. // Test for case3: More than one regionservers reported opened this region.
am.reportOnlineRegions(locationInMeta, Collections.singleton(hri.getRegionName())); am.reportOnlineRegions(locationInMeta, Collections.singleton(hri.getRegionName()));
problematicRegions = am.getProblematicRegions(); hbckChecker.choreForTesting();
assertTrue(problematicRegions.containsKey(regionName)); inconsistentRegions = hbckChecker.getInconsistentRegions();
pair = problematicRegions.get(regionName); assertTrue(inconsistentRegions.containsKey(regionName));
pair = inconsistentRegions.get(regionName);
locationInMeta = pair.getFirst(); locationInMeta = pair.getFirst();
reportedRegionServers = pair.getSecond(); reportedRegionServers = pair.getSecond();
assertEquals(2, reportedRegionServers.size()); assertEquals(2, reportedRegionServers.size());
@ -121,7 +137,8 @@ public class TestAMProblematicRegions extends TestAssignmentManagerBase {
// Reported right region location. Then not in problematic regions. // Reported right region location. Then not in problematic regions.
am.reportOnlineRegions(anotherServer, Collections.EMPTY_SET); am.reportOnlineRegions(anotherServer, Collections.EMPTY_SET);
problematicRegions = am.getProblematicRegions(); hbckChecker.choreForTesting();
assertFalse(problematicRegions.containsKey(regionName)); inconsistentRegions = hbckChecker.getInconsistentRegions();
assertFalse(inconsistentRegions.containsKey(regionName));
} }
} }