HBASE-22527 [hbck2] Add a master web ui to show the problematic regions
This commit is contained in:
parent
2b7e33fe52
commit
f0cbdf8beb
|
@ -17,27 +17,105 @@ See the License for the specific language governing permissions and
|
||||||
limitations under the License.
|
limitations under the License.
|
||||||
</%doc>
|
</%doc>
|
||||||
<%import>
|
<%import>
|
||||||
org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
java.util.Map;
|
||||||
org.apache.hadoop.hbase.master.assignment.AssignmentManager.RegionInTransitionStat;
|
java.util.Set;
|
||||||
org.apache.hadoop.hbase.master.assignment.RegionStates.RegionFailedOpen;
|
java.util.SortedSet;
|
||||||
org.apache.hadoop.hbase.master.RegionState;
|
java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
java.util.stream.Collectors;
|
||||||
org.apache.hadoop.conf.Configuration;
|
org.apache.hadoop.conf.Configuration;
|
||||||
org.apache.hadoop.hbase.HBaseConfiguration;
|
org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
org.apache.hadoop.hbase.HConstants;
|
org.apache.hadoop.hbase.HConstants;
|
||||||
|
org.apache.hadoop.hbase.ServerName;
|
||||||
|
org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
org.apache.hadoop.hbase.client.RegionInfoDisplay;
|
org.apache.hadoop.hbase.client.RegionInfoDisplay;
|
||||||
java.util.HashSet;
|
org.apache.hadoop.hbase.master.RegionState;
|
||||||
java.util.SortedSet;
|
org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||||
java.util.Map;
|
org.apache.hadoop.hbase.master.assignment.AssignmentManager.RegionInTransitionStat;
|
||||||
java.util.concurrent.atomic.AtomicInteger;
|
org.apache.hadoop.hbase.master.assignment.RegionStates.RegionFailedOpen;
|
||||||
|
org.apache.hadoop.hbase.util.Pair;
|
||||||
</%import>
|
</%import>
|
||||||
<%args>
|
<%args>
|
||||||
AssignmentManager assignmentManager;
|
AssignmentManager assignmentManager;
|
||||||
int limit = 100;
|
int limit = 100;
|
||||||
</%args>
|
</%args>
|
||||||
|
|
||||||
<%java SortedSet<RegionState> rit = assignmentManager
|
<%java>
|
||||||
.getRegionStates().getRegionsInTransitionOrderedByTimestamp();
|
SortedSet<RegionState> rit = assignmentManager.getRegionStates()
|
||||||
%>
|
.getRegionsInTransitionOrderedByTimestamp();
|
||||||
|
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = assignmentManager
|
||||||
|
.getProblematicRegions();
|
||||||
|
</%java>
|
||||||
|
|
||||||
|
<%if !problematicRegions.isEmpty() %>
|
||||||
|
<%java>
|
||||||
|
int totalSize = problematicRegions.size();
|
||||||
|
int sizePerPage = Math.min(10, totalSize);
|
||||||
|
int numOfPages = (int) Math.ceil(totalSize * 1.0 / sizePerPage);
|
||||||
|
</%java>
|
||||||
|
<section>
|
||||||
|
<h2><a name="problem-regions">Problematic Regions</a></h2>
|
||||||
|
<p>
|
||||||
|
<span>
|
||||||
|
<% problematicRegions.size() %> problematic region(s). Notice: the reported online
|
||||||
|
regionservers may be not right when there are regions in transition. Please check them
|
||||||
|
in regionserver's web UI.
|
||||||
|
</span>
|
||||||
|
</p>
|
||||||
|
<div class="tabbable">
|
||||||
|
<div class="tab-content">
|
||||||
|
<%java int recordItr = 0; %>
|
||||||
|
<%for Map.Entry<String, Pair<ServerName, Set<ServerName>>> entry : problematicRegions.entrySet() %>
|
||||||
|
<%if (recordItr % sizePerPage) == 0 %>
|
||||||
|
<%if recordItr == 0 %>
|
||||||
|
<div class="tab-pane active" id="tab_prs<% (recordItr / sizePerPage) + 1 %>">
|
||||||
|
<%else>
|
||||||
|
<div class="tab-pane" id="tab_prs<% (recordItr / sizePerPage) + 1 %>">
|
||||||
|
</%if>
|
||||||
|
<table class="table table-striped" style="margin-bottom:0px;">
|
||||||
|
<tr>
|
||||||
|
<th>Region</th>
|
||||||
|
<th>Location in META</th>
|
||||||
|
<th>Reported Online Region Servers</th>
|
||||||
|
</tr>
|
||||||
|
</%if>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td><% entry.getKey() %></td>
|
||||||
|
<td><% entry.getValue().getFirst() %></td>
|
||||||
|
<td><% entry.getValue().getSecond().stream().map(ServerName::getServerName)
|
||||||
|
.collect(Collectors.joining(", ")) %></td>
|
||||||
|
</tr>
|
||||||
|
<%java recordItr++; %>
|
||||||
|
<%if (recordItr % sizePerPage) == 0 %>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</%if>
|
||||||
|
</%for>
|
||||||
|
|
||||||
|
<%if (recordItr % sizePerPage) != 0 %>
|
||||||
|
<%for ; (recordItr % sizePerPage) != 0 ; recordItr++ %>
|
||||||
|
<tr><td colspan="3" style="height:61px"></td></tr>
|
||||||
|
</%for>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</%if>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<nav>
|
||||||
|
<ul class="nav nav-pills pagination">
|
||||||
|
<%for int i = 1 ; i <= numOfPages; i++ %>
|
||||||
|
<%if i == 1 %>
|
||||||
|
<li class="active">
|
||||||
|
<%else>
|
||||||
|
<li>
|
||||||
|
</%if>
|
||||||
|
<a href="#tab_prs<% i %>"><% i %></a></li>
|
||||||
|
</%for>
|
||||||
|
</ul>
|
||||||
|
</nav>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</%if>
|
||||||
|
|
||||||
<%if !rit.isEmpty() %>
|
<%if !rit.isEmpty() %>
|
||||||
<%java>
|
<%java>
|
||||||
|
|
|
@ -158,6 +158,8 @@ public class AssignmentManager {
|
||||||
private final RegionStates regionStates = new RegionStates();
|
private final RegionStates regionStates = new RegionStates();
|
||||||
private final RegionStateStore regionStateStore;
|
private final RegionStateStore regionStateStore;
|
||||||
|
|
||||||
|
private final Map<ServerName, Set<byte[]>> rsReports = new HashMap<>();
|
||||||
|
|
||||||
private final boolean shouldAssignRegionsWithFavoredNodes;
|
private final boolean shouldAssignRegionsWithFavoredNodes;
|
||||||
private final int assignDispatchWaitQueueMaxSize;
|
private final int assignDispatchWaitQueueMaxSize;
|
||||||
private final int assignDispatchWaitMillis;
|
private final int assignDispatchWaitMillis;
|
||||||
|
@ -1065,13 +1067,18 @@ public class AssignmentManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
ServerStateNode serverNode = regionStates.getOrCreateServer(serverName);
|
ServerStateNode serverNode = regionStates.getOrCreateServer(serverName);
|
||||||
|
|
||||||
synchronized (serverNode) {
|
synchronized (serverNode) {
|
||||||
if (!serverNode.isInState(ServerState.ONLINE)) {
|
if (!serverNode.isInState(ServerState.ONLINE)) {
|
||||||
LOG.warn("Got a report from a server result in state " + serverNode.getState());
|
LOG.warn("Got a report from a server result in state " + serverNode.getState());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track the regionserver reported online regions in memory.
|
||||||
|
synchronized (rsReports) {
|
||||||
|
rsReports.put(serverName, regionNames);
|
||||||
|
}
|
||||||
|
|
||||||
if (regionNames.isEmpty()) {
|
if (regionNames.isEmpty()) {
|
||||||
// nothing to do if we don't have regions
|
// nothing to do if we don't have regions
|
||||||
LOG.trace("no online region found on {}", serverName);
|
LOG.trace("no online region found on {}", serverName);
|
||||||
|
@ -2028,4 +2035,53 @@ public class AssignmentManager {
|
||||||
MasterServices getMaster() {
|
MasterServices getMaster() {
|
||||||
return master;
|
return master;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Found the potentially problematic opened regions. There are three case:
|
||||||
|
* case 1. Master thought this region opened, but no regionserver reported it.
|
||||||
|
* case 2. Master thought this region opened on Server1, but regionserver reported Server2
|
||||||
|
* case 3. More than one regionservers reported opened this region
|
||||||
|
*
|
||||||
|
* @return the map of potentially problematic opened regions. Key is the region name. Value is
|
||||||
|
* a pair of location in meta and the regionservers which reported opened this region.
|
||||||
|
*/
|
||||||
|
public Map<String, Pair<ServerName, Set<ServerName>>> getProblematicRegions() {
|
||||||
|
Map<String, Set<ServerName>> reportedOnlineRegions = new HashMap<>();
|
||||||
|
synchronized (rsReports) {
|
||||||
|
for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
|
||||||
|
for (byte[] regionName : entry.getValue()) {
|
||||||
|
reportedOnlineRegions
|
||||||
|
.computeIfAbsent(RegionInfo.getRegionNameAsString(regionName), r -> new HashSet<>())
|
||||||
|
.add(entry.getKey());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = new HashMap<>();
|
||||||
|
List<RegionState> rits = regionStates.getRegionsStateInTransition();
|
||||||
|
for (RegionState regionState : regionStates.getRegionStates()) {
|
||||||
|
// Only consider the opened region and not in transition
|
||||||
|
if (!rits.contains(regionState) && regionState.isOpened()) {
|
||||||
|
String regionName = regionState.getRegion().getRegionNameAsString();
|
||||||
|
ServerName serverName = regionState.getServerName();
|
||||||
|
if (reportedOnlineRegions.containsKey(regionName)) {
|
||||||
|
Set<ServerName> reportedServers = reportedOnlineRegions.get(regionName);
|
||||||
|
if (reportedServers.contains(serverName)) {
|
||||||
|
if (reportedServers.size() > 1) {
|
||||||
|
// More than one regionserver reported opened this region
|
||||||
|
problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Master thought this region opened on Server1, but regionserver reported Server2
|
||||||
|
problematicRegions.put(regionName, new Pair<>(serverName, reportedServers));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Master thought this region opened, but no regionserver reported it.
|
||||||
|
problematicRegions.put(regionName, new Pair<>(serverName, new HashSet<>()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return problematicRegions;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,127 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.master.assignment;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@Category({ MasterTests.class, MediumTests.class })
|
||||||
|
public class TestAMProblematicRegions extends TestAssignmentManagerBase {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(TestAMProblematicRegions.class);
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestAMProblematicRegions.class);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testForMeta() {
|
||||||
|
byte[] metaRegionNameAsBytes = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName();
|
||||||
|
String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString();
|
||||||
|
List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
|
||||||
|
assertEquals(NSERVERS, serverNames.size());
|
||||||
|
|
||||||
|
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = am.getProblematicRegions();
|
||||||
|
|
||||||
|
// Test for case1: Master thought this region opened, but no regionserver reported it.
|
||||||
|
assertTrue(problematicRegions.containsKey(metaRegionName));
|
||||||
|
Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(metaRegionName);
|
||||||
|
ServerName locationInMeta = pair.getFirst();
|
||||||
|
Set<ServerName> reportedRegionServers = pair.getSecond();
|
||||||
|
assertTrue(serverNames.contains(locationInMeta));
|
||||||
|
assertEquals(0, reportedRegionServers.size());
|
||||||
|
|
||||||
|
// Reported right region location. Then not in problematic regions.
|
||||||
|
am.reportOnlineRegions(locationInMeta, Collections.singleton(metaRegionNameAsBytes));
|
||||||
|
problematicRegions = am.getProblematicRegions();
|
||||||
|
assertFalse(problematicRegions.containsKey(metaRegionName));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testForUserTable() throws Exception {
|
||||||
|
TableName tableName = TableName.valueOf("testForUserTable");
|
||||||
|
RegionInfo hri = createRegionInfo(tableName, 1);
|
||||||
|
String regionName = hri.getRegionNameAsString();
|
||||||
|
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
|
||||||
|
Future<byte[]> future = submitProcedure(createAssignProcedure(hri));
|
||||||
|
waitOnFuture(future);
|
||||||
|
|
||||||
|
List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
|
||||||
|
assertEquals(NSERVERS, serverNames.size());
|
||||||
|
|
||||||
|
// Test for case1: Master thought this region opened, but no regionserver reported it.
|
||||||
|
Map<String, Pair<ServerName, Set<ServerName>>> problematicRegions = am.getProblematicRegions();
|
||||||
|
assertTrue(problematicRegions.containsKey(regionName));
|
||||||
|
Pair<ServerName, Set<ServerName>> pair = problematicRegions.get(regionName);
|
||||||
|
ServerName locationInMeta = pair.getFirst();
|
||||||
|
Set<ServerName> reportedRegionServers = pair.getSecond();
|
||||||
|
assertTrue(serverNames.contains(locationInMeta));
|
||||||
|
assertEquals(0, reportedRegionServers.size());
|
||||||
|
|
||||||
|
// Test for case2: Master thought this region opened on Server1, but regionserver reported
|
||||||
|
// Server2
|
||||||
|
final ServerName tempLocationInMeta = locationInMeta;
|
||||||
|
final ServerName anotherServer =
|
||||||
|
serverNames.stream().filter(s -> !s.equals(tempLocationInMeta)).findFirst().get();
|
||||||
|
am.reportOnlineRegions(anotherServer, Collections.singleton(hri.getRegionName()));
|
||||||
|
problematicRegions = am.getProblematicRegions();
|
||||||
|
assertTrue(problematicRegions.containsKey(regionName));
|
||||||
|
pair = problematicRegions.get(regionName);
|
||||||
|
locationInMeta = pair.getFirst();
|
||||||
|
reportedRegionServers = pair.getSecond();
|
||||||
|
assertEquals(1, reportedRegionServers.size());
|
||||||
|
assertFalse(reportedRegionServers.contains(locationInMeta));
|
||||||
|
assertTrue(reportedRegionServers.contains(anotherServer));
|
||||||
|
|
||||||
|
// Test for case3: More than one regionservers reported opened this region.
|
||||||
|
am.reportOnlineRegions(locationInMeta, Collections.singleton(hri.getRegionName()));
|
||||||
|
problematicRegions = am.getProblematicRegions();
|
||||||
|
assertTrue(problematicRegions.containsKey(regionName));
|
||||||
|
pair = problematicRegions.get(regionName);
|
||||||
|
locationInMeta = pair.getFirst();
|
||||||
|
reportedRegionServers = pair.getSecond();
|
||||||
|
assertEquals(2, reportedRegionServers.size());
|
||||||
|
assertTrue(reportedRegionServers.contains(locationInMeta));
|
||||||
|
assertTrue(reportedRegionServers.contains(anotherServer));
|
||||||
|
|
||||||
|
// Reported right region location. Then not in problematic regions.
|
||||||
|
am.reportOnlineRegions(anotherServer, Collections.EMPTY_SET);
|
||||||
|
problematicRegions = am.getProblematicRegions();
|
||||||
|
assertFalse(problematicRegions.containsKey(regionName));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue