HBASE-27104 Add a tool command list_unknownservers (#4523)
Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
parent
32c135d8a5
commit
61cd63c7b6
|
@ -76,6 +76,9 @@ public interface ClusterMetrics {
|
|||
/** Returns the names of region servers on the dead list */
|
||||
List<ServerName> getDeadServerNames();
|
||||
|
||||
/** Returns the names of region servers on the unknown list */
|
||||
List<ServerName> getUnknownServerNames();
|
||||
|
||||
/** Returns the names of region servers on the decommissioned list */
|
||||
List<ServerName> getDecommissionedServerNames();
|
||||
|
||||
|
@ -179,6 +182,10 @@ public interface ClusterMetrics {
|
|||
* metrics about dead region servers
|
||||
*/
|
||||
DEAD_SERVERS,
|
||||
/**
|
||||
* metrics about unknown region servers
|
||||
*/
|
||||
UNKNOWN_SERVERS,
|
||||
/**
|
||||
* metrics about master name
|
||||
*/
|
||||
|
|
|
@ -47,6 +47,8 @@ public final class ClusterMetricsBuilder {
|
|||
.collect(Collectors.toList()))
|
||||
.addAllDeadServers(metrics.getDeadServerNames().stream().map(ProtobufUtil::toServerName)
|
||||
.collect(Collectors.toList()))
|
||||
.addAllUnknownServers(metrics.getUnknownServerNames().stream()
|
||||
.map(ProtobufUtil::toServerName).collect(Collectors.toList()))
|
||||
.addAllLiveServers(metrics.getLiveServerMetrics().entrySet().stream()
|
||||
.map(s -> ClusterStatusProtos.LiveServerInfo.newBuilder()
|
||||
.setServer(ProtobufUtil.toServerName(s.getKey()))
|
||||
|
@ -89,6 +91,7 @@ public final class ClusterMetricsBuilder {
|
|||
builder.setHbaseVersion(
|
||||
FSProtos.HBaseVersionFileContent.newBuilder().setVersion(metrics.getHBaseVersion()));
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
|
@ -100,6 +103,8 @@ public final class ClusterMetricsBuilder {
|
|||
ServerMetricsBuilder::toServerMetrics)))
|
||||
.setDeadServerNames(proto.getDeadServersList().stream().map(ProtobufUtil::toServerName)
|
||||
.collect(Collectors.toList()))
|
||||
.setUnknownServerNames(proto.getUnknownServersList().stream().map(ProtobufUtil::toServerName)
|
||||
.collect(Collectors.toList()))
|
||||
.setBackerMasterNames(proto.getBackupMastersList().stream().map(ProtobufUtil::toServerName)
|
||||
.collect(Collectors.toList()))
|
||||
.setRegionsInTransition(proto.getRegionsInTransitionList().stream()
|
||||
|
@ -151,6 +156,8 @@ public final class ClusterMetricsBuilder {
|
|||
return ClusterMetrics.Option.LIVE_SERVERS;
|
||||
case DEAD_SERVERS:
|
||||
return ClusterMetrics.Option.DEAD_SERVERS;
|
||||
case UNKNOWN_SERVERS:
|
||||
return ClusterMetrics.Option.UNKNOWN_SERVERS;
|
||||
case REGIONS_IN_TRANSITION:
|
||||
return ClusterMetrics.Option.REGIONS_IN_TRANSITION;
|
||||
case CLUSTER_ID:
|
||||
|
@ -192,6 +199,8 @@ public final class ClusterMetricsBuilder {
|
|||
return ClusterStatusProtos.Option.LIVE_SERVERS;
|
||||
case DEAD_SERVERS:
|
||||
return ClusterStatusProtos.Option.DEAD_SERVERS;
|
||||
case UNKNOWN_SERVERS:
|
||||
return ClusterStatusProtos.Option.UNKNOWN_SERVERS;
|
||||
case REGIONS_IN_TRANSITION:
|
||||
return ClusterStatusProtos.Option.REGIONS_IN_TRANSITION;
|
||||
case CLUSTER_ID:
|
||||
|
@ -246,6 +255,7 @@ public final class ClusterMetricsBuilder {
|
|||
@Nullable
|
||||
private String hbaseVersion;
|
||||
private List<ServerName> deadServerNames = Collections.emptyList();
|
||||
private List<ServerName> unknownServerNames = Collections.emptyList();
|
||||
private Map<ServerName, ServerMetrics> liveServerMetrics = new TreeMap<>();
|
||||
@Nullable
|
||||
private ServerName masterName;
|
||||
|
@ -276,6 +286,11 @@ public final class ClusterMetricsBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
public ClusterMetricsBuilder setUnknownServerNames(List<ServerName> value) {
|
||||
this.unknownServerNames = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ClusterMetricsBuilder setLiveServerMetrics(Map<ServerName, ServerMetrics> value) {
|
||||
liveServerMetrics.putAll(value);
|
||||
return this;
|
||||
|
@ -338,15 +353,17 @@ public final class ClusterMetricsBuilder {
|
|||
}
|
||||
|
||||
public ClusterMetrics build() {
|
||||
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, liveServerMetrics, masterName,
|
||||
backupMasterNames, regionsInTransition, clusterId, masterCoprocessorNames, balancerOn,
|
||||
masterInfoPort, serversName, tableRegionStatesCount, masterTasks, decommissionedServerNames);
|
||||
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, unknownServerNames,
|
||||
liveServerMetrics, masterName, backupMasterNames, regionsInTransition, clusterId,
|
||||
masterCoprocessorNames, balancerOn, masterInfoPort, serversName, tableRegionStatesCount,
|
||||
masterTasks, decommissionedServerNames);
|
||||
}
|
||||
|
||||
private static class ClusterMetricsImpl implements ClusterMetrics {
|
||||
@Nullable
|
||||
private final String hbaseVersion;
|
||||
private final List<ServerName> deadServerNames;
|
||||
private final List<ServerName> unknownServerNames;
|
||||
private final List<ServerName> decommissionedServerNames;
|
||||
private final Map<ServerName, ServerMetrics> liveServerMetrics;
|
||||
@Nullable
|
||||
|
@ -364,13 +381,15 @@ public final class ClusterMetricsBuilder {
|
|||
private final List<ServerTask> masterTasks;
|
||||
|
||||
ClusterMetricsImpl(String hbaseVersion, List<ServerName> deadServerNames,
|
||||
Map<ServerName, ServerMetrics> liveServerMetrics, ServerName masterName,
|
||||
List<ServerName> backupMasterNames, List<RegionState> regionsInTransition, String clusterId,
|
||||
List<String> masterCoprocessorNames, Boolean balancerOn, int masterInfoPort,
|
||||
List<ServerName> serversName, Map<TableName, RegionStatesCount> tableRegionStatesCount,
|
||||
List<ServerTask> masterTasks, List<ServerName> decommissionedServerNames) {
|
||||
List<ServerName> unknownServerNames, Map<ServerName, ServerMetrics> liveServerMetrics,
|
||||
ServerName masterName, List<ServerName> backupMasterNames,
|
||||
List<RegionState> regionsInTransition, String clusterId, List<String> masterCoprocessorNames,
|
||||
Boolean balancerOn, int masterInfoPort, List<ServerName> serversName,
|
||||
Map<TableName, RegionStatesCount> tableRegionStatesCount, List<ServerTask> masterTasks,
|
||||
List<ServerName> decommissionedServerNames) {
|
||||
this.hbaseVersion = hbaseVersion;
|
||||
this.deadServerNames = Preconditions.checkNotNull(deadServerNames);
|
||||
this.unknownServerNames = Preconditions.checkNotNull(unknownServerNames);
|
||||
this.decommissionedServerNames = Preconditions.checkNotNull(decommissionedServerNames);
|
||||
this.liveServerMetrics = Preconditions.checkNotNull(liveServerMetrics);
|
||||
this.masterName = masterName;
|
||||
|
@ -395,6 +414,11 @@ public final class ClusterMetricsBuilder {
|
|||
return Collections.unmodifiableList(deadServerNames);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ServerName> getUnknownServerNames() {
|
||||
return Collections.unmodifiableList(unknownServerNames);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ServerName> getDecommissionedServerNames() {
|
||||
return Collections.unmodifiableList(decommissionedServerNames);
|
||||
|
@ -490,6 +514,14 @@ public final class ClusterMetricsBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
int unknownServerSize = getUnknownServerNames().size();
|
||||
sb.append("\nNumber of unknown region servers: " + unknownServerSize);
|
||||
if (unknownServerSize > 0) {
|
||||
for (ServerName serverName : getUnknownServerNames()) {
|
||||
sb.append("\n " + serverName);
|
||||
}
|
||||
}
|
||||
|
||||
sb.append("\nAverage load: " + getAverageLoad());
|
||||
sb.append("\nNumber of requests: " + getRequestCount());
|
||||
sb.append("\nNumber of regions: " + getRegionCount());
|
||||
|
|
|
@ -2228,6 +2228,14 @@ public interface Admin extends Abortable, Closeable {
|
|||
return getClusterMetrics(EnumSet.of(Option.DEAD_SERVERS)).getDeadServerNames();
|
||||
}
|
||||
|
||||
/**
|
||||
* List unknown region servers.
|
||||
* @return List of unknown region servers.
|
||||
*/
|
||||
default List<ServerName> listUnknownServers() throws IOException {
|
||||
return getClusterMetrics(EnumSet.of(Option.UNKNOWN_SERVERS)).getUnknownServerNames();
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear dead region servers from master.
|
||||
* @param servers list of dead region servers.
|
||||
|
|
|
@ -1455,6 +1455,14 @@ public interface AsyncAdmin {
|
|||
.thenApply(ClusterMetrics::getDeadServerNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* List all the unknown region servers.
|
||||
*/
|
||||
default CompletableFuture<List<ServerName>> listUnknownServers() {
|
||||
return this.getClusterMetrics(EnumSet.of(Option.UNKNOWN_SERVERS))
|
||||
.thenApply(ClusterMetrics::getUnknownServerNames);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear dead region servers from master.
|
||||
* @param servers list of dead region servers.
|
||||
|
|
|
@ -775,6 +775,11 @@ class AsyncHBaseAdmin implements AsyncAdmin {
|
|||
return wrap(rawAdmin.listDeadServers());
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<List<ServerName>> listUnknownServers() {
|
||||
return wrap(rawAdmin.listUnknownServers());
|
||||
}
|
||||
|
||||
@Override
|
||||
public CompletableFuture<List<ServerName>> clearDeadServers(List<ServerName> servers) {
|
||||
return wrap(rawAdmin.clearDeadServers(servers));
|
||||
|
|
|
@ -350,6 +350,7 @@ message ClusterStatus {
|
|||
repeated TableRegionStatesCount table_region_states_count = 12;
|
||||
repeated ServerTask master_tasks = 13;
|
||||
repeated ServerName decommissioned_servers = 14;
|
||||
repeated ServerName unknown_servers = 15;
|
||||
}
|
||||
|
||||
enum Option {
|
||||
|
@ -367,4 +368,5 @@ enum Option {
|
|||
TABLE_TO_REGIONS_COUNT = 11;
|
||||
TASKS = 12;
|
||||
DECOMMISSIONED_SERVERS = 13;
|
||||
UNKNOWN_SERVERS = 14;
|
||||
}
|
||||
|
|
|
@ -2863,6 +2863,12 @@ public class HMaster extends HBaseServerBase<MasterRpcServices> implements Maste
|
|||
}
|
||||
break;
|
||||
}
|
||||
case UNKNOWN_SERVERS: {
|
||||
if (serverManager != null) {
|
||||
builder.setUnknownServerNames(getUnknownServers());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case MASTER_COPROCESSORS: {
|
||||
if (cpHost != null) {
|
||||
builder.setMasterCoprocessorNames(Arrays.asList(getMasterCoprocessors()));
|
||||
|
@ -2928,6 +2934,17 @@ public class HMaster extends HBaseServerBase<MasterRpcServices> implements Maste
|
|||
return builder.build();
|
||||
}
|
||||
|
||||
private List<ServerName> getUnknownServers() {
|
||||
if (serverManager != null) {
|
||||
final Set<ServerName> serverNames = getAssignmentManager().getRegionStates().getRegionStates()
|
||||
.stream().map(RegionState::getServerName).collect(Collectors.toSet());
|
||||
final List<ServerName> unknownServerNames = serverNames.stream()
|
||||
.filter(sn -> sn != null && serverManager.isServerUnknown(sn)).collect(Collectors.toList());
|
||||
return unknownServerNames;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private Map<ServerName, ServerMetrics> getOnlineServers() {
|
||||
if (serverManager != null) {
|
||||
final Map<ServerName, ServerMetrics> map = new HashMap<>();
|
||||
|
|
|
@ -227,6 +227,11 @@ public class TestRegionsRecoveryChore {
|
|||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ServerName> getUnknownServerNames() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ServerName> getDecommissionedServerNames() {
|
||||
return null;
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.master;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtil;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
@Category(MediumTests.class)
|
||||
public class TestUnknownServers {
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestUnknownServers.class);
|
||||
|
||||
private static HBaseTestingUtil UTIL;
|
||||
private static Admin ADMIN;
|
||||
private final static int SLAVES = 1;
|
||||
private static boolean IS_UNKNOWN_SERVER = true;
|
||||
|
||||
@BeforeClass
|
||||
public static void setUpBeforeClass() throws Exception {
|
||||
UTIL = new HBaseTestingUtil();
|
||||
UTIL.getConfiguration().setClass(HConstants.MASTER_IMPL,
|
||||
TestUnknownServers.HMasterForTest.class, HMaster.class);
|
||||
UTIL.startMiniCluster(SLAVES);
|
||||
ADMIN = UTIL.getAdmin();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testListUnknownServers() throws Exception {
|
||||
Assert.assertEquals(ADMIN.listUnknownServers().size(), SLAVES);
|
||||
IS_UNKNOWN_SERVER = false;
|
||||
Assert.assertEquals(ADMIN.listUnknownServers().size(), 0);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDownAfterClass() throws Exception {
|
||||
if (ADMIN != null) {
|
||||
ADMIN.close();
|
||||
}
|
||||
if (UTIL != null) {
|
||||
UTIL.shutdownMiniCluster();
|
||||
}
|
||||
}
|
||||
|
||||
public static final class HMasterForTest extends HMaster {
|
||||
|
||||
public HMasterForTest(Configuration conf) throws IOException {
|
||||
super(conf);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected ServerManager createServerManager(MasterServices master, RegionServerList storage)
|
||||
throws IOException {
|
||||
setupClusterConnection();
|
||||
return new TestUnknownServers.ServerManagerForTest(master, storage);
|
||||
}
|
||||
}
|
||||
|
||||
private static final class ServerManagerForTest extends ServerManager {
|
||||
|
||||
public ServerManagerForTest(MasterServices master, RegionServerList storage) {
|
||||
super(master, storage);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isServerUnknown(ServerName serverName) {
|
||||
return IS_UNKNOWN_SERVER;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1631,6 +1631,12 @@ module Hbase
|
|||
@admin.clearDeadServers(servers).to_a
|
||||
end
|
||||
|
||||
#----------------------------------------------------------------------------------------------
|
||||
# list unknown region servers
|
||||
def list_unknownservers
|
||||
@admin.listUnknownServers.to_a
|
||||
end
|
||||
|
||||
#----------------------------------------------------------------------------------------------
|
||||
# List live region servers
|
||||
def list_liveservers
|
||||
|
|
|
@ -475,6 +475,7 @@ Shell.load_command_group(
|
|||
clear_compaction_queues
|
||||
list_deadservers
|
||||
list_liveservers
|
||||
list_unknownservers
|
||||
clear_deadservers
|
||||
clear_block_cache
|
||||
stop_master
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
#
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
module Shell
|
||||
module Commands
|
||||
class ListUnknownservers < Command
|
||||
def help
|
||||
<<~EOF
|
||||
List all unknown region servers
|
||||
Examples:
|
||||
hbase> list_unknownservers
|
||||
EOF
|
||||
end
|
||||
|
||||
def command
|
||||
now = Time.now
|
||||
formatter.header(['SERVERNAME'])
|
||||
|
||||
servers = admin.list_unknownservers
|
||||
servers.each do |server|
|
||||
formatter.row([server.toString])
|
||||
end
|
||||
|
||||
formatter.footer(servers.size)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue