HBASE-27036 Displays the number of decommissioned region server for status command (#4431)

add decommissionedServers to cluster metric

Signed-off-by: Pankaj Kumar <pankajkumar@apache.org>
Signed-off-by: Duo Zhang <zhangduo@apache.org>
Signed-off-by: Viraj Jasani <vjasani@apache.org
This commit is contained in:
litao 2022-06-02 23:51:39 +08:00 committed by GitHub
parent 15002fccb4
commit ac8b51718f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 54 additions and 4 deletions

View File

@ -80,6 +80,11 @@ public interface ClusterMetrics {
*/
List<ServerName> getDeadServerNames();
/**
* @return the names of region servers on the decommissioned list
*/
List<ServerName> getDecommissionedServerNames();
/**
* @return the names of region servers on the live list
*/
@ -222,5 +227,9 @@ public interface ClusterMetrics {
* metrics about monitored tasks
*/
TASKS,
/**
* metrics about decommissioned region servers
*/
DECOMMISSIONED_SERVERS,
}
}

View File

@ -69,7 +69,9 @@ public final class ClusterMetricsBuilder {
.map(status -> ClusterStatusProtos.TableRegionStatesCount.newBuilder()
.setTableName(ProtobufUtil.toProtoTableName((status.getKey())))
.setRegionStatesCount(ProtobufUtil.toTableRegionStatesCount(status.getValue())).build())
.collect(Collectors.toList()));
.collect(Collectors.toList()))
.addAllDecommissionedServers(metrics.getDecommissionedServerNames().stream()
.map(ProtobufUtil::toServerName).collect(Collectors.toList()));
if (metrics.getMasterName() != null) {
builder.setMaster(ProtobufUtil.toServerName((metrics.getMasterName())));
}
@ -111,7 +113,9 @@ public final class ClusterMetricsBuilder {
.collect(Collectors.toMap(e -> ProtobufUtil.toTableName(e.getTableName()),
e -> ProtobufUtil.toTableRegionStatesCount(e.getRegionStatesCount()))))
.setMasterTasks(proto.getMasterTasksList().stream().map(t -> ProtobufUtil.getServerTask(t))
.collect(Collectors.toList()));
.collect(Collectors.toList()))
.setDecommissionedServerNames(proto.getDecommissionedServersList().stream()
.map(ProtobufUtil::toServerName).collect(Collectors.toList()));
if (proto.hasClusterId()) {
builder.setClusterId(ClusterId.convert(proto.getClusterId()).toString());
}
@ -167,6 +171,8 @@ public final class ClusterMetricsBuilder {
return ClusterMetrics.Option.TABLE_TO_REGIONS_COUNT;
case TASKS:
return ClusterMetrics.Option.TASKS;
case DECOMMISSIONED_SERVERS:
return ClusterMetrics.Option.DECOMMISSIONED_SERVERS;
// should not reach here
default:
throw new IllegalArgumentException("Invalid option: " + option);
@ -206,6 +212,8 @@ public final class ClusterMetricsBuilder {
return ClusterStatusProtos.Option.TABLE_TO_REGIONS_COUNT;
case TASKS:
return ClusterStatusProtos.Option.TASKS;
case DECOMMISSIONED_SERVERS:
return ClusterStatusProtos.Option.DECOMMISSIONED_SERVERS;
// should not reach here
default:
throw new IllegalArgumentException("Invalid option: " + option);
@ -253,6 +261,7 @@ public final class ClusterMetricsBuilder {
private Map<TableName, RegionStatesCount> tableRegionStatesCount = Collections.emptyMap();
@Nullable
private List<ServerTask> masterTasks;
private List<ServerName> decommissionedServerNames = Collections.emptyList();
private ClusterMetricsBuilder() {
}
@ -317,6 +326,11 @@ public final class ClusterMetricsBuilder {
return this;
}
public ClusterMetricsBuilder setDecommissionedServerNames(List<ServerName> value) {
this.decommissionedServerNames = value;
return this;
}
public ClusterMetricsBuilder
setTableRegionStatesCount(Map<TableName, RegionStatesCount> tableRegionStatesCount) {
this.tableRegionStatesCount = tableRegionStatesCount;
@ -326,13 +340,14 @@ public final class ClusterMetricsBuilder {
public ClusterMetrics build() {
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, liveServerMetrics, masterName,
backupMasterNames, regionsInTransition, clusterId, masterCoprocessorNames, balancerOn,
masterInfoPort, serversName, tableRegionStatesCount, masterTasks);
masterInfoPort, serversName, tableRegionStatesCount, masterTasks, decommissionedServerNames);
}
private static class ClusterMetricsImpl implements ClusterMetrics {
@Nullable
private final String hbaseVersion;
private final List<ServerName> deadServerNames;
private final List<ServerName> decommissionedServerNames;
private final Map<ServerName, ServerMetrics> liveServerMetrics;
@Nullable
private final ServerName masterName;
@ -353,9 +368,10 @@ public final class ClusterMetricsBuilder {
List<ServerName> backupMasterNames, List<RegionState> regionsInTransition, String clusterId,
List<String> masterCoprocessorNames, Boolean balancerOn, int masterInfoPort,
List<ServerName> serversName, Map<TableName, RegionStatesCount> tableRegionStatesCount,
List<ServerTask> masterTasks) {
List<ServerTask> masterTasks, List<ServerName> decommissionedServerNames) {
this.hbaseVersion = hbaseVersion;
this.deadServerNames = Preconditions.checkNotNull(deadServerNames);
this.decommissionedServerNames = Preconditions.checkNotNull(decommissionedServerNames);
this.liveServerMetrics = Preconditions.checkNotNull(liveServerMetrics);
this.masterName = masterName;
this.backupMasterNames = Preconditions.checkNotNull(backupMasterNames);
@ -379,6 +395,11 @@ public final class ClusterMetricsBuilder {
return Collections.unmodifiableList(deadServerNames);
}
@Override
public List<ServerName> getDecommissionedServerNames() {
return Collections.unmodifiableList(decommissionedServerNames);
}
@Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
return Collections.unmodifiableMap(liveServerMetrics);

View File

@ -349,6 +349,7 @@ message ClusterStatus {
repeated ServerName servers_name = 11;
repeated TableRegionStatesCount table_region_states_count = 12;
repeated ServerTask master_tasks = 13;
repeated ServerName decommissioned_servers = 14;
}
enum Option {
@ -365,4 +366,5 @@ enum Option {
SERVERS_NAME = 10;
TABLE_TO_REGIONS_COUNT = 11;
TASKS = 12;
DECOMMISSIONED_SERVERS = 13;
}

View File

@ -2903,6 +2903,12 @@ public class HMaster extends HBaseServerBase<MasterRpcServices> implements Maste
}
break;
}
case DECOMMISSIONED_SERVERS: {
if (serverManager != null) {
builder.setDecommissionedServerNames(serverManager.getDrainingServersList());
}
break;
}
}
}

View File

@ -132,6 +132,12 @@ public class TestClientClusterMetrics {
Assert.assertEquals(origin.getMasterInfoPort(), defaults.getMasterInfoPort());
Assert.assertEquals(origin.getServersName().size(), defaults.getServersName().size());
Assert.assertEquals(ADMIN.getRegionServers().size(), defaults.getServersName().size());
// We decommission the first online region server and verify the metrics.
List<ServerName> serverNames = origin.getServersName().subList(0, 1);
ADMIN.decommissionRegionServers(serverNames, false);
Assert.assertEquals(1, ADMIN.getClusterMetrics().getDecommissionedServerNames().size());
Assert.assertEquals(ADMIN.getClusterMetrics().getDecommissionedServerNames().get(0),
serverNames.get(0));
}
@Test

View File

@ -227,6 +227,11 @@ public class TestRegionsRecoveryChore {
return null;
}
@Override
public List<ServerName> getDecommissionedServerNames() {
return null;
}
@Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
Map<ServerName, ServerMetrics> liveServerMetrics = new HashMap<>();

View File

@ -1043,6 +1043,7 @@ module Hbase
else
puts "1 active master, #{cluster_metrics.getBackupMasterNames.size} backup masters,
#{cluster_metrics.getLiveServerMetrics.size} servers,
#{cluster_metrics.getDecommissionedServerNames.size} decommissioned,
#{cluster_metrics.getDeadServerNames.size} dead,
#{format('%.4f', cluster_metrics.getAverageLoad)} average load"
end