HBASE-27036 Displays the number of decommissioned region server for status command (#4431)

add decommissionedServers to cluster metric

Signed-off-by: Pankaj Kumar <pankajkumar@apache.org>
Signed-off-by: Duo Zhang <zhangduo@apache.org>
Signed-off-by: Viraj Jasani <vjasani@apache.org
This commit is contained in:
litao 2022-06-02 23:51:39 +08:00 committed by GitHub
parent 15002fccb4
commit ac8b51718f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 54 additions and 4 deletions

View File

@ -80,6 +80,11 @@ public interface ClusterMetrics {
*/ */
List<ServerName> getDeadServerNames(); List<ServerName> getDeadServerNames();
/**
* @return the names of region servers on the decommissioned list
*/
List<ServerName> getDecommissionedServerNames();
/** /**
* @return the names of region servers on the live list * @return the names of region servers on the live list
*/ */
@ -222,5 +227,9 @@ public interface ClusterMetrics {
* metrics about monitored tasks * metrics about monitored tasks
*/ */
TASKS, TASKS,
/**
* metrics about decommissioned region servers
*/
DECOMMISSIONED_SERVERS,
} }
} }

View File

@ -69,7 +69,9 @@ public final class ClusterMetricsBuilder {
.map(status -> ClusterStatusProtos.TableRegionStatesCount.newBuilder() .map(status -> ClusterStatusProtos.TableRegionStatesCount.newBuilder()
.setTableName(ProtobufUtil.toProtoTableName((status.getKey()))) .setTableName(ProtobufUtil.toProtoTableName((status.getKey())))
.setRegionStatesCount(ProtobufUtil.toTableRegionStatesCount(status.getValue())).build()) .setRegionStatesCount(ProtobufUtil.toTableRegionStatesCount(status.getValue())).build())
.collect(Collectors.toList())); .collect(Collectors.toList()))
.addAllDecommissionedServers(metrics.getDecommissionedServerNames().stream()
.map(ProtobufUtil::toServerName).collect(Collectors.toList()));
if (metrics.getMasterName() != null) { if (metrics.getMasterName() != null) {
builder.setMaster(ProtobufUtil.toServerName((metrics.getMasterName()))); builder.setMaster(ProtobufUtil.toServerName((metrics.getMasterName())));
} }
@ -111,7 +113,9 @@ public final class ClusterMetricsBuilder {
.collect(Collectors.toMap(e -> ProtobufUtil.toTableName(e.getTableName()), .collect(Collectors.toMap(e -> ProtobufUtil.toTableName(e.getTableName()),
e -> ProtobufUtil.toTableRegionStatesCount(e.getRegionStatesCount())))) e -> ProtobufUtil.toTableRegionStatesCount(e.getRegionStatesCount()))))
.setMasterTasks(proto.getMasterTasksList().stream().map(t -> ProtobufUtil.getServerTask(t)) .setMasterTasks(proto.getMasterTasksList().stream().map(t -> ProtobufUtil.getServerTask(t))
.collect(Collectors.toList())); .collect(Collectors.toList()))
.setDecommissionedServerNames(proto.getDecommissionedServersList().stream()
.map(ProtobufUtil::toServerName).collect(Collectors.toList()));
if (proto.hasClusterId()) { if (proto.hasClusterId()) {
builder.setClusterId(ClusterId.convert(proto.getClusterId()).toString()); builder.setClusterId(ClusterId.convert(proto.getClusterId()).toString());
} }
@ -167,6 +171,8 @@ public final class ClusterMetricsBuilder {
return ClusterMetrics.Option.TABLE_TO_REGIONS_COUNT; return ClusterMetrics.Option.TABLE_TO_REGIONS_COUNT;
case TASKS: case TASKS:
return ClusterMetrics.Option.TASKS; return ClusterMetrics.Option.TASKS;
case DECOMMISSIONED_SERVERS:
return ClusterMetrics.Option.DECOMMISSIONED_SERVERS;
// should not reach here // should not reach here
default: default:
throw new IllegalArgumentException("Invalid option: " + option); throw new IllegalArgumentException("Invalid option: " + option);
@ -206,6 +212,8 @@ public final class ClusterMetricsBuilder {
return ClusterStatusProtos.Option.TABLE_TO_REGIONS_COUNT; return ClusterStatusProtos.Option.TABLE_TO_REGIONS_COUNT;
case TASKS: case TASKS:
return ClusterStatusProtos.Option.TASKS; return ClusterStatusProtos.Option.TASKS;
case DECOMMISSIONED_SERVERS:
return ClusterStatusProtos.Option.DECOMMISSIONED_SERVERS;
// should not reach here // should not reach here
default: default:
throw new IllegalArgumentException("Invalid option: " + option); throw new IllegalArgumentException("Invalid option: " + option);
@ -253,6 +261,7 @@ public final class ClusterMetricsBuilder {
private Map<TableName, RegionStatesCount> tableRegionStatesCount = Collections.emptyMap(); private Map<TableName, RegionStatesCount> tableRegionStatesCount = Collections.emptyMap();
@Nullable @Nullable
private List<ServerTask> masterTasks; private List<ServerTask> masterTasks;
private List<ServerName> decommissionedServerNames = Collections.emptyList();
private ClusterMetricsBuilder() { private ClusterMetricsBuilder() {
} }
@ -317,6 +326,11 @@ public final class ClusterMetricsBuilder {
return this; return this;
} }
public ClusterMetricsBuilder setDecommissionedServerNames(List<ServerName> value) {
this.decommissionedServerNames = value;
return this;
}
public ClusterMetricsBuilder public ClusterMetricsBuilder
setTableRegionStatesCount(Map<TableName, RegionStatesCount> tableRegionStatesCount) { setTableRegionStatesCount(Map<TableName, RegionStatesCount> tableRegionStatesCount) {
this.tableRegionStatesCount = tableRegionStatesCount; this.tableRegionStatesCount = tableRegionStatesCount;
@ -326,13 +340,14 @@ public final class ClusterMetricsBuilder {
public ClusterMetrics build() { public ClusterMetrics build() {
return new ClusterMetricsImpl(hbaseVersion, deadServerNames, liveServerMetrics, masterName, return new ClusterMetricsImpl(hbaseVersion, deadServerNames, liveServerMetrics, masterName,
backupMasterNames, regionsInTransition, clusterId, masterCoprocessorNames, balancerOn, backupMasterNames, regionsInTransition, clusterId, masterCoprocessorNames, balancerOn,
masterInfoPort, serversName, tableRegionStatesCount, masterTasks); masterInfoPort, serversName, tableRegionStatesCount, masterTasks, decommissionedServerNames);
} }
private static class ClusterMetricsImpl implements ClusterMetrics { private static class ClusterMetricsImpl implements ClusterMetrics {
@Nullable @Nullable
private final String hbaseVersion; private final String hbaseVersion;
private final List<ServerName> deadServerNames; private final List<ServerName> deadServerNames;
private final List<ServerName> decommissionedServerNames;
private final Map<ServerName, ServerMetrics> liveServerMetrics; private final Map<ServerName, ServerMetrics> liveServerMetrics;
@Nullable @Nullable
private final ServerName masterName; private final ServerName masterName;
@ -353,9 +368,10 @@ public final class ClusterMetricsBuilder {
List<ServerName> backupMasterNames, List<RegionState> regionsInTransition, String clusterId, List<ServerName> backupMasterNames, List<RegionState> regionsInTransition, String clusterId,
List<String> masterCoprocessorNames, Boolean balancerOn, int masterInfoPort, List<String> masterCoprocessorNames, Boolean balancerOn, int masterInfoPort,
List<ServerName> serversName, Map<TableName, RegionStatesCount> tableRegionStatesCount, List<ServerName> serversName, Map<TableName, RegionStatesCount> tableRegionStatesCount,
List<ServerTask> masterTasks) { List<ServerTask> masterTasks, List<ServerName> decommissionedServerNames) {
this.hbaseVersion = hbaseVersion; this.hbaseVersion = hbaseVersion;
this.deadServerNames = Preconditions.checkNotNull(deadServerNames); this.deadServerNames = Preconditions.checkNotNull(deadServerNames);
this.decommissionedServerNames = Preconditions.checkNotNull(decommissionedServerNames);
this.liveServerMetrics = Preconditions.checkNotNull(liveServerMetrics); this.liveServerMetrics = Preconditions.checkNotNull(liveServerMetrics);
this.masterName = masterName; this.masterName = masterName;
this.backupMasterNames = Preconditions.checkNotNull(backupMasterNames); this.backupMasterNames = Preconditions.checkNotNull(backupMasterNames);
@ -379,6 +395,11 @@ public final class ClusterMetricsBuilder {
return Collections.unmodifiableList(deadServerNames); return Collections.unmodifiableList(deadServerNames);
} }
@Override
public List<ServerName> getDecommissionedServerNames() {
return Collections.unmodifiableList(decommissionedServerNames);
}
@Override @Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() { public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
return Collections.unmodifiableMap(liveServerMetrics); return Collections.unmodifiableMap(liveServerMetrics);

View File

@ -349,6 +349,7 @@ message ClusterStatus {
repeated ServerName servers_name = 11; repeated ServerName servers_name = 11;
repeated TableRegionStatesCount table_region_states_count = 12; repeated TableRegionStatesCount table_region_states_count = 12;
repeated ServerTask master_tasks = 13; repeated ServerTask master_tasks = 13;
repeated ServerName decommissioned_servers = 14;
} }
enum Option { enum Option {
@ -365,4 +366,5 @@ enum Option {
SERVERS_NAME = 10; SERVERS_NAME = 10;
TABLE_TO_REGIONS_COUNT = 11; TABLE_TO_REGIONS_COUNT = 11;
TASKS = 12; TASKS = 12;
DECOMMISSIONED_SERVERS = 13;
} }

View File

@ -2903,6 +2903,12 @@ public class HMaster extends HBaseServerBase<MasterRpcServices> implements Maste
} }
break; break;
} }
case DECOMMISSIONED_SERVERS: {
if (serverManager != null) {
builder.setDecommissionedServerNames(serverManager.getDrainingServersList());
}
break;
}
} }
} }

View File

@ -132,6 +132,12 @@ public class TestClientClusterMetrics {
Assert.assertEquals(origin.getMasterInfoPort(), defaults.getMasterInfoPort()); Assert.assertEquals(origin.getMasterInfoPort(), defaults.getMasterInfoPort());
Assert.assertEquals(origin.getServersName().size(), defaults.getServersName().size()); Assert.assertEquals(origin.getServersName().size(), defaults.getServersName().size());
Assert.assertEquals(ADMIN.getRegionServers().size(), defaults.getServersName().size()); Assert.assertEquals(ADMIN.getRegionServers().size(), defaults.getServersName().size());
// We decommission the first online region server and verify the metrics.
List<ServerName> serverNames = origin.getServersName().subList(0, 1);
ADMIN.decommissionRegionServers(serverNames, false);
Assert.assertEquals(1, ADMIN.getClusterMetrics().getDecommissionedServerNames().size());
Assert.assertEquals(ADMIN.getClusterMetrics().getDecommissionedServerNames().get(0),
serverNames.get(0));
} }
@Test @Test

View File

@ -227,6 +227,11 @@ public class TestRegionsRecoveryChore {
return null; return null;
} }
@Override
public List<ServerName> getDecommissionedServerNames() {
return null;
}
@Override @Override
public Map<ServerName, ServerMetrics> getLiveServerMetrics() { public Map<ServerName, ServerMetrics> getLiveServerMetrics() {
Map<ServerName, ServerMetrics> liveServerMetrics = new HashMap<>(); Map<ServerName, ServerMetrics> liveServerMetrics = new HashMap<>();

View File

@ -1043,6 +1043,7 @@ module Hbase
else else
puts "1 active master, #{cluster_metrics.getBackupMasterNames.size} backup masters, puts "1 active master, #{cluster_metrics.getBackupMasterNames.size} backup masters,
#{cluster_metrics.getLiveServerMetrics.size} servers, #{cluster_metrics.getLiveServerMetrics.size} servers,
#{cluster_metrics.getDecommissionedServerNames.size} decommissioned,
#{cluster_metrics.getDeadServerNames.size} dead, #{cluster_metrics.getDeadServerNames.size} dead,
#{format('%.4f', cluster_metrics.getAverageLoad)} average load" #{format('%.4f', cluster_metrics.getAverageLoad)} average load"
end end