HBASE-24662 Update DumpClusterStatusAction to notice changes in region server count

Sometimes running chaos monkey, I've found that we lose accounting of
region servers. I've taken to a manual process of checking the
reported list against a known reference. It occurs to me that
ChaosMonkey has a known reference, and it can do this accounting for
me.

Signed-off-by: Viraj Jasani <vjasani@apache.org>
This commit is contained in:
Nick Dimiduk 2020-06-30 16:50:10 -07:00 committed by Nick Dimiduk
parent f35c5eaadd
commit a6e3db5ba5
1 changed files with 60 additions and 2 deletions

View File

@ -19,6 +19,13 @@
package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.net.Address;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -28,18 +35,69 @@ import org.slf4j.LoggerFactory;
public class DumpClusterStatusAction extends Action {
private static final Logger LOG = LoggerFactory.getLogger(DumpClusterStatusAction.class);
@Override protected Logger getLogger() {
private Set<Address> initialRegionServers;
@Override
protected Logger getLogger() {
return LOG;
}
@Override
public void init(ActionContext context) throws IOException {
super.init(context);
initialRegionServers = collectKnownRegionServers(initialStatus);
}
@Override
public void perform() throws Exception {
getLogger().debug("Performing action: Dump cluster status");
getLogger().info("Cluster status\n" + cluster.getClusterMetrics());
final ClusterMetrics currentMetrics = cluster.getClusterMetrics();
getLogger().info("Cluster status\n{}", currentMetrics);
reportMissingRegionServers(currentMetrics);
reportNewRegionServers(currentMetrics);
}
/**
* Build a set of all the host:port pairs of region servers known to this cluster.
*/
private static Set<Address> collectKnownRegionServers(final ClusterMetrics clusterMetrics) {
final Set<Address> regionServers = clusterMetrics.getLiveServerMetrics()
.keySet()
.stream()
.map(ServerName::getAddress)
.collect(Collectors.toSet());
clusterMetrics.getDeadServerNames()
.stream()
.map(ServerName::getAddress)
.forEach(regionServers::add);
return Collections.unmodifiableSet(regionServers);
}
private void reportMissingRegionServers(final ClusterMetrics clusterMetrics) {
final Set<Address> regionServers = collectKnownRegionServers(clusterMetrics);
final Set<Address> missingRegionServers = new HashSet<>(initialRegionServers);
missingRegionServers.removeAll(regionServers);
if (!missingRegionServers.isEmpty()) {
final StringBuilder stringBuilder = new StringBuilder()
.append("region server(s) are missing from this cluster report");
missingRegionServers.stream()
.sorted()
.forEach(address -> stringBuilder.append("\n ").append(address));
getLogger().warn(stringBuilder.toString());
}
}
private void reportNewRegionServers(final ClusterMetrics clusterMetrics) {
final Set<Address> regionServers = collectKnownRegionServers(clusterMetrics);
final Set<Address> newRegionServers = new HashSet<>(regionServers);
newRegionServers.removeAll(initialRegionServers);
if (!newRegionServers.isEmpty()) {
final StringBuilder stringBuilder = new StringBuilder()
.append("region server(s) are new for this cluster report");
newRegionServers.stream()
.sorted()
.forEach(address -> stringBuilder.append("\n ").append(address));
getLogger().warn(stringBuilder.toString());
}
}
}