HBASE-24662 Update DumpClusterStatusAction to notice changes in region server count
Sometimes running chaos monkey, I've found that we lose accounting of region servers. I've taken to a manual process of checking the reported list against a known reference. It occurs to me that ChaosMonkey has a known reference, and it can do this accounting for me. Signed-off-by: Viraj Jasani <vjasani@apache.org>
This commit is contained in:
parent
f35c5eaadd
commit
a6e3db5ba5
|
@ -19,6 +19,13 @@
|
|||
package org.apache.hadoop.hbase.chaos.actions;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.hadoop.hbase.ClusterMetrics;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.net.Address;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -28,18 +35,69 @@ import org.slf4j.LoggerFactory;
|
|||
public class DumpClusterStatusAction extends Action {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(DumpClusterStatusAction.class);
|
||||
|
||||
@Override protected Logger getLogger() {
|
||||
private Set<Address> initialRegionServers;
|
||||
|
||||
@Override
|
||||
protected Logger getLogger() {
|
||||
return LOG;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(ActionContext context) throws IOException {
|
||||
super.init(context);
|
||||
initialRegionServers = collectKnownRegionServers(initialStatus);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void perform() throws Exception {
|
||||
getLogger().debug("Performing action: Dump cluster status");
|
||||
getLogger().info("Cluster status\n" + cluster.getClusterMetrics());
|
||||
final ClusterMetrics currentMetrics = cluster.getClusterMetrics();
|
||||
getLogger().info("Cluster status\n{}", currentMetrics);
|
||||
reportMissingRegionServers(currentMetrics);
|
||||
reportNewRegionServers(currentMetrics);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a set of all the host:port pairs of region servers known to this cluster.
|
||||
*/
|
||||
private static Set<Address> collectKnownRegionServers(final ClusterMetrics clusterMetrics) {
|
||||
final Set<Address> regionServers = clusterMetrics.getLiveServerMetrics()
|
||||
.keySet()
|
||||
.stream()
|
||||
.map(ServerName::getAddress)
|
||||
.collect(Collectors.toSet());
|
||||
clusterMetrics.getDeadServerNames()
|
||||
.stream()
|
||||
.map(ServerName::getAddress)
|
||||
.forEach(regionServers::add);
|
||||
return Collections.unmodifiableSet(regionServers);
|
||||
}
|
||||
|
||||
private void reportMissingRegionServers(final ClusterMetrics clusterMetrics) {
|
||||
final Set<Address> regionServers = collectKnownRegionServers(clusterMetrics);
|
||||
final Set<Address> missingRegionServers = new HashSet<>(initialRegionServers);
|
||||
missingRegionServers.removeAll(regionServers);
|
||||
if (!missingRegionServers.isEmpty()) {
|
||||
final StringBuilder stringBuilder = new StringBuilder()
|
||||
.append("region server(s) are missing from this cluster report");
|
||||
missingRegionServers.stream()
|
||||
.sorted()
|
||||
.forEach(address -> stringBuilder.append("\n ").append(address));
|
||||
getLogger().warn(stringBuilder.toString());
|
||||
}
|
||||
}
|
||||
|
||||
private void reportNewRegionServers(final ClusterMetrics clusterMetrics) {
|
||||
final Set<Address> regionServers = collectKnownRegionServers(clusterMetrics);
|
||||
final Set<Address> newRegionServers = new HashSet<>(regionServers);
|
||||
newRegionServers.removeAll(initialRegionServers);
|
||||
if (!newRegionServers.isEmpty()) {
|
||||
final StringBuilder stringBuilder = new StringBuilder()
|
||||
.append("region server(s) are new for this cluster report");
|
||||
newRegionServers.stream()
|
||||
.sorted()
|
||||
.forEach(address -> stringBuilder.append("\n ").append(address));
|
||||
getLogger().warn(stringBuilder.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue