HBASE-24662 Update DumpClusterStatusAction to notice changes in region server count

Sometimes running chaos monkey, I've found that we lose accounting of
region servers. I've taken to a manual process of checking the
reported list against a known reference. It occurs to me that
ChaosMonkey has a known reference, and it can do this accounting for
me.

Signed-off-by: Viraj Jasani <vjasani@apache.org>
This commit is contained in:
Nick Dimiduk 2020-06-30 16:50:10 -07:00 committed by Nick Dimiduk
parent 2fd587384a
commit 714a6f53d8
1 changed files with 63 additions and 2 deletions

View File

@ -19,6 +19,14 @@
package org.apache.hadoop.hbase.chaos.actions;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.net.Address;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -28,18 +36,71 @@ import org.slf4j.LoggerFactory;
public class DumpClusterStatusAction extends Action {
private static final Logger LOG = LoggerFactory.getLogger(DumpClusterStatusAction.class);
@Override protected Logger getLogger() {
private Set<Address> initialRegionServers;
@Override
protected Logger getLogger() {
return LOG;
}
@Override
public void init(ActionContext context) throws IOException {
super.init(context);
initialRegionServers = collectKnownRegionServers(initialStatus);
}
@Override
public void perform() throws Exception {
getLogger().debug("Performing action: Dump cluster status");
getLogger().info("Cluster status\n" + cluster.getClusterStatus());
final ClusterStatus currentMetrics = cluster.getClusterStatus();
getLogger().info("Cluster status\n{}", currentMetrics);
reportMissingRegionServers(currentMetrics);
reportNewRegionServers(currentMetrics);
}
/**
* Build a set of all the host:port pairs of region servers known to this cluster.
*/
private static Set<Address> collectKnownRegionServers(final ClusterStatus clusterStatus) {
final Set<Address> regionServers = new HashSet<>();
final Set<ServerName> serverNames = clusterStatus.getLiveServersLoad().keySet();
serverNames.addAll(clusterStatus.getDeadServerNames());
for (final ServerName serverName : serverNames) {
regionServers.add(serverName.getAddress());
}
return Collections.unmodifiableSet(regionServers);
}
private void reportMissingRegionServers(final ClusterStatus clusterStatus) {
final Set<Address> regionServers = collectKnownRegionServers(clusterStatus);
final Set<Address> missingRegionServers = new HashSet<>(initialRegionServers);
missingRegionServers.removeAll(regionServers);
if (!missingRegionServers.isEmpty()) {
final StringBuilder stringBuilder = new StringBuilder()
.append("region server(s) are missing from this cluster report");
final List<Address> sortedAddresses = new ArrayList<>(missingRegionServers);
Collections.sort(sortedAddresses);
for (final Address address : sortedAddresses) {
stringBuilder.append("\n ").append(address);
}
getLogger().warn(stringBuilder.toString());
}
}
private void reportNewRegionServers(final ClusterStatus clusterStatus) {
final Set<Address> regionServers = collectKnownRegionServers(clusterStatus);
final Set<Address> newRegionServers = new HashSet<>(regionServers);
newRegionServers.removeAll(initialRegionServers);
if (!newRegionServers.isEmpty()) {
final StringBuilder stringBuilder = new StringBuilder()
.append("region server(s) are new for this cluster report");
final List<Address> sortedAddresses = new ArrayList<>(newRegionServers);
Collections.sort(sortedAddresses);
for (final Address address : sortedAddresses) {
stringBuilder.append("\n ").append(address);
}
getLogger().warn(stringBuilder.toString());
}
}
}