SOLR-13155: Add command-line option for testing autoscaling configurations.

This commit is contained in:
Andrzej Bialecki 2019-02-12 11:53:15 +01:00
parent 55c061770e
commit 242ff88e02
5 changed files with 350 additions and 1 deletions

View File

@ -200,6 +200,8 @@ New Features
* SOLR-13147: Add movingMAD Stream Evaluator (Joel Bernstein)
* SOLR-13155: Add command-line option for testing autoscaling configurations. (ab)
Bug Fixes
----------------------

View File

@ -312,7 +312,7 @@ function print_usage() {
if [ -z "$CMD" ]; then
echo ""
echo "Usage: solr COMMAND OPTIONS"
echo " where COMMAND is one of: start, stop, restart, status, healthcheck, create, create_core, create_collection, delete, version, zk, auth, assert, config"
echo " where COMMAND is one of: start, stop, restart, status, healthcheck, create, create_core, create_collection, delete, version, zk, auth, assert, config, autoscaling"
echo ""
echo " Standalone server example (start Solr running in the background on port 8984):"
echo ""
@ -632,6 +632,17 @@ function print_usage() {
echo ""
echo " -V Enable more verbose output."
echo ""
elif [ "$CMD" == "autoscaling" ]; then
echo ""
echo "Usage: solr autoscaling [-z zkHost] [-a <autoscaling.json.file>] [-s] [-d] [-n] [-r]"
echo ""
echo " Calculate autoscaling policy suggestions and diagnostic information, using either the deployed"
echo " autoscaling configuration or the one supplied on the command line. This calculation takes place"
echo " on the client-side without affecting the running cluster except for fetching the node and replica"
echo " metrics from the cluster. For detailed usage instructions, do:"
echo ""
echo " bin/solr autoscaling -help"
echo ""
fi
} # end print_usage
@ -1343,6 +1354,12 @@ if [[ "$SCRIPT_CMD" == "zk" ]]; then
exit $?
fi
if [[ "$SCRIPT_CMD" == "autoscaling" ]]; then
run_tool autoscaling $@
exit $?
fi
if [[ "$SCRIPT_CMD" == "auth" ]]; then
VERBOSE=""

View File

@ -17,6 +17,10 @@
package org.apache.solr.util;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.regex.Pattern;
public class RedactionUtils {
@ -47,5 +51,30 @@ public class RedactionUtils {
RedactionUtils.redactSystemProperty = redactSystemProperty;
}
/**
* Replace actual names found in a string with meaningless randomized names.
* @param names actual names
* @param redactionPrefix prefix to use for redacted names
* @param data string to redact
* @return redacted string where all actual names have been replaced.
*/
public static String redactNames(Collection<String> names, String redactionPrefix, String data) {
Set<String> uniqueNames = new TreeSet<>(names);
Set<Integer> uniqueCode = new HashSet<>();
// minimal(ish) hash
int codeShift = 0;
int codeSpace = names.size();
for (String name : uniqueNames) {
int code = Math.abs(name.hashCode() % codeSpace);
while (uniqueCode.contains(code)) {
codeShift++;
codeSpace = names.size() << codeShift;
code = Math.abs(name.hashCode() % codeSpace);
}
uniqueCode.add(code);
data = data.replaceAll("\\Q" + name + "\\E", redactionPrefix + Integer.toString(code, Character.MAX_RADIX));
}
return data;
}
}

View File

@ -19,12 +19,14 @@ package org.apache.solr.util;
import javax.net.ssl.SSLPeerUnverifiedException;
import java.io.Console;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.lang.invoke.MethodHandles;
import java.net.ConnectException;
import java.net.MalformedURLException;
import java.net.Socket;
import java.net.SocketException;
import java.net.URL;
@ -43,6 +45,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
@ -50,9 +53,11 @@ import java.util.Map;
import java.util.Optional;
import java.util.Scanner;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.zip.ZipEntry;
@ -72,6 +77,7 @@ import org.apache.commons.exec.Executor;
import org.apache.commons.exec.OS;
import org.apache.commons.exec.environment.EnvironmentUtils;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.BooleanUtils;
import org.apache.commons.lang.SystemUtils;
import org.apache.http.HttpEntity;
@ -93,15 +99,24 @@ import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.client.solrj.cloud.DistributedQueueFactory;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
import org.apache.solr.client.solrj.cloud.autoscaling.PolicyHelper;
import org.apache.solr.client.solrj.cloud.autoscaling.Row;
import org.apache.solr.client.solrj.cloud.autoscaling.Suggester;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.Builder;
import org.apache.solr.client.solrj.impl.SolrClientCloudManager;
import org.apache.solr.client.solrj.impl.ZkClientClusterStateProvider;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.ContentStreamUpdateRequest;
import org.apache.solr.client.solrj.response.CollectionAdminResponse;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
@ -117,6 +132,8 @@ import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.ContentStreamBase;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
import org.apache.solr.security.Sha256AuthenticationProvider;
import org.apache.solr.util.configuration.SSLConfigurationsFactory;
import org.noggit.CharArr;
@ -392,6 +409,8 @@ public class SolrCLI {
return new UtilsTool();
else if ("auth".equals(toolType))
return new AuthTool();
else if ("autoscaling".equals(toolType))
return new AutoscalingTool();
// If you add a built-in tool to this class, add it here to avoid
// classpath scanning
@ -410,6 +429,7 @@ public class SolrCLI {
formatter.printHelp("healthcheck", getToolOptions(new HealthcheckTool()));
formatter.printHelp("status", getToolOptions(new StatusTool()));
formatter.printHelp("api", getToolOptions(new ApiTool()));
formatter.printHelp("autoscaling", getToolOptions(new AutoscalingTool()));
formatter.printHelp("create_collection", getToolOptions(new CreateCollectionTool()));
formatter.printHelp("create_core", getToolOptions(new CreateCoreTool()));
formatter.printHelp("create", getToolOptions(new CreateTool()));
@ -832,6 +852,283 @@ public class SolrCLI {
}
public static class AutoscalingTool extends SolrCloudTool {
static final String NODE_REDACTION_PREFIX = "N_";
static final String COLL_REDACTION_PREFIX = "COLL_";
public AutoscalingTool() {
this(System.out);
}
public AutoscalingTool(PrintStream stdout) {
super(stdout);
}
@Override
public Option[] getOptions() {
return new Option[] {
OptionBuilder
.withArgName("HOST")
.hasArg()
.isRequired(false)
.withDescription("Address of the Zookeeper ensemble; defaults to: "+ZK_HOST)
.create("zkHost"),
OptionBuilder
.withArgName("CONFIG")
.hasArg()
.isRequired(false)
.withDescription("Autoscaling config file, defaults to the one deployed in the cluster.")
.withLongOpt("config")
.create("a"),
OptionBuilder
.withDescription("Show calculated suggestions")
.withLongOpt("suggestions")
.create("s"),
OptionBuilder
.withDescription("Show ClusterState (collections layout)")
.withLongOpt("clusterState")
.create("c"),
OptionBuilder
.withDescription("Show calculated diagnostics")
.withLongOpt("diagnostics")
.create("d"),
OptionBuilder
.withDescription("Show sorted nodes with diagnostics")
.withLongOpt("sortedNodes")
.create("n"),
OptionBuilder
.withDescription("Redact node and collection names (original names will be consistently randomized)")
.withLongOpt("redact")
.create("r"),
OptionBuilder
.withDescription("Show summarized collection & node statistics.")
.create("stats"),
OptionBuilder
.withDescription("Turn on all options to get all available information.")
.create("all")
};
}
@Override
public String getName() {
return "autoscaling";
}
@Override
protected void runCloudTool(CloudSolrClient cloudSolrClient, CommandLine cli) throws Exception {
DistributedQueueFactory dummmyFactory = new DistributedQueueFactory() {
@Override
public DistributedQueue makeQueue(String path) throws IOException {
throw new UnsupportedOperationException("makeQueue");
}
@Override
public void removeQueue(String path) throws IOException {
throw new UnsupportedOperationException("removeQueue");
}
};
try (SolrClientCloudManager clientCloudManager = new SolrClientCloudManager(dummmyFactory, cloudSolrClient)) {
AutoScalingConfig config = null;
HashSet<String> liveNodes = new HashSet<>();
String configFile = cli.getOptionValue("a");
if (configFile != null) {
log.info("- reading autoscaling config from " + configFile);
config = new AutoScalingConfig(IOUtils.toByteArray(new FileInputStream(configFile)));
} else {
log.info("- reading autoscaling config from the cluster.");
config = clientCloudManager.getDistribStateManager().getAutoScalingConfig();
}
log.info("- calculating suggestions...");
long start = TimeSource.NANO_TIME.getTimeNs();
// collect live node names for optional redaction
liveNodes.addAll(clientCloudManager.getClusterStateProvider().getLiveNodes());
List<Suggester.SuggestionInfo> suggestions = PolicyHelper.getSuggestions(config, clientCloudManager);
long end = TimeSource.NANO_TIME.getTimeNs();
log.info(" (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
log.info("- calculating diagnostics...");
start = TimeSource.NANO_TIME.getTimeNs();
// update the live nodes
liveNodes.addAll(clientCloudManager.getClusterStateProvider().getLiveNodes());
Policy.Session session = config.getPolicy().createSession(clientCloudManager);
MapWriter mw = PolicyHelper.getDiagnostics(session);
Map<String, Object> diagnostics = new LinkedHashMap<>();
mw.toMap(diagnostics);
end = TimeSource.NANO_TIME.getTimeNs();
log.info(" (took " + TimeUnit.NANOSECONDS.toMillis(end - start) + " ms)");
boolean withSuggestions = cli.hasOption("s");
boolean withDiagnostics = cli.hasOption("d") || cli.hasOption("n");
boolean withSortedNodes = cli.hasOption("n");
boolean withClusterState = cli.hasOption("c");
boolean withStats = cli.hasOption("stats");
boolean redact = cli.hasOption("r");
if (cli.hasOption("all")) {
withSuggestions = true;
withDiagnostics = true;
withSortedNodes = true;
withClusterState = true;
withStats = true;
}
// prepare to redact also host names / IPs in base_url and other properties
Set<String> redactNames = new HashSet<>();
for (String nodeName : liveNodes) {
String urlString = Utils.getBaseUrlForNodeName(nodeName, "http");
try {
URL u = new URL(urlString);
// protocol format
redactNames.add(u.getHost() + ":" + u.getPort());
// node name format
redactNames.add(u.getHost() + "_" + u.getPort() + "_");
} catch (MalformedURLException e) {
log.warn("Invalid URL for node name " + nodeName + ", replacing including protocol and path", e);
redactNames.add(urlString);
redactNames.add(Utils.getBaseUrlForNodeName(nodeName, "https"));
}
}
// redact collection names too
Set<String> redactCollections = new HashSet<>();
ClusterState clusterState = clientCloudManager.getClusterStateProvider().getClusterState();
clusterState.forEachCollection(coll -> redactCollections.add(coll.getName()));
if (!withSuggestions && !withDiagnostics) {
withSuggestions = true;
}
Map<String, Object> results = new LinkedHashMap<>();
if (withClusterState) {
Map<String, Object> map = new LinkedHashMap<>();
map.put("znodeVersion", clusterState.getZNodeVersion());
map.put("liveNodes", new TreeSet<>(clusterState.getLiveNodes()));
map.put("collections", clusterState.getCollectionsMap());
results.put("CLUSTERSTATE", map);
}
if (withStats) {
Map<String, Map<String, Number>> collStats = new TreeMap<>();
clusterState.forEachCollection(coll -> {
Map<String, Number> perColl = collStats.computeIfAbsent(coll.getName(), n -> new LinkedHashMap<>());
AtomicInteger numCores = new AtomicInteger();
HashMap<String, Map<String, AtomicInteger>> nodes = new HashMap<>();
coll.getSlices().forEach(s -> {
numCores.addAndGet(s.getReplicas().size());
s.getReplicas().forEach(r -> {
nodes.computeIfAbsent(r.getNodeName(), n -> new HashMap<>())
.computeIfAbsent(s.getName(), slice -> new AtomicInteger()).incrementAndGet();
});
});
int maxCoresPerNode = 0;
int minCoresPerNode = 0;
int maxActualShardsPerNode = 0;
int minActualShardsPerNode = 0;
int maxShardReplicasPerNode = 0;
int minShardReplicasPerNode = 0;
if (!nodes.isEmpty()) {
minCoresPerNode = Integer.MAX_VALUE;
minActualShardsPerNode = Integer.MAX_VALUE;
minShardReplicasPerNode = Integer.MAX_VALUE;
for (Map<String, AtomicInteger> counts : nodes.values()) {
int total = counts.values().stream().mapToInt(c -> c.get()).sum();
for (AtomicInteger count : counts.values()) {
if (count.get() > maxShardReplicasPerNode) {
maxShardReplicasPerNode = count.get();
}
if (count.get() < minShardReplicasPerNode) {
minShardReplicasPerNode = count.get();
}
}
if (total > maxCoresPerNode) {
maxCoresPerNode = total;
}
if (total < minCoresPerNode) {
minCoresPerNode = total;
}
if (counts.size() > maxActualShardsPerNode) {
maxActualShardsPerNode = counts.size();
}
if (counts.size() < minActualShardsPerNode) {
minActualShardsPerNode = counts.size();
}
}
}
perColl.put("activeShards", coll.getActiveSlices().size());
perColl.put("inactiveShards", coll.getSlices().size() - coll.getActiveSlices().size());
perColl.put("rf", coll.getReplicationFactor());
perColl.put("maxShardsPerNode", coll.getMaxShardsPerNode());
perColl.put("maxActualShardsPerNode", maxActualShardsPerNode);
perColl.put("minActualShardsPerNode", minActualShardsPerNode);
perColl.put("maxShardReplicasPerNode", maxShardReplicasPerNode);
perColl.put("minShardReplicasPerNode", minShardReplicasPerNode);
perColl.put("numCores", numCores.get());
perColl.put("numNodes", nodes.size());
perColl.put("maxCoresPerNode", maxCoresPerNode);
perColl.put("minCoresPerNode", minCoresPerNode);
});
Map<String, Map<String, Object>> nodeStats = new TreeMap<>();
for (Row row : session.getSortedNodes()) {
Map<String, Object> nodeStat = nodeStats.computeIfAbsent(row.node, n -> new LinkedHashMap<>());
nodeStat.put("isLive", row.isLive());
nodeStat.put("freedisk", row.getVal("freedisk", 0));
nodeStat.put("totaldisk", row.getVal("totaldisk", 0));
nodeStat.put("cores", row.getVal("cores", 0));
Map<String, Map<String, Map<String, Object>>> collReplicas = new TreeMap<>();
row.forEachReplica(ri -> {
Map<String, Object> perReplica = collReplicas.computeIfAbsent(ri.getCollection(), c -> new TreeMap<>())
.computeIfAbsent(ri.getCore().substring(ri.getCollection().length() + 1), core -> new LinkedHashMap<>());
perReplica.put("INDEX.sizeInGB", ri.getVariable("INDEX.sizeInGB"));
perReplica.put("coreNode", ri.getName());
if (ri.getBool("leader", false)) {
perReplica.put("leader", true);
Double totalSize = (Double)collStats.computeIfAbsent(ri.getCollection(), c -> new HashMap<>())
.computeIfAbsent("avgShardSize", size -> 0.0);
Number riSize = (Number)ri.getVariable("INDEX.sizeInGB");
if (riSize != null) {
totalSize += riSize.doubleValue();
collStats.get(ri.getCollection()).put("avgShardSize", totalSize);
Double max = (Double)collStats.get(ri.getCollection()).get("maxShardSize");
if (max == null) max = 0.0;
if (riSize.doubleValue() > max) {
collStats.get(ri.getCollection()).put("maxShardSize", riSize.doubleValue());
}
Double min = (Double)collStats.get(ri.getCollection()).get("minShardSize");
if (min == null) min = Double.MAX_VALUE;
if (riSize.doubleValue() < min) {
collStats.get(ri.getCollection()).put("minShardSize", riSize.doubleValue());
}
}
}
nodeStat.put("replicas", collReplicas);
});
}
// calculate average per shard
for (Map<String, Number> perColl : collStats.values()) {
Double avg = (Double)perColl.get("avgShardSize");
if (avg != null) {
avg = avg / ((Number)perColl.get("activeShards")).doubleValue();
perColl.put("avgShardSize", avg);
}
}
Map<String, Object> stats = new LinkedHashMap<>();
results.put("STATISTICS", stats);
stats.put("nodeStats", nodeStats);
stats.put("collectionStats", collStats);
}
if (withSuggestions) {
results.put("SUGGESTIONS", suggestions);
}
if (!withSortedNodes) {
diagnostics.remove("sortedNodes");
}
if (withDiagnostics) {
results.put("DIAGNOSTICS", diagnostics);
}
String data = Utils.toJSONString(results);
if (redact) {
data = RedactionUtils.redactNames(redactCollections, COLL_REDACTION_PREFIX, data);
data = RedactionUtils.redactNames(redactNames, NODE_REDACTION_PREFIX, data);
}
stdout.println(data);
}
}
}
/**
* Get the status of a Solr server.
*/

View File

@ -351,6 +351,10 @@ public class Row implements MapWriter {
return cells;
}
public boolean isLive() {
return isLive;
}
public void forEachReplica(Consumer<ReplicaInfo> consumer) {
forEachReplica(collectionVsShardVsReplicas, consumer);
}