SOLR-12845: Add a default autoscaling cluster policy.

This commit is contained in:
Andrzej Bialecki 2020-04-22 15:34:12 +02:00
parent fbcb6ce64a
commit 5d60ff4613
16 changed files with 135 additions and 23 deletions

View File

@ -89,6 +89,8 @@ Improvements
* SOLR-14396: TaggerRequestHandler should not error on an empty index (Trey Grainger)
* SOLR-12845: Add a default autoscaling cluster policy. (shalin, varunthacker, ab)
Optimizations
---------------------
* SOLR-8306: Do not collect expand documents when expand.rows=0 (Marshall Sanders, Amelia Henderson)

View File

@ -298,7 +298,7 @@ public class Assign {
// if no autoscaling configuration exists then obviously we cannot use the policy framework
if (autoScalingConfig.getPolicy().isEmpty()) return false;
// do custom preferences exist
if (!autoScalingConfig.getPolicy().isEmptyPreferences()) return true;
if (!autoScalingConfig.getPolicy().hasEmptyPreferences()) return true;
// does a cluster policy exist
if (!autoScalingConfig.getPolicy().getClusterPolicy().isEmpty()) return true;
// finally we check if the current collection has a policy

View File

@ -33,6 +33,7 @@ import org.apache.solr.client.solrj.cloud.DistribStateManager;
import org.apache.solr.client.solrj.cloud.autoscaling.AutoScalingConfig;
import org.apache.solr.client.solrj.cloud.autoscaling.BadVersionException;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.Policy;
import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType;
import org.apache.solr.common.AlreadyClosedException;
import org.apache.solr.common.SolrCloseable;
@ -143,7 +144,8 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
break;
}
AutoScalingConfig autoScalingConfig = cloudManager.getDistribStateManager().getAutoScalingConfig();
AutoScalingConfig updatedConfig = withAutoAddReplicasTrigger(autoScalingConfig);
AutoScalingConfig updatedConfig = withDefaultPolicy(autoScalingConfig);
updatedConfig = withAutoAddReplicasTrigger(updatedConfig);
updatedConfig = withScheduledMaintenanceTrigger(updatedConfig);
if (updatedConfig.equals(autoScalingConfig)) break;
log.debug("Adding .auto_add_replicas and .scheduled_maintenance triggers");
@ -343,6 +345,15 @@ public class OverseerTriggerThread implements Runnable, SolrCloseable {
}
}
private AutoScalingConfig withDefaultPolicy(AutoScalingConfig autoScalingConfig) {
Policy policy = autoScalingConfig.getPolicy();
if (policy.hasEmptyClusterPolicy()) {
policy = policy.withClusterPolicy(Policy.DEFAULT_CLUSTER_POLICY);
autoScalingConfig = autoScalingConfig.withPolicy(policy);
}
return autoScalingConfig;
}
private AutoScalingConfig withAutoAddReplicasTrigger(AutoScalingConfig autoScalingConfig) {
Map<String, Object> triggerProps = AutoScaling.AUTO_ADD_REPLICAS_TRIGGER_PROPS;
return withDefaultTrigger(triggerProps, autoScalingConfig);

View File

@ -19,6 +19,7 @@ package org.apache.solr.cloud.autoscaling.sim;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@ -233,6 +234,11 @@ public class SimNodeStateProvider implements NodeStateProvider {
return nodeValues;
}
/** Get all values for a selected node. */
public Map<String, Object> simGetNodeValues(String node) {
return nodeValues.getOrDefault(node, Collections.emptyMap());
}
private void saveRoles() {
final Map<String, Set<String>> roles = new HashMap<>();
nodeValues.forEach((n, values) -> {
@ -319,7 +325,10 @@ public class SimNodeStateProvider implements NodeStateProvider {
if (values == null) {
return result;
}
result.putAll(values.entrySet().stream().filter(e -> tags.contains(e.getKey())).collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue())));
result.putAll(values.entrySet().stream()
.filter(e -> tags.contains(e.getKey()))
.filter(e -> e.getValue() != null)
.collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue())));
return result;
}

View File

@ -803,7 +803,9 @@ public class SimScenario implements AutoCloseable {
values.put(key, val);
}
for (String node : nodes) {
scenario.cluster.getSimNodeStateProvider().simSetNodeValues(node, values);
Map<String, Object> newValues = new HashMap<>(scenario.cluster.getSimNodeStateProvider().simGetNodeValues(node));
newValues.putAll(values);
scenario.cluster.getSimNodeStateProvider().simSetNodeValues(node, newValues);
}
}
}

View File

@ -72,12 +72,12 @@ public class TestUtilizeNode extends SolrCloudTestCase {
@Test
public void test() throws Exception {
int REPLICATION = 2;
cluster.waitForAllNodes(5);
String coll = "utilizenodecoll";
CloudSolrClient cloudClient = cluster.getSolrClient();
log.info("Creating Collection...");
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(coll, "conf1", 2, REPLICATION)
CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(coll, "conf1", 2, 2)
.setMaxShardsPerNode(2);
cloudClient.request(create);
@ -129,10 +129,6 @@ public class TestUtilizeNode extends SolrCloudTestCase {
cloudClient.request(new CollectionAdminRequest.UtilizeNode(jettyY.getNodeName()));
assertSomeReplicas("jettyY should now be utilized: ", coll, jettyY);
assertNoReplicas("jettyX should no longer be utilized: ", coll, jettyX);
}
/**

View File

@ -85,6 +85,9 @@ public class TestPolicyCloud extends SolrCloudTestCase {
cluster.deleteAllCollections();
cluster.getSolrClient().getZkStateReader().getZkClient().setData(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH,
"{}".getBytes(StandardCharsets.UTF_8), true);
// remove default policy
String commands = "{set-cluster-policy : []}";
cluster.getSolrClient().request(AutoScalingRequest.create(SolrRequest.METHOD.POST, commands));
}
public void testCreateCollection() throws Exception {

View File

@ -100,6 +100,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
// disable metrics history collection
cluster.disableMetricsHistory();
// turn off the default policy to avoid slowdowns due to the costly #EQUAL rules
CloudTestUtils.assertAutoScalingRequest(cluster, "{'set-cluster-policy': []}");
// disable .scheduled_maintenance (once it exists)
CloudTestUtils.waitForTriggerToBeScheduled(cluster, ".scheduled_maintenance");
CloudTestUtils.suspendTrigger(cluster, ".scheduled_maintenance");
@ -297,6 +299,8 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
}
@Test
// impossible to complete due to the slowness of policy calculations
@AwaitsFix( bugUrl = "https://issues.apache.org/jira/browse/SOLR-14275")
public void testAddNode() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
assertAutoScalingRequest

View File

@ -63,6 +63,9 @@ public class TestSimPolicyCloud extends SimSolrCloudTestCase {
@Before
public void setupCluster() throws Exception {
configureCluster(5, TimeSource.get("simTime:50"));
// reset autoscaling policy to empty
String commands = "{set-cluster-policy : []}";
cluster.simGetSolrClient().request(AutoScalingRequest.create(SolrRequest.METHOD.POST, commands));
}
@After

View File

@ -64,6 +64,7 @@ public class TestSimScenario extends SimSolrCloudTestCase {
String testSuggestionsScenario =
"create_cluster numNodes=2\n" +
"load_autoscaling json={'cluster-policy':[]}\n" +
"solr_request /admin/collections?action=CREATE&autoAddReplicas=true&name=testCollection&numShards=2&replicationFactor=2&maxShardsPerNode=2\n" +
"wait_collection collection=testCollection&shards=2&replicas=2\n" +
"ctx_set key=myNode&value=${_random_node_}\n" +
@ -118,6 +119,7 @@ public class TestSimScenario extends SimSolrCloudTestCase {
String indexingScenario =
"create_cluster numNodes=100\n" +
"load_autoscaling json={'cluster-policy':[]}\n" +
"solr_request /admin/collections?action=CREATE&autoAddReplicas=true&name=testCollection&numShards=2&replicationFactor=2&maxShardsPerNode=2\n" +
"wait_collection collection=testCollection&shards=2&replicas=2\n" +
"solr_request /admin/autoscaling?httpMethod=POST&stream.body=" +
@ -141,6 +143,7 @@ public class TestSimScenario extends SimSolrCloudTestCase {
String splitShardScenario =
"create_cluster numNodes=2\n" +
"load_autoscaling json={'cluster-policy':[]}\n" +
"solr_request /admin/collections?action=CREATE&name=testCollection&numShards=2&replicationFactor=2&maxShardsPerNode=5\n" +
"wait_collection collection=testCollection&shards=2&replicas=2\n" +
"set_shard_metrics collection=testCollection&shard=shard1&INDEX.sizeInBytes=1000000000\n" +

View File

@ -65,6 +65,27 @@ There are many metrics on which the rule can be based, e.g., system load average
When a node, shard, or collection does not satisfy a policy rule, we call it a *violation*. By default, cluster management operations will fail if there is even one violation. You can allow operations to succeed in the face of a violation by marking the corresponding rule with <<solrcloud-autoscaling-policy-preferences.adoc#rule-strictness,`"strict":false`>>. When you do this, Solr ensures that cluster management operations minimize the number of violations.
The default cluster policy, if none is specified, is the following:
[source,json]
----
[
{ "replica" : "<2", "shard" : "#EACH", "node" : "#ANY", "strict" : false},
{ "replica" : "#EQUAL", "node" : "#ANY", "strict" : false},
{ "cores" : "#EQUAL", "node" : "#ANY", "strict" : false}
]
----
These rules mean that:
* Each shard should not have more than one replica on the same node, if possible (strict: false).
* Each collection's replicas should be equally distributed among nodes.
* All cores should be equally distributed among nodes.
NOTE: You can remove this default policy by specifying an empty rule-set in the autoscaling
admin request, like this: `{set-cluster-policy : []}`.
Solr also supports <<solrcloud-autoscaling-policy-preferences.adoc#collection-specific-policy,collection-specific policies>>, which operate in tandem with the cluster policy.
== Triggers

View File

@ -117,7 +117,7 @@ public class Clause implements MapWriter, Comparable<Clause> {
Optional<String> globalTagName = m.keySet().stream().filter(Policy.GLOBAL_ONLY_TAGS::contains).findFirst();
if (globalTagName.isPresent()) {
globalTag = parse(globalTagName.get(), m);
if (m.size() > 2) {
if (m.size() > 3) {
throw new RuntimeException("Only one extra tag supported for the tag " + globalTagName.get() + " in " + toJSONString(m));
}
tag = parse(m.keySet().stream()
@ -677,7 +677,8 @@ public class Clause implements MapWriter, Comparable<Clause> {
for (Row r : session.matrix) {
computedValueEvaluator.node = r.node;
SealedClause sealedClause = getSealedClause(computedValueEvaluator);
if (!sealedClause.getGlobalTag().isPass(r)) {
// check only live nodes
if (r.isLive() && !sealedClause.getGlobalTag().isPass(r)) {
ctx.resetAndAddViolation(r.node, null, new Violation(sealedClause, null, null, r.node, r.getVal(sealedClause.globalTag.name),
sealedClause.globalTag.delta(r.getVal(globalTag.name)), r.node));
addViolatingReplicasForGroup(sealedClause.globalTag, computedValueEvaluator, ctx, Type.CORES.tagName, r.node, ctx.currentViolation, session.matrix);

View File

@ -18,6 +18,7 @@
package org.apache.solr.client.solrj.cloud.autoscaling;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
@ -86,6 +87,18 @@ public class Policy implements MapWriter {
new Preference((Map<String, Object>) Utils.fromJSONString("{minimize : cores, precision:1}")),
new Preference((Map<String, Object>) Utils.fromJSONString("{maximize : freedisk}"))));
public static final List<Map<String, Object>> DEFAULT_CLUSTER_POLICY_JSON = Collections.unmodifiableList(
Arrays.asList(
Utils.makeMap("replica","<2", "shard","#EACH", "node", "#ANY", "strict", "false"),
Utils.makeMap("replica", "#EQUAL", "node", "#ANY", "strict", "false"),
Utils.makeMap("cores", "#EQUAL", "node","#ANY", "strict", "false")
)
);
public static final List<Clause> DEFAULT_CLUSTER_POLICY = DEFAULT_CLUSTER_POLICY_JSON.stream()
.map(Clause::create)
.collect(collectingAndThen(toList(), Collections::unmodifiableList));
/**
* These parameters are always fetched for all nodes regardless of whether they are used in preferences or not
*/
@ -107,6 +120,12 @@ public class Policy implements MapWriter {
*/
private final boolean emptyPreferences;
/**
* True if cluster policy was originally empty, false otherwise. It is used to figure out if the
* current policy was implicitly added or not.
*/
final boolean emptyClusterPolicy;
public Policy() {
this(Collections.emptyMap());
}
@ -134,7 +153,12 @@ public class Policy implements MapWriter {
final SortedSet<String> paramsOfInterest = new TreeSet<>(DEFAULT_PARAMS_OF_INTEREST);
clusterPreferences.forEach(preference -> paramsOfInterest.add(preference.name.toString()));
List<String> newParams = new ArrayList<>(paramsOfInterest);
clusterPolicy = ((List<Map<String, Object>>) jsonMap.getOrDefault(CLUSTER_POLICY, emptyList())).stream()
// if json map has CLUSTER_POLICY and even if its size is 0, we consider it as a custom cluster policy
// and do not add the implicit policy clauses
emptyClusterPolicy = !jsonMap.containsKey(CLUSTER_POLICY);
clusterPolicy = ((List<Map<String, Object>>) jsonMap.getOrDefault(CLUSTER_POLICY, DEFAULT_CLUSTER_POLICY_JSON)).stream()
.map(Clause::create)
.filter(clause -> {
clause.addTags(newParams);
@ -144,7 +168,7 @@ public class Policy implements MapWriter {
for (String newParam : new ArrayList<>(newParams)) {
Type t = VariableBase.getTagType(newParam);
if(t != null && !t.associatedPerNodeValues.isEmpty()){
if(t != null && !t.associatedPerNodeValues.isEmpty()) {
for (String s : t.associatedPerNodeValues) {
if(!newParams.contains(s)) newParams.add(s);
}
@ -173,6 +197,7 @@ public class Policy implements MapWriter {
this.empty = policies == null && clusterPolicy == null && clusterPreferences == null;
this.zkVersion = version;
this.policies = policies != null ? Collections.unmodifiableMap(policies) : Collections.emptyMap();
this.emptyClusterPolicy = clusterPolicy == null;
this.clusterPolicy = clusterPolicy != null ? Collections.unmodifiableList(clusterPolicy) : Collections.emptyList();
this.emptyPreferences = clusterPreferences == null;
this.clusterPreferences = emptyPreferences ? DEFAULT_PREFERENCES : Collections.unmodifiableList(clusterPreferences);
@ -240,7 +265,7 @@ public class Policy implements MapWriter {
for (Preference p : clusterPreferences) iw.add(p);
});
}
if (!clusterPolicy.isEmpty()) {
if (!emptyClusterPolicy) {
ew.put(CLUSTER_POLICY, (IteratorWriter) iw -> {
for (Clause c : clusterPolicy) {
iw.add(c);
@ -307,11 +332,13 @@ public class Policy implements MapWriter {
PolicyHelper.writeNodes(ew, matrixCopy);
ew.put("config", matrix.get(0).session.getPolicy());
});
log.error("Exception! prefs = {}, recent r1 = {}, r2 = {}, matrix = {}",
StringWriter exc = new StringWriter();
e.printStackTrace(new PrintWriter(exc));
log.error("Exception during matrix sorting! prefs = {}, recent r1 = {}, r2 = {}, matrix = {}, exception={}",
clusterPreferences,
lastComparison[0].node,
lastComparison[1].node,
Utils.writeJson(m, new StringWriter(), true).toString());
Utils.writeJson(m, new StringWriter(), true).toString(), exc.toString());
} catch (IOException e1) {
//
}
@ -503,10 +530,17 @@ public class Policy implements MapWriter {
/**
* @return true if no preferences were specified by the user, false otherwise
*/
public boolean isEmptyPreferences() {
public boolean hasEmptyPreferences() {
return emptyPreferences;
}
/**
* @return true if no cluster policy was specified by the user, false otherwise
*/
public boolean hasEmptyClusterPolicy() {
return emptyClusterPolicy;
}
/*This stores the logical state of the system, given a policy and
* a cluster state.
*

View File

@ -68,7 +68,6 @@ public class Suggestion {
public boolean hasTimedOut() {
return session.cloudManager.getTimeSource().getTimeNs() >= endTime;
}
public boolean needMore() {

View File

@ -127,4 +127,5 @@
"minimize":"cores",
"precision":1}
,{
"maximize":"freedisk"}]}}}
"maximize":"freedisk"}],
"cluster-policy": []}}}

View File

@ -76,6 +76,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.nio.charset.StandardCharsets.UTF_8;
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.CLUSTER_POLICY;
import static org.apache.solr.client.solrj.cloud.autoscaling.Policy.CLUSTER_PREFERENCES;
import static org.apache.solr.client.solrj.cloud.autoscaling.TestPolicy2.loadFromResource;
import static org.apache.solr.client.solrj.cloud.autoscaling.Variable.Type.CORES;
@ -1303,6 +1304,7 @@ public class TestPolicy extends SolrTestCaseJ4 {
Map<String, Map> nodeValues = (Map<String, Map>) Utils.fromJSONString("{" +
"node1:{cores:2}," +
"node3:{cores:4}" +
"node2:{cores:2}" +
"}");
Policy policy = new Policy(new HashMap<>());
Suggester suggester = policy.createSession(getSolrCloudManager(nodeValues,
@ -1310,7 +1312,8 @@ public class TestPolicy extends SolrTestCaseJ4 {
.getSuggester(MOVEREPLICA)
.hint(Hint.COLL, "collection1")
.hint(Hint.COLL, "collection2")
.hint(Suggester.Hint.SRC_NODE, "node2");
.hint(Suggester.Hint.SRC_NODE, "node2")
.forceOperation(true);
SolrRequest op = suggester.getSuggestion();
assertNotNull(op);
assertEquals("collection2", op.getParams().get("collection"));
@ -1322,7 +1325,8 @@ public class TestPolicy extends SolrTestCaseJ4 {
.getSuggester(MOVEREPLICA)
.hint(Hint.COLL, "collection1")
.hint(Hint.COLL, "collection2")
.hint(Suggester.Hint.SRC_NODE, "node2");
.hint(Suggester.Hint.SRC_NODE, "node2")
.forceOperation(true);
op = suggester.getSuggestion();
assertNotNull(op);
assertEquals("collection2", op.getParams().get("collection"));
@ -2049,7 +2053,7 @@ public class TestPolicy extends SolrTestCaseJ4 {
}
public void testEmptyClusterState() {
String autoScaleJson = " {'policies':{'c1':[{" +
String autoScaleJson = " {'cluster-policy':[], 'policies':{'c1':[{" +
" 'replica':1," +
" 'shard':'#EACH'," +
" 'port':'50096'}]}}";
@ -3088,5 +3092,24 @@ public class TestPolicy extends SolrTestCaseJ4 {
// since the user explicitly added those preferences, they should be written by MapWriter
assertEquals(1, writtenKeys.size());
assertTrue(writtenKeys.contains(CLUSTER_PREFERENCES));
// reset
writtenKeys.clear();
// now we create a cluster policy that is intentionally empty which should prevent the implicit
// cluster policy from being written but should emit an empty key/val pair for cluster policy
policy = new Policy(Utils.makeMap(CLUSTER_POLICY, Collections.emptyList()));
// sanity checks
assertFalse(policy.isEmpty());
assertTrue(policy.hasEmptyPreferences());
assertFalse(policy.hasEmptyClusterPolicy());
policy.writeMap(new MapWriter.EntryWriter() {
@Override
public MapWriter.EntryWriter put(CharSequence k, Object v) throws IOException {
writtenKeys.add(k.toString());
return this;
}
});
assertEquals(1, writtenKeys.size());
assertTrue(writtenKeys.contains(CLUSTER_POLICY));
}
}