From 2c5b86b67374fc9e6ff4f5f8b18b61adce4b3e3a Mon Sep 17 00:00:00 2001 From: Andrzej Bialecki Date: Mon, 1 Mar 2021 15:21:54 +0100 Subject: [PATCH] SOLR-15130: Support for per-collection replica placement node sets, a.k.a "node type" placements. --- solr/CHANGES.txt | 3 + .../api/collections/CreateCollectionCmd.java | 1 + .../apache/solr/cluster/SolrCollection.java | 2 +- .../placement/impl/AttributeFetcherImpl.java | 2 - .../impl/SimpleClusterAbstractionsImpl.java | 35 +++- .../plugins/AffinityPlacementConfig.java | 51 +++++- .../plugins/AffinityPlacementFactory.java | 95 ++++++----- .../solr/cluster/placement/Builders.java | 1 + .../placement/ClusterAbstractionsForTest.java | 12 ++ .../impl/PlacementPluginIntegrationTest.java | 49 +++++- .../plugins/AffinityPlacementFactoryTest.java | 152 ++++++++++++++++-- .../src/replica-placement-plugins.adoc | 49 +++++- 12 files changed, 389 insertions(+), 63 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 914dd5ed419..02a3b802da9 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -32,6 +32,9 @@ New Features * SOLR-14787: Payload check query parser now supports inequalities. (Kevin Watters, Gus Heck) +* SOLR-15130: Support for per-collection replica placement node sets, a.k.a "node type" + placements. (ab, ilan) + Improvements ---------------------- * LUCENE-8984: MoreLikeThis MLT is biased for uncommon fields (Andy Hind via Anshum Gupta) diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java index 51cb43b36d4..ecf6004ee99 100644 --- a/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/CreateCollectionCmd.java @@ -133,6 +133,7 @@ public class CreateCollectionCmd implements CollApiCmds.CollectionApiCommand { ZkStateReader zkStateReader = ccc.getZkStateReader(); + // this also creates the collection zk node as a side-effect CollectionHandlingUtils.createConfNode(stateManager, configName, collectionName); Map collectionParams = new HashMap<>(); diff --git a/solr/core/src/java/org/apache/solr/cluster/SolrCollection.java b/solr/core/src/java/org/apache/solr/cluster/SolrCollection.java index ea7ea459de2..62130c762e5 100644 --- a/solr/core/src/java/org/apache/solr/cluster/SolrCollection.java +++ b/solr/core/src/java/org/apache/solr/cluster/SolrCollection.java @@ -62,7 +62,7 @@ public interface SolrCollection { /** *

Returns the value of a custom property name set on the {@link SolrCollection} or {@code null} when no such - * property was set. Properties are set through the Collection API. See for example {@code COLLECTIONPROP} in the Solr reference guide. + * property was set. Properties are set through the Collection API. See for example {@code MODIFYCOLLECTION} in the Solr reference guide. * *

{@link PlacementPlugin} related note:

*

Using custom properties in conjunction with ad hoc {@link PlacementPlugin} code allows customizing placement diff --git a/solr/core/src/java/org/apache/solr/cluster/placement/impl/AttributeFetcherImpl.java b/solr/core/src/java/org/apache/solr/cluster/placement/impl/AttributeFetcherImpl.java index e3a7a3f9721..af8e72e461c 100644 --- a/solr/core/src/java/org/apache/solr/cluster/placement/impl/AttributeFetcherImpl.java +++ b/solr/core/src/java/org/apache/solr/cluster/placement/impl/AttributeFetcherImpl.java @@ -90,8 +90,6 @@ public class AttributeFetcherImpl implements AttributeFetcher { return this; } - private static final double GB = 1024 * 1024 * 1024; - @Override public AttributeValues fetchAttributes() { diff --git a/solr/core/src/java/org/apache/solr/cluster/placement/impl/SimpleClusterAbstractionsImpl.java b/solr/core/src/java/org/apache/solr/cluster/placement/impl/SimpleClusterAbstractionsImpl.java index e26a3744e70..48f3f506104 100644 --- a/solr/core/src/java/org/apache/solr/cluster/placement/impl/SimpleClusterAbstractionsImpl.java +++ b/solr/core/src/java/org/apache/solr/cluster/placement/impl/SimpleClusterAbstractionsImpl.java @@ -27,6 +27,7 @@ import org.apache.solr.cluster.*; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.CollectionAdminParams; import org.apache.solr.common.util.Pair; import javax.annotation.Nonnull; @@ -179,9 +180,18 @@ class SimpleClusterAbstractionsImpl { return shards.keySet(); } + @Override + public String toString() { + return "SolrCollectionImpl{" + + "collectionName='" + collectionName + '\'' + + ", shards=" + shards.keySet() + + ", docCollection=" + docCollection + + '}'; + } + @Override public String getCustomProperty(String customPropertyName) { - return docCollection.getStr(customPropertyName); + return docCollection.getStr(CollectionAdminParams.PROPERTY_PREFIX + customPropertyName); } } @@ -292,6 +302,17 @@ class SimpleClusterAbstractionsImpl { public int hashCode() { return Objects.hash(shardName, collection, shardState); } + + @Override + public String toString() { + return "ShardImpl{" + + "shardName='" + shardName + '\'' + + ", collection='" + collection.getName() + '\'' + + ", shardState=" + shardState + + ", replicas=" + replicas.size() + + ", leader=" + leader + + '}'; + } } @@ -432,5 +453,17 @@ class SimpleClusterAbstractionsImpl { public int hashCode() { return Objects.hash(replicaName, coreName, shard, replicaType, replicaState, node); } + + @Override + public String toString() { + return "ReplicaImpl{" + + "replicaName='" + replicaName + '\'' + + ", coreName='" + coreName + '\'' + + ", shard='" + shard.getShardName() + '\'' + + ", replicaType=" + replicaType + + ", replicaState=" + replicaState + + ", node='" + node + '\'' + + '}'; + } } } diff --git a/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementConfig.java b/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementConfig.java index d9579bcb83a..7e7643c34f0 100644 --- a/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementConfig.java +++ b/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementConfig.java @@ -34,6 +34,36 @@ public class AffinityPlacementConfig implements PlacementPluginConfig { public static final AffinityPlacementConfig DEFAULT = new AffinityPlacementConfig(DEFAULT_MINIMAL_FREE_DISK_GB, DEFAULT_PRIORITIZED_FREE_DISK_GB); + /** + *

Name of the system property on a node indicating which (public cloud) Availability Zone that node is in. The value + * is any string, different strings denote different availability zones. + * + *

Nodes on which this system property is not defined are considered being in the same Availability Zone + * {@link #UNDEFINED_AVAILABILITY_ZONE} (hopefully the value of this constant is not the name of a real Availability Zone :). + */ + public static final String AVAILABILITY_ZONE_SYSPROP = "availability_zone"; + + /** + *

Name of the system property on a node indicating the type of replicas allowed on that node. + * The value of that system property is a comma separated list or a single string of value names of + * {@link org.apache.solr.cluster.Replica.ReplicaType} (case insensitive). If that property is not defined, that node is + * considered accepting all replica types (i.e. undefined is equivalent to {@code "NRT,Pull,tlog"}). + */ + public static final String REPLICA_TYPE_SYSPROP = "replica_type"; + + /** + * Name of the system property on a node indicating the arbitrary "node type" (for example, a node + * more suitable for the indexing work load could be labeled as node_type: indexing). + * The value of this system property is a comma-separated list or a single label (labels must not + * contain commas), which represent a logical OR for the purpose of placement. + */ + public static final String NODE_TYPE_SYSPROP = "node_type"; + + /** + * This is the "AZ" name for nodes that do not define an AZ. Should not match a real AZ name (I think we're safe) + */ + public static final String UNDEFINED_AVAILABILITY_ZONE = "uNd3f1NeD"; + /** * If a node has strictly less GB of free disk than this value, the node is excluded from assignment decisions. * Set to 0 or less to disable. @@ -59,6 +89,16 @@ public class AffinityPlacementConfig implements PlacementPluginConfig { @JsonProperty public Map withCollection; + /** + * This property defines an additional constraint that the collection must be placed + * only on the nodes of the correct "node type". The nodes can specify what type they are (one or + * several types, using a comma-separated list) by defining the {@link #NODE_TYPE_SYSPROP} system property. + * Similarly, the plugin can be configured to specify that a collection (key in the map) must be placed on one or more node + * type (value in the map, using comma-separated list of acceptable node types). + */ + @JsonProperty + public Map collectionNodeType; + /** * Zero-arguments public constructor required for deserialization - don't use. */ @@ -72,7 +112,7 @@ public class AffinityPlacementConfig implements PlacementPluginConfig { * @param prioritizedFreeDiskGB prioritized free disk GB. */ public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB) { - this(minimalFreeDiskGB, prioritizedFreeDiskGB, Map.of()); + this(minimalFreeDiskGB, prioritizedFreeDiskGB, Map.of(), Map.of()); } /** @@ -82,11 +122,18 @@ public class AffinityPlacementConfig implements PlacementPluginConfig { * @param withCollection configuration of co-located collections: keys are * primary collection names and values are secondary * collection names. + * @param collectionNodeType configuration of reequired node types per collection. + * Keys are collection names and values are comma-separated + * lists of required node types. */ - public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB, Map withCollection) { + public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB, + Map withCollection, + Map collectionNodeType) { this.minimalFreeDiskGB = minimalFreeDiskGB; this.prioritizedFreeDiskGB = prioritizedFreeDiskGB; Objects.requireNonNull(withCollection); + Objects.requireNonNull(collectionNodeType); this.withCollection = withCollection; + this.collectionNodeType = collectionNodeType; } } diff --git a/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactory.java b/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactory.java index eaec4ab2edd..6f1ddc9bff6 100644 --- a/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactory.java +++ b/solr/core/src/java/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactory.java @@ -23,6 +23,7 @@ import org.apache.solr.cluster.*; import org.apache.solr.cluster.placement.*; import org.apache.solr.cluster.placement.impl.NodeMetricImpl; import org.apache.solr.common.util.Pair; +import org.apache.solr.common.util.StrUtils; import org.apache.solr.common.util.SuppressForbidden; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -114,28 +115,6 @@ import java.util.stream.Collectors; public class AffinityPlacementFactory implements PlacementPluginFactory { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - /** - *

Name of the system property on a node indicating which (public cloud) Availability Zone that node is in. The value - * is any string, different strings denote different availability zones. - * - *

Nodes on which this system property is not defined are considered being in the same Availability Zone - * {@link #UNDEFINED_AVAILABILITY_ZONE} (hopefully the value of this constant is not the name of a real Availability Zone :). - */ - public static final String AVAILABILITY_ZONE_SYSPROP = "availability_zone"; - - /** - *

Name of the system property on a node indicating the type of replicas allowed on that node. - * The value of that system property is a comma separated list or a single string of value names of - * {@link org.apache.solr.cluster.Replica.ReplicaType} (case insensitive). If that property is not defined, that node is - * considered accepting all replica types (i.e. undefined is equivalent to {@code "NRT,Pull,tlog"}). - */ - public static final String REPLICA_TYPE_SYSPROP = "replica_type"; - - /** - * This is the "AZ" name for nodes that do not define an AZ. Should not match a real AZ name (I think we're safe) - */ - public static final String UNDEFINED_AVAILABILITY_ZONE = "uNd3f1NeD"; - private AffinityPlacementConfig config = AffinityPlacementConfig.DEFAULT; /** @@ -149,7 +128,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory> colocatedWith; + private final Map> nodeTypes; + private final Random replicaPlacementRandom = new Random(); // ok even if random sequence is predictable. /** * The factory has decoded the configuration for the plugin instance and passes it the parameters it needs. */ - private AffinityPlacementPlugin(long minimalFreeDiskGB, long prioritizedFreeDiskGB, Map withCollections) { + private AffinityPlacementPlugin(long minimalFreeDiskGB, long prioritizedFreeDiskGB, + Map withCollections, + Map collectionNodeTypes) { this.minimalFreeDiskGB = minimalFreeDiskGB; this.prioritizedFreeDiskGB = prioritizedFreeDiskGB; Objects.requireNonNull(withCollections, "withCollections must not be null"); + Objects.requireNonNull(collectionNodeTypes, "collectionNodeTypes must not be null"); this.withCollections = withCollections; if (withCollections.isEmpty()) { colocatedWith = Map.of(); @@ -197,6 +181,18 @@ public class AffinityPlacementFactory implements PlacementPluginFactory(); + collectionNodeTypes.forEach((coll, typesString) -> { + List types = StrUtils.splitSmart(typesString, ',', true); + if (!types.isEmpty()) { + nodeTypes.put(coll, new HashSet<>(types)); + } + }); + } + // We make things reproducible in tests by using test seed if any String seed = System.getProperty("tests.seed"); if (seed != null) { @@ -210,16 +206,22 @@ public class AffinityPlacementFactory implements PlacementPluginFactory nodes = request.getTargetNodes(); SolrCollection solrCollection = request.getCollection(); - nodes = filterNodesWithCollection(placementContext.getCluster(), request, nodes); - // Request all needed attributes AttributeFetcher attributeFetcher = placementContext.getAttributeFetcher(); - attributeFetcher.requestNodeSystemProperty(AVAILABILITY_ZONE_SYSPROP).requestNodeSystemProperty(REPLICA_TYPE_SYSPROP); attributeFetcher - .requestNodeMetric(NodeMetricImpl.NUM_CORES) - .requestNodeMetric(NodeMetricImpl.FREE_DISK_GB); + .requestNodeSystemProperty(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP) + .requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP) + .requestNodeSystemProperty(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP); + attributeFetcher + .requestNodeMetric(NodeMetricImpl.NUM_CORES) + .requestNodeMetric(NodeMetricImpl.FREE_DISK_GB); attributeFetcher.fetchFrom(nodes); final AttributeValues attrValues = attributeFetcher.fetchAttributes(); + // filter out nodes that don't meet the `withCollection` constraint + nodes = filterNodesWithCollection(placementContext.getCluster(), request, attrValues, nodes); + // filter out nodes that don't match the "node types" specified in the collection props + nodes = filterNodesByNodeType(placementContext.getCluster(), request, attrValues, nodes); + // Split the set of nodes into 3 sets of nodes accepting each replica type (sets can overlap if nodes accept multiple replica types) // These subsets sets are actually maps, because we capture the number of cores (of any replica type) present on each node. @@ -359,13 +361,13 @@ public class AffinityPlacementFactory implements PlacementPluginFactory nodeAz = attrValues.getSystemProperty(n, AVAILABILITY_ZONE_SYSPROP); + Optional nodeAz = attrValues.getSystemProperty(n, AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP); // All nodes with undefined AZ will be considered part of the same AZ. This also works for deployments that do not care about AZ's - return nodeAz.orElse(UNDEFINED_AVAILABILITY_ZONE); + return nodeAz.orElse(AffinityPlacementConfig.UNDEFINED_AVAILABILITY_ZONE); } /** @@ -399,7 +401,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory>, Map> getNodesPerReplicaType(Set nodes, final AttributeValues attrValues) { @@ -437,7 +439,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory filterNodesWithCollection(Cluster cluster, PlacementRequest request, Set initialNodes) throws PlacementException { + private Set filterNodesWithCollection(Cluster cluster, PlacementRequest request, AttributeValues attributeValues, Set initialNodes) throws PlacementException { // if there's a `withCollection` constraint for this collection then remove nodes // that are not eligible String withCollectionName = withCollections.get(request.getCollection().getName()); @@ -658,6 +660,27 @@ public class AffinityPlacementFactory implements PlacementPluginFactory filterNodesByNodeType(Cluster cluster, PlacementRequest request, AttributeValues attributeValues, Set initialNodes) throws PlacementException { + Set collNodeTypes = nodeTypes.get(request.getCollection().getName()); + if (collNodeTypes == null) { + // no filtering by node type + return initialNodes; + } + Set filteredNodes = initialNodes.stream() + .filter(n -> { + Optional nodePropOpt = attributeValues.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP); + if (!nodePropOpt.isPresent()) { + return false; + } + Set nodeTypes = new HashSet<>(StrUtils.splitSmart(nodePropOpt.get(), ',')); + nodeTypes.retainAll(collNodeTypes); + return !nodeTypes.isEmpty(); + }).collect(Collectors.toSet()); + if (filteredNodes.isEmpty()) { + throw new PlacementException("There are no nodes with types: " + collNodeTypes + " expected by collection " + request.getCollection().getName()); + } + return filteredNodes; + } /** * Comparator implementing the placement strategy based on free space and number of cores: we want to place new replicas * on nodes with the less number of cores, but only if they do have enough disk space (expressed as a threshold value). diff --git a/solr/core/src/test/org/apache/solr/cluster/placement/Builders.java b/solr/core/src/test/org/apache/solr/cluster/placement/Builders.java index 43de56ede0d..d31ba45afe0 100644 --- a/solr/core/src/test/org/apache/solr/cluster/placement/Builders.java +++ b/solr/core/src/test/org/apache/solr/cluster/placement/Builders.java @@ -55,6 +55,7 @@ public class Builders { NodeBuilder nodeBuilder = new NodeBuilder().setNodeName("node_" + n); // Default name, can be changed nodeBuilder.setTotalDiskGB(10000.0); nodeBuilder.setFreeDiskGB(5000.0); + nodeBuilder.setCoreCount(0); nodeBuilders.add(nodeBuilder); } return this; diff --git a/solr/core/src/test/org/apache/solr/cluster/placement/ClusterAbstractionsForTest.java b/solr/core/src/test/org/apache/solr/cluster/placement/ClusterAbstractionsForTest.java index 771f148e9ee..1260a1063c2 100644 --- a/solr/core/src/test/org/apache/solr/cluster/placement/ClusterAbstractionsForTest.java +++ b/solr/core/src/test/org/apache/solr/cluster/placement/ClusterAbstractionsForTest.java @@ -312,5 +312,17 @@ class ClusterAbstractionsForTest { public int hashCode() { return Objects.hash(replicaName, coreName, shard, replicaType, replicaState, node); } + + @Override + public String toString() { + return "ReplicaImpl{" + + "replicaName='" + replicaName + '\'' + + ", coreName='" + coreName + '\'' + + ", shard='" + shard + '\'' + + ", replicaType=" + replicaType + + ", replicaState=" + replicaState + + ", node=" + node + + '}'; + } } } diff --git a/solr/core/src/test/org/apache/solr/cluster/placement/impl/PlacementPluginIntegrationTest.java b/solr/core/src/test/org/apache/solr/cluster/placement/impl/PlacementPluginIntegrationTest.java index dd89bf9de6c..f2140e74a20 100644 --- a/solr/core/src/test/org/apache/solr/cluster/placement/impl/PlacementPluginIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/cluster/placement/impl/PlacementPluginIntegrationTest.java @@ -241,7 +241,7 @@ public class PlacementPluginIntegrationTest extends SolrCloudTestCase { PluginMeta plugin = new PluginMeta(); plugin.name = PlacementPluginFactory.PLUGIN_NAME; plugin.klass = AffinityPlacementFactory.class.getName(); - plugin.config = new AffinityPlacementConfig(1, 2, Map.of(COLLECTION, SECONDARY_COLLECTION)); + plugin.config = new AffinityPlacementConfig(1, 2, Map.of(COLLECTION, SECONDARY_COLLECTION), Map.of()); V2Request req = new V2Request.Builder("/cluster/plugin") .forceV2(true) .POST() @@ -302,6 +302,53 @@ public class PlacementPluginIntegrationTest extends SolrCloudTestCase { } } + // this functionality relies on System.getProperty which we cannot set on individual + // nodes in a mini cluster. For this reason this test is very basic - see + // AffinityPlacementFactoryTest for a more comprehensive test. + @Test + public void testNodeTypeIntegration() throws Exception { + // this functionality relies on System.getProperty which we cannot set on individual + // nodes in a mini cluster. + System.clearProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP); + + String collectionName = "nodeTypeCollection"; + + PlacementPluginFactory pluginFactory = cc.getPlacementPluginFactory(); + assertTrue("wrong type " + pluginFactory.getClass().getName(), pluginFactory instanceof DelegatingPlacementPluginFactory); + DelegatingPlacementPluginFactory wrapper = (DelegatingPlacementPluginFactory) pluginFactory; + Phaser phaser = new Phaser(); + wrapper.setDelegationPhaser(phaser); + + int version = phaser.getPhase(); + + PluginMeta plugin = new PluginMeta(); + plugin.name = PlacementPluginFactory.PLUGIN_NAME; + plugin.klass = AffinityPlacementFactory.class.getName(); + plugin.config = new AffinityPlacementConfig(1, 2, Map.of(), Map.of(collectionName, "type_0")); + V2Request req = new V2Request.Builder("/cluster/plugin") + .forceV2(true) + .POST() + .withPayload(singletonMap("add", plugin)) + .build(); + req.process(cluster.getSolrClient()); + + phaser.awaitAdvanceInterruptibly(version, 10, TimeUnit.SECONDS); + + try { + CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(collectionName, "conf", 1, 3) + .process(cluster.getSolrClient()); + fail("should have failed due to no nodes with the types: " + rsp); + } catch (Exception e) { + assertTrue("should contain 'no nodes with types':" + e.toString(), + e.toString().contains("no nodes with types")); + } + System.setProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_0"); + CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(collectionName, "conf", 1, 3) + .process(cluster.getSolrClient()); + + System.clearProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP); + } + @Test public void testAttributeFetcherImpl() throws Exception { CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 2) diff --git a/solr/core/src/test/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactoryTest.java b/solr/core/src/test/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactoryTest.java index 2fd02a08acf..fd35b4cec30 100644 --- a/solr/core/src/test/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactoryTest.java +++ b/solr/core/src/test/org/apache/solr/cluster/placement/plugins/AffinityPlacementFactoryTest.java @@ -28,7 +28,8 @@ import org.apache.solr.cluster.placement.Builders; import org.apache.solr.cluster.placement.impl.ModificationRequestImpl; import org.apache.solr.cluster.placement.impl.PlacementRequestImpl; import org.apache.solr.common.util.Pair; -import org.junit.BeforeClass; +import org.apache.solr.common.util.StrUtils; +import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,19 +47,23 @@ import java.util.stream.StreamSupport; public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static PlacementPlugin plugin; + private PlacementPlugin plugin; private final static long MINIMAL_FREE_DISK_GB = 10L; private final static long PRIORITIZED_FREE_DISK_GB = 50L; private final static String secondaryCollectionName = "withCollection_secondary"; private final static String primaryCollectionName = "withCollection_primary"; - @BeforeClass - public static void setupPlugin() { - AffinityPlacementConfig config = new AffinityPlacementConfig( - MINIMAL_FREE_DISK_GB, - PRIORITIZED_FREE_DISK_GB, - Map.of(primaryCollectionName, secondaryCollectionName)); + static AffinityPlacementConfig defaultConfig = new AffinityPlacementConfig( + MINIMAL_FREE_DISK_GB, + PRIORITIZED_FREE_DISK_GB); + + @Before + public void setupPlugin() { + configurePlugin(defaultConfig); + } + + private void configurePlugin(AffinityPlacementConfig config) { AffinityPlacementFactory factory = new AffinityPlacementFactory(); factory.configure(config); plugin = factory.createPluginInstance(); @@ -297,8 +302,8 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { acceptedReplicaType = NRT_REPLICA_TYPE; } - nodeBuilders.get(i).setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, az) - .setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, acceptedReplicaType) + nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, az) + .setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, acceptedReplicaType) .setCoreCount(numcores) .setFreeDiskGB(freedisk); } @@ -349,7 +354,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(9); LinkedList nodeBuilders = clusterBuilder.getLiveNodeBuilders(); for (int i = 0; i < 9; i++) { - nodeBuilders.get(i).setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "AZ" + (i / 3)) + nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "AZ" + (i / 3)) .setCoreCount(i) .setFreeDiskGB((double)(PRIORITIZED_FREE_DISK_GB + 10)); } @@ -499,9 +504,9 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { nodeBuilder.setCoreCount(0); nodeBuilder.setFreeDiskGB(100.0); if (i < NUM_NODES / 2) { - nodeBuilder.setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "az1"); + nodeBuilder.setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "az1"); } else { - nodeBuilder.setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "az2"); + nodeBuilder.setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "az2"); } } @@ -526,7 +531,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { Map>> replicas = new HashMap<>(); AttributeValues attributeValues = placementContext.getAttributeFetcher().fetchAttributes(); for (ReplicaPlacement rp : pp.getReplicaPlacements()) { - Optional azOptional = attributeValues.getSystemProperty(rp.getNode(), AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP); + Optional azOptional = attributeValues.getSystemProperty(rp.getNode(), AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP); if (!azOptional.isPresent()) { fail("missing AZ sysprop for node " + rp.getNode()); } @@ -556,10 +561,10 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { nodeBuilder.setCoreCount(0); nodeBuilder.setFreeDiskGB(100.0); if (i < NUM_NODES / 3 * 2) { - nodeBuilder.setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, "Nrt, TlOg"); + nodeBuilder.setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, "Nrt, TlOg"); nodeBuilder.setSysprop("group", "one"); } else { - nodeBuilder.setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, "Pull,foobar"); + nodeBuilder.setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, "Pull,foobar"); nodeBuilder.setSysprop("group", "two"); } } @@ -653,6 +658,12 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { @Test public void testWithCollectionPlacement() throws Exception { + AffinityPlacementConfig config = new AffinityPlacementConfig( + MINIMAL_FREE_DISK_GB, + PRIORITIZED_FREE_DISK_GB, + Map.of(primaryCollectionName, secondaryCollectionName), Map.of()); + configurePlugin(config); + int NUM_NODES = 3; Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(NUM_NODES); Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(secondaryCollectionName); @@ -695,6 +706,12 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { @Test public void testWithCollectionModificationRejected() throws Exception { + AffinityPlacementConfig config = new AffinityPlacementConfig( + MINIMAL_FREE_DISK_GB, + PRIORITIZED_FREE_DISK_GB, + Map.of(primaryCollectionName, secondaryCollectionName), Map.of()); + configurePlugin(config); + int NUM_NODES = 2; Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(NUM_NODES); Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(secondaryCollectionName); @@ -742,6 +759,109 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 { } } + @Test + public void testNodeType() throws Exception { + Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(9); + LinkedList nodeBuilders = clusterBuilder.getLiveNodeBuilders(); + for (int i = 0; i < 9; i++) { + nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_" + (i % 3)); + } + + String collectionName = "nodeTypeCollection"; + Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(collectionName); + collectionBuilder.initializeShardsReplicas(1, 0, 0, 0, clusterBuilder.getLiveNodeBuilders()); + + // test single node type in collection + AffinityPlacementConfig config = new AffinityPlacementConfig( + MINIMAL_FREE_DISK_GB, + PRIORITIZED_FREE_DISK_GB, + Map.of(), Map.of(collectionName, "type_0")); + configurePlugin(config); + + clusterBuilder.addCollection(collectionBuilder); + + PlacementContext placementContext = clusterBuilder.buildPlacementContext(); + Map> nodeNamesByType = new HashMap<>(); + Cluster cluster = placementContext.getCluster(); + AttributeValues attributeValues = placementContext.getAttributeFetcher() + .requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP) + .fetchAttributes(); + placementContext.getCluster().getLiveNodes().forEach(n -> + nodeNamesByType + .computeIfAbsent(attributeValues.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP).get(), type -> new HashSet<>()) + .add(n.getName()) + ); + SolrCollection collection = placementContext.getCluster().getCollection(collectionName); + PlacementRequestImpl placementRequest = new PlacementRequestImpl(collection, + Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 3, 0, 0); + + PlacementPlan pp = plugin.computePlacement(placementRequest, placementContext); + assertEquals("expected 3 placements: " + pp, 3, pp.getReplicaPlacements().size()); + Set type0nodes = nodeNamesByType.get("type_0"); + Set type1nodes = nodeNamesByType.get("type_1"); + Set type2nodes = nodeNamesByType.get("type_2"); + + for (ReplicaPlacement p : pp.getReplicaPlacements()) { + assertTrue(type0nodes.contains(p.getNode().getName())); + } + + // test 2 node types in collection + config = new AffinityPlacementConfig( + MINIMAL_FREE_DISK_GB, + PRIORITIZED_FREE_DISK_GB, + Map.of(), Map.of(collectionName, "type_0,type_1")); + configurePlugin(config); + + placementContext = clusterBuilder.buildPlacementContext(); + collection = placementContext.getCluster().getCollection(collectionName); + placementRequest = new PlacementRequestImpl(collection, + Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 6, 0, 0); + + pp = plugin.computePlacement(placementRequest, placementContext); + assertEquals("expected 6 placements: " + pp, 6, pp.getReplicaPlacements().size()); + for (ReplicaPlacement p : pp.getReplicaPlacements()) { + assertTrue(type0nodes.contains(p.getNode().getName()) || + type1nodes.contains(p.getNode().getName())); + } + + // test 2 node types in nodes + for (int i = 0; i < 9; i++) { + if (i < 3) { + nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_0,type_1"); + } else if (i < 6) { + nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_1,type_2"); + } else { + nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_2"); + } + } + + placementContext = clusterBuilder.buildPlacementContext(); + collection = placementContext.getCluster().getCollection(collectionName); + placementRequest = new PlacementRequestImpl(collection, + Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 6, 0, 0); + pp = plugin.computePlacement(placementRequest, placementContext); + assertEquals("expected 6 placements: " + pp, 6, pp.getReplicaPlacements().size()); + nodeNamesByType.clear(); + AttributeValues attributeValues2 = placementContext.getAttributeFetcher() + .requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP) + .fetchAttributes(); + placementContext.getCluster().getLiveNodes().forEach(n -> { + String nodeTypesStr = attributeValues2.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP).get(); + for (String nodeType : StrUtils.splitSmart(nodeTypesStr, ',')) { + nodeNamesByType + .computeIfAbsent(nodeType, type -> new HashSet<>()) + .add(n.getName()); + } + }); + type0nodes = nodeNamesByType.get("type_0"); + type1nodes = nodeNamesByType.get("type_1"); + + for (ReplicaPlacement p : pp.getReplicaPlacements()) { + assertTrue(type0nodes.contains(p.getNode().getName()) || + type1nodes.contains(p.getNode().getName())); + } + + } @Test @Slow public void testScalability() throws Exception { log.info("==== numNodes ===="); diff --git a/solr/solr-ref-guide/src/replica-placement-plugins.adoc b/solr/solr-ref-guide/src/replica-placement-plugins.adoc index 39369bc1d40..1dad0c26775 100644 --- a/solr/solr-ref-guide/src/replica-placement-plugins.adoc +++ b/solr/solr-ref-guide/src/replica-placement-plugins.adoc @@ -76,6 +76,10 @@ curl -X POST -H 'Content-type: application/json' -d '{ "withCollections": { "A_primary": "A_secondary", "B_primary": "B_secondary" + }, + "nodeType": { + "collection_A": "searchNode,indexNode", + "collection_B": "analyticsNode" } } }}' @@ -119,9 +123,14 @@ The autoscaling specification in the configuration linked above aimed to do the ** minimize cores per node, or ** minimize disk usage. -Additionally, it supports the `withCollection` constraint that enforces the placement of -co-located collections' replicas on the same nodes, and prevents deletions of collections and -replicas that would break this constraint. +It also supports additional per-collection constraints: + +* `withCollection` constraint enforces the placement of co-located collections' replicas on the +same nodes, and prevents deletions of collections and replicas that would break this constraint. +* `nodeType` constraint limits the nodes eligible for placement to only those that match one or +more of the specified node types. + +See below for more details on these constraints. Overall strategy of this plugin: @@ -170,6 +179,9 @@ The plugin preserves this co-location by rejecting delete operation of secondary removed from the co-located nodes, or the configuration must be changed to remove the co-location mapping for the primary collection. +===== `nodeType` constraint + + ===== Configuration This plugin supports the following configuration parameters: @@ -185,11 +197,19 @@ not an option, replicas can still be assigned to nodes with less than this amoun Default value is 100. `withCollection`:: -(optional, map) this property defines additional constraints that primary collections (keys) +(optional, map) this property defines an additional constraint that primary collections (keys) must be located on the same nodes as the secondary collections (values). The plugin will assume that the secondary collection replicas are already in place and ignore candidate nodes where they are not already present. Default value is none. +`nodeType`:: +(optional, map) this property defines an additional constraint that collections (keys) +must be located only on the nodes that are labeled with one or more of the matching +"node type" labels (values in the map are comma-separated labels). Nodes are labeled using the +`node_type` system property with the value being an arbitrary comma-separated list of labels. +Correspondingly, the plugin configuration can specify that a particular collection must be placed +only on the nodes that match at least one of the (comma-separated) labels defined here. + === Example configurations This is a simple configuration that uses default values: @@ -237,3 +257,24 @@ curl -X POST -H 'Content-type: application/json' -d '{ }}' http://localhost:8983/api/cluster/plugin ---- + +This configuration defines that collection `collection_A` must be placed only on the nodes with +the `node_type` system property containing either `searchNode` or `indexNode` (for example, a node +may be labeled as `-Dnode_type=searchNode,indexNode,uiNode,zkNode`). Similarly, the +collection `collection_B` must be placed only on the nodes that contain the `analyticsNode` label: + +[source,bash] +---- +curl -X POST -H 'Content-type: application/json' -d '{ + "add":{ + "name": ".placement-plugin", + "class": "org.apache.solr.cluster.placement.plugins.AffinityPlacementFactory", + "config": { + "nodeType": { + "collection_A": "searchNode,indexNode", + "collection_B": "analyticsNode" + } + } + }}' + http://localhost:8983/api/cluster/plugin +----