SOLR-15130: Support for per-collection replica placement node sets, a.k.a "node type"

placements.
This commit is contained in:
Andrzej Bialecki 2021-03-01 15:21:54 +01:00
parent ef3a23b3d0
commit 2c5b86b673
12 changed files with 389 additions and 63 deletions

View File

@ -32,6 +32,9 @@ New Features
* SOLR-14787: Payload check query parser now supports inequalities. (Kevin Watters, Gus Heck)
* SOLR-15130: Support for per-collection replica placement node sets, a.k.a "node type"
placements. (ab, ilan)
Improvements
----------------------
* LUCENE-8984: MoreLikeThis MLT is biased for uncommon fields (Andy Hind via Anshum Gupta)

View File

@ -133,6 +133,7 @@ public class CreateCollectionCmd implements CollApiCmds.CollectionApiCommand {
ZkStateReader zkStateReader = ccc.getZkStateReader();
// this also creates the collection zk node as a side-effect
CollectionHandlingUtils.createConfNode(stateManager, configName, collectionName);
Map<String,String> collectionParams = new HashMap<>();

View File

@ -62,7 +62,7 @@ public interface SolrCollection {
/**
* <p>Returns the value of a custom property name set on the {@link SolrCollection} or {@code null} when no such
* property was set. Properties are set through the Collection API. See for example {@code COLLECTIONPROP} in the Solr reference guide.
* property was set. Properties are set through the Collection API. See for example {@code MODIFYCOLLECTION} in the Solr reference guide.
*
* <p><b>{@link PlacementPlugin} related note:</b></p>
* <p>Using custom properties in conjunction with ad hoc {@link PlacementPlugin} code allows customizing placement

View File

@ -90,8 +90,6 @@ public class AttributeFetcherImpl implements AttributeFetcher {
return this;
}
private static final double GB = 1024 * 1024 * 1024;
@Override
public AttributeValues fetchAttributes() {

View File

@ -27,6 +27,7 @@ import org.apache.solr.cluster.*;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.util.Pair;
import javax.annotation.Nonnull;
@ -179,9 +180,18 @@ class SimpleClusterAbstractionsImpl {
return shards.keySet();
}
@Override
public String toString() {
return "SolrCollectionImpl{" +
"collectionName='" + collectionName + '\'' +
", shards=" + shards.keySet() +
", docCollection=" + docCollection +
'}';
}
@Override
public String getCustomProperty(String customPropertyName) {
return docCollection.getStr(customPropertyName);
return docCollection.getStr(CollectionAdminParams.PROPERTY_PREFIX + customPropertyName);
}
}
@ -292,6 +302,17 @@ class SimpleClusterAbstractionsImpl {
public int hashCode() {
return Objects.hash(shardName, collection, shardState);
}
@Override
public String toString() {
return "ShardImpl{" +
"shardName='" + shardName + '\'' +
", collection='" + collection.getName() + '\'' +
", shardState=" + shardState +
", replicas=" + replicas.size() +
", leader=" + leader +
'}';
}
}
@ -432,5 +453,17 @@ class SimpleClusterAbstractionsImpl {
public int hashCode() {
return Objects.hash(replicaName, coreName, shard, replicaType, replicaState, node);
}
@Override
public String toString() {
return "ReplicaImpl{" +
"replicaName='" + replicaName + '\'' +
", coreName='" + coreName + '\'' +
", shard='" + shard.getShardName() + '\'' +
", replicaType=" + replicaType +
", replicaState=" + replicaState +
", node='" + node + '\'' +
'}';
}
}
}

View File

@ -34,6 +34,36 @@ public class AffinityPlacementConfig implements PlacementPluginConfig {
public static final AffinityPlacementConfig DEFAULT =
new AffinityPlacementConfig(DEFAULT_MINIMAL_FREE_DISK_GB, DEFAULT_PRIORITIZED_FREE_DISK_GB);
/**
* <p>Name of the system property on a node indicating which (public cloud) Availability Zone that node is in. The value
* is any string, different strings denote different availability zones.
*
* <p>Nodes on which this system property is not defined are considered being in the same Availability Zone
* {@link #UNDEFINED_AVAILABILITY_ZONE} (hopefully the value of this constant is not the name of a real Availability Zone :).
*/
public static final String AVAILABILITY_ZONE_SYSPROP = "availability_zone";
/**
* <p>Name of the system property on a node indicating the type of replicas allowed on that node.
* The value of that system property is a comma separated list or a single string of value names of
* {@link org.apache.solr.cluster.Replica.ReplicaType} (case insensitive). If that property is not defined, that node is
* considered accepting all replica types (i.e. undefined is equivalent to {@code "NRT,Pull,tlog"}).
*/
public static final String REPLICA_TYPE_SYSPROP = "replica_type";
/**
* Name of the system property on a node indicating the arbitrary "node type" (for example, a node
* more suitable for the indexing work load could be labeled as <code>node_type: indexing</code>).
* The value of this system property is a comma-separated list or a single label (labels must not
* contain commas), which represent a logical OR for the purpose of placement.
*/
public static final String NODE_TYPE_SYSPROP = "node_type";
/**
* This is the "AZ" name for nodes that do not define an AZ. Should not match a real AZ name (I think we're safe)
*/
public static final String UNDEFINED_AVAILABILITY_ZONE = "uNd3f1NeD";
/**
* If a node has strictly less GB of free disk than this value, the node is excluded from assignment decisions.
* Set to 0 or less to disable.
@ -59,6 +89,16 @@ public class AffinityPlacementConfig implements PlacementPluginConfig {
@JsonProperty
public Map<String, String> withCollection;
/**
* This property defines an additional constraint that the collection must be placed
* only on the nodes of the correct "node type". The nodes can specify what type they are (one or
* several types, using a comma-separated list) by defining the {@link #NODE_TYPE_SYSPROP} system property.
* Similarly, the plugin can be configured to specify that a collection (key in the map) must be placed on one or more node
* type (value in the map, using comma-separated list of acceptable node types).
*/
@JsonProperty
public Map<String, String> collectionNodeType;
/**
* Zero-arguments public constructor required for deserialization - don't use.
*/
@ -72,7 +112,7 @@ public class AffinityPlacementConfig implements PlacementPluginConfig {
* @param prioritizedFreeDiskGB prioritized free disk GB.
*/
public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB) {
this(minimalFreeDiskGB, prioritizedFreeDiskGB, Map.of());
this(minimalFreeDiskGB, prioritizedFreeDiskGB, Map.of(), Map.of());
}
/**
@ -82,11 +122,18 @@ public class AffinityPlacementConfig implements PlacementPluginConfig {
* @param withCollection configuration of co-located collections: keys are
* primary collection names and values are secondary
* collection names.
* @param collectionNodeType configuration of reequired node types per collection.
* Keys are collection names and values are comma-separated
* lists of required node types.
*/
public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB, Map<String, String> withCollection) {
public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB,
Map<String, String> withCollection,
Map<String, String> collectionNodeType) {
this.minimalFreeDiskGB = minimalFreeDiskGB;
this.prioritizedFreeDiskGB = prioritizedFreeDiskGB;
Objects.requireNonNull(withCollection);
Objects.requireNonNull(collectionNodeType);
this.withCollection = withCollection;
this.collectionNodeType = collectionNodeType;
}
}

View File

@ -23,6 +23,7 @@ import org.apache.solr.cluster.*;
import org.apache.solr.cluster.placement.*;
import org.apache.solr.cluster.placement.impl.NodeMetricImpl;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.SuppressForbidden;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -114,28 +115,6 @@ import java.util.stream.Collectors;
public class AffinityPlacementFactory implements PlacementPluginFactory<AffinityPlacementConfig> {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* <p>Name of the system property on a node indicating which (public cloud) Availability Zone that node is in. The value
* is any string, different strings denote different availability zones.
*
* <p>Nodes on which this system property is not defined are considered being in the same Availability Zone
* {@link #UNDEFINED_AVAILABILITY_ZONE} (hopefully the value of this constant is not the name of a real Availability Zone :).
*/
public static final String AVAILABILITY_ZONE_SYSPROP = "availability_zone";
/**
* <p>Name of the system property on a node indicating the type of replicas allowed on that node.
* The value of that system property is a comma separated list or a single string of value names of
* {@link org.apache.solr.cluster.Replica.ReplicaType} (case insensitive). If that property is not defined, that node is
* considered accepting all replica types (i.e. undefined is equivalent to {@code "NRT,Pull,tlog"}).
*/
public static final String REPLICA_TYPE_SYSPROP = "replica_type";
/**
* This is the "AZ" name for nodes that do not define an AZ. Should not match a real AZ name (I think we're safe)
*/
public static final String UNDEFINED_AVAILABILITY_ZONE = "uNd3f1NeD";
private AffinityPlacementConfig config = AffinityPlacementConfig.DEFAULT;
/**
@ -149,7 +128,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
@Override
public PlacementPlugin createPluginInstance() {
return new AffinityPlacementPlugin(config.minimalFreeDiskGB, config.prioritizedFreeDiskGB, config.withCollection);
return new AffinityPlacementPlugin(config.minimalFreeDiskGB, config.prioritizedFreeDiskGB, config.withCollection, config.collectionNodeType);
}
@Override
@ -178,15 +157,20 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
// secondary to primary (1:N)
private final Map<String, Set<String>> colocatedWith;
private final Map<String, Set<String>> nodeTypes;
private final Random replicaPlacementRandom = new Random(); // ok even if random sequence is predictable.
/**
* The factory has decoded the configuration for the plugin instance and passes it the parameters it needs.
*/
private AffinityPlacementPlugin(long minimalFreeDiskGB, long prioritizedFreeDiskGB, Map<String, String> withCollections) {
private AffinityPlacementPlugin(long minimalFreeDiskGB, long prioritizedFreeDiskGB,
Map<String, String> withCollections,
Map<String, String> collectionNodeTypes) {
this.minimalFreeDiskGB = minimalFreeDiskGB;
this.prioritizedFreeDiskGB = prioritizedFreeDiskGB;
Objects.requireNonNull(withCollections, "withCollections must not be null");
Objects.requireNonNull(collectionNodeTypes, "collectionNodeTypes must not be null");
this.withCollections = withCollections;
if (withCollections.isEmpty()) {
colocatedWith = Map.of();
@ -197,6 +181,18 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
.add(primary));
}
if (collectionNodeTypes.isEmpty()) {
nodeTypes = Map.of();
} else {
nodeTypes = new HashMap<>();
collectionNodeTypes.forEach((coll, typesString) -> {
List<String> types = StrUtils.splitSmart(typesString, ',', true);
if (!types.isEmpty()) {
nodeTypes.put(coll, new HashSet<>(types));
}
});
}
// We make things reproducible in tests by using test seed if any
String seed = System.getProperty("tests.seed");
if (seed != null) {
@ -210,16 +206,22 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
Set<Node> nodes = request.getTargetNodes();
SolrCollection solrCollection = request.getCollection();
nodes = filterNodesWithCollection(placementContext.getCluster(), request, nodes);
// Request all needed attributes
AttributeFetcher attributeFetcher = placementContext.getAttributeFetcher();
attributeFetcher.requestNodeSystemProperty(AVAILABILITY_ZONE_SYSPROP).requestNodeSystemProperty(REPLICA_TYPE_SYSPROP);
attributeFetcher
.requestNodeMetric(NodeMetricImpl.NUM_CORES)
.requestNodeMetric(NodeMetricImpl.FREE_DISK_GB);
.requestNodeSystemProperty(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP)
.requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP)
.requestNodeSystemProperty(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP);
attributeFetcher
.requestNodeMetric(NodeMetricImpl.NUM_CORES)
.requestNodeMetric(NodeMetricImpl.FREE_DISK_GB);
attributeFetcher.fetchFrom(nodes);
final AttributeValues attrValues = attributeFetcher.fetchAttributes();
// filter out nodes that don't meet the `withCollection` constraint
nodes = filterNodesWithCollection(placementContext.getCluster(), request, attrValues, nodes);
// filter out nodes that don't match the "node types" specified in the collection props
nodes = filterNodesByNodeType(placementContext.getCluster(), request, attrValues, nodes);
// Split the set of nodes into 3 sets of nodes accepting each replica type (sets can overlap if nodes accept multiple replica types)
// These subsets sets are actually maps, because we capture the number of cores (of any replica type) present on each node.
@ -359,13 +361,13 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
}
/**
* Resolves the AZ of a node and takes care of nodes that have no defined AZ in system property {@link #AVAILABILITY_ZONE_SYSPROP}
* to then return {@link #UNDEFINED_AVAILABILITY_ZONE} as the AZ name.
* Resolves the AZ of a node and takes care of nodes that have no defined AZ in system property {@link AffinityPlacementConfig#AVAILABILITY_ZONE_SYSPROP}
* to then return {@link AffinityPlacementConfig#UNDEFINED_AVAILABILITY_ZONE} as the AZ name.
*/
private String getNodeAZ(Node n, final AttributeValues attrValues) {
Optional<String> nodeAz = attrValues.getSystemProperty(n, AVAILABILITY_ZONE_SYSPROP);
Optional<String> nodeAz = attrValues.getSystemProperty(n, AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP);
// All nodes with undefined AZ will be considered part of the same AZ. This also works for deployments that do not care about AZ's
return nodeAz.orElse(UNDEFINED_AVAILABILITY_ZONE);
return nodeAz.orElse(AffinityPlacementConfig.UNDEFINED_AVAILABILITY_ZONE);
}
/**
@ -399,7 +401,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
* as it would not be possible to make any meaningful placement decisions.
*
* @param nodes all nodes on which this plugin should compute placement
* @param attrValues attributes fetched for the nodes. This method uses system property {@link #REPLICA_TYPE_SYSPROP} as
* @param attrValues attributes fetched for the nodes. This method uses system property {@link AffinityPlacementConfig#REPLICA_TYPE_SYSPROP} as
* well as the number of cores on each node.
*/
private Pair<EnumMap<Replica.ReplicaType, Set<Node>>, Map<Node, Integer>> getNodesPerReplicaType(Set<Node> nodes, final AttributeValues attrValues) {
@ -437,7 +439,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
Integer coresCount = attrValues.getNodeMetric(node, NodeMetricImpl.NUM_CORES).get();
coresOnNodes.put(node, coresCount);
String supportedReplicaTypes = attrValues.getSystemProperty(node, REPLICA_TYPE_SYSPROP).isPresent() ? attrValues.getSystemProperty(node, REPLICA_TYPE_SYSPROP).get() : null;
String supportedReplicaTypes = attrValues.getSystemProperty(node, AffinityPlacementConfig.REPLICA_TYPE_SYSPROP).isPresent() ? attrValues.getSystemProperty(node, AffinityPlacementConfig.REPLICA_TYPE_SYSPROP).get() : null;
// If property not defined or is only whitespace on a node, assuming node can take any replica type
if (supportedReplicaTypes == null || supportedReplicaTypes.isBlank()) {
for (Replica.ReplicaType rt : Replica.ReplicaType.values()) {
@ -632,7 +634,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
}
}
private Set<Node> filterNodesWithCollection(Cluster cluster, PlacementRequest request, Set<Node> initialNodes) throws PlacementException {
private Set<Node> filterNodesWithCollection(Cluster cluster, PlacementRequest request, AttributeValues attributeValues, Set<Node> initialNodes) throws PlacementException {
// if there's a `withCollection` constraint for this collection then remove nodes
// that are not eligible
String withCollectionName = withCollections.get(request.getCollection().getName());
@ -658,6 +660,27 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
return filteredNodes;
}
private Set<Node> filterNodesByNodeType(Cluster cluster, PlacementRequest request, AttributeValues attributeValues, Set<Node> initialNodes) throws PlacementException {
Set<String> collNodeTypes = nodeTypes.get(request.getCollection().getName());
if (collNodeTypes == null) {
// no filtering by node type
return initialNodes;
}
Set<Node> filteredNodes = initialNodes.stream()
.filter(n -> {
Optional<String> nodePropOpt = attributeValues.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP);
if (!nodePropOpt.isPresent()) {
return false;
}
Set<String> nodeTypes = new HashSet<>(StrUtils.splitSmart(nodePropOpt.get(), ','));
nodeTypes.retainAll(collNodeTypes);
return !nodeTypes.isEmpty();
}).collect(Collectors.toSet());
if (filteredNodes.isEmpty()) {
throw new PlacementException("There are no nodes with types: " + collNodeTypes + " expected by collection " + request.getCollection().getName());
}
return filteredNodes;
}
/**
* Comparator implementing the placement strategy based on free space and number of cores: we want to place new replicas
* on nodes with the less number of cores, but only if they do have enough disk space (expressed as a threshold value).

View File

@ -55,6 +55,7 @@ public class Builders {
NodeBuilder nodeBuilder = new NodeBuilder().setNodeName("node_" + n); // Default name, can be changed
nodeBuilder.setTotalDiskGB(10000.0);
nodeBuilder.setFreeDiskGB(5000.0);
nodeBuilder.setCoreCount(0);
nodeBuilders.add(nodeBuilder);
}
return this;

View File

@ -312,5 +312,17 @@ class ClusterAbstractionsForTest {
public int hashCode() {
return Objects.hash(replicaName, coreName, shard, replicaType, replicaState, node);
}
@Override
public String toString() {
return "ReplicaImpl{" +
"replicaName='" + replicaName + '\'' +
", coreName='" + coreName + '\'' +
", shard='" + shard + '\'' +
", replicaType=" + replicaType +
", replicaState=" + replicaState +
", node=" + node +
'}';
}
}
}

View File

@ -241,7 +241,7 @@ public class PlacementPluginIntegrationTest extends SolrCloudTestCase {
PluginMeta plugin = new PluginMeta();
plugin.name = PlacementPluginFactory.PLUGIN_NAME;
plugin.klass = AffinityPlacementFactory.class.getName();
plugin.config = new AffinityPlacementConfig(1, 2, Map.of(COLLECTION, SECONDARY_COLLECTION));
plugin.config = new AffinityPlacementConfig(1, 2, Map.of(COLLECTION, SECONDARY_COLLECTION), Map.of());
V2Request req = new V2Request.Builder("/cluster/plugin")
.forceV2(true)
.POST()
@ -302,6 +302,53 @@ public class PlacementPluginIntegrationTest extends SolrCloudTestCase {
}
}
// this functionality relies on System.getProperty which we cannot set on individual
// nodes in a mini cluster. For this reason this test is very basic - see
// AffinityPlacementFactoryTest for a more comprehensive test.
@Test
public void testNodeTypeIntegration() throws Exception {
// this functionality relies on System.getProperty which we cannot set on individual
// nodes in a mini cluster.
System.clearProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP);
String collectionName = "nodeTypeCollection";
PlacementPluginFactory<? extends PlacementPluginConfig> pluginFactory = cc.getPlacementPluginFactory();
assertTrue("wrong type " + pluginFactory.getClass().getName(), pluginFactory instanceof DelegatingPlacementPluginFactory);
DelegatingPlacementPluginFactory wrapper = (DelegatingPlacementPluginFactory) pluginFactory;
Phaser phaser = new Phaser();
wrapper.setDelegationPhaser(phaser);
int version = phaser.getPhase();
PluginMeta plugin = new PluginMeta();
plugin.name = PlacementPluginFactory.PLUGIN_NAME;
plugin.klass = AffinityPlacementFactory.class.getName();
plugin.config = new AffinityPlacementConfig(1, 2, Map.of(), Map.of(collectionName, "type_0"));
V2Request req = new V2Request.Builder("/cluster/plugin")
.forceV2(true)
.POST()
.withPayload(singletonMap("add", plugin))
.build();
req.process(cluster.getSolrClient());
phaser.awaitAdvanceInterruptibly(version, 10, TimeUnit.SECONDS);
try {
CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(collectionName, "conf", 1, 3)
.process(cluster.getSolrClient());
fail("should have failed due to no nodes with the types: " + rsp);
} catch (Exception e) {
assertTrue("should contain 'no nodes with types':" + e.toString(),
e.toString().contains("no nodes with types"));
}
System.setProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_0");
CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(collectionName, "conf", 1, 3)
.process(cluster.getSolrClient());
System.clearProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP);
}
@Test
public void testAttributeFetcherImpl() throws Exception {
CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 2)

View File

@ -28,7 +28,8 @@ import org.apache.solr.cluster.placement.Builders;
import org.apache.solr.cluster.placement.impl.ModificationRequestImpl;
import org.apache.solr.cluster.placement.impl.PlacementRequestImpl;
import org.apache.solr.common.util.Pair;
import org.junit.BeforeClass;
import org.apache.solr.common.util.StrUtils;
import org.junit.Before;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -46,19 +47,23 @@ import java.util.stream.StreamSupport;
public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static PlacementPlugin plugin;
private PlacementPlugin plugin;
private final static long MINIMAL_FREE_DISK_GB = 10L;
private final static long PRIORITIZED_FREE_DISK_GB = 50L;
private final static String secondaryCollectionName = "withCollection_secondary";
private final static String primaryCollectionName = "withCollection_primary";
@BeforeClass
public static void setupPlugin() {
AffinityPlacementConfig config = new AffinityPlacementConfig(
MINIMAL_FREE_DISK_GB,
PRIORITIZED_FREE_DISK_GB,
Map.of(primaryCollectionName, secondaryCollectionName));
static AffinityPlacementConfig defaultConfig = new AffinityPlacementConfig(
MINIMAL_FREE_DISK_GB,
PRIORITIZED_FREE_DISK_GB);
@Before
public void setupPlugin() {
configurePlugin(defaultConfig);
}
private void configurePlugin(AffinityPlacementConfig config) {
AffinityPlacementFactory factory = new AffinityPlacementFactory();
factory.configure(config);
plugin = factory.createPluginInstance();
@ -297,8 +302,8 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
acceptedReplicaType = NRT_REPLICA_TYPE;
}
nodeBuilders.get(i).setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, az)
.setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, acceptedReplicaType)
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, az)
.setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, acceptedReplicaType)
.setCoreCount(numcores)
.setFreeDiskGB(freedisk);
}
@ -349,7 +354,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(9);
LinkedList<Builders.NodeBuilder> nodeBuilders = clusterBuilder.getLiveNodeBuilders();
for (int i = 0; i < 9; i++) {
nodeBuilders.get(i).setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "AZ" + (i / 3))
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "AZ" + (i / 3))
.setCoreCount(i)
.setFreeDiskGB((double)(PRIORITIZED_FREE_DISK_GB + 10));
}
@ -499,9 +504,9 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
nodeBuilder.setCoreCount(0);
nodeBuilder.setFreeDiskGB(100.0);
if (i < NUM_NODES / 2) {
nodeBuilder.setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "az1");
nodeBuilder.setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "az1");
} else {
nodeBuilder.setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "az2");
nodeBuilder.setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "az2");
}
}
@ -526,7 +531,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
Map<Replica.ReplicaType, Map<String, Map<String, AtomicInteger>>> replicas = new HashMap<>();
AttributeValues attributeValues = placementContext.getAttributeFetcher().fetchAttributes();
for (ReplicaPlacement rp : pp.getReplicaPlacements()) {
Optional<String> azOptional = attributeValues.getSystemProperty(rp.getNode(), AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP);
Optional<String> azOptional = attributeValues.getSystemProperty(rp.getNode(), AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP);
if (!azOptional.isPresent()) {
fail("missing AZ sysprop for node " + rp.getNode());
}
@ -556,10 +561,10 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
nodeBuilder.setCoreCount(0);
nodeBuilder.setFreeDiskGB(100.0);
if (i < NUM_NODES / 3 * 2) {
nodeBuilder.setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, "Nrt, TlOg");
nodeBuilder.setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, "Nrt, TlOg");
nodeBuilder.setSysprop("group", "one");
} else {
nodeBuilder.setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, "Pull,foobar");
nodeBuilder.setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, "Pull,foobar");
nodeBuilder.setSysprop("group", "two");
}
}
@ -653,6 +658,12 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
@Test
public void testWithCollectionPlacement() throws Exception {
AffinityPlacementConfig config = new AffinityPlacementConfig(
MINIMAL_FREE_DISK_GB,
PRIORITIZED_FREE_DISK_GB,
Map.of(primaryCollectionName, secondaryCollectionName), Map.of());
configurePlugin(config);
int NUM_NODES = 3;
Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(NUM_NODES);
Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(secondaryCollectionName);
@ -695,6 +706,12 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
@Test
public void testWithCollectionModificationRejected() throws Exception {
AffinityPlacementConfig config = new AffinityPlacementConfig(
MINIMAL_FREE_DISK_GB,
PRIORITIZED_FREE_DISK_GB,
Map.of(primaryCollectionName, secondaryCollectionName), Map.of());
configurePlugin(config);
int NUM_NODES = 2;
Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(NUM_NODES);
Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(secondaryCollectionName);
@ -742,6 +759,109 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
}
}
@Test
public void testNodeType() throws Exception {
Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(9);
LinkedList<Builders.NodeBuilder> nodeBuilders = clusterBuilder.getLiveNodeBuilders();
for (int i = 0; i < 9; i++) {
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_" + (i % 3));
}
String collectionName = "nodeTypeCollection";
Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(collectionName);
collectionBuilder.initializeShardsReplicas(1, 0, 0, 0, clusterBuilder.getLiveNodeBuilders());
// test single node type in collection
AffinityPlacementConfig config = new AffinityPlacementConfig(
MINIMAL_FREE_DISK_GB,
PRIORITIZED_FREE_DISK_GB,
Map.of(), Map.of(collectionName, "type_0"));
configurePlugin(config);
clusterBuilder.addCollection(collectionBuilder);
PlacementContext placementContext = clusterBuilder.buildPlacementContext();
Map<String, Set<String>> nodeNamesByType = new HashMap<>();
Cluster cluster = placementContext.getCluster();
AttributeValues attributeValues = placementContext.getAttributeFetcher()
.requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP)
.fetchAttributes();
placementContext.getCluster().getLiveNodes().forEach(n ->
nodeNamesByType
.computeIfAbsent(attributeValues.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP).get(), type -> new HashSet<>())
.add(n.getName())
);
SolrCollection collection = placementContext.getCluster().getCollection(collectionName);
PlacementRequestImpl placementRequest = new PlacementRequestImpl(collection,
Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 3, 0, 0);
PlacementPlan pp = plugin.computePlacement(placementRequest, placementContext);
assertEquals("expected 3 placements: " + pp, 3, pp.getReplicaPlacements().size());
Set<String> type0nodes = nodeNamesByType.get("type_0");
Set<String> type1nodes = nodeNamesByType.get("type_1");
Set<String> type2nodes = nodeNamesByType.get("type_2");
for (ReplicaPlacement p : pp.getReplicaPlacements()) {
assertTrue(type0nodes.contains(p.getNode().getName()));
}
// test 2 node types in collection
config = new AffinityPlacementConfig(
MINIMAL_FREE_DISK_GB,
PRIORITIZED_FREE_DISK_GB,
Map.of(), Map.of(collectionName, "type_0,type_1"));
configurePlugin(config);
placementContext = clusterBuilder.buildPlacementContext();
collection = placementContext.getCluster().getCollection(collectionName);
placementRequest = new PlacementRequestImpl(collection,
Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 6, 0, 0);
pp = plugin.computePlacement(placementRequest, placementContext);
assertEquals("expected 6 placements: " + pp, 6, pp.getReplicaPlacements().size());
for (ReplicaPlacement p : pp.getReplicaPlacements()) {
assertTrue(type0nodes.contains(p.getNode().getName()) ||
type1nodes.contains(p.getNode().getName()));
}
// test 2 node types in nodes
for (int i = 0; i < 9; i++) {
if (i < 3) {
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_0,type_1");
} else if (i < 6) {
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_1,type_2");
} else {
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_2");
}
}
placementContext = clusterBuilder.buildPlacementContext();
collection = placementContext.getCluster().getCollection(collectionName);
placementRequest = new PlacementRequestImpl(collection,
Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 6, 0, 0);
pp = plugin.computePlacement(placementRequest, placementContext);
assertEquals("expected 6 placements: " + pp, 6, pp.getReplicaPlacements().size());
nodeNamesByType.clear();
AttributeValues attributeValues2 = placementContext.getAttributeFetcher()
.requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP)
.fetchAttributes();
placementContext.getCluster().getLiveNodes().forEach(n -> {
String nodeTypesStr = attributeValues2.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP).get();
for (String nodeType : StrUtils.splitSmart(nodeTypesStr, ',')) {
nodeNamesByType
.computeIfAbsent(nodeType, type -> new HashSet<>())
.add(n.getName());
}
});
type0nodes = nodeNamesByType.get("type_0");
type1nodes = nodeNamesByType.get("type_1");
for (ReplicaPlacement p : pp.getReplicaPlacements()) {
assertTrue(type0nodes.contains(p.getNode().getName()) ||
type1nodes.contains(p.getNode().getName()));
}
}
@Test @Slow
public void testScalability() throws Exception {
log.info("==== numNodes ====");

View File

@ -76,6 +76,10 @@ curl -X POST -H 'Content-type: application/json' -d '{
"withCollections": {
"A_primary": "A_secondary",
"B_primary": "B_secondary"
},
"nodeType": {
"collection_A": "searchNode,indexNode",
"collection_B": "analyticsNode"
}
}
}}'
@ -119,9 +123,14 @@ The autoscaling specification in the configuration linked above aimed to do the
** minimize cores per node, or
** minimize disk usage.
Additionally, it supports the `withCollection` constraint that enforces the placement of
co-located collections' replicas on the same nodes, and prevents deletions of collections and
replicas that would break this constraint.
It also supports additional per-collection constraints:
* `withCollection` constraint enforces the placement of co-located collections' replicas on the
same nodes, and prevents deletions of collections and replicas that would break this constraint.
* `nodeType` constraint limits the nodes eligible for placement to only those that match one or
more of the specified node types.
See below for more details on these constraints.
Overall strategy of this plugin:
@ -170,6 +179,9 @@ The plugin preserves this co-location by rejecting delete operation of secondary
removed from the co-located nodes, or the configuration must be changed to remove the
co-location mapping for the primary collection.
===== `nodeType` constraint
===== Configuration
This plugin supports the following configuration parameters:
@ -185,11 +197,19 @@ not an option, replicas can still be assigned to nodes with less than this amoun
Default value is 100.
`withCollection`::
(optional, map) this property defines additional constraints that primary collections (keys)
(optional, map) this property defines an additional constraint that primary collections (keys)
must be located on the same nodes as the secondary collections (values). The plugin will
assume that the secondary collection replicas are already in place and ignore candidate
nodes where they are not already present. Default value is none.
`nodeType`::
(optional, map) this property defines an additional constraint that collections (keys)
must be located only on the nodes that are labeled with one or more of the matching
"node type" labels (values in the map are comma-separated labels). Nodes are labeled using the
`node_type` system property with the value being an arbitrary comma-separated list of labels.
Correspondingly, the plugin configuration can specify that a particular collection must be placed
only on the nodes that match at least one of the (comma-separated) labels defined here.
=== Example configurations
This is a simple configuration that uses default values:
@ -237,3 +257,24 @@ curl -X POST -H 'Content-type: application/json' -d '{
}}'
http://localhost:8983/api/cluster/plugin
----
This configuration defines that collection `collection_A` must be placed only on the nodes with
the `node_type` system property containing either `searchNode` or `indexNode` (for example, a node
may be labeled as `-Dnode_type=searchNode,indexNode,uiNode,zkNode`). Similarly, the
collection `collection_B` must be placed only on the nodes that contain the `analyticsNode` label:
[source,bash]
----
curl -X POST -H 'Content-type: application/json' -d '{
"add":{
"name": ".placement-plugin",
"class": "org.apache.solr.cluster.placement.plugins.AffinityPlacementFactory",
"config": {
"nodeType": {
"collection_A": "searchNode,indexNode",
"collection_B": "analyticsNode"
}
}
}}'
http://localhost:8983/api/cluster/plugin
----