mirror of https://github.com/apache/lucene.git
SOLR-15130: Support for per-collection replica placement node sets, a.k.a "node type"
placements.
This commit is contained in:
parent
ef3a23b3d0
commit
2c5b86b673
|
@ -32,6 +32,9 @@ New Features
|
|||
|
||||
* SOLR-14787: Payload check query parser now supports inequalities. (Kevin Watters, Gus Heck)
|
||||
|
||||
* SOLR-15130: Support for per-collection replica placement node sets, a.k.a "node type"
|
||||
placements. (ab, ilan)
|
||||
|
||||
Improvements
|
||||
----------------------
|
||||
* LUCENE-8984: MoreLikeThis MLT is biased for uncommon fields (Andy Hind via Anshum Gupta)
|
||||
|
|
|
@ -133,6 +133,7 @@ public class CreateCollectionCmd implements CollApiCmds.CollectionApiCommand {
|
|||
|
||||
ZkStateReader zkStateReader = ccc.getZkStateReader();
|
||||
|
||||
// this also creates the collection zk node as a side-effect
|
||||
CollectionHandlingUtils.createConfNode(stateManager, configName, collectionName);
|
||||
|
||||
Map<String,String> collectionParams = new HashMap<>();
|
||||
|
|
|
@ -62,7 +62,7 @@ public interface SolrCollection {
|
|||
|
||||
/**
|
||||
* <p>Returns the value of a custom property name set on the {@link SolrCollection} or {@code null} when no such
|
||||
* property was set. Properties are set through the Collection API. See for example {@code COLLECTIONPROP} in the Solr reference guide.
|
||||
* property was set. Properties are set through the Collection API. See for example {@code MODIFYCOLLECTION} in the Solr reference guide.
|
||||
*
|
||||
* <p><b>{@link PlacementPlugin} related note:</b></p>
|
||||
* <p>Using custom properties in conjunction with ad hoc {@link PlacementPlugin} code allows customizing placement
|
||||
|
|
|
@ -90,8 +90,6 @@ public class AttributeFetcherImpl implements AttributeFetcher {
|
|||
return this;
|
||||
}
|
||||
|
||||
private static final double GB = 1024 * 1024 * 1024;
|
||||
|
||||
@Override
|
||||
public AttributeValues fetchAttributes() {
|
||||
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.solr.cluster.*;
|
|||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.params.CollectionAdminParams;
|
||||
import org.apache.solr.common.util.Pair;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
@ -179,9 +180,18 @@ class SimpleClusterAbstractionsImpl {
|
|||
return shards.keySet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "SolrCollectionImpl{" +
|
||||
"collectionName='" + collectionName + '\'' +
|
||||
", shards=" + shards.keySet() +
|
||||
", docCollection=" + docCollection +
|
||||
'}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getCustomProperty(String customPropertyName) {
|
||||
return docCollection.getStr(customPropertyName);
|
||||
return docCollection.getStr(CollectionAdminParams.PROPERTY_PREFIX + customPropertyName);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -292,6 +302,17 @@ class SimpleClusterAbstractionsImpl {
|
|||
public int hashCode() {
|
||||
return Objects.hash(shardName, collection, shardState);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ShardImpl{" +
|
||||
"shardName='" + shardName + '\'' +
|
||||
", collection='" + collection.getName() + '\'' +
|
||||
", shardState=" + shardState +
|
||||
", replicas=" + replicas.size() +
|
||||
", leader=" + leader +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -432,5 +453,17 @@ class SimpleClusterAbstractionsImpl {
|
|||
public int hashCode() {
|
||||
return Objects.hash(replicaName, coreName, shard, replicaType, replicaState, node);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ReplicaImpl{" +
|
||||
"replicaName='" + replicaName + '\'' +
|
||||
", coreName='" + coreName + '\'' +
|
||||
", shard='" + shard.getShardName() + '\'' +
|
||||
", replicaType=" + replicaType +
|
||||
", replicaState=" + replicaState +
|
||||
", node='" + node + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,6 +34,36 @@ public class AffinityPlacementConfig implements PlacementPluginConfig {
|
|||
public static final AffinityPlacementConfig DEFAULT =
|
||||
new AffinityPlacementConfig(DEFAULT_MINIMAL_FREE_DISK_GB, DEFAULT_PRIORITIZED_FREE_DISK_GB);
|
||||
|
||||
/**
|
||||
* <p>Name of the system property on a node indicating which (public cloud) Availability Zone that node is in. The value
|
||||
* is any string, different strings denote different availability zones.
|
||||
*
|
||||
* <p>Nodes on which this system property is not defined are considered being in the same Availability Zone
|
||||
* {@link #UNDEFINED_AVAILABILITY_ZONE} (hopefully the value of this constant is not the name of a real Availability Zone :).
|
||||
*/
|
||||
public static final String AVAILABILITY_ZONE_SYSPROP = "availability_zone";
|
||||
|
||||
/**
|
||||
* <p>Name of the system property on a node indicating the type of replicas allowed on that node.
|
||||
* The value of that system property is a comma separated list or a single string of value names of
|
||||
* {@link org.apache.solr.cluster.Replica.ReplicaType} (case insensitive). If that property is not defined, that node is
|
||||
* considered accepting all replica types (i.e. undefined is equivalent to {@code "NRT,Pull,tlog"}).
|
||||
*/
|
||||
public static final String REPLICA_TYPE_SYSPROP = "replica_type";
|
||||
|
||||
/**
|
||||
* Name of the system property on a node indicating the arbitrary "node type" (for example, a node
|
||||
* more suitable for the indexing work load could be labeled as <code>node_type: indexing</code>).
|
||||
* The value of this system property is a comma-separated list or a single label (labels must not
|
||||
* contain commas), which represent a logical OR for the purpose of placement.
|
||||
*/
|
||||
public static final String NODE_TYPE_SYSPROP = "node_type";
|
||||
|
||||
/**
|
||||
* This is the "AZ" name for nodes that do not define an AZ. Should not match a real AZ name (I think we're safe)
|
||||
*/
|
||||
public static final String UNDEFINED_AVAILABILITY_ZONE = "uNd3f1NeD";
|
||||
|
||||
/**
|
||||
* If a node has strictly less GB of free disk than this value, the node is excluded from assignment decisions.
|
||||
* Set to 0 or less to disable.
|
||||
|
@ -59,6 +89,16 @@ public class AffinityPlacementConfig implements PlacementPluginConfig {
|
|||
@JsonProperty
|
||||
public Map<String, String> withCollection;
|
||||
|
||||
/**
|
||||
* This property defines an additional constraint that the collection must be placed
|
||||
* only on the nodes of the correct "node type". The nodes can specify what type they are (one or
|
||||
* several types, using a comma-separated list) by defining the {@link #NODE_TYPE_SYSPROP} system property.
|
||||
* Similarly, the plugin can be configured to specify that a collection (key in the map) must be placed on one or more node
|
||||
* type (value in the map, using comma-separated list of acceptable node types).
|
||||
*/
|
||||
@JsonProperty
|
||||
public Map<String, String> collectionNodeType;
|
||||
|
||||
/**
|
||||
* Zero-arguments public constructor required for deserialization - don't use.
|
||||
*/
|
||||
|
@ -72,7 +112,7 @@ public class AffinityPlacementConfig implements PlacementPluginConfig {
|
|||
* @param prioritizedFreeDiskGB prioritized free disk GB.
|
||||
*/
|
||||
public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB) {
|
||||
this(minimalFreeDiskGB, prioritizedFreeDiskGB, Map.of());
|
||||
this(minimalFreeDiskGB, prioritizedFreeDiskGB, Map.of(), Map.of());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -82,11 +122,18 @@ public class AffinityPlacementConfig implements PlacementPluginConfig {
|
|||
* @param withCollection configuration of co-located collections: keys are
|
||||
* primary collection names and values are secondary
|
||||
* collection names.
|
||||
* @param collectionNodeType configuration of reequired node types per collection.
|
||||
* Keys are collection names and values are comma-separated
|
||||
* lists of required node types.
|
||||
*/
|
||||
public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB, Map<String, String> withCollection) {
|
||||
public AffinityPlacementConfig(long minimalFreeDiskGB, long prioritizedFreeDiskGB,
|
||||
Map<String, String> withCollection,
|
||||
Map<String, String> collectionNodeType) {
|
||||
this.minimalFreeDiskGB = minimalFreeDiskGB;
|
||||
this.prioritizedFreeDiskGB = prioritizedFreeDiskGB;
|
||||
Objects.requireNonNull(withCollection);
|
||||
Objects.requireNonNull(collectionNodeType);
|
||||
this.withCollection = withCollection;
|
||||
this.collectionNodeType = collectionNodeType;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.solr.cluster.*;
|
|||
import org.apache.solr.cluster.placement.*;
|
||||
import org.apache.solr.cluster.placement.impl.NodeMetricImpl;
|
||||
import org.apache.solr.common.util.Pair;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -114,28 +115,6 @@ import java.util.stream.Collectors;
|
|||
public class AffinityPlacementFactory implements PlacementPluginFactory<AffinityPlacementConfig> {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
/**
|
||||
* <p>Name of the system property on a node indicating which (public cloud) Availability Zone that node is in. The value
|
||||
* is any string, different strings denote different availability zones.
|
||||
*
|
||||
* <p>Nodes on which this system property is not defined are considered being in the same Availability Zone
|
||||
* {@link #UNDEFINED_AVAILABILITY_ZONE} (hopefully the value of this constant is not the name of a real Availability Zone :).
|
||||
*/
|
||||
public static final String AVAILABILITY_ZONE_SYSPROP = "availability_zone";
|
||||
|
||||
/**
|
||||
* <p>Name of the system property on a node indicating the type of replicas allowed on that node.
|
||||
* The value of that system property is a comma separated list or a single string of value names of
|
||||
* {@link org.apache.solr.cluster.Replica.ReplicaType} (case insensitive). If that property is not defined, that node is
|
||||
* considered accepting all replica types (i.e. undefined is equivalent to {@code "NRT,Pull,tlog"}).
|
||||
*/
|
||||
public static final String REPLICA_TYPE_SYSPROP = "replica_type";
|
||||
|
||||
/**
|
||||
* This is the "AZ" name for nodes that do not define an AZ. Should not match a real AZ name (I think we're safe)
|
||||
*/
|
||||
public static final String UNDEFINED_AVAILABILITY_ZONE = "uNd3f1NeD";
|
||||
|
||||
private AffinityPlacementConfig config = AffinityPlacementConfig.DEFAULT;
|
||||
|
||||
/**
|
||||
|
@ -149,7 +128,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
|
||||
@Override
|
||||
public PlacementPlugin createPluginInstance() {
|
||||
return new AffinityPlacementPlugin(config.minimalFreeDiskGB, config.prioritizedFreeDiskGB, config.withCollection);
|
||||
return new AffinityPlacementPlugin(config.minimalFreeDiskGB, config.prioritizedFreeDiskGB, config.withCollection, config.collectionNodeType);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -178,15 +157,20 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
// secondary to primary (1:N)
|
||||
private final Map<String, Set<String>> colocatedWith;
|
||||
|
||||
private final Map<String, Set<String>> nodeTypes;
|
||||
|
||||
private final Random replicaPlacementRandom = new Random(); // ok even if random sequence is predictable.
|
||||
|
||||
/**
|
||||
* The factory has decoded the configuration for the plugin instance and passes it the parameters it needs.
|
||||
*/
|
||||
private AffinityPlacementPlugin(long minimalFreeDiskGB, long prioritizedFreeDiskGB, Map<String, String> withCollections) {
|
||||
private AffinityPlacementPlugin(long minimalFreeDiskGB, long prioritizedFreeDiskGB,
|
||||
Map<String, String> withCollections,
|
||||
Map<String, String> collectionNodeTypes) {
|
||||
this.minimalFreeDiskGB = minimalFreeDiskGB;
|
||||
this.prioritizedFreeDiskGB = prioritizedFreeDiskGB;
|
||||
Objects.requireNonNull(withCollections, "withCollections must not be null");
|
||||
Objects.requireNonNull(collectionNodeTypes, "collectionNodeTypes must not be null");
|
||||
this.withCollections = withCollections;
|
||||
if (withCollections.isEmpty()) {
|
||||
colocatedWith = Map.of();
|
||||
|
@ -197,6 +181,18 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
.add(primary));
|
||||
}
|
||||
|
||||
if (collectionNodeTypes.isEmpty()) {
|
||||
nodeTypes = Map.of();
|
||||
} else {
|
||||
nodeTypes = new HashMap<>();
|
||||
collectionNodeTypes.forEach((coll, typesString) -> {
|
||||
List<String> types = StrUtils.splitSmart(typesString, ',', true);
|
||||
if (!types.isEmpty()) {
|
||||
nodeTypes.put(coll, new HashSet<>(types));
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// We make things reproducible in tests by using test seed if any
|
||||
String seed = System.getProperty("tests.seed");
|
||||
if (seed != null) {
|
||||
|
@ -210,16 +206,22 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
Set<Node> nodes = request.getTargetNodes();
|
||||
SolrCollection solrCollection = request.getCollection();
|
||||
|
||||
nodes = filterNodesWithCollection(placementContext.getCluster(), request, nodes);
|
||||
|
||||
// Request all needed attributes
|
||||
AttributeFetcher attributeFetcher = placementContext.getAttributeFetcher();
|
||||
attributeFetcher.requestNodeSystemProperty(AVAILABILITY_ZONE_SYSPROP).requestNodeSystemProperty(REPLICA_TYPE_SYSPROP);
|
||||
attributeFetcher
|
||||
.requestNodeMetric(NodeMetricImpl.NUM_CORES)
|
||||
.requestNodeMetric(NodeMetricImpl.FREE_DISK_GB);
|
||||
.requestNodeSystemProperty(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP)
|
||||
.requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP)
|
||||
.requestNodeSystemProperty(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP);
|
||||
attributeFetcher
|
||||
.requestNodeMetric(NodeMetricImpl.NUM_CORES)
|
||||
.requestNodeMetric(NodeMetricImpl.FREE_DISK_GB);
|
||||
attributeFetcher.fetchFrom(nodes);
|
||||
final AttributeValues attrValues = attributeFetcher.fetchAttributes();
|
||||
// filter out nodes that don't meet the `withCollection` constraint
|
||||
nodes = filterNodesWithCollection(placementContext.getCluster(), request, attrValues, nodes);
|
||||
// filter out nodes that don't match the "node types" specified in the collection props
|
||||
nodes = filterNodesByNodeType(placementContext.getCluster(), request, attrValues, nodes);
|
||||
|
||||
|
||||
// Split the set of nodes into 3 sets of nodes accepting each replica type (sets can overlap if nodes accept multiple replica types)
|
||||
// These subsets sets are actually maps, because we capture the number of cores (of any replica type) present on each node.
|
||||
|
@ -359,13 +361,13 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
}
|
||||
|
||||
/**
|
||||
* Resolves the AZ of a node and takes care of nodes that have no defined AZ in system property {@link #AVAILABILITY_ZONE_SYSPROP}
|
||||
* to then return {@link #UNDEFINED_AVAILABILITY_ZONE} as the AZ name.
|
||||
* Resolves the AZ of a node and takes care of nodes that have no defined AZ in system property {@link AffinityPlacementConfig#AVAILABILITY_ZONE_SYSPROP}
|
||||
* to then return {@link AffinityPlacementConfig#UNDEFINED_AVAILABILITY_ZONE} as the AZ name.
|
||||
*/
|
||||
private String getNodeAZ(Node n, final AttributeValues attrValues) {
|
||||
Optional<String> nodeAz = attrValues.getSystemProperty(n, AVAILABILITY_ZONE_SYSPROP);
|
||||
Optional<String> nodeAz = attrValues.getSystemProperty(n, AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP);
|
||||
// All nodes with undefined AZ will be considered part of the same AZ. This also works for deployments that do not care about AZ's
|
||||
return nodeAz.orElse(UNDEFINED_AVAILABILITY_ZONE);
|
||||
return nodeAz.orElse(AffinityPlacementConfig.UNDEFINED_AVAILABILITY_ZONE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -399,7 +401,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
* as it would not be possible to make any meaningful placement decisions.
|
||||
*
|
||||
* @param nodes all nodes on which this plugin should compute placement
|
||||
* @param attrValues attributes fetched for the nodes. This method uses system property {@link #REPLICA_TYPE_SYSPROP} as
|
||||
* @param attrValues attributes fetched for the nodes. This method uses system property {@link AffinityPlacementConfig#REPLICA_TYPE_SYSPROP} as
|
||||
* well as the number of cores on each node.
|
||||
*/
|
||||
private Pair<EnumMap<Replica.ReplicaType, Set<Node>>, Map<Node, Integer>> getNodesPerReplicaType(Set<Node> nodes, final AttributeValues attrValues) {
|
||||
|
@ -437,7 +439,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
Integer coresCount = attrValues.getNodeMetric(node, NodeMetricImpl.NUM_CORES).get();
|
||||
coresOnNodes.put(node, coresCount);
|
||||
|
||||
String supportedReplicaTypes = attrValues.getSystemProperty(node, REPLICA_TYPE_SYSPROP).isPresent() ? attrValues.getSystemProperty(node, REPLICA_TYPE_SYSPROP).get() : null;
|
||||
String supportedReplicaTypes = attrValues.getSystemProperty(node, AffinityPlacementConfig.REPLICA_TYPE_SYSPROP).isPresent() ? attrValues.getSystemProperty(node, AffinityPlacementConfig.REPLICA_TYPE_SYSPROP).get() : null;
|
||||
// If property not defined or is only whitespace on a node, assuming node can take any replica type
|
||||
if (supportedReplicaTypes == null || supportedReplicaTypes.isBlank()) {
|
||||
for (Replica.ReplicaType rt : Replica.ReplicaType.values()) {
|
||||
|
@ -632,7 +634,7 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
}
|
||||
}
|
||||
|
||||
private Set<Node> filterNodesWithCollection(Cluster cluster, PlacementRequest request, Set<Node> initialNodes) throws PlacementException {
|
||||
private Set<Node> filterNodesWithCollection(Cluster cluster, PlacementRequest request, AttributeValues attributeValues, Set<Node> initialNodes) throws PlacementException {
|
||||
// if there's a `withCollection` constraint for this collection then remove nodes
|
||||
// that are not eligible
|
||||
String withCollectionName = withCollections.get(request.getCollection().getName());
|
||||
|
@ -658,6 +660,27 @@ public class AffinityPlacementFactory implements PlacementPluginFactory<Affinity
|
|||
return filteredNodes;
|
||||
}
|
||||
|
||||
private Set<Node> filterNodesByNodeType(Cluster cluster, PlacementRequest request, AttributeValues attributeValues, Set<Node> initialNodes) throws PlacementException {
|
||||
Set<String> collNodeTypes = nodeTypes.get(request.getCollection().getName());
|
||||
if (collNodeTypes == null) {
|
||||
// no filtering by node type
|
||||
return initialNodes;
|
||||
}
|
||||
Set<Node> filteredNodes = initialNodes.stream()
|
||||
.filter(n -> {
|
||||
Optional<String> nodePropOpt = attributeValues.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP);
|
||||
if (!nodePropOpt.isPresent()) {
|
||||
return false;
|
||||
}
|
||||
Set<String> nodeTypes = new HashSet<>(StrUtils.splitSmart(nodePropOpt.get(), ','));
|
||||
nodeTypes.retainAll(collNodeTypes);
|
||||
return !nodeTypes.isEmpty();
|
||||
}).collect(Collectors.toSet());
|
||||
if (filteredNodes.isEmpty()) {
|
||||
throw new PlacementException("There are no nodes with types: " + collNodeTypes + " expected by collection " + request.getCollection().getName());
|
||||
}
|
||||
return filteredNodes;
|
||||
}
|
||||
/**
|
||||
* Comparator implementing the placement strategy based on free space and number of cores: we want to place new replicas
|
||||
* on nodes with the less number of cores, but only if they do have enough disk space (expressed as a threshold value).
|
||||
|
|
|
@ -55,6 +55,7 @@ public class Builders {
|
|||
NodeBuilder nodeBuilder = new NodeBuilder().setNodeName("node_" + n); // Default name, can be changed
|
||||
nodeBuilder.setTotalDiskGB(10000.0);
|
||||
nodeBuilder.setFreeDiskGB(5000.0);
|
||||
nodeBuilder.setCoreCount(0);
|
||||
nodeBuilders.add(nodeBuilder);
|
||||
}
|
||||
return this;
|
||||
|
|
|
@ -312,5 +312,17 @@ class ClusterAbstractionsForTest {
|
|||
public int hashCode() {
|
||||
return Objects.hash(replicaName, coreName, shard, replicaType, replicaState, node);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ReplicaImpl{" +
|
||||
"replicaName='" + replicaName + '\'' +
|
||||
", coreName='" + coreName + '\'' +
|
||||
", shard='" + shard + '\'' +
|
||||
", replicaType=" + replicaType +
|
||||
", replicaState=" + replicaState +
|
||||
", node=" + node +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -241,7 +241,7 @@ public class PlacementPluginIntegrationTest extends SolrCloudTestCase {
|
|||
PluginMeta plugin = new PluginMeta();
|
||||
plugin.name = PlacementPluginFactory.PLUGIN_NAME;
|
||||
plugin.klass = AffinityPlacementFactory.class.getName();
|
||||
plugin.config = new AffinityPlacementConfig(1, 2, Map.of(COLLECTION, SECONDARY_COLLECTION));
|
||||
plugin.config = new AffinityPlacementConfig(1, 2, Map.of(COLLECTION, SECONDARY_COLLECTION), Map.of());
|
||||
V2Request req = new V2Request.Builder("/cluster/plugin")
|
||||
.forceV2(true)
|
||||
.POST()
|
||||
|
@ -302,6 +302,53 @@ public class PlacementPluginIntegrationTest extends SolrCloudTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// this functionality relies on System.getProperty which we cannot set on individual
|
||||
// nodes in a mini cluster. For this reason this test is very basic - see
|
||||
// AffinityPlacementFactoryTest for a more comprehensive test.
|
||||
@Test
|
||||
public void testNodeTypeIntegration() throws Exception {
|
||||
// this functionality relies on System.getProperty which we cannot set on individual
|
||||
// nodes in a mini cluster.
|
||||
System.clearProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP);
|
||||
|
||||
String collectionName = "nodeTypeCollection";
|
||||
|
||||
PlacementPluginFactory<? extends PlacementPluginConfig> pluginFactory = cc.getPlacementPluginFactory();
|
||||
assertTrue("wrong type " + pluginFactory.getClass().getName(), pluginFactory instanceof DelegatingPlacementPluginFactory);
|
||||
DelegatingPlacementPluginFactory wrapper = (DelegatingPlacementPluginFactory) pluginFactory;
|
||||
Phaser phaser = new Phaser();
|
||||
wrapper.setDelegationPhaser(phaser);
|
||||
|
||||
int version = phaser.getPhase();
|
||||
|
||||
PluginMeta plugin = new PluginMeta();
|
||||
plugin.name = PlacementPluginFactory.PLUGIN_NAME;
|
||||
plugin.klass = AffinityPlacementFactory.class.getName();
|
||||
plugin.config = new AffinityPlacementConfig(1, 2, Map.of(), Map.of(collectionName, "type_0"));
|
||||
V2Request req = new V2Request.Builder("/cluster/plugin")
|
||||
.forceV2(true)
|
||||
.POST()
|
||||
.withPayload(singletonMap("add", plugin))
|
||||
.build();
|
||||
req.process(cluster.getSolrClient());
|
||||
|
||||
phaser.awaitAdvanceInterruptibly(version, 10, TimeUnit.SECONDS);
|
||||
|
||||
try {
|
||||
CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(collectionName, "conf", 1, 3)
|
||||
.process(cluster.getSolrClient());
|
||||
fail("should have failed due to no nodes with the types: " + rsp);
|
||||
} catch (Exception e) {
|
||||
assertTrue("should contain 'no nodes with types':" + e.toString(),
|
||||
e.toString().contains("no nodes with types"));
|
||||
}
|
||||
System.setProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_0");
|
||||
CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(collectionName, "conf", 1, 3)
|
||||
.process(cluster.getSolrClient());
|
||||
|
||||
System.clearProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAttributeFetcherImpl() throws Exception {
|
||||
CollectionAdminResponse rsp = CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 2)
|
||||
|
|
|
@ -28,7 +28,8 @@ import org.apache.solr.cluster.placement.Builders;
|
|||
import org.apache.solr.cluster.placement.impl.ModificationRequestImpl;
|
||||
import org.apache.solr.cluster.placement.impl.PlacementRequestImpl;
|
||||
import org.apache.solr.common.util.Pair;
|
||||
import org.junit.BeforeClass;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
@ -46,19 +47,23 @@ import java.util.stream.StreamSupport;
|
|||
public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
private static PlacementPlugin plugin;
|
||||
private PlacementPlugin plugin;
|
||||
|
||||
private final static long MINIMAL_FREE_DISK_GB = 10L;
|
||||
private final static long PRIORITIZED_FREE_DISK_GB = 50L;
|
||||
private final static String secondaryCollectionName = "withCollection_secondary";
|
||||
private final static String primaryCollectionName = "withCollection_primary";
|
||||
|
||||
@BeforeClass
|
||||
public static void setupPlugin() {
|
||||
AffinityPlacementConfig config = new AffinityPlacementConfig(
|
||||
MINIMAL_FREE_DISK_GB,
|
||||
PRIORITIZED_FREE_DISK_GB,
|
||||
Map.of(primaryCollectionName, secondaryCollectionName));
|
||||
static AffinityPlacementConfig defaultConfig = new AffinityPlacementConfig(
|
||||
MINIMAL_FREE_DISK_GB,
|
||||
PRIORITIZED_FREE_DISK_GB);
|
||||
|
||||
@Before
|
||||
public void setupPlugin() {
|
||||
configurePlugin(defaultConfig);
|
||||
}
|
||||
|
||||
private void configurePlugin(AffinityPlacementConfig config) {
|
||||
AffinityPlacementFactory factory = new AffinityPlacementFactory();
|
||||
factory.configure(config);
|
||||
plugin = factory.createPluginInstance();
|
||||
|
@ -297,8 +302,8 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
|||
acceptedReplicaType = NRT_REPLICA_TYPE;
|
||||
}
|
||||
|
||||
nodeBuilders.get(i).setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, az)
|
||||
.setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, acceptedReplicaType)
|
||||
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, az)
|
||||
.setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, acceptedReplicaType)
|
||||
.setCoreCount(numcores)
|
||||
.setFreeDiskGB(freedisk);
|
||||
}
|
||||
|
@ -349,7 +354,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
|||
Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(9);
|
||||
LinkedList<Builders.NodeBuilder> nodeBuilders = clusterBuilder.getLiveNodeBuilders();
|
||||
for (int i = 0; i < 9; i++) {
|
||||
nodeBuilders.get(i).setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "AZ" + (i / 3))
|
||||
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "AZ" + (i / 3))
|
||||
.setCoreCount(i)
|
||||
.setFreeDiskGB((double)(PRIORITIZED_FREE_DISK_GB + 10));
|
||||
}
|
||||
|
@ -499,9 +504,9 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
|||
nodeBuilder.setCoreCount(0);
|
||||
nodeBuilder.setFreeDiskGB(100.0);
|
||||
if (i < NUM_NODES / 2) {
|
||||
nodeBuilder.setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "az1");
|
||||
nodeBuilder.setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "az1");
|
||||
} else {
|
||||
nodeBuilder.setSysprop(AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP, "az2");
|
||||
nodeBuilder.setSysprop(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP, "az2");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -526,7 +531,7 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
|||
Map<Replica.ReplicaType, Map<String, Map<String, AtomicInteger>>> replicas = new HashMap<>();
|
||||
AttributeValues attributeValues = placementContext.getAttributeFetcher().fetchAttributes();
|
||||
for (ReplicaPlacement rp : pp.getReplicaPlacements()) {
|
||||
Optional<String> azOptional = attributeValues.getSystemProperty(rp.getNode(), AffinityPlacementFactory.AVAILABILITY_ZONE_SYSPROP);
|
||||
Optional<String> azOptional = attributeValues.getSystemProperty(rp.getNode(), AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP);
|
||||
if (!azOptional.isPresent()) {
|
||||
fail("missing AZ sysprop for node " + rp.getNode());
|
||||
}
|
||||
|
@ -556,10 +561,10 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
|||
nodeBuilder.setCoreCount(0);
|
||||
nodeBuilder.setFreeDiskGB(100.0);
|
||||
if (i < NUM_NODES / 3 * 2) {
|
||||
nodeBuilder.setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, "Nrt, TlOg");
|
||||
nodeBuilder.setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, "Nrt, TlOg");
|
||||
nodeBuilder.setSysprop("group", "one");
|
||||
} else {
|
||||
nodeBuilder.setSysprop(AffinityPlacementFactory.REPLICA_TYPE_SYSPROP, "Pull,foobar");
|
||||
nodeBuilder.setSysprop(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP, "Pull,foobar");
|
||||
nodeBuilder.setSysprop("group", "two");
|
||||
}
|
||||
}
|
||||
|
@ -653,6 +658,12 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void testWithCollectionPlacement() throws Exception {
|
||||
AffinityPlacementConfig config = new AffinityPlacementConfig(
|
||||
MINIMAL_FREE_DISK_GB,
|
||||
PRIORITIZED_FREE_DISK_GB,
|
||||
Map.of(primaryCollectionName, secondaryCollectionName), Map.of());
|
||||
configurePlugin(config);
|
||||
|
||||
int NUM_NODES = 3;
|
||||
Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(NUM_NODES);
|
||||
Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(secondaryCollectionName);
|
||||
|
@ -695,6 +706,12 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void testWithCollectionModificationRejected() throws Exception {
|
||||
AffinityPlacementConfig config = new AffinityPlacementConfig(
|
||||
MINIMAL_FREE_DISK_GB,
|
||||
PRIORITIZED_FREE_DISK_GB,
|
||||
Map.of(primaryCollectionName, secondaryCollectionName), Map.of());
|
||||
configurePlugin(config);
|
||||
|
||||
int NUM_NODES = 2;
|
||||
Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(NUM_NODES);
|
||||
Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(secondaryCollectionName);
|
||||
|
@ -742,6 +759,109 @@ public class AffinityPlacementFactoryTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNodeType() throws Exception {
|
||||
Builders.ClusterBuilder clusterBuilder = Builders.newClusterBuilder().initializeLiveNodes(9);
|
||||
LinkedList<Builders.NodeBuilder> nodeBuilders = clusterBuilder.getLiveNodeBuilders();
|
||||
for (int i = 0; i < 9; i++) {
|
||||
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_" + (i % 3));
|
||||
}
|
||||
|
||||
String collectionName = "nodeTypeCollection";
|
||||
Builders.CollectionBuilder collectionBuilder = Builders.newCollectionBuilder(collectionName);
|
||||
collectionBuilder.initializeShardsReplicas(1, 0, 0, 0, clusterBuilder.getLiveNodeBuilders());
|
||||
|
||||
// test single node type in collection
|
||||
AffinityPlacementConfig config = new AffinityPlacementConfig(
|
||||
MINIMAL_FREE_DISK_GB,
|
||||
PRIORITIZED_FREE_DISK_GB,
|
||||
Map.of(), Map.of(collectionName, "type_0"));
|
||||
configurePlugin(config);
|
||||
|
||||
clusterBuilder.addCollection(collectionBuilder);
|
||||
|
||||
PlacementContext placementContext = clusterBuilder.buildPlacementContext();
|
||||
Map<String, Set<String>> nodeNamesByType = new HashMap<>();
|
||||
Cluster cluster = placementContext.getCluster();
|
||||
AttributeValues attributeValues = placementContext.getAttributeFetcher()
|
||||
.requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP)
|
||||
.fetchAttributes();
|
||||
placementContext.getCluster().getLiveNodes().forEach(n ->
|
||||
nodeNamesByType
|
||||
.computeIfAbsent(attributeValues.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP).get(), type -> new HashSet<>())
|
||||
.add(n.getName())
|
||||
);
|
||||
SolrCollection collection = placementContext.getCluster().getCollection(collectionName);
|
||||
PlacementRequestImpl placementRequest = new PlacementRequestImpl(collection,
|
||||
Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 3, 0, 0);
|
||||
|
||||
PlacementPlan pp = plugin.computePlacement(placementRequest, placementContext);
|
||||
assertEquals("expected 3 placements: " + pp, 3, pp.getReplicaPlacements().size());
|
||||
Set<String> type0nodes = nodeNamesByType.get("type_0");
|
||||
Set<String> type1nodes = nodeNamesByType.get("type_1");
|
||||
Set<String> type2nodes = nodeNamesByType.get("type_2");
|
||||
|
||||
for (ReplicaPlacement p : pp.getReplicaPlacements()) {
|
||||
assertTrue(type0nodes.contains(p.getNode().getName()));
|
||||
}
|
||||
|
||||
// test 2 node types in collection
|
||||
config = new AffinityPlacementConfig(
|
||||
MINIMAL_FREE_DISK_GB,
|
||||
PRIORITIZED_FREE_DISK_GB,
|
||||
Map.of(), Map.of(collectionName, "type_0,type_1"));
|
||||
configurePlugin(config);
|
||||
|
||||
placementContext = clusterBuilder.buildPlacementContext();
|
||||
collection = placementContext.getCluster().getCollection(collectionName);
|
||||
placementRequest = new PlacementRequestImpl(collection,
|
||||
Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 6, 0, 0);
|
||||
|
||||
pp = plugin.computePlacement(placementRequest, placementContext);
|
||||
assertEquals("expected 6 placements: " + pp, 6, pp.getReplicaPlacements().size());
|
||||
for (ReplicaPlacement p : pp.getReplicaPlacements()) {
|
||||
assertTrue(type0nodes.contains(p.getNode().getName()) ||
|
||||
type1nodes.contains(p.getNode().getName()));
|
||||
}
|
||||
|
||||
// test 2 node types in nodes
|
||||
for (int i = 0; i < 9; i++) {
|
||||
if (i < 3) {
|
||||
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_0,type_1");
|
||||
} else if (i < 6) {
|
||||
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_1,type_2");
|
||||
} else {
|
||||
nodeBuilders.get(i).setSysprop(AffinityPlacementConfig.NODE_TYPE_SYSPROP, "type_2");
|
||||
}
|
||||
}
|
||||
|
||||
placementContext = clusterBuilder.buildPlacementContext();
|
||||
collection = placementContext.getCluster().getCollection(collectionName);
|
||||
placementRequest = new PlacementRequestImpl(collection,
|
||||
Set.of("shard1"), placementContext.getCluster().getLiveNodes(), 6, 0, 0);
|
||||
pp = plugin.computePlacement(placementRequest, placementContext);
|
||||
assertEquals("expected 6 placements: " + pp, 6, pp.getReplicaPlacements().size());
|
||||
nodeNamesByType.clear();
|
||||
AttributeValues attributeValues2 = placementContext.getAttributeFetcher()
|
||||
.requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP)
|
||||
.fetchAttributes();
|
||||
placementContext.getCluster().getLiveNodes().forEach(n -> {
|
||||
String nodeTypesStr = attributeValues2.getSystemProperty(n, AffinityPlacementConfig.NODE_TYPE_SYSPROP).get();
|
||||
for (String nodeType : StrUtils.splitSmart(nodeTypesStr, ',')) {
|
||||
nodeNamesByType
|
||||
.computeIfAbsent(nodeType, type -> new HashSet<>())
|
||||
.add(n.getName());
|
||||
}
|
||||
});
|
||||
type0nodes = nodeNamesByType.get("type_0");
|
||||
type1nodes = nodeNamesByType.get("type_1");
|
||||
|
||||
for (ReplicaPlacement p : pp.getReplicaPlacements()) {
|
||||
assertTrue(type0nodes.contains(p.getNode().getName()) ||
|
||||
type1nodes.contains(p.getNode().getName()));
|
||||
}
|
||||
|
||||
}
|
||||
@Test @Slow
|
||||
public void testScalability() throws Exception {
|
||||
log.info("==== numNodes ====");
|
||||
|
|
|
@ -76,6 +76,10 @@ curl -X POST -H 'Content-type: application/json' -d '{
|
|||
"withCollections": {
|
||||
"A_primary": "A_secondary",
|
||||
"B_primary": "B_secondary"
|
||||
},
|
||||
"nodeType": {
|
||||
"collection_A": "searchNode,indexNode",
|
||||
"collection_B": "analyticsNode"
|
||||
}
|
||||
}
|
||||
}}'
|
||||
|
@ -119,9 +123,14 @@ The autoscaling specification in the configuration linked above aimed to do the
|
|||
** minimize cores per node, or
|
||||
** minimize disk usage.
|
||||
|
||||
Additionally, it supports the `withCollection` constraint that enforces the placement of
|
||||
co-located collections' replicas on the same nodes, and prevents deletions of collections and
|
||||
replicas that would break this constraint.
|
||||
It also supports additional per-collection constraints:
|
||||
|
||||
* `withCollection` constraint enforces the placement of co-located collections' replicas on the
|
||||
same nodes, and prevents deletions of collections and replicas that would break this constraint.
|
||||
* `nodeType` constraint limits the nodes eligible for placement to only those that match one or
|
||||
more of the specified node types.
|
||||
|
||||
See below for more details on these constraints.
|
||||
|
||||
Overall strategy of this plugin:
|
||||
|
||||
|
@ -170,6 +179,9 @@ The plugin preserves this co-location by rejecting delete operation of secondary
|
|||
removed from the co-located nodes, or the configuration must be changed to remove the
|
||||
co-location mapping for the primary collection.
|
||||
|
||||
===== `nodeType` constraint
|
||||
|
||||
|
||||
===== Configuration
|
||||
This plugin supports the following configuration parameters:
|
||||
|
||||
|
@ -185,11 +197,19 @@ not an option, replicas can still be assigned to nodes with less than this amoun
|
|||
Default value is 100.
|
||||
|
||||
`withCollection`::
|
||||
(optional, map) this property defines additional constraints that primary collections (keys)
|
||||
(optional, map) this property defines an additional constraint that primary collections (keys)
|
||||
must be located on the same nodes as the secondary collections (values). The plugin will
|
||||
assume that the secondary collection replicas are already in place and ignore candidate
|
||||
nodes where they are not already present. Default value is none.
|
||||
|
||||
`nodeType`::
|
||||
(optional, map) this property defines an additional constraint that collections (keys)
|
||||
must be located only on the nodes that are labeled with one or more of the matching
|
||||
"node type" labels (values in the map are comma-separated labels). Nodes are labeled using the
|
||||
`node_type` system property with the value being an arbitrary comma-separated list of labels.
|
||||
Correspondingly, the plugin configuration can specify that a particular collection must be placed
|
||||
only on the nodes that match at least one of the (comma-separated) labels defined here.
|
||||
|
||||
=== Example configurations
|
||||
This is a simple configuration that uses default values:
|
||||
|
||||
|
@ -237,3 +257,24 @@ curl -X POST -H 'Content-type: application/json' -d '{
|
|||
}}'
|
||||
http://localhost:8983/api/cluster/plugin
|
||||
----
|
||||
|
||||
This configuration defines that collection `collection_A` must be placed only on the nodes with
|
||||
the `node_type` system property containing either `searchNode` or `indexNode` (for example, a node
|
||||
may be labeled as `-Dnode_type=searchNode,indexNode,uiNode,zkNode`). Similarly, the
|
||||
collection `collection_B` must be placed only on the nodes that contain the `analyticsNode` label:
|
||||
|
||||
[source,bash]
|
||||
----
|
||||
curl -X POST -H 'Content-type: application/json' -d '{
|
||||
"add":{
|
||||
"name": ".placement-plugin",
|
||||
"class": "org.apache.solr.cluster.placement.plugins.AffinityPlacementFactory",
|
||||
"config": {
|
||||
"nodeType": {
|
||||
"collection_A": "searchNode,indexNode",
|
||||
"collection_B": "analyticsNode"
|
||||
}
|
||||
}
|
||||
}}'
|
||||
http://localhost:8983/api/cluster/plugin
|
||||
----
|
||||
|
|
Loading…
Reference in New Issue