diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 80d0d11b3bf..d182e36984e 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -60,6 +60,9 @@ New Features processor. (Lance Norskog, Grant Ingersoll, Joern Kottmann, Em, Kai Gülzau, Rene Nederhand, Robert Muir, Steven Bower, Steve Rowe) +* SOLR-11201: Implement autoscaling trigger for arbitrary metrics that creates events when + a given metric breaches a threshold (shalin) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java index 039067c76d5..3ebfbd0d331 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java @@ -155,6 +155,8 @@ public class AutoScaling { return new NodeLostTrigger(name, props, loader, cloudManager); case SEARCHRATE: return new SearchRateTrigger(name, props, loader, cloudManager); + case METRIC: + return new MetricTrigger(name, props, loader, cloudManager); default: throw new IllegalArgumentException("Unknown event type: " + type + " in trigger: " + name); } diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java index 8cce9769b4c..b1e33e1cce9 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ComputePlanAction.java @@ -40,6 +40,8 @@ import org.apache.solr.common.util.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.common.params.AutoScalingParams.PREFERRED_OP; + /** * This class is responsible for using the configured policy and preferences * with the hints provided by the trigger event to compute the required cluster operations. @@ -133,8 +135,30 @@ public class ComputePlanAction extends TriggerActionBase { } } break; + case METRIC: + Map sourceNodes = (Map) event.getProperty(AutoScalingParams.NODE); + String collection = (String) event.getProperty(AutoScalingParams.COLLECTION); + String shard = (String) event.getProperty(AutoScalingParams.SHARD); + String preferredOp = (String) event.getProperty(PREFERRED_OP); + if (sourceNodes.isEmpty()) { + log.warn("No nodes reported in event: " + event); + return NoneSuggester.INSTANCE; + } + CollectionParams.CollectionAction action = CollectionParams.CollectionAction.get(preferredOp == null ? CollectionParams.CollectionAction.MOVEREPLICA.toLower() : preferredOp); + suggester = session.getSuggester(action); + for (String node : sourceNodes.keySet()) { + suggester = suggester.hint(Suggester.Hint.SRC_NODE, node); + } + if (collection != null) { + if (shard == null) { + suggester = suggester.hint(Suggester.Hint.COLL, collection); + } else { + suggester = suggester.hint(Suggester.Hint.COLL_SHARD, new Pair(collection, shard)); + } + } + break; default: - throw new UnsupportedOperationException("No support for events other than nodeAdded, nodeLost and searchRate, received: " + event.getEventType()); + throw new UnsupportedOperationException("No support for events other than nodeAdded, nodeLost, searchRate and metric. Received: " + event.getEventType()); } return suggester; } diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/MetricTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/MetricTrigger.java new file mode 100644 index 00000000000..531e4e6eca7 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/MetricTrigger.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud.autoscaling; + +import java.lang.invoke.MethodHandles; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.stream.Collectors; + +import org.apache.solr.client.solrj.cloud.autoscaling.Policy; +import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; +import org.apache.solr.client.solrj.cloud.autoscaling.TriggerEventType; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.cloud.ClusterState; +import org.apache.solr.common.cloud.DocCollection; +import org.apache.solr.common.cloud.Slice; +import org.apache.solr.common.params.AutoScalingParams; +import org.apache.solr.core.SolrResourceLoader; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.solr.common.params.AutoScalingParams.ABOVE; +import static org.apache.solr.common.params.AutoScalingParams.BELOW; +import static org.apache.solr.common.params.AutoScalingParams.METRIC; +import static org.apache.solr.common.params.AutoScalingParams.PREFERRED_OP; + +public class MetricTrigger extends TriggerBase { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private final String metric; + private final Number above, below; + private final String collection, shard, node, preferredOp; + + private final Map lastNodeEvent = new ConcurrentHashMap<>(); + + public MetricTrigger(String name, Map properties, SolrResourceLoader loader, SolrCloudManager cloudManager) { + super(TriggerEventType.METRIC, name, properties, loader, cloudManager); + this.metric = (String) properties.get(METRIC); + this.above = (Number) properties.get(ABOVE); + this.below = (Number) properties.get(BELOW); + this.collection = (String) properties.getOrDefault(AutoScalingParams.COLLECTION, Policy.ANY); + shard = (String) properties.getOrDefault(AutoScalingParams.SHARD, Policy.ANY); + if (collection.equals(Policy.ANY) && !shard.equals(Policy.ANY)) { + throw new IllegalArgumentException("When 'shard' is other than #ANY then collection name must be also other than #ANY"); + } + node = (String) properties.getOrDefault(AutoScalingParams.NODE, Policy.ANY); + preferredOp = (String) properties.getOrDefault(PREFERRED_OP, null); + } + + @Override + protected Map getState() { + return null; + } + + @Override + protected void setState(Map state) { + lastNodeEvent.clear(); + Map nodeTimes = (Map) state.get("lastNodeEvent"); + if (nodeTimes != null) { + lastNodeEvent.putAll(nodeTimes); + } + } + + @Override + public void restoreState(AutoScaling.Trigger old) { + assert old.isClosed(); + if (old instanceof MetricTrigger) { + MetricTrigger that = (MetricTrigger) old; + assert this.name.equals(that.name); + this.lastNodeEvent.clear(); + this.lastNodeEvent.putAll(that.lastNodeEvent); + } else { + throw new SolrException(SolrException.ErrorCode.INVALID_STATE, + "Unable to restore state from an unknown type of trigger"); + } + } + + @Override + public void run() { + AutoScaling.TriggerEventProcessor processor = processorRef.get(); + if (processor == null) { + return; + } + + Set liveNodes = null; + if (node.equals(Policy.ANY)) { + if (collection.equals(Policy.ANY)) { + liveNodes = cloudManager.getClusterStateProvider().getLiveNodes(); + } else { + final Set nodes = new HashSet<>(); + ClusterState.CollectionRef ref = cloudManager.getClusterStateProvider().getState(collection); + DocCollection docCollection; + if (ref == null || (docCollection = ref.get()) == null) { + log.warn("MetricTrigger could not find collection: {}", collection); + return; + } + if (shard.equals(Policy.ANY)) { + docCollection.getReplicas().forEach(replica -> { + nodes.add(replica.getNodeName()); + }); + } else { + Slice slice = docCollection.getSlice(shard); + if (slice == null) { + log.warn("MetricTrigger could not find collection: {} shard: {}", collection, shard); + return; + } + slice.getReplicas().forEach(replica -> nodes.add(replica.getNodeName())); + } + liveNodes = nodes; + } + } else { + liveNodes = Collections.singleton(node); + } + + Map rates = new HashMap<>(liveNodes.size()); + for (String node : liveNodes) { + Map values = cloudManager.getNodeStateProvider().getNodeValues(node, Collections.singletonList(metric)); + values.forEach((tag, rate) -> rates.computeIfAbsent(node, s -> (Number) rate)); + } + + long now = cloudManager.getTimeSource().getTime(); + // check for exceeded rates and filter out those with less than waitFor from previous events + Map hotNodes = rates.entrySet().stream() + .filter(entry -> waitForElapsed(entry.getKey(), now, lastNodeEvent)) + .filter(entry -> (below != null && Double.compare(entry.getValue().doubleValue(), below.doubleValue()) < 0) || (above != null && Double.compare(entry.getValue().doubleValue(), above.doubleValue()) > 0)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + if (hotNodes.isEmpty()) return; + + final AtomicLong eventTime = new AtomicLong(now); + hotNodes.forEach((n, r) -> { + long time = lastNodeEvent.get(n); + if (eventTime.get() > time) { + eventTime.set(time); + } + }); + + if (processor.process(new MetricBreachedEvent(getName(), collection, shard, preferredOp, eventTime.get(), metric, hotNodes))) { + hotNodes.keySet().forEach(node -> lastNodeEvent.put(node, now)); + } + } + + private boolean waitForElapsed(String name, long now, Map lastEventMap) { + Long lastTime = lastEventMap.computeIfAbsent(name, s -> now); + long elapsed = TimeUnit.SECONDS.convert(now - lastTime, TimeUnit.NANOSECONDS); + log.trace("name={}, lastTime={}, elapsed={}", name, lastTime, elapsed); + if (TimeUnit.SECONDS.convert(now - lastTime, TimeUnit.NANOSECONDS) < getWaitForSecond()) { + return false; + } + return true; + } + + public static class MetricBreachedEvent extends TriggerEvent { + public MetricBreachedEvent(String source, String collection, String shard, String preferredOp, long eventTime, String metric, Map hotNodes) { + super(TriggerEventType.METRIC, source, eventTime, null); + properties.put(METRIC, metric); + properties.put(AutoScalingParams.NODE, hotNodes); + if (!collection.equals(Policy.ANY)) { + properties.put(AutoScalingParams.COLLECTION, collection); + } + if (!shard.equals(Policy.ANY)) { + properties.put(AutoScalingParams.SHARD, shard); + } + if (preferredOp != null) { + properties.put(PREFERRED_OP, preferredOp); + } + } + } +} diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/MetricTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/MetricTriggerTest.java new file mode 100644 index 00000000000..96083f48370 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/MetricTriggerTest.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.cloud.autoscaling; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.solr.client.solrj.cloud.autoscaling.SolrCloudManager; +import org.apache.solr.client.solrj.impl.CloudSolrClient; +import org.apache.solr.client.solrj.impl.SolrClientCloudManager; +import org.apache.solr.client.solrj.request.CollectionAdminRequest; +import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.cloud.ZkDistributedQueueFactory; +import org.apache.solr.common.cloud.SolrZkClient; +import org.apache.solr.common.util.Utils; +import org.apache.solr.core.CoreDescriptor; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.metrics.SolrCoreMetricManager; +import org.junit.BeforeClass; +import org.junit.Test; + +public class MetricTriggerTest extends SolrCloudTestCase { + + private AutoScaling.TriggerEventProcessor noFirstRunProcessor = event -> { + fail("Did not expect the listener to fire on first run!"); + return true; + }; + + @BeforeClass + public static void setupCluster() throws Exception { + configureCluster(1) + .addConfig("conf", configset("cloud-minimal")) + .configure(); + CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(DEFAULT_TEST_COLLECTION_NAME, + "conf", 1, 1); + CloudSolrClient solrClient = cluster.getSolrClient(); + create.setMaxShardsPerNode(1); + create.process(solrClient); + } + + @Test + public void test() throws Exception { + CoreDescriptor coreDescriptor = cluster.getJettySolrRunner(0).getCoreContainer().getCoreDescriptors().iterator().next(); + String shardId = coreDescriptor.getCloudDescriptor().getShardId(); + String coreName = coreDescriptor.getName(); + String replicaName = Utils.parseMetricsReplicaName(DEFAULT_TEST_COLLECTION_NAME, coreName); + long waitForSeconds = 2 + random().nextInt(5); + String registry = SolrCoreMetricManager.createRegistryName(true, DEFAULT_TEST_COLLECTION_NAME, shardId, replicaName, null); + String tag = "metrics:" + registry + ":ADMIN./admin/file.requests"; + + Map props = createTriggerProps(waitForSeconds, tag, 1.0d, null, DEFAULT_TEST_COLLECTION_NAME, null, null); + + final List events = new ArrayList<>(); + SolrZkClient zkClient = cluster.getSolrClient().getZkStateReader().getZkClient(); + SolrResourceLoader loader = cluster.getJettySolrRunner(0).getCoreContainer().getResourceLoader(); + SolrCloudManager cloudManager = new SolrClientCloudManager(new ZkDistributedQueueFactory(zkClient), cluster.getSolrClient()); + + try (MetricTrigger metricTrigger = new MetricTrigger("metricTrigger", props, loader, cloudManager)) { + metricTrigger.setProcessor(noFirstRunProcessor); + metricTrigger.run(); + metricTrigger.setProcessor(event -> events.add(event)); + assertEquals(0, events.size()); + Thread.sleep(waitForSeconds * 1000 + 2000); + metricTrigger.run(); + assertEquals(1, events.size()); + } + + events.clear(); + tag = "metrics:" + registry + ":ADMIN./admin/file.handlerStart"; + props = createTriggerProps(waitForSeconds, tag, null, 100.0d, DEFAULT_TEST_COLLECTION_NAME, null, null); + try (MetricTrigger metricTrigger = new MetricTrigger("metricTrigger", props, loader, cloudManager)) { + metricTrigger.setProcessor(noFirstRunProcessor); + metricTrigger.run(); + metricTrigger.setProcessor(event -> events.add(event)); + assertEquals(0, events.size()); + Thread.sleep(waitForSeconds * 1000 + 2000); + metricTrigger.run(); + assertEquals(1, events.size()); + } + } + + private Map createTriggerProps(long waitForSeconds, String metric, Double below, Double above, String collection, String shard, String node) { + Map props = new HashMap<>(); + props.put("metric", metric); + if (above != null) { + props.put("above", above); + } + if (below != null) { + props.put("below", below); + } + if (collection != null) { + props.put("collection", collection); + } + if (shard != null) { + props.put("shard", shard); + } + if (node != null) { + props.put("node", node); + } + props.put("event", "metric"); + props.put("waitFor", waitForSeconds); + props.put("enabled", true); + + List> actions = new ArrayList<>(3); + Map map = new HashMap<>(2); + map.put("name", "compute_plan"); + map.put("class", "solr.ComputePlanAction"); + actions.add(map); + map = new HashMap<>(2); + map.put("name", "execute_plan"); + map.put("class", "solr.ExecutePlanAction"); + actions.add(map); + props.put("actions", actions); + return props; + } +} diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java index c104a99528c..eb196c16edf 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java @@ -45,18 +45,22 @@ import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.client.solrj.response.CollectionAdminResponse; import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.LiveNodesListener; +import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.params.AutoScalingParams; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.TimeSource; import org.apache.solr.common.util.Utils; import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.metrics.SolrCoreMetricManager; import org.apache.solr.util.LogLevel; import org.apache.solr.util.TimeOut; -import org.apache.solr.common.util.TimeSource; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.ZKUtil; import org.apache.zookeeper.data.Stat; @@ -1497,4 +1501,130 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { assertEquals(collectionRate, totalShardRate.get(), 5.0); assertEquals(collectionRate, totalReplicaRate.get(), 5.0); } + + @Test + public void testMetricTrigger() throws Exception { + // at least 3 nodes + for (int i = cluster.getJettySolrRunners().size(); i < 3; i++) { + cluster.startJettySolrRunner(); + } + cluster.waitForAllNodes(5); + + String collectionName = "testMetricTrigger"; + CloudSolrClient solrClient = cluster.getSolrClient(); + CollectionAdminRequest.Create create = CollectionAdminRequest.createCollection(collectionName, + "conf", 2, 1); + create.process(solrClient); + solrClient.setDefaultCollection(collectionName); + + waitForState("Timed out waiting for collection:" + collectionName + " to become active", collectionName, clusterShape(2, 1)); + + DocCollection docCollection = solrClient.getZkStateReader().getClusterState().getCollection(collectionName); + String shardId = "shard1"; + Replica replica = docCollection.getSlice(shardId).getReplicas().iterator().next(); + String coreName = replica.getCoreName(); + String replicaName = Utils.parseMetricsReplicaName(collectionName, coreName); + long waitForSeconds = 2 + random().nextInt(5); + String registry = SolrCoreMetricManager.createRegistryName(true, collectionName, shardId, replicaName, null); + String tag = "metrics:" + registry + ":INDEX.sizeInBytes"; + + String setTriggerCommand = "{" + + "'set-trigger' : {" + + "'name' : 'metric_trigger'," + + "'event' : 'metric'," + + "'waitFor' : '" + waitForSeconds + "s'," + + "'enabled' : true," + + "'metric': '" + tag + "'" + + "'above' : 100.0," + + "'collection': '" + collectionName + "'" + + "'shard':'" + shardId + "'" + + "'actions' : [" + + "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," + + "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," + + "{'name':'test','class':'" + TestSearchRateAction.class.getName() + "'}" + + "]" + + "}}"; + SolrRequest req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand); + NamedList response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + String setListenerCommand1 = "{" + + "'set-listener' : " + + "{" + + "'name' : 'srt'," + + "'trigger' : 'metric_trigger'," + + "'stage' : ['FAILED','SUCCEEDED']," + + "'afterAction': ['compute', 'execute', 'test']," + + "'class' : '" + TestTriggerListener.class.getName() + "'" + + "}" + + "}"; + req = createAutoScalingRequest(SolrRequest.METHOD.POST, setListenerCommand1); + response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + for (int i = 0; i < 500; i++) { + solrClient.add(new SolrInputDocument("id", String.valueOf(i), "x_s", "x" + i)); + } + solrClient.commit(); + + boolean await = triggerFiredLatch.await(20, TimeUnit.SECONDS); + assertTrue("The trigger did not fire at all", await); + // wait for listener to capture the SUCCEEDED stage + Thread.sleep(2000); + assertEquals(listenerEvents.toString(), 4, listenerEvents.get("srt").size()); + CapturedEvent ev = listenerEvents.get("srt").get(0); + long now = timeSource.getTime(); + // verify waitFor + assertTrue(TimeUnit.SECONDS.convert(waitForSeconds, TimeUnit.NANOSECONDS) - WAIT_FOR_DELTA_NANOS <= now - ev.event.getEventTime()); + assertEquals(collectionName, ev.event.getProperties().get("collection")); + + String oldReplicaName = replica.getName(); + docCollection = solrClient.getZkStateReader().getClusterState().getCollection(collectionName); + assertEquals(2, docCollection.getReplicas().size()); + assertNull(docCollection.getReplica(oldReplicaName)); + + // todo uncomment the following code once SOLR-11714 is fixed + // find a new replica and create its metric name +// replica = docCollection.getSlice(shardId).getReplicas().iterator().next(); +// coreName = replica.getCoreName(); +// replicaName = Utils.parseMetricsReplicaName(collectionName, coreName); +// registry = SolrCoreMetricManager.createRegistryName(true, collectionName, shardId, replicaName, null); +// tag = "metrics:" + registry + ":INDEX.sizeInBytes"; +// +// setTriggerCommand = "{" + +// "'set-trigger' : {" + +// "'name' : 'metric_trigger'," + +// "'event' : 'metric'," + +// "'waitFor' : '" + waitForSeconds + "s'," + +// "'enabled' : true," + +// "'metric': '" + tag + "'" + +// "'above' : 100.0," + +// "'collection': '" + collectionName + "'" + +// "'shard':'" + shardId + "'" + +// "'preferredOperation':'addreplica'" + +// "'actions' : [" + +// "{'name':'compute','class':'" + ComputePlanAction.class.getName() + "'}," + +// "{'name':'execute','class':'" + ExecutePlanAction.class.getName() + "'}," + +// "{'name':'test','class':'" + TestSearchRateAction.class.getName() + "'}" + +// "]" + +// "}}"; +// req = createAutoScalingRequest(SolrRequest.METHOD.POST, setTriggerCommand); +// response = solrClient.request(req); +// assertEquals(response.get("result").toString(), "success"); +// +// triggerFiredLatch = new CountDownLatch(1); +// listenerEvents.clear(); +// await = triggerFiredLatch.await(20, TimeUnit.SECONDS); +// assertTrue("The trigger did not fire at all", await); +// // wait for listener to capture the SUCCEEDED stage +// Thread.sleep(2000); +// assertEquals(listenerEvents.toString(), 4, listenerEvents.get("srt").size()); +// ev = listenerEvents.get("srt").get(0); +// now = timeSource.getTime(); +// // verify waitFor +// assertTrue(TimeUnit.SECONDS.convert(waitForSeconds, TimeUnit.NANOSECONDS) - WAIT_FOR_DELTA_NANOS <= now - ev.event.getEventTime()); +// assertEquals(collectionName, ev.event.getProperties().get("collection")); +// docCollection = solrClient.getZkStateReader().getClusterState().getCollection(collectionName); +// assertEquals(3, docCollection.getReplicas().size()); + } } diff --git a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc index 70cbb5f1e53..5a986576e4a 100644 --- a/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc +++ b/solr/solr-ref-guide/src/solrcloud-autoscaling-triggers.adoc @@ -34,6 +34,7 @@ Currently the following event types (and corresponding trigger implementations) * `nodeAdded` - generated when a new node joins the cluster * `nodeLost` - generated when a node leaves the cluster +* `metric` - generated when the configured metric crosses a configured lower or upper threshold value Events are not necessarily generated immediately after the corresponding state change occurred - the maximum rate of events is controlled by the `waitFor` configuration parameter (see below). @@ -57,6 +58,35 @@ to add replicas on the live nodes to maintain the expected replication factor). You can see the section <> to learn more about how the `.autoAddReplicas` trigger works. +== Metric Trigger + +The metric trigger can be used to monitor any metric exposed by the Metrics API. It supports lower and upper threshold configurations as well as optional filters to limit operation to specific collection, shards and nodes. + +This trigger supports the following configuration: + +* `metric` - (string, required) The metric property name to be watched in the format metrics:group:prefix e.g. `metric:solr.node:CONTAINER.fs.coreRoot.usableSpace` +* `below` - (double, optional) The lower threshold for the metric value. The trigger produces a metric breached event if the metric's value falls below this value +* `above` - (double, optional) The upper threshold for the metric value. The trigger produces a metric breached event if the metric's value crosses above this value +* `collection` - (string, optional) The collection used to limit the nodes on which the given metric is watched. When the metric is breached, trigger actions will limit operations to this collection only. +* `shard` - (string, optional) The shard used to limit the nodes on which the given metric is watched. When the metric is breached, trigger actions will limit operations to this shard only. +* `node` - (string, optional) The node on which the given metric is watched. Trigger actions will operate on this node only. +* `preferredOperation` (string, optional, defaults to `MOVEREPLICA`) - The operation to be performed in response to an event generated by this trigger. By default, replicas will be moved from the hot node to others. The only other supported value is `ADDREPLICA` which adds more replicas if the metric is breached. + +.Example: Metric Trigger that fires when total usable space on a node having replicas of "mycollection" falls below 100GB +[source,json] +---- +{ + "set-trigger": { + "name": "metric_trigger", + "event": "metric", + "waitFor": "5s", + "metric": "metric:solr.node:CONTAINER.fs.coreRoot.usableSpace", + "below": 107374182400, + "collection": "mycollection" + } +} +---- + == Trigger Configuration Trigger configurations are managed using the Autoscaling Write API and the commands `set-trigger`, `remove-trigger`, `suspend-trigger`, and `resume-trigger`. @@ -74,7 +104,7 @@ Action configuration consists of the following properties: * `name` - (string, required) A unique name of the action configuration. * `class` - (string, required) The action implementation class. -* A dditional implementation-specific properties may be provided +* Additional implementation-specific properties may be provided If the Action configuration is omitted, then by default, the `ComputePlanAction` and the `ExecutePlanAction` are automatically added to the trigger configuration. diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java index 238d7e1c611..96bc773c111 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/cloud/autoscaling/TriggerEventType.java @@ -27,5 +27,6 @@ public enum TriggerEventType { SCHEDULED, SEARCHRATE, INDEXRATE, - INVALID + INVALID, + METRIC } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java b/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java index cf259c6a86d..4f00e282eb3 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/AutoScalingParams.java @@ -46,6 +46,10 @@ public interface AutoScalingParams { String RATE = "rate"; String REMOVE_LISTENERS = "removeListeners"; String ZK_VERSION = "zkVersion"; + String METRIC = "metric"; + String ABOVE = "above"; + String BELOW = "below"; + String PREFERRED_OP = "preferredOperation"; // commands String CMD_SET_TRIGGER = "set-trigger";