From dc9ab499678830e090dce931da95add69e42249f Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Thu, 4 May 2017 16:17:53 +0530 Subject: [PATCH] SOLR-10602: Triggers should be able to restore state from old instances when taking over --- solr/CHANGES.txt | 2 + .../solr/cloud/autoscaling/AutoScaling.java | 2 + .../cloud/autoscaling/NodeAddedTrigger.java | 28 +++- .../cloud/autoscaling/NodeLostTrigger.java | 18 +- .../autoscaling/OverseerTriggerThread.java | 7 + .../cloud/autoscaling/ScheduledTriggers.java | 6 +- .../autoscaling/NodeAddedTriggerTest.java | 101 +++++++++--- .../autoscaling/NodeLostTriggerTest.java | 102 +++++++++--- .../autoscaling/TriggerIntegrationTest.java | 154 +++++++++++++++++- 9 files changed, 364 insertions(+), 56 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 16bb70f7c74..22aa372489f 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -96,6 +96,8 @@ Bug Fixes * SOLR-9837: Fix 55% performance regression of FieldCache uninvert time of numeric fields. (yonik) +* SOLR-10602: Triggers should be able to restore state from old instances when taking over. (shalin) + Optimizations ---------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java index 1e677b252b8..688aac5686e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/AutoScaling.java @@ -101,6 +101,8 @@ public class AutoScaling { public TriggerListener getListener(); public boolean isClosed(); + + public void restoreState(Trigger old); } public static class TriggerFactory implements Closeable { diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/NodeAddedTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/NodeAddedTrigger.java index 4654cfb6410..00281c79a1f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/NodeAddedTrigger.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/NodeAddedTrigger.java @@ -32,6 +32,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import org.apache.lucene.util.IOUtils; +import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.core.CoreContainer; import org.slf4j.Logger; @@ -78,6 +79,7 @@ public class NodeAddedTrigger implements AutoScaling.Trigger old) { + assert old.isClosed(); + if (old instanceof NodeAddedTrigger) { + NodeAddedTrigger that = (NodeAddedTrigger) old; + assert this.name.equals(that.name); + this.lastLiveNodes = new HashSet<>(that.lastLiveNodes); + this.nodeNameVsTimeAdded = new HashMap<>(that.nodeNameVsTimeAdded); + } else { + throw new SolrException(SolrException.ErrorCode.INVALID_STATE, + "Unable to restore state from an unknown type of trigger"); + } + } + @Override public void run() { try { @@ -152,7 +168,7 @@ public class NodeAddedTrigger implements AutoScaling.Trigger newLiveNodes = reader.getClusterState().getLiveNodes(); @@ -171,19 +187,21 @@ public class NodeAddedTrigger implements AutoScaling.Trigger copyOfNew = new HashSet<>(newLiveNodes); copyOfNew.removeAll(lastLiveNodes); copyOfNew.forEach(n -> { - log.info("Tracking new node: {}", n); - nodeNameVsTimeAdded.put(n, System.nanoTime()); + long nanoTime = System.nanoTime(); + nodeNameVsTimeAdded.put(n, nanoTime); + log.info("Tracking new node: {} at {} nanotime", n, nanoTime); }); // has enough time expired to trigger events for a node? for (Map.Entry entry : nodeNameVsTimeAdded.entrySet()) { String nodeName = entry.getKey(); Long timeAdded = entry.getValue(); - if (TimeUnit.SECONDS.convert(System.nanoTime() - timeAdded, TimeUnit.NANOSECONDS) >= getWaitForSecond()) { + long now = System.nanoTime(); + if (TimeUnit.SECONDS.convert(now - timeAdded, TimeUnit.NANOSECONDS) >= getWaitForSecond()) { // fire! AutoScaling.TriggerListener listener = listenerRef.get(); if (listener != null) { - log.info("NodeAddedTrigger firing registered listener"); + log.info("NodeAddedTrigger {} firing registered listener for node: {} added at {} nanotime, now: {} nanotime", name, nodeName, timeAdded, now); listener.triggerFired(new NodeAddedEvent(this, timeAdded, nodeName)); } trackingKeySet.remove(nodeName); diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/NodeLostTrigger.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/NodeLostTrigger.java index 1f510c2e14a..df1ea76c4b0 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/NodeLostTrigger.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/NodeLostTrigger.java @@ -32,9 +32,9 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import org.apache.lucene.util.IOUtils; +import org.apache.solr.common.SolrException; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.core.CoreContainer; -import org.apache.zookeeper.KeeperException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -146,6 +146,20 @@ public class NodeLostTrigger implements AutoScaling.Trigger old) { + assert old.isClosed(); + if (old instanceof NodeLostTrigger) { + NodeLostTrigger that = (NodeLostTrigger) old; + assert this.name.equals(that.name); + this.lastLiveNodes = new HashSet<>(that.lastLiveNodes); + this.nodeNameVsTimeRemoved = new HashMap<>(that.nodeNameVsTimeRemoved); + } else { + throw new SolrException(SolrException.ErrorCode.INVALID_STATE, + "Unable to restore state from an unknown type of trigger"); + } + } + @Override public void run() { try { @@ -155,7 +169,7 @@ public class NodeLostTrigger implements AutoScaling.Trigger newLiveNodes = reader.getClusterState().getLiveNodes(); diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/OverseerTriggerThread.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/OverseerTriggerThread.java index 33469653864..eca8c0bc468 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/OverseerTriggerThread.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/OverseerTriggerThread.java @@ -179,12 +179,19 @@ public class OverseerTriggerThread implements Runnable, Closeable { } final Stat stat = new Stat(); final byte[] data = zkClient.getData(ZkStateReader.SOLR_AUTOSCALING_CONF_PATH, this, stat, true); + log.debug("{} watcher fired with znode version {}", ZkStateReader.SOLR_AUTOSCALING_CONF_PATH, stat.getVersion()); if (znodeVersion >= stat.getVersion()) { // protect against reordered watcher fires by ensuring that we only move forward return; } znodeVersion = stat.getVersion(); Map triggerMap = loadTriggers(triggerFactory, data); + + // remove all active triggers that have been removed from ZK + Set trackingKeySet = activeTriggers.keySet(); + trackingKeySet.retainAll(triggerMap.keySet()); + + // now lets add or remove triggers which have been enabled or disabled respectively for (Map.Entry entry : triggerMap.entrySet()) { String triggerName = entry.getKey(); AutoScaling.Trigger trigger = entry.getValue(); diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java index 58292881ed0..1fbfff7f33f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java @@ -45,6 +45,7 @@ import org.slf4j.LoggerFactory; */ public class ScheduledTriggers implements Closeable { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + static final int SCHEDULED_TRIGGER_DELAY_SECONDS = 1; private final Map scheduledTriggers = new HashMap<>(); @@ -67,7 +68,7 @@ public class ScheduledTriggers implements Closeable { // how many triggers we have and secondly, that many threads will always be instantiated and kept around idle // so it is wasteful as well. Hopefully 4 is a good compromise. scheduledThreadPoolExecutor = (ScheduledThreadPoolExecutor) Executors.newScheduledThreadPool(4, - new DefaultSolrThreadFactory("ScheduledTrigger-")); + new DefaultSolrThreadFactory("ScheduledTrigger")); scheduledThreadPoolExecutor.setRemoveOnCancelPolicy(true); scheduledThreadPoolExecutor.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); actionExecutor = ExecutorUtil.newMDCAwareSingleThreadExecutor(new DefaultSolrThreadFactory("AutoscalingActionExecutor")); @@ -93,6 +94,7 @@ public class ScheduledTriggers implements Closeable { return; } IOUtils.closeQuietly(old); + newTrigger.restoreState(old.trigger); scheduledTriggers.replace(newTrigger.getName(), scheduledTrigger); } newTrigger.setListener(event -> { @@ -115,7 +117,7 @@ public class ScheduledTriggers implements Closeable { }); } }); - scheduledTrigger.scheduledFuture = scheduledThreadPoolExecutor.scheduleWithFixedDelay(newTrigger, 0, 1, TimeUnit.SECONDS); + scheduledTrigger.scheduledFuture = scheduledThreadPoolExecutor.scheduleWithFixedDelay(newTrigger, 0, SCHEDULED_TRIGGER_DELAY_SECONDS, TimeUnit.SECONDS); } /** diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/NodeAddedTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/NodeAddedTriggerTest.java index 5ab6bec31e6..d08f839e808 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/NodeAddedTriggerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/NodeAddedTriggerTest.java @@ -43,27 +43,10 @@ public class NodeAddedTriggerTest extends SolrCloudTestCase { } @Test - public void test() throws Exception { + public void testTrigger() throws Exception { CoreContainer container = cluster.getJettySolrRunners().get(0).getCoreContainer(); - Map props = new HashMap<>(); - props.put("event", "nodeLost"); long waitForSeconds = 1 + random().nextInt(5); - props.put("waitFor", waitForSeconds); - props.put("enabled", true); - List> actions = new ArrayList<>(3); - Map map = new HashMap<>(2); - map.put("name", "compute_plan"); - map.put("class", "solr.ComputePlanAction"); - actions.add(map); - map = new HashMap<>(2); - map.put("name", "execute_plan"); - map.put("class", "solr.ExecutePlanAction"); - actions.add(map); - map = new HashMap<>(2); - map.put("name", "log_plan"); - map.put("class", "solr.LogPlanAction"); - actions.add(map); - props.put("actions", actions); + Map props = createTriggerProps(waitForSeconds); try (NodeAddedTrigger trigger = new NodeAddedTrigger("node_added_trigger", props, container)) { trigger.setListener(event -> fail("Did not expect the listener to fire on first run!")); @@ -130,4 +113,84 @@ public class NodeAddedTriggerTest extends SolrCloudTestCase { assertFalse(fired.get()); } } + + @Test + public void testRestoreState() throws Exception { + CoreContainer container = cluster.getJettySolrRunners().get(0).getCoreContainer(); + long waitForSeconds = 1 + random().nextInt(5); + Map props = createTriggerProps(waitForSeconds); + + // add a new node but update the trigger before the waitFor period expires + // and assert that the new trigger still fires + NodeAddedTrigger trigger = new NodeAddedTrigger("node_added_trigger", props, container); + final long waitTime = 2; + props.put("waitFor", waitTime); + trigger.setListener(event -> fail("Did not expect the listener to fire on first run!")); + trigger.run(); + + JettySolrRunner newNode = cluster.startJettySolrRunner(); + trigger.run(); // this run should detect the new node + trigger.close(); // close the old trigger + + try (NodeAddedTrigger newTrigger = new NodeAddedTrigger("some_different_name", props, container)) { + try { + newTrigger.restoreState(trigger); + fail("Trigger should only be able to restore state from an old trigger of the same name"); + } catch (AssertionError e) { + // expected + } + } + + try (NodeAddedTrigger newTrigger = new NodeAddedTrigger("node_added_trigger", props, container)) { + AtomicBoolean fired = new AtomicBoolean(false); + AtomicReference eventRef = new AtomicReference<>(); + newTrigger.setListener(event -> { + if (fired.compareAndSet(false, true)) { + eventRef.set(event); + if (System.nanoTime() - event.getEventNanoTime() <= TimeUnit.NANOSECONDS.convert(waitTime, TimeUnit.SECONDS)) { + fail("NodeAddedListener was fired before the configured waitFor period"); + } + } else { + fail("NodeAddedTrigger was fired more than once!"); + } + }); + newTrigger.restoreState(trigger); // restore state from the old trigger + int counter = 0; + do { + newTrigger.run(); + Thread.sleep(1000); + if (counter++ > 10) { + fail("Newly added node was not discovered by trigger even after 10 seconds"); + } + } while (!fired.get()); + + // ensure the event was fired + assertTrue(fired.get()); + NodeAddedTrigger.NodeAddedEvent nodeAddedEvent = eventRef.get(); + assertNotNull(nodeAddedEvent); + assertEquals("", newNode.getNodeName(), nodeAddedEvent.getNodeName()); + } + } + + private Map createTriggerProps(long waitForSeconds) { + Map props = new HashMap<>(); + props.put("event", "nodeLost"); + props.put("waitFor", waitForSeconds); + props.put("enabled", true); + List> actions = new ArrayList<>(3); + Map map = new HashMap<>(2); + map.put("name", "compute_plan"); + map.put("class", "solr.ComputePlanAction"); + actions.add(map); + map = new HashMap<>(2); + map.put("name", "execute_plan"); + map.put("class", "solr.ExecutePlanAction"); + actions.add(map); + map = new HashMap<>(2); + map.put("name", "log_plan"); + map.put("class", "solr.LogPlanAction"); + actions.add(map); + props.put("actions", actions); + return props; + } } diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/NodeLostTriggerTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/NodeLostTriggerTest.java index 56c3b73b80b..53a4458aaa7 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/NodeLostTriggerTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/NodeLostTriggerTest.java @@ -44,27 +44,10 @@ public class NodeLostTriggerTest extends SolrCloudTestCase { } @Test - public void test() throws Exception { + public void testTrigger() throws Exception { CoreContainer container = cluster.getJettySolrRunners().get(0).getCoreContainer(); - Map props = new HashMap<>(); - props.put("event", "nodeLost"); long waitForSeconds = 1 + random().nextInt(5); - props.put("waitFor", waitForSeconds); - props.put("enabled", true); - List> actions = new ArrayList<>(3); - Map map = new HashMap<>(2); - map.put("name", "compute_plan"); - map.put("class", "solr.ComputePlanAction"); - actions.add(map); - map = new HashMap<>(2); - map.put("name", "execute_plan"); - map.put("class", "solr.ExecutePlanAction"); - actions.add(map); - map = new HashMap<>(2); - map.put("name", "log_plan"); - map.put("class", "solr.LogPlanAction"); - actions.add(map); - props.put("actions", actions); + Map props = createTriggerProps(waitForSeconds); try (NodeLostTrigger trigger = new NodeLostTrigger("node_lost_trigger", props, container)) { trigger.setListener(event -> fail("Did not expect the listener to fire on first run!")); @@ -119,7 +102,7 @@ public class NodeLostTriggerTest extends SolrCloudTestCase { fail("NodeLostListener was fired more than once!"); } }); - trigger.run(); // first run should detect the new node + trigger.run(); // first run should detect the lost node int counter = 0; do { if (container.getZkController().getZkStateReader().getClusterState().getLiveNodes().size() == 3) { @@ -144,4 +127,83 @@ public class NodeLostTriggerTest extends SolrCloudTestCase { assertFalse(fired.get()); } } + + @Test + public void testRestoreState() throws Exception { + CoreContainer container = cluster.getJettySolrRunners().get(0).getCoreContainer(); + long waitForSeconds = 1 + random().nextInt(5); + Map props = createTriggerProps(waitForSeconds); + + JettySolrRunner newNode = cluster.startJettySolrRunner(); + String lostNodeName = newNode.getNodeName(); + + // remove a node but update the trigger before the waitFor period expires + // and assert that the new trigger still fires + + NodeLostTrigger trigger = new NodeLostTrigger("node_lost_trigger", props, container); + trigger.setListener(event -> fail("Did not expect the listener to fire on first run!")); + trigger.run(); + newNode.stop(); + trigger.run(); // this run should detect the lost node + trigger.close(); // close the old trigger + + try (NodeLostTrigger newTrigger = new NodeLostTrigger("some_different_name", props, container)) { + try { + newTrigger.restoreState(trigger); + fail("Trigger should only be able to restore state from an old trigger of the same name"); + } catch (AssertionError e) { + // expected + } + } + + try (NodeLostTrigger newTrigger = new NodeLostTrigger("node_lost_trigger", props, container)) { + AtomicBoolean fired = new AtomicBoolean(false); + AtomicReference eventRef = new AtomicReference<>(); + newTrigger.setListener(event -> { + if (fired.compareAndSet(false, true)) { + eventRef.set(event); + if (System.nanoTime() - event.getEventNanoTime() <= TimeUnit.NANOSECONDS.convert(waitForSeconds, TimeUnit.SECONDS)) { + fail("NodeLostListener was fired before the configured waitFor period"); + } + } else { + fail("NodeLostListener was fired more than once!"); + } + }); + newTrigger.restoreState(trigger); // restore state from the old trigger + int counter = 0; + do { + newTrigger.run(); + Thread.sleep(1000); + if (counter++ > 10) { + fail("Lost node was not discovered by trigger even after 10 seconds"); + } + } while (!fired.get()); + + NodeLostTrigger.NodeLostEvent nodeLostEvent = eventRef.get(); + assertNotNull(nodeLostEvent); + assertEquals("", lostNodeName, nodeLostEvent.getNodeName()); + } + } + + private Map createTriggerProps(long waitForSeconds) { + Map props = new HashMap<>(); + props.put("event", "nodeLost"); + props.put("waitFor", waitForSeconds); + props.put("enabled", true); + List> actions = new ArrayList<>(3); + Map map = new HashMap<>(2); + map.put("name", "compute_plan"); + map.put("class", "solr.ComputePlanAction"); + actions.add(map); + map = new HashMap<>(2); + map.put("name", "execute_plan"); + map.put("class", "solr.ExecutePlanAction"); + actions.add(map); + map = new HashMap<>(2); + map.put("name", "log_plan"); + map.put("class", "solr.LogPlanAction"); + actions.add(map); + props.put("actions", actions); + return props; + } } diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java index abd6499e33d..a5ff24ef761 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java @@ -19,6 +19,7 @@ package org.apache.solr.cloud.autoscaling; import java.io.IOException; import java.lang.invoke.MethodHandles; +import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; @@ -26,18 +27,27 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import org.apache.solr.client.solrj.SolrRequest; +import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.embedded.JettySolrRunner; import org.apache.solr.client.solrj.impl.CloudSolrClient; import org.apache.solr.client.solrj.request.CollectionAdminRequest; import org.apache.solr.cloud.SolrCloudTestCase; +import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.Utils; import org.apache.solr.util.LogLevel; +import org.apache.solr.util.TimeOut; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.data.Stat; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.cloud.autoscaling.ScheduledTriggers.SCHEDULED_TRIGGER_DELAY_SECONDS; +import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_PATH; + /** * An end-to-end integration test for triggers */ @@ -51,28 +61,159 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { private static AtomicBoolean triggerFired; private static AtomicReference eventRef; + private String path; + @BeforeClass public static void setupCluster() throws Exception { configureCluster(2) .addConfig("conf", configset("cloud-minimal")) .configure(); - waitForSeconds = 1 + random().nextInt(3); } @Before - public void setupTest() { + public void setupTest() throws KeeperException, InterruptedException, IOException, SolrServerException { + waitForSeconds = 1 + random().nextInt(3); actionCreated = new CountDownLatch(1); triggerFiredLatch = new CountDownLatch(1); triggerFired = new AtomicBoolean(false); eventRef = new AtomicReference<>(); + // clear any persisted auto scaling configuration + Stat stat = zkClient().setData(SOLR_AUTOSCALING_CONF_PATH, Utils.toJSON(new ZkNodeProps()), true); + log.info(SOLR_AUTOSCALING_CONF_PATH + " reset, new znode version {}", stat.getVersion()); + // todo nocommit -- add testing for the v2 path + // String path = random().nextBoolean() ? "/admin/autoscaling" : "/v2/cluster/autoscaling"; + this.path = "/admin/autoscaling"; + } + + @Test + public void testNodeLostTriggerRestoreState() throws Exception { + // for this test we want to update the trigger so we must assert that the actions were created twice + TriggerIntegrationTest.actionCreated = new CountDownLatch(2); + + // start a new node + JettySolrRunner newNode = cluster.startJettySolrRunner(); + String nodeName = newNode.getNodeName(); + + CloudSolrClient solrClient = cluster.getSolrClient(); + waitForSeconds = 5; + String setTriggerCommand = "{" + + "'set-trigger' : {" + + "'name' : 'node_lost_restore_trigger'," + + "'event' : 'nodeLost'," + + "'waitFor' : '5s'," + // should be enough for us to update the trigger + "'enabled' : true," + + "'actions' : [{'name':'test','class':'" + TestTriggerAction.class.getName() + "'}]" + + "}}"; + SolrRequest req = new AutoScalingHandlerTest.AutoScalingRequest(SolrRequest.METHOD.POST, path, setTriggerCommand); + NamedList response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + TimeOut timeOut = new TimeOut(2, TimeUnit.SECONDS); + while (actionCreated.getCount() == 0 && !timeOut.hasTimedOut()) { + Thread.sleep(200); + } + assertTrue("The action specified in node_lost_restore_trigger was not instantiated even after 2 seconds",actionCreated.getCount() > 0); + + List jettySolrRunners = cluster.getJettySolrRunners(); + int index = -1; + for (int i = 0; i < jettySolrRunners.size(); i++) { + JettySolrRunner runner = jettySolrRunners.get(i); + if (runner == newNode) index = i; + } + assertFalse(index == -1); + cluster.stopJettySolrRunner(index); + + // ensure that the old trigger sees the stopped node, todo find a better way to do this + Thread.sleep(500 + TimeUnit.MILLISECONDS.convert(SCHEDULED_TRIGGER_DELAY_SECONDS, TimeUnit.SECONDS)); + + waitForSeconds = 0; + setTriggerCommand = "{" + + "'set-trigger' : {" + + "'name' : 'node_lost_restore_trigger'," + + "'event' : 'nodeLost'," + + "'waitFor' : '0s'," + // update a property so that it replaces the old trigger, also we want it to fire immediately + "'enabled' : true," + + "'actions' : [{'name':'test','class':'" + TestTriggerAction.class.getName() + "'}]" + + "}}"; + req = new AutoScalingHandlerTest.AutoScalingRequest(SolrRequest.METHOD.POST, path, setTriggerCommand); + response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + // wait until the second instance of action is created + if (!actionCreated.await(3, TimeUnit.SECONDS)) { + fail("Two TriggerAction instances should have been created by now"); + } + + boolean await = triggerFiredLatch.await(5, TimeUnit.SECONDS); + assertTrue("The trigger did not fire at all", await); + assertTrue(triggerFired.get()); + NodeLostTrigger.NodeLostEvent nodeLostEvent = (NodeLostTrigger.NodeLostEvent) eventRef.get(); + assertNotNull(nodeLostEvent); + assertEquals("The node added trigger was fired but for a different node", + nodeName, nodeLostEvent.getNodeName()); + } + + @Test + public void testNodeAddedTriggerRestoreState() throws Exception { + // for this test we want to update the trigger so we must assert that the actions were created twice + TriggerIntegrationTest.actionCreated = new CountDownLatch(2); + + CloudSolrClient solrClient = cluster.getSolrClient(); + waitForSeconds = 5; + String setTriggerCommand = "{" + + "'set-trigger' : {" + + "'name' : 'node_added_restore_trigger'," + + "'event' : 'nodeAdded'," + + "'waitFor' : '5s'," + // should be enough for us to update the trigger + "'enabled' : true," + + "'actions' : [{'name':'test','class':'" + TestTriggerAction.class.getName() + "'}]" + + "}}"; + SolrRequest req = new AutoScalingHandlerTest.AutoScalingRequest(SolrRequest.METHOD.POST, path, setTriggerCommand); + NamedList response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + TimeOut timeOut = new TimeOut(2, TimeUnit.SECONDS); + while (actionCreated.getCount() == 0 && !timeOut.hasTimedOut()) { + Thread.sleep(200); + } + assertTrue("The action specified in node_added_restore_trigger was not instantiated even after 2 seconds",actionCreated.getCount() > 0); + + // start a new node + JettySolrRunner newNode = cluster.startJettySolrRunner(); + + // ensure that the old trigger sees the new node, todo find a better way to do this + Thread.sleep(500 + TimeUnit.MILLISECONDS.convert(SCHEDULED_TRIGGER_DELAY_SECONDS, TimeUnit.SECONDS)); + + waitForSeconds = 0; + setTriggerCommand = "{" + + "'set-trigger' : {" + + "'name' : 'node_added_restore_trigger'," + + "'event' : 'nodeAdded'," + + "'waitFor' : '0s'," + // update a property so that it replaces the old trigger, also we want it to fire immediately + "'enabled' : true," + + "'actions' : [{'name':'test','class':'" + TestTriggerAction.class.getName() + "'}]" + + "}}"; + req = new AutoScalingHandlerTest.AutoScalingRequest(SolrRequest.METHOD.POST, path, setTriggerCommand); + response = solrClient.request(req); + assertEquals(response.get("result").toString(), "success"); + + // wait until the second instance of action is created + if (!actionCreated.await(3, TimeUnit.SECONDS)) { + fail("Two TriggerAction instances should have been created by now"); + } + + boolean await = triggerFiredLatch.await(5, TimeUnit.SECONDS); + assertTrue("The trigger did not fire at all", await); + assertTrue(triggerFired.get()); + NodeAddedTrigger.NodeAddedEvent nodeAddedEvent = (NodeAddedTrigger.NodeAddedEvent) eventRef.get(); + assertNotNull(nodeAddedEvent); + assertEquals("The node added trigger was fired but for a different node", + newNode.getNodeName(), nodeAddedEvent.getNodeName()); } @Test public void testNodeAddedTrigger() throws Exception { CloudSolrClient solrClient = cluster.getSolrClient(); - // todo nocommit -- add testing for the v2 path - // String path = random().nextBoolean() ? "/admin/autoscaling" : "/v2/cluster/autoscaling"; - String path = "/admin/autoscaling"; String setTriggerCommand = "{" + "'set-trigger' : {" + "'name' : 'node_added_trigger'," + @@ -102,9 +243,6 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { @Test public void testNodeLostTrigger() throws Exception { CloudSolrClient solrClient = cluster.getSolrClient(); - // todo nocommit -- add testing for the v2 path - // String path = random().nextBoolean() ? "/admin/autoscaling" : "/v2/cluster/autoscaling"; - String path = "/admin/autoscaling"; String setTriggerCommand = "{" + "'set-trigger' : {" + "'name' : 'node_lost_trigger'," +