This commit is contained in:
Karl Wright 2018-12-11 13:48:44 -05:00
commit 6587f640d2
7 changed files with 183 additions and 101 deletions

View File

@ -468,7 +468,7 @@ final class FrozenBufferedUpdates {
long delGen, long delGen,
boolean segmentPrivateDeletes) throws IOException { boolean segmentPrivateDeletes) throws IOException {
TermsEnum termsEnum; TermsEnum termsEnum = null;
PostingsEnum postingsEnum = null; PostingsEnum postingsEnum = null;
// TODO: we can process the updates per DV field, from last to first so that // TODO: we can process the updates per DV field, from last to first so that
@ -492,11 +492,14 @@ final class FrozenBufferedUpdates {
boolean isNumeric = value.isNumeric(); boolean isNumeric = value.isNumeric();
FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator(); FieldUpdatesBuffer.BufferedUpdateIterator iterator = value.iterator();
FieldUpdatesBuffer.BufferedUpdate bufferedUpdate; FieldUpdatesBuffer.BufferedUpdate bufferedUpdate;
String previousField = null;
while ((bufferedUpdate = iterator.next()) != null) { while ((bufferedUpdate = iterator.next()) != null) {
Terms terms = segState.reader.terms(bufferedUpdate.termField); if (previousField == null || previousField.equals(bufferedUpdate.termField) == false) {
if (terms != null) { previousField = bufferedUpdate.termField;
termsEnum = terms.iterator(); Terms terms = segState.reader.terms(previousField);
} else { termsEnum = terms == null ? null : terms.iterator();
}
if (termsEnum == null) {
// no terms in this segment for this field // no terms in this segment for this field
continue; continue;
} }

View File

@ -29,6 +29,8 @@ import org.apache.solr.client.solrj.cloud.DistributedQueue;
import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.SolrZkClient;
import org.apache.solr.common.util.ExecutorUtil; import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrjNamedThreadFactory; import org.apache.solr.common.util.SolrjNamedThreadFactory;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.util.TimeOut;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -143,6 +145,16 @@ public class DistributedQueueTest extends SolrTestCaseJ4 {
// After draining the queue, a watcher should be set. // After draining the queue, a watcher should be set.
assertNull(dq.peek(100)); assertNull(dq.peek(100));
TimeOut timeout = new TimeOut(30, TimeUnit.SECONDS, TimeSource.NANO_TIME);
timeout.waitFor("Timeout waiting to see dirty=false", () -> {
try {
return !dq.isDirty();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
});
assertFalse(dq.isDirty()); assertFalse(dq.isDirty());
assertEquals(1, dq.watcherCount()); assertEquals(1, dq.watcherCount());

View File

@ -20,13 +20,17 @@ package org.apache.solr.cloud.autoscaling;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.HashSet; import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.CountDownLatch; import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest;
@ -50,8 +54,6 @@ import static org.apache.solr.cloud.autoscaling.TriggerIntegrationTest.timeSourc
public class TriggerSetPropertiesIntegrationTest extends SolrCloudTestCase { public class TriggerSetPropertiesIntegrationTest extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static CountDownLatch triggerFiredLatch = new CountDownLatch(1);
@BeforeClass @BeforeClass
public static void setupCluster() throws Exception { public static void setupCluster() throws Exception {
configureCluster(2) configureCluster(2)
@ -67,134 +69,199 @@ public class TriggerSetPropertiesIntegrationTest extends SolrCloudTestCase {
assertEquals(response.get("result").toString(), "success"); assertEquals(response.get("result").toString(), "success");
} }
private static CountDownLatch getTriggerFiredLatch() { /**
return triggerFiredLatch; * Test that we can add/remove triggers to a scheduler, and change the config on the fly, and still get
} * expected behavior
*/
public void testSetProperties() throws Exception { public void testSetProperties() throws Exception {
JettySolrRunner runner = cluster.getJettySolrRunner(0); final JettySolrRunner runner = cluster.getJettySolrRunner(0);
SolrResourceLoader resourceLoader = runner.getCoreContainer().getResourceLoader(); final SolrResourceLoader resourceLoader = runner.getCoreContainer().getResourceLoader();
SolrCloudManager solrCloudManager = runner.getCoreContainer().getZkController().getSolrCloudManager(); final SolrCloudManager solrCloudManager = runner.getCoreContainer().getZkController().getSolrCloudManager();
AtomicLong diff = new AtomicLong(0);
triggerFiredLatch = new CountDownLatch(2); // have the trigger run twice to capture time difference
try (ScheduledTriggers scheduledTriggers = new ScheduledTriggers(resourceLoader, solrCloudManager)) { try (ScheduledTriggers scheduledTriggers = new ScheduledTriggers(resourceLoader, solrCloudManager)) {
AutoScalingConfig config = new AutoScalingConfig(Collections.emptyMap()); AutoScalingConfig config = new AutoScalingConfig(Collections.emptyMap());
scheduledTriggers.setAutoScalingConfig(config); scheduledTriggers.setAutoScalingConfig(config);
AutoScaling.Trigger t = new TriggerBase(TriggerEventType.NODELOST, "x") {
@Override
protected Map<String, Object> getState() {
return Collections.singletonMap("x", "y");
}
@Override
protected void setState(Map<String, Object> state) {
}
@Override
public void restoreState(AutoScaling.Trigger old) {
}
// Setup a trigger that records the timestamp of each time it was run
// we only need 2 timestamps for the test, so limit the queue and make the trigger a No-Op if full
final BlockingQueue<Long> timestamps = new ArrayBlockingQueue<Long>(2);
final AutoScaling.Trigger t1 = new MockTrigger(TriggerEventType.NODELOST, "mock-timestamper") {
@Override @Override
public void run() { public void run() {
if (getTriggerFiredLatch().getCount() == 0) return; log.info("Running {} in {}", this.getName(), Thread.currentThread().getName());
long l = diff.get(); timestamps.offer(timeSource.getTimeNs());
diff.set(timeSource.getTimeNs() - l);
getTriggerFiredLatch().countDown();
} }
}; };
t.configure(runner.getCoreContainer().getResourceLoader(), runner.getCoreContainer().getZkController().getSolrCloudManager(), Collections.emptyMap());
scheduledTriggers.add(t);
assertTrue(getTriggerFiredLatch().await(4, TimeUnit.SECONDS)); log.info("Configuring simple scheduler and adding trigger: {}", t1.getName());
assertTrue(diff.get() - TimeUnit.SECONDS.toNanos(ScheduledTriggers.DEFAULT_SCHEDULED_TRIGGER_DELAY_SECONDS) >= 0); t1.configure(resourceLoader, solrCloudManager, Collections.emptyMap());
scheduledTriggers.add(t1);
// change schedule delay waitForAndDiffTimestamps("conf(default delay)",
config = config.withProperties(Collections.singletonMap(AutoScalingParams.TRIGGER_SCHEDULE_DELAY_SECONDS, 4)); ScheduledTriggers.DEFAULT_SCHEDULED_TRIGGER_DELAY_SECONDS, TimeUnit.SECONDS,
timestamps);
log.info("Reconfiguing scheduler to use 4s delay and clearing queue for trigger: {}", t1.getName());
config = config.withProperties(Collections.singletonMap
(AutoScalingParams.TRIGGER_SCHEDULE_DELAY_SECONDS, 4));
scheduledTriggers.setAutoScalingConfig(config); scheduledTriggers.setAutoScalingConfig(config);
triggerFiredLatch = new CountDownLatch(2); timestamps.clear();
assertTrue("Timed out waiting for latch to fire", getTriggerFiredLatch().await(10, TimeUnit.SECONDS));
assertTrue(diff.get() - TimeUnit.SECONDS.toNanos(4) >= 0);
// reset with default properties waitForAndDiffTimestamps("conf(four sec delay)",
scheduledTriggers.remove("x"); // remove the old trigger 4, TimeUnit.SECONDS,
timestamps);
log.info("Removing trigger: {}", t1.getName());
scheduledTriggers.remove(t1.getName());
log.info("Reconfiguing scheduler to use default props");
config = config.withProperties(ScheduledTriggers.DEFAULT_PROPERTIES); config = config.withProperties(ScheduledTriggers.DEFAULT_PROPERTIES);
scheduledTriggers.setAutoScalingConfig(config); scheduledTriggers.setAutoScalingConfig(config);
// test core thread count
List<AutoScaling.Trigger> triggerList = new ArrayList<>(); assertTrue("Test sanity check, need default thread pool to be at least 3 so we can" +
final Set<String> threadNames = Collections.synchronizedSet(new HashSet<>()); "test lowering it by 2", ScheduledTriggers.DEFAULT_TRIGGER_CORE_POOL_SIZE >= 3);
final Set<String> triggerNames = Collections.synchronizedSet(new HashSet<>()); final int numTriggers = ScheduledTriggers.DEFAULT_TRIGGER_CORE_POOL_SIZE;
triggerFiredLatch = new CountDownLatch(8); final int reducedThreadPoolSize = numTriggers - 2;
for (int i = 0; i < 8; i++) {
AutoScaling.Trigger trigger = new MockTrigger(TriggerEventType.NODELOST, "x" + i) { // Setup X instances of a trigger that:
// - records it's name as being run
// - skipping all remaining execution if it's name has already been recorded
// - records the name of the thread that ran it
// - blocks on a cyclic barrier untill at Y instances have run (to hog a thread)
// ...to test that the scheduler will add new threads as needed, up to the configured limit
//
// NOTE: the reason we need X unique instances is because the scheduler won't "re-run" a single
// trigger while a previouss "run" is still in process
final List<AutoScaling.Trigger> triggerList = new ArrayList<>(numTriggers);
// Use a cyclic barrier gated by an atomic ref so we can swap it out later
final AtomicReference<CyclicBarrier> latch = new AtomicReference<>(new CyclicBarrier(numTriggers));
// variables for tracking state as we go
// NOTE: all read/write must be gated by synchronizing on the barrier (ref),
// so we we can ensure we are reading a consistent view
final Set<String> threadNames = Collections.synchronizedSet(new LinkedHashSet<>());
final Set<String> triggerNames = Collections.synchronizedSet(new LinkedHashSet<>());
final AtomicLong fails = new AtomicLong(0);
// Use a semaphore to track when each trigger *finishes* so our test thread
// can know when to check & clear the tracking state
final Semaphore completionSemaphore = new Semaphore(numTriggers);
for (int i = 0; i < numTriggers; i++) {
AutoScaling.Trigger trigger = new MockTrigger(TriggerEventType.NODELOST,
"mock-blocking-trigger-" + i) {
@Override @Override
public void run() { public void run() {
try { log.info("Running {} in {}", this.getName(), Thread.currentThread().getName());
// If core pool size is increased then new threads won't be started if existing threads CyclicBarrier barrier = null;
// aren't busy with tasks. So we make this thread wait longer than necessary synchronized (latch) {
// so that the pool is forced to start threads for other triggers if (triggerNames.add(this.getName())) {
Thread.sleep(5000); log.info("{}: No-Op since we've already recorded a run", this.getName());
} catch (InterruptedException e) { return;
} }
if (triggerNames.add(getName())) {
getTriggerFiredLatch().countDown();
threadNames.add(Thread.currentThread().getName()); threadNames.add(Thread.currentThread().getName());
barrier = latch.get();
}
try {
log.info("{}: waiting on barrier to hog a thread", this.getName());
barrier.await(30, TimeUnit.SECONDS);
completionSemaphore.release();
} catch (Exception e) {
fails.incrementAndGet();
log.error(this.getName() + ": failure waiting on cyclic barrier: " + e.toString(), e);
} }
} }
}; };
trigger.configure(resourceLoader, solrCloudManager, Collections.emptyMap()); trigger.configure(resourceLoader, solrCloudManager, Collections.emptyMap());
triggerList.add(trigger); triggerList.add(trigger);
completionSemaphore.acquire();
log.info("Adding trigger {} to scheduler", trigger.getName());
scheduledTriggers.add(trigger); scheduledTriggers.add(trigger);
} }
assertTrue("Timed out waiting for latch to fire", getTriggerFiredLatch().await(20, TimeUnit.SECONDS));
assertEquals("Expected 8 triggers but found: " + triggerNames, 8, triggerNames.size()); log.info("Waiting on semaphore for all triggers to signal completion...");
assertEquals("Expected " + ScheduledTriggers.DEFAULT_TRIGGER_CORE_POOL_SIZE assertTrue("Timed out waiting for semaphore count to be released",
+ " threads but found: " + threadNames, completionSemaphore.tryAcquire(numTriggers, 60, TimeUnit.SECONDS));
ScheduledTriggers.DEFAULT_TRIGGER_CORE_POOL_SIZE, threadNames.size());
synchronized (latch) {
assertEquals("Unexpected number of trigger names found: " + triggerNames.toString(),
numTriggers, triggerNames.size());
assertEquals("Unexpected number of thread ames found: " + threadNames.toString(),
numTriggers, threadNames.size());
assertEquals("Unexpected number of trigger fails recorded, check logs?",
0, fails.get());
// change core pool size // before releasing the latch, clear the state and update our config to use a lower number of threads
config = config.withProperties(Collections.singletonMap(AutoScalingParams.TRIGGER_CORE_POOL_SIZE, 6)); log.info("Updating scheduler config to use {} threads", reducedThreadPoolSize);
scheduledTriggers.setAutoScalingConfig(config); config = config.withProperties(Collections.singletonMap(AutoScalingParams.TRIGGER_CORE_POOL_SIZE,
triggerFiredLatch = new CountDownLatch(8); reducedThreadPoolSize));
threadNames.clear(); scheduledTriggers.setAutoScalingConfig(config);
triggerNames.clear();
assertTrue(getTriggerFiredLatch().await(20, TimeUnit.SECONDS));
assertEquals("Expected 8 triggers but found: " + triggerNames, 8, triggerNames.size());
assertEquals("Expected 6 threads but found: " + threadNames, 6, threadNames.size());
// reset log.info("Updating cyclic barrier and clearing test state so triggers will 'run' again");
for (int i = 0; i < 8; i++) { latch.set(new CyclicBarrier(reducedThreadPoolSize));
scheduledTriggers.remove(triggerList.get(i).getName()); threadNames.clear();
triggerNames.clear();
}
log.info("Waiting on semaphore for all triggers to signal completion...");
assertTrue("Timed out waiting for semaphore count to be released",
completionSemaphore.tryAcquire(numTriggers, 60, TimeUnit.SECONDS));
synchronized (latch) {
assertEquals("Unexpected number of trigger names found: " + triggerNames.toString(),
numTriggers, triggerNames.size());
assertEquals("Unexpected number of thread names found: " + threadNames.toString(),
reducedThreadPoolSize, threadNames.size());
assertEquals("Unexpected number of trigger fails recorded, check logs?",
0, fails.get());
} }
} }
} }
public static class MockTrigger extends TriggerBase {
private static final void waitForAndDiffTimestamps(final String label,
final long minExpectedDelta,
final TimeUnit minExpectedDeltaUnit,
final BlockingQueue<Long> timestamps) {
try {
log.info(label + ": Waiting for 2 timestamps to be recorded");
Long firstTs = timestamps.poll(minExpectedDelta * 3, minExpectedDeltaUnit);
assertNotNull(label + ": Couldn't get first timestampe after max allowed polling", firstTs);
Long secondTs = timestamps.poll(minExpectedDelta * 3, minExpectedDeltaUnit);
assertNotNull(label + ": Couldn't get second timestampe after max allowed polling", secondTs);
final long deltaInNanos = secondTs - firstTs;
final long minExpectedDeltaInNanos = minExpectedDeltaUnit.toNanos(minExpectedDelta);
assertTrue(label + ": Delta between timestamps ("+secondTs+"ns - "+firstTs+"ns = "+deltaInNanos+"ns) is not " +
"at least as much as min expected delay: " + minExpectedDeltaInNanos + "ns",
deltaInNanos >= minExpectedDeltaInNanos);
} catch (InterruptedException e) {
log.error(label + ": interupted", e);
fail(label + ": interupted:" + e.toString());
}
}
private static abstract class MockTrigger extends TriggerBase {
public MockTrigger(TriggerEventType eventType, String name) { public MockTrigger(TriggerEventType eventType, String name) {
super(eventType, name); super(eventType, name);
} }
@Override @Override
protected Map<String, Object> getState() { protected Map<String, Object> getState() {
return Collections.emptyMap(); return Collections.emptyMap();
} }
@Override @Override
protected void setState(Map<String, Object> state) { protected void setState(Map<String, Object> state) { /* No-Op */ }
}
@Override @Override
public void restoreState(AutoScaling.Trigger old) { public void restoreState(AutoScaling.Trigger old) { /* No-Op */ }
}
@Override
public void run() {
}
} }
} }

View File

@ -697,7 +697,7 @@ public class TestSimLargeCluster extends SimSolrCloudTestCase {
boolean await = triggerFinishedLatch.await(waitForSeconds * 45000 / SPEED, TimeUnit.MILLISECONDS); boolean await = triggerFinishedLatch.await(waitForSeconds * 45000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("The trigger did not fire at all", await); assertTrue("The trigger did not fire at all", await);
// wait for listener to capture the SUCCEEDED stage // wait for listener to capture the SUCCEEDED stage
cluster.getTimeSource().sleep(15000); cluster.getTimeSource().sleep(25000);
assertNotNull(listenerEvents.entrySet().toString(), listenerEvents.get("srt")); assertNotNull(listenerEvents.entrySet().toString(), listenerEvents.get("srt"));

View File

@ -210,7 +210,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
String newNode = cluster.simAddNode(); String newNode = cluster.simAddNode();
if (!triggerFiredLatch.await(320000 / SPEED, TimeUnit.MILLISECONDS)) { if (!triggerFiredLatch.await(420000 / SPEED, TimeUnit.MILLISECONDS)) {
fail("Both triggers should have fired by now"); fail("Both triggers should have fired by now");
} }
@ -436,7 +436,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
} }
String newNode = cluster.simAddNode(); String newNode = cluster.simAddNode();
boolean await = triggerFiredLatch.await(90000 / SPEED, TimeUnit.MILLISECONDS); boolean await = triggerFiredLatch.await(240000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("The trigger did not fire at all", await); assertTrue("The trigger did not fire at all", await);
assertTrue(triggerFired.get()); assertTrue(triggerFired.get());
TriggerEvent nodeAddedEvent = events.iterator().next(); TriggerEvent nodeAddedEvent = events.iterator().next();
@ -922,7 +922,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
} }
if (!triggerFiredLatch.await(60000 / SPEED, TimeUnit.MILLISECONDS)) { if (!triggerFiredLatch.await(120000 / SPEED, TimeUnit.MILLISECONDS)) {
fail("Trigger should have fired by now"); fail("Trigger should have fired by now");
} }
assertEquals(1, events.size()); assertEquals(1, events.size());
@ -1184,7 +1184,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS); await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("The trigger did not fire at all", await); assertTrue("The trigger did not fire at all", await);
// wait for listener to capture the SUCCEEDED stage // wait for listener to capture the SUCCEEDED stage
cluster.getTimeSource().sleep(6000); cluster.getTimeSource().sleep(8000);
// there must be exactly one SUCCEEDED event // there must be exactly one SUCCEEDED event
capturedEvents = listenerEvents.get("bar"); capturedEvents = listenerEvents.get("bar");

View File

@ -114,7 +114,7 @@ public class CdcrBootstrapTest extends SolrTestCaseJ4 {
target.waitForActiveCollection("cdcr-target", 1, 2); target.waitForActiveCollection("cdcr-target", 1, 2);
CloudSolrClient targetSolrClient = target.getSolrClient(); CloudSolrClient targetSolrClient = target.getSolrClient();
targetSolrClient.setDefaultCollection("cdcr-target"); targetSolrClient.setDefaultCollection("cdcr-target");
Thread.sleep(3000); Thread.sleep(6000);
CdcrTestsUtil.cdcrStart(targetSolrClient); CdcrTestsUtil.cdcrStart(targetSolrClient);
CdcrTestsUtil.cdcrStart(sourceSolrClient); CdcrTestsUtil.cdcrStart(sourceSolrClient);

View File

@ -122,7 +122,7 @@ public class InfixSuggestersTest extends SolrTestCaseJ4 {
try { try {
LinkedHashMap<Class<? extends Throwable>, List<Class<? extends Throwable>>> expected = new LinkedHashMap<>(); LinkedHashMap<Class<? extends Throwable>, List<Class<? extends Throwable>>> expected = new LinkedHashMap<>();
expected.put(RuntimeException.class, Arrays.asList expected.put(RuntimeException.class, Arrays.asList
(SolrCoreState.CoreIsClosedException.class, SolrException.class, IllegalStateException.class)); (SolrCoreState.CoreIsClosedException.class, SolrException.class, IllegalStateException.class, NullPointerException.class));
final Throwable[] outerException = new Throwable[1]; final Throwable[] outerException = new Throwable[1];
// Build the suggester in the background with a long dictionary // Build the suggester in the background with a long dictionary
Future job = executor.submit(() -> outerException[0] = expectThrowsAnyOf(expected, Future job = executor.submit(() -> outerException[0] = expectThrowsAnyOf(expected,