SOLR-14588: Implement Circuit Breakers (#1626)

* SOLR-14588: Implement Circuit Breakers

This commit consists of two parts: add circuit breakers infrastructure and a "real" JVM heap memory based
circuit breaker which monitors incoming search requests and rejects them with SERVICE_TOO_BUSY error
if the defined threshold is breached, thus giving headroom to existing indexing and search requests
to complete.
This commit is contained in:
Atri Sharma 2020-07-02 12:43:48 +05:30 committed by GitHub
parent ccdfee2cf0
commit 3f9cc227f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 925 additions and 2 deletions

View File

@ -12,6 +12,8 @@ New Features
---------------------
* SOLR-14440: Introduce new Certificate Authentication Plugin to load Principal from certificate subject. (Mike Drob)
* SOLR-14588: Introduce Circuit Breaker Infrastructure and a JVM heap usage memory tracking circuit breaker implementation (Atri Sharma)
Improvements
----------------------
* LUCENE-8984: MoreLikeThis MLT is biased for uncommon fields (Andy Hind via Anshum Gupta)

View File

@ -224,6 +224,11 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
queryResultWindowSize = Math.max(1, getInt("query/queryResultWindowSize", 1));
queryResultMaxDocsCached = getInt("query/queryResultMaxDocsCached", Integer.MAX_VALUE);
enableLazyFieldLoading = getBool("query/enableLazyFieldLoading", false);
useCircuitBreakers = getBool("circuitBreaker/useCircuitBreakers", false);
memoryCircuitBreakerThresholdPct = getInt("circuitBreaker/memoryCircuitBreakerThresholdPct", 95);
validateMemoryBreakerThreshold();
useRangeVersionsForPeerSync = getBool("peerSync/useRangeVersions", true);
@ -522,6 +527,10 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
public final int queryResultWindowSize;
public final int queryResultMaxDocsCached;
public final boolean enableLazyFieldLoading;
// Circuit Breaker Configuration
public final boolean useCircuitBreakers;
public final int memoryCircuitBreakerThresholdPct;
public final boolean useRangeVersionsForPeerSync;
@ -804,6 +813,14 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
loader.reloadLuceneSPI();
}
private void validateMemoryBreakerThreshold() {
if (useCircuitBreakers) {
if (memoryCircuitBreakerThresholdPct > 95 || memoryCircuitBreakerThresholdPct < 50) {
throw new IllegalArgumentException("Valid value range of memoryCircuitBreakerThresholdPct is 50 - 95");
}
}
}
public int getMultipartUploadLimitKB() {
return multipartUploadLimitKB;
}
@ -873,6 +890,8 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
m.put("queryResultMaxDocsCached", queryResultMaxDocsCached);
m.put("enableLazyFieldLoading", enableLazyFieldLoading);
m.put("maxBooleanClauses", booleanQueryMaxClauseCount);
m.put("useCircuitBreakers", useCircuitBreakers);
m.put("memoryCircuitBreakerThresholdPct", memoryCircuitBreakerThresholdPct);
for (SolrPluginInfo plugin : plugins) {
List<PluginInfo> infos = getPluginInfos(plugin.clazz.getName());
if (infos == null || infos.isEmpty()) continue;

View File

@ -94,6 +94,7 @@ import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.ObjectReleaseTracker;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.common.util.Utils;
import org.apache.solr.core.DirectoryFactory.DirContext;
import org.apache.solr.core.snapshots.SolrSnapshotManager;
@ -157,13 +158,13 @@ import org.apache.solr.update.processor.RunUpdateProcessorFactory;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
import org.apache.solr.update.processor.UpdateRequestProcessorChain.ProcessorInfo;
import org.apache.solr.update.processor.UpdateRequestProcessorFactory;
import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.util.IOFunction;
import org.apache.solr.util.NumberUtils;
import org.apache.solr.util.PropertiesInputStream;
import org.apache.solr.util.PropertiesOutputStream;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestInjection;
import org.apache.solr.util.circuitbreaker.CircuitBreakerManager;
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware;
@ -219,6 +220,8 @@ public final class SolrCore implements SolrInfoBean, Closeable {
private final Codec codec;
private final MemClassLoader memClassLoader;
private final CircuitBreakerManager circuitBreakerManager;
private final List<Runnable> confListeners = new CopyOnWriteArrayList<>();
private final ReentrantLock ruleExpiryLock;
@ -938,6 +941,7 @@ public final class SolrCore implements SolrInfoBean, Closeable {
this.configSetProperties = configSet.getProperties();
// Initialize the metrics manager
this.coreMetricManager = initCoreMetricManager(solrConfig);
this.circuitBreakerManager = initCircuitBreakerManager();
solrMetricsContext = coreMetricManager.getSolrMetricsContext();
this.coreMetricManager.loadReporters();
@ -1164,6 +1168,12 @@ public final class SolrCore implements SolrInfoBean, Closeable {
return coreMetricManager;
}
private CircuitBreakerManager initCircuitBreakerManager() {
CircuitBreakerManager circuitBreakerManager = CircuitBreakerManager.build(solrConfig);
return circuitBreakerManager;
}
@Override
public void initializeMetrics(SolrMetricsContext parentContext, String scope) {
newSearcherCounter = parentContext.counter("new", Category.SEARCHER.toString());
@ -1499,6 +1509,10 @@ public final class SolrCore implements SolrInfoBean, Closeable {
return updateProcessors;
}
public CircuitBreakerManager getCircuitBreakerManager() {
return circuitBreakerManager;
}
// this core current usage count
private final AtomicInteger refCount = new AtomicInteger(1);

View File

@ -51,13 +51,17 @@ import org.apache.solr.security.AuthorizationContext;
import org.apache.solr.security.PermissionNameProvider;
import org.apache.solr.util.RTimerTree;
import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.circuitbreaker.CircuitBreaker;
import org.apache.solr.util.circuitbreaker.CircuitBreakerManager;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.common.params.CommonParams.DISTRIB;
import static org.apache.solr.common.params.CommonParams.FAILURE;
import static org.apache.solr.common.params.CommonParams.PATH;
import static org.apache.solr.common.params.CommonParams.STATUS;
/**
@ -297,6 +301,30 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware,
final RTimerTree timer = rb.isDebug() ? req.getRequestTimer() : null;
if (req.getCore().getSolrConfig().useCircuitBreakers) {
List<CircuitBreaker> trippedCircuitBreakers;
if (timer != null) {
RTimerTree subt = timer.sub("circuitbreaker");
rb.setTimer(subt);
CircuitBreakerManager circuitBreakerManager = req.getCore().getCircuitBreakerManager();
trippedCircuitBreakers = circuitBreakerManager.checkTripped();
rb.getTimer().stop();
} else {
CircuitBreakerManager circuitBreakerManager = req.getCore().getCircuitBreakerManager();
trippedCircuitBreakers = circuitBreakerManager.checkTripped();
}
if (trippedCircuitBreakers != null) {
String errorMessage = CircuitBreakerManager.toErrorMessage(trippedCircuitBreakers);
rsp.add(STATUS, FAILURE);
rsp.setException(new SolrException(SolrException.ErrorCode.SERVICE_UNAVAILABLE, "Circuit Breakers tripped " + errorMessage));
return;
}
}
final ShardHandler shardHandler1 = getAndPrepShardHandler(req, rb); // creates a ShardHandler object only if it's needed
if (timer == null) {
@ -308,7 +336,7 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware,
// debugging prepare phase
RTimerTree subt = timer.sub( "prepare" );
for( SearchComponent c : components ) {
rb.setTimer( subt.sub( c.getName() ) );
rb.setTimer(subt.sub( c.getName() ) );
c.prepare(rb);
rb.getTimer().stop();
}

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util.circuitbreaker;
import org.apache.solr.core.SolrConfig;
/**
* Default class to define circuit breakers for Solr.
* <p>
* There are two (typical) ways to use circuit breakers:
* 1. Have them checked at admission control by default (use CircuitBreakerManager for the same).
* 2. Use the circuit breaker in a specific code path(s).
*
* TODO: This class should be grown as the scope of circuit breakers grow.
* </p>
*/
public abstract class CircuitBreaker {
public static final String NAME = "circuitbreaker";
protected final SolrConfig solrConfig;
public CircuitBreaker(SolrConfig solrConfig) {
this.solrConfig = solrConfig;
}
// Global config for all circuit breakers. For specific circuit breaker configs, define
// your own config.
protected boolean isEnabled() {
return solrConfig.useCircuitBreakers;
}
/**
* Check if circuit breaker is tripped.
*/
public abstract boolean isTripped();
/**
* Get debug useful info.
*/
public abstract String getDebugInfo();
}

View File

@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util.circuitbreaker;
import java.util.ArrayList;
import java.util.List;
import org.apache.solr.core.SolrConfig;
/**
* Manages all registered circuit breaker instances. Responsible for a holistic view
* of whether a circuit breaker has tripped or not.
*
* There are two typical ways of using this class's instance:
* 1. Check if any circuit breaker has triggered -- and know which circuit breaker has triggered.
* 2. Get an instance of a specific circuit breaker and perform checks.
*
* It is a good practice to register new circuit breakers here if you want them checked for every
* request.
*
* NOTE: The current way of registering new default circuit breakers is minimal and not a long term
* solution. There will be a follow up with a SIP for a schema API design.
*/
public class CircuitBreakerManager {
// Class private to potentially allow "family" of circuit breakers to be enabled or disabled
private final boolean enableCircuitBreakerManager;
private final List<CircuitBreaker> circuitBreakerList = new ArrayList<>();
public CircuitBreakerManager(final boolean enableCircuitBreakerManager) {
this.enableCircuitBreakerManager = enableCircuitBreakerManager;
}
public void register(CircuitBreaker circuitBreaker) {
circuitBreakerList.add(circuitBreaker);
}
public void deregisterAll() {
circuitBreakerList.clear();
}
/**
* Check and return circuit breakers that have triggered
* @return CircuitBreakers which have triggered, null otherwise.
*/
public List<CircuitBreaker> checkTripped() {
List<CircuitBreaker> triggeredCircuitBreakers = null;
if (enableCircuitBreakerManager) {
for (CircuitBreaker circuitBreaker : circuitBreakerList) {
if (circuitBreaker.isEnabled() &&
circuitBreaker.isTripped()) {
if (triggeredCircuitBreakers == null) {
triggeredCircuitBreakers = new ArrayList<>();
}
triggeredCircuitBreakers.add(circuitBreaker);
}
}
}
return triggeredCircuitBreakers;
}
/**
* Returns true if *any* circuit breaker has triggered, false if none have triggered.
*
* <p>
* NOTE: This method short circuits the checking of circuit breakers -- the method will
* return as soon as it finds a circuit breaker that is enabled and has triggered.
* </p>
*/
public boolean checkAnyTripped() {
if (enableCircuitBreakerManager) {
for (CircuitBreaker circuitBreaker : circuitBreakerList) {
if (circuitBreaker.isEnabled() &&
circuitBreaker.isTripped()) {
return true;
}
}
}
return false;
}
/**
* Construct the final error message to be printed when circuit breakers trip.
*
* @param circuitBreakerList Input list for circuit breakers.
* @return Constructed error message.
*/
public static String toErrorMessage(List<CircuitBreaker> circuitBreakerList) {
StringBuilder sb = new StringBuilder();
for (CircuitBreaker circuitBreaker : circuitBreakerList) {
sb.append(circuitBreaker.getClass().getName());
sb.append(" ");
sb.append(circuitBreaker.getDebugInfo());
sb.append("\n");
}
return sb.toString();
}
/**
* Register default circuit breakers and return a constructed CircuitBreakerManager
* instance which serves the given circuit breakers.
*
* Any default circuit breakers should be registered here.
*/
public static CircuitBreakerManager build(SolrConfig solrConfig) {
CircuitBreakerManager circuitBreakerManager = new CircuitBreakerManager(solrConfig.useCircuitBreakers);
// Install the default circuit breakers
CircuitBreaker memoryCircuitBreaker = new MemoryCircuitBreaker(solrConfig);
circuitBreakerManager.register(memoryCircuitBreaker);
return circuitBreakerManager;
}
}

View File

@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util.circuitbreaker;
import java.lang.invoke.MethodHandles;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import org.apache.solr.core.SolrConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* Tracks the current JVM heap usage and triggers if it exceeds the defined percentage of the maximum
* heap size allocated to the JVM. This circuit breaker is a part of the default CircuitBreakerManager
* so is checked for every request -- hence it is realtime. Once the memory usage goes below the threshold,
* it will start allowing queries again.
* </p>
*
* <p>
* The memory threshold is defined as a percentage of the maximum memory allocated -- see memoryCircuitBreakerThresholdPct
* in solrconfig.xml.
* </p>
*/
public class MemoryCircuitBreaker extends CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean();
private final long heapMemoryThreshold;
// Assumption -- the value of these parameters will be set correctly before invoking getDebugInfo()
private final ThreadLocal<Long> seenMemory = new ThreadLocal<>();
private final ThreadLocal<Long> allowedMemory = new ThreadLocal<>();
public MemoryCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig);
long currentMaxHeap = MEMORY_MX_BEAN.getHeapMemoryUsage().getMax();
if (currentMaxHeap <= 0) {
throw new IllegalArgumentException("Invalid JVM state for the max heap usage");
}
int thresholdValueInPercentage = solrConfig.memoryCircuitBreakerThresholdPct;
double thresholdInFraction = thresholdValueInPercentage / (double) 100;
heapMemoryThreshold = (long) (currentMaxHeap * thresholdInFraction);
if (heapMemoryThreshold <= 0) {
throw new IllegalStateException("Memory limit cannot be less than or equal to zero");
}
}
// TODO: An optimization can be to trip the circuit breaker for a duration of time
// after the circuit breaker condition is matched. This will optimize for per call
// overhead of calculating the condition parameters but can result in false positives.
@Override
public boolean isTripped() {
if (!isEnabled()) {
return false;
}
long localAllowedMemory = getCurrentMemoryThreshold();
long localSeenMemory = calculateLiveMemoryUsage();
allowedMemory.set(localAllowedMemory);
seenMemory.set(localSeenMemory);
return (localSeenMemory >= localAllowedMemory);
}
@Override
public String getDebugInfo() {
if (seenMemory.get() == 0L || allowedMemory.get() == 0L) {
log.warn("MemoryCircuitBreaker's monitored values (seenMemory, allowedMemory) not set");
}
return "seenMemory=" + seenMemory.get() + " allowedMemory=" + allowedMemory.get();
}
private long getCurrentMemoryThreshold() {
return heapMemoryThreshold;
}
/**
* Calculate the live memory usage for the system. This method has package visibility
* to allow using for testing.
* @return Memory usage in bytes.
*/
protected long calculateLiveMemoryUsage() {
// NOTE: MemoryUsageGaugeSet provides memory usage statistics but we do not use them
// here since it will require extra allocations and incur cost, hence it is cheaper to use
// MemoryMXBean directly. Ideally, this call should not add noticeable
// latency to a query -- but if it does, please signify on SOLR-14588
return MEMORY_MX_BEAN.getHeapMemoryUsage().getUsed();
}
}

View File

@ -55,6 +55,8 @@
"queryResultMaxDocsCached":1,
"enableLazyFieldLoading":1,
"boolTofilterOptimizer":1,
"useCircuitBreakers":10,
"memoryCircuitBreakerThresholdPct":20,
"maxBooleanClauses":1},
"jmx":{
"agentId":0,

View File

@ -0,0 +1,95 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<config>
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<dataDir>${solr.data.dir:}</dataDir>
<xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
<schemaFactory class="ClassicIndexSchemaFactory"/>
<requestHandler name="/select" class="solr.SearchHandler" />
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or wildcard queries that expand to big boolean
queries. An exception is thrown if exceeded.
-->
<maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses>
<!-- Cache specification for Filters or DocSets - unordered set of *all* documents
that match a particular query.
-->
<filterCache
enabled="${filterCache.enabled}"
size="512"
initialSize="512"
autowarmCount="2"/>
<queryResultCache
enabled="${queryResultCache.enabled}"
size="512"
initialSize="512"
autowarmCount="2"/>
<documentCache
enabled="${documentCache.enabled}"
size="512"
initialSize="512"
autowarmCount="0"/>
<cache
name="user_definied_cache_XXX"
enabled="${user_definied_cache_XXX.enabled:false}"
/>
<cache
name="user_definied_cache_ZZZ"
enabled="${user_definied_cache_ZZZ.enabled:false}"
/>
<!-- If true, stored fields that are not requested will be loaded lazily.
-->
<enableLazyFieldLoading>true</enableLazyFieldLoading>
<queryResultWindowSize>10</queryResultWindowSize>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index)
-->
<boolTofilterOptimizer enabled="false" cacheSize="32" threshold=".05"/>
</query>
<circuitBreaker>
<useCircuitBreakers>true</useCircuitBreakers>
<memoryCircuitBreakerThresholdPct>75</memoryCircuitBreakerThresholdPct>
</circuitBreaker>
<initParams path="/select">
<lst name="defaults">
<str name="df">text</str>
</lst>
</initParams>
</config>

View File

@ -266,6 +266,8 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
assertEquals("wrong config for maxBooleanClauses", 1024, solrConfig.booleanQueryMaxClauseCount);
assertEquals("wrong config for enableLazyFieldLoading", true, solrConfig.enableLazyFieldLoading);
assertEquals("wrong config for queryResultWindowSize", 10, solrConfig.queryResultWindowSize);
assertEquals("wrong config for useCircuitBreakers", false, solrConfig.useCircuitBreakers);
assertEquals("wrong config for memoryCircuitBreakerThresholdPct", 95, solrConfig.memoryCircuitBreakerThresholdPct);
}
/**

View File

@ -46,6 +46,8 @@ public class TestConfigOverlay extends SolrTestCase {
assertTrue(isEditableProp("query.queryResultMaxDocsCached", false, null));
assertTrue(isEditableProp("query.enableLazyFieldLoading", false, null));
assertTrue(isEditableProp("query.boolTofilterOptimizer", false, null));
assertTrue(isEditableProp("query.useCircuitBreakers", false, null));
assertTrue(isEditableProp("query.memoryCircuitBreakerThresholdPct", false, null));
assertTrue(isEditableProp("jmx.agentId", false, null));
assertTrue(isEditableProp("jmx.serviceUrl", false, null));
assertTrue(isEditableProp("jmx.rootName", false, null));

View File

@ -0,0 +1,218 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import java.util.HashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.util.circuitbreaker.CircuitBreaker;
import org.apache.solr.util.circuitbreaker.MemoryCircuitBreaker;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.rules.RuleChain;
import org.junit.rules.TestRule;
public class TestCircuitBreaker extends SolrTestCaseJ4 {
private final static int NUM_DOCS = 20;
@Rule
public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule());
@BeforeClass
public static void setUpClass() throws Exception {
System.setProperty("filterCache.enabled", "false");
System.setProperty("queryResultCache.enabled", "false");
System.setProperty("documentCache.enabled", "true");
initCore("solrconfig-memory-circuitbreaker.xml", "schema.xml");
for (int i = 0 ; i < NUM_DOCS ; i ++) {
assertU(adoc("name", "john smith", "id", "1"));
assertU(adoc("name", "johathon smith", "id", "2"));
assertU(adoc("name", "john percival smith", "id", "3"));
assertU(adoc("id", "1", "title", "this is a title.", "inStock_b1", "true"));
assertU(adoc("id", "2", "title", "this is another title.", "inStock_b1", "true"));
assertU(adoc("id", "3", "title", "Mary had a little lamb.", "inStock_b1", "false"));
//commit inside the loop to get multiple segments to make search as realistic as possible
assertU(commit());
}
}
@Override
public void tearDown() throws Exception {
super.tearDown();
}
@After
public void after() {
h.getCore().getCircuitBreakerManager().deregisterAll();
}
public void testCBAlwaysTrips() {
HashMap<String, String> args = new HashMap<String, String>();
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
args.put(CommonParams.FL, "id");
CircuitBreaker circuitBreaker = new MockCircuitBreaker(h.getCore().getSolrConfig());
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
expectThrows(SolrException.class, () -> {
h.query(req("name:\"john smith\""));
});
}
public void testCBFakeMemoryPressure() {
HashMap<String, String> args = new HashMap<String, String>();
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
args.put(CommonParams.FL, "id");
CircuitBreaker circuitBreaker = new FakeMemoryPressureCircuitBreaker(h.getCore().getSolrConfig());
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
expectThrows(SolrException.class, () -> {
h.query(req("name:\"john smith\""));
});
}
public void testBuildingMemoryPressure() {
ExecutorService executor = ExecutorUtil.newMDCAwareCachedThreadPool(
new SolrNamedThreadFactory("TestCircuitBreaker"));
HashMap<String, String> args = new HashMap<String, String>();
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
args.put(CommonParams.FL, "id");
AtomicInteger failureCount = new AtomicInteger();
try {
CircuitBreaker circuitBreaker = new BuildingUpMemoryPressureCircuitBreaker(h.getCore().getSolrConfig());
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
for (int i = 0; i < 5; i++) {
executor.submit(() -> {
try {
h.query(req("name:\"john smith\""));
} catch (SolrException e) {
failureCount.incrementAndGet();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
});
}
executor.shutdown();
try {
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e.getMessage());
}
assertEquals("Number of failed queries is not correct", 1, failureCount.get());
} finally {
if (!executor.isShutdown()) {
executor.shutdown();
}
}
}
public void testResponseWithCBTiming() {
assertQ(req("q", "*:*", CommonParams.DEBUG_QUERY, "true"),
"//str[@name='rawquerystring']='*:*'",
"//str[@name='querystring']='*:*'",
"//str[@name='parsedquery']='MatchAllDocsQuery(*:*)'",
"//str[@name='parsedquery_toString']='*:*'",
"count(//lst[@name='explain']/*)=3",
"//lst[@name='explain']/str[@name='1']",
"//lst[@name='explain']/str[@name='2']",
"//lst[@name='explain']/str[@name='3']",
"//str[@name='QParser']",
"count(//lst[@name='timing']/*)=4",
"//lst[@name='timing']/double[@name='time']",
"count(//lst[@name='circuitbreaker']/*)>0",
"//lst[@name='circuitbreaker']/double[@name='time']",
"count(//lst[@name='prepare']/*)>0",
"//lst[@name='prepare']/double[@name='time']",
"count(//lst[@name='process']/*)>0",
"//lst[@name='process']/double[@name='time']"
);
}
private class MockCircuitBreaker extends CircuitBreaker {
public MockCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig);
}
@Override
public boolean isTripped() {
// Always return true
return true;
}
@Override
public String getDebugInfo() {
return "MockCircuitBreaker";
}
}
private class FakeMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
public FakeMemoryPressureCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig);
}
@Override
protected long calculateLiveMemoryUsage() {
// Return a number large enough to trigger a pushback from the circuit breaker
return Long.MAX_VALUE;
}
}
private class BuildingUpMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
private AtomicInteger count = new AtomicInteger();
public BuildingUpMemoryPressureCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig);
}
@Override
protected long calculateLiveMemoryUsage() {
if (count.getAndIncrement() >= 4) {
return Long.MAX_VALUE;
}
return 5; // Random number guaranteed to not trip the circuit breaker
}
}
}

View File

@ -546,6 +546,44 @@
</query>
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Circuit Breaker Section - This section consists of configurations for
circuit breakers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<circuitBreaker>
<!-- Enable Circuit Breakers
Circuit breakers are designed to allow stability and predictable query
execution. They prevent operations that can take down the node and cause
noisy neighbour issues.
This flag is the uber control switch which controls the activation/deactivation of all circuit
breakers. At the moment, the only circuit breaker (max JVM circuit breaker) does not have its
own specific configuration. However, if a circuit breaker wishes to be independently configurable,
they are free to add their specific configuration but need to ensure that this flag is always
respected - this should have veto over all independent configuration flags.
-->
<useCircuitBreakers>false</useCircuitBreakers>
<!-- Memory Circuit Breaker Threshold In Percentage
Specific configuration for max JVM heap usage circuit breaker. This configuration defines the
threshold percentage of maximum heap allocated beyond which queries will be rejected until the
current JVM usage goes below the threshold. The valid value range for this parameter is 50 - 95.
Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThresholdPct is
defined as 75. Threshold JVM usage will be 4 * 0.75 = 3 GB. Its generally a good idea to keep this value between 75 - 80% of maximum heap
allocated.
If, at any point, the current JVM heap usage goes above 3 GB, queries will be rejected until the heap usage goes below 3 GB again.
If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped"
in logs and the corresponding error message should tell you what transpired (if the failure
was caused by tripped circuit breakers).
-->
<memoryCircuitBreakerThresholdPct>100</memoryCircuitBreakerThresholdPct>
</circuitBreaker>
<!-- Request Dispatcher

View File

@ -513,6 +513,23 @@
-->
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
<!-- Use Filter For Sorted Query
A possible optimization that attempts to use a filter to
satisfy a search. If the requested sort does not include
score, then the filterCache will be checked for a filter
matching the query. If found, the filter will be used as the
source of document ids, and then the sort will be applied to
that.
For most situations, this will not be useful unless you
frequently get the same search repeatedly with different sort
options, and none of them ever use "score"
-->
<!--
<useFilterForSortedQuery>true</useFilterForSortedQuery>
-->
<!-- Query Related Event Listeners
Various IndexSearcher related events can trigger Listeners to
@ -561,6 +578,48 @@
</query>
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Circuit Breaker Section - This section consists of configurations for
circuit breakers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<circuitBreaker>
<!-- Enable Circuit Breakers
Circuit breakers are designed to allow stability and predictable query
execution. They prevent operations that can take down the node and cause
noisy neighbour issues.
This flag is the uber control switch which controls the activation/deactivation of all circuit
breakers. At the moment, the only circuit breaker (max JVM circuit breaker) does not have its
own specific configuration. However, if a circuit breaker wishes to be independently configurable,
they are free to add their specific configuration but need to ensure that this flag is always
respected - this should have veto over all independent configuration flags.
-->
<!--
<useCircuitBreakers>true</useCircuitBreakers>
-->
<!-- Memory Circuit Breaker Threshold In Percentage
Specific configuration for max JVM heap usage circuit breaker. This configuration defines the
threshold percentage of maximum heap allocated beyond which queries will be rejected until the
current JVM usage goes below the threshold. The valid value range for this value is 50-95.
Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThreshold is
defined as 75. Threshold JVM usage will be 4 * 0.75 = 3 GB. Its generally a good idea to keep this value between 75 - 80% of maximum heap
allocated.
If, at any point, the current JVM heap usage goes above 3 GB, queries will be rejected until the heap usage goes below 3 GB again.
If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped"
in logs and the corresponding error message should tell you what transpired (if the failure
was caused by tripped circuit breakers).
-->
<!--
<memoryCircuitBreakerThresholdPct>100</memoryCircuitBreakerThresholdPct>
-->
</circuitBreaker>
<!-- Request Dispatcher

View File

@ -600,6 +600,48 @@
</query>
<!-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Circuit Breaker Section - This section consists of configurations for
circuit breakers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<circuitBreaker>
<!-- Enable Circuit Breakers
Circuit breakers are designed to allow stability and predictable query
execution. They prevent operations that can take down the node and cause
noisy neighbour issues.
This flag is the uber control switch which controls the activation/deactivation of all circuit
breakers. At the moment, the only circuit breaker (max JVM circuit breaker) does not have its
own specific configuration. However, if a circuit breaker wishes to be independently configurable,
they are free to add their specific configuration but need to ensure that this flag is always
respected - this should have veto over all independent configuration flags.
-->
<!--
<useCircuitBreakers>true</useCircuitBreakers>
-->
<!-- Memory Circuit Breaker Threshold In Percentage
Specific configuration for max JVM heap usage circuit breaker. This configuration defines the
threshold percentage of maximum heap allocated beyond which queries will be rejected until the
current JVM usage goes below the threshold. The valid value for this range is 50-95.
Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThresholdPct is
defined as 75. Threshold JVM usage will be 4 * 0.75 = 3 GB. Its generally a good idea to keep this value between 75 - 80% of maximum heap
allocated.
If, at any point, the current JVM heap usage goes above 3 GB, queries will be rejected until the heap usage goes below 3 GB again.
If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped"
in logs and the corresponding error message should tell you what transpired (if the failure
was caused by tripped circuit breakers).
-->
<!--
<memoryCircuitBreakerThresholdPct>100</memoryCircuitBreakerThresholdPct>
-->
</circuitBreaker>
<!-- Request Dispatcher

View File

@ -0,0 +1,68 @@
= Circuit Breakers
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
Solr's circuit breaker infrastructure allows prevention of actions that can cause a node to go beyond its capacity or to go down. The
premise of circuit breakers is to ensure a higher quality of service and only accept request loads that are serviceable in the current
resource configuration.
== When To Use Circuit Breakers
Circuit breakers should be used when the user wishes to trade request throughput for a higher Solr stability. If circuit breakers
are enabled, requests may be rejected under the condition of high node duress with an appropriate HTTP error code (typically 503).
It is up to the client to handle this error and potentially build a retrial logic as this should ideally be a transient situation.
== Circuit Breaker Configurations
The following flag controls the global activation/deactivation of circuit breakers. If this flag is disabled, all circuit breakers
will be disabled globally. Per circuit breaker configurations are specified in their respective sections later.
[source,xml]
----
<useCircuitBreakers>false</useCircuitBreakers>
----
== Currently Supported Circuit Breakers
=== JVM Heap Usage Based Circuit Breaker
This circuit breaker tracks JVM heap memory usage and rejects incoming search requests with a 503 error code if the heap usage
exceeds a configured percentage of maximum heap allocated to the JVM (-Xmx). The main configuration for this circuit breaker is
controlling the threshold percentage at which the breaker will trip.
It does not logically make sense to have a threshold below 50% and above 95% of the max heap allocated to the JVM. Hence, the range
of valid values for this parameter is [50, 95], both inclusive.
[source,xml]
----
<memoryCircuitBreakerThresholdPct>75</memoryCircuitBreakerThresholdPct>
----
Consider the following example:
JVM has been allocated a maximum heap of 5GB (-Xmx) and memoryCircuitBreakerThresholdPct is set to 75. In this scenario, the heap usage
at which the circuit breaker will trip is 3.75GB.
Note that this circuit breaker is checked for each incoming search request and considers the current heap usage of the node, i.e every search
request will get the live heap usage and compare it against the set memory threshold. The check does not impact performance,
but any performance regressions that are suspected to be caused by this feature should be reported to the dev list.
== Performance Considerations
It is worth noting that while JVM circuit breaker does not add any noticeable overhead per query, having too many
circuit breakers checked for a single request can cause a performance overhead.
In addition, it is a good practice to exponentially back off while retrying requests on a busy node.

View File

@ -202,6 +202,13 @@ _Query Sizing and Warming_
* `query.queryResultWindowSize`
* `query.queryResultMaxDocCached`
_Query Circuit Breakers_
See <<circuit-breakers.adoc#circuit-breakers,Circuit Breakers in Solr>> for more details
* `query.useCircuitBreakers`
* `query.memoryCircuitBreakerThresholdPct`
*RequestDispatcher Settings*
See <<requestdispatcher-in-solrconfig.adoc#requestdispatcher-in-solrconfig,RequestDispatcher in SolrConfig>> for defaults and acceptable values for these settings.

View File

@ -10,6 +10,7 @@
streaming-expressions, \
solrcloud, \
legacy-scaling-and-distribution, \
circuit-breakers, \
solr-plugins, \
the-well-configured-solr-instance, \
monitoring-solr, \
@ -121,6 +122,8 @@ The *<<getting-started.adoc#getting-started,Getting Started>>* section guides yo
*<<solrcloud.adoc#solrcloud,SolrCloud>>*: This section describes SolrCloud, which provides comprehensive distributed capabilities.
*<<legacy-scaling-and-distribution.adoc#legacy-scaling-and-distribution,Legacy Scaling and Distribution>>*: This section tells you how to grow a Solr distribution by dividing a large index into sections called shards, which are then distributed across multiple servers, or by replicating a single index across multiple services.
*<<circuit-breakers.adoc#circuit-breakers,Circuit Breakers>>*: This section talks about circuit breakers, a way of allowing a higher stability of Solr nodes and increased service level guarantees of requests that are accepted by Solr.
****
.Advanced Configuration

View File

@ -172,6 +172,26 @@ This parameter sets the maximum number of documents to cache for any entry in th
<queryResultMaxDocsCached>200</queryResultMaxDocsCached>
----
=== useCircuitBreakers
Global control flag for enabling circuit breakers.
[source,xml]
----
<useCircuitBreakers>true</useCircuitBreakers>
----
=== memoryCircuitBreakerThresholdPct
Memory threshold in percentage for JVM heap usage defined in percentage of maximum heap allocated
to the JVM (-Xmx). Ideally, this value should be in the range of 75-80% of maximum heap allocated
to the JVM.
[source,xml]
----
<memoryCircuitBreakerThresholdPct>75</memoryCircuitBreakerThresholdPct>
----
=== useColdSearcher
This setting controls whether search requests for which there is not a currently registered searcher should wait for a new searcher to warm up (false) or proceed immediately (true). When set to "false", requests will block until the searcher has warmed its caches.