SOLR-14615: Implement CPU Utilization Based Circuit Breaker (#1737)

This commit introduces CPU based circuit breaker. This circuit breaker
tracks the average CPU load per minute and triggers if the value exceeds
a configurable value.

This commit also adds a specific control flag for Memory Circuit Breaker
to allow enabling/disabling the same.
This commit is contained in:
Atri Sharma 2020-08-20 13:21:26 +05:30 committed by GitHub
parent fa878eb5b8
commit 2f37f40171
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 348 additions and 55 deletions

View File

@ -42,6 +42,8 @@ Improvements
* SOLR-13528 Rate Limiting in Solr (Atri Sharma, Mike Drob) * SOLR-13528 Rate Limiting in Solr (Atri Sharma, Mike Drob)
* SOLR-14615: CPU Utilization Based Circuit Breaker (Atri Sharma)
Other Changes Other Changes
---------------------- ----------------------
* SOLR-14656: Autoscaling framework removed (Ishan Chattopadhyaya, noble, Ilan Ginzburg) * SOLR-14656: Autoscaling framework removed (Ishan Chattopadhyaya, noble, Ilan Ginzburg)

View File

@ -228,10 +228,13 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
queryResultMaxDocsCached = getInt("query/queryResultMaxDocsCached", Integer.MAX_VALUE); queryResultMaxDocsCached = getInt("query/queryResultMaxDocsCached", Integer.MAX_VALUE);
enableLazyFieldLoading = getBool("query/enableLazyFieldLoading", false); enableLazyFieldLoading = getBool("query/enableLazyFieldLoading", false);
useCircuitBreakers = getBool("circuitBreaker/useCircuitBreakers", false); useCircuitBreakers = getBool("circuitBreakers/@enabled", false);
memoryCircuitBreakerThresholdPct = getInt("circuitBreaker/memoryCircuitBreakerThresholdPct", 95); cpuCBEnabled = getBool("circuitBreakers/cpuBreaker/@enabled", false);
memCBEnabled = getBool("circuitBreakers/memBreaker/@enabled", false);
memCBThreshold = getInt("circuitBreakers/memBreaker/@threshold", 95);
cpuCBThreshold = getInt("circuitBreakers/cpuBreaker/@threshold", 95);
validateMemoryBreakerThreshold(); validateCircuitBreakerThresholds();
filterCacheConfig = CacheConfig.getConfig(this, "query/filterCache"); filterCacheConfig = CacheConfig.getConfig(this, "query/filterCache");
queryResultCacheConfig = CacheConfig.getConfig(this, "query/queryResultCache"); queryResultCacheConfig = CacheConfig.getConfig(this, "query/queryResultCache");
@ -530,7 +533,10 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
// Circuit Breaker Configuration // Circuit Breaker Configuration
public final boolean useCircuitBreakers; public final boolean useCircuitBreakers;
public final int memoryCircuitBreakerThresholdPct; public final int memCBThreshold;
public final boolean memCBEnabled;
public final boolean cpuCBEnabled;
public final int cpuCBThreshold;
// IndexConfig settings // IndexConfig settings
public final SolrIndexConfig indexConfig; public final SolrIndexConfig indexConfig;
@ -811,13 +817,15 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
loader.reloadLuceneSPI(); loader.reloadLuceneSPI();
} }
private void validateMemoryBreakerThreshold() { private void validateCircuitBreakerThresholds() {
if (useCircuitBreakers) { if (useCircuitBreakers) {
if (memoryCircuitBreakerThresholdPct > 95 || memoryCircuitBreakerThresholdPct < 50) { if (memCBEnabled) {
if (memCBThreshold > 95 || memCBThreshold < 50) {
throw new IllegalArgumentException("Valid value range of memoryCircuitBreakerThresholdPct is 50 - 95"); throw new IllegalArgumentException("Valid value range of memoryCircuitBreakerThresholdPct is 50 - 95");
} }
} }
} }
}
public int getMultipartUploadLimitKB() { public int getMultipartUploadLimitKB() {
return multipartUploadLimitKB; return multipartUploadLimitKB;
@ -889,7 +897,10 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
m.put("enableLazyFieldLoading", enableLazyFieldLoading); m.put("enableLazyFieldLoading", enableLazyFieldLoading);
m.put("maxBooleanClauses", booleanQueryMaxClauseCount); m.put("maxBooleanClauses", booleanQueryMaxClauseCount);
m.put("useCircuitBreakers", useCircuitBreakers); m.put("useCircuitBreakers", useCircuitBreakers);
m.put("memoryCircuitBreakerThresholdPct", memoryCircuitBreakerThresholdPct); m.put("cpuCircuitBreakerEnabled", cpuCBEnabled);
m.put("memoryCircuitBreakerEnabled", memCBEnabled);
m.put("memoryCircuitBreakerThresholdPct", memCBThreshold);
m.put("cpuCircuitBreakerThreshold", cpuCBThreshold);
for (SolrPluginInfo plugin : plugins) { for (SolrPluginInfo plugin : plugins) {
List<PluginInfo> infos = getPluginInfos(plugin.clazz.getName()); List<PluginInfo> infos = getPluginInfos(plugin.clazz.getName());
if (infos == null || infos.isEmpty()) continue; if (infos == null || infos.isEmpty()) continue;

View File

@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util.circuitbreaker;
import java.lang.invoke.MethodHandles;
import java.lang.management.ManagementFactory;
import java.lang.management.OperatingSystemMXBean;
import org.apache.solr.core.SolrConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* Tracks current CPU usage and triggers if the specified threshold is breached.
*
* This circuit breaker gets the average CPU load over the last minute and uses
* that data to take a decision. We depend on OperatingSystemMXBean which does
* not allow a configurable interval of collection of data.
* //TODO: Use Codahale Meter to calculate the value locally.
* </p>
*
* <p>
* The configuration to define which mode to use and the trigger threshold are defined in
* solrconfig.xml
* </p>
*/
public class CPUCircuitBreaker extends CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final OperatingSystemMXBean operatingSystemMXBean = ManagementFactory.getOperatingSystemMXBean();
private final boolean enabled;
private final double cpuUsageThreshold;
// Assumption -- the value of these parameters will be set correctly before invoking getDebugInfo()
private static final ThreadLocal<Double> seenCPUUsage = ThreadLocal.withInitial(() -> 0.0);
private static final ThreadLocal<Double> allowedCPUUsage = ThreadLocal.withInitial(() -> 0.0);
public CPUCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig);
this.enabled = solrConfig.cpuCBEnabled;
this.cpuUsageThreshold = solrConfig.cpuCBThreshold;
}
@Override
public boolean isTripped() {
if (!isEnabled()) {
return false;
}
if (!enabled) {
return false;
}
double localAllowedCPUUsage = getCpuUsageThreshold();
double localSeenCPUUsage = calculateLiveCPUUsage();
if (localSeenCPUUsage < 0) {
if (log.isWarnEnabled()) {
String msg = "Unable to get CPU usage";
log.warn(msg);
}
return false;
}
allowedCPUUsage.set(localAllowedCPUUsage);
seenCPUUsage.set(localSeenCPUUsage);
return (localSeenCPUUsage >= localAllowedCPUUsage);
}
@Override
public String getDebugInfo() {
if (seenCPUUsage.get() == 0.0 || seenCPUUsage.get() == 0.0) {
log.warn("CPUCircuitBreaker's monitored values (seenCPUUSage, allowedCPUUsage) not set");
}
return "seenCPUUSage=" + seenCPUUsage.get() + " allowedCPUUsage=" + allowedCPUUsage.get();
}
@Override
public String getErrorMessage() {
return "CPU Circuit Breaker triggered as seen CPU usage is above allowed threshold." +
"Seen CPU usage " + seenCPUUsage.get() + " and allocated threshold " +
allowedCPUUsage.get();
}
public double getCpuUsageThreshold() {
return cpuUsageThreshold;
}
protected double calculateLiveCPUUsage() {
return operatingSystemMXBean.getSystemLoadAverage();
}
}

View File

@ -27,6 +27,9 @@ import org.apache.solr.core.SolrConfig;
* 2. Use the circuit breaker in a specific code path(s). * 2. Use the circuit breaker in a specific code path(s).
* *
* TODO: This class should be grown as the scope of circuit breakers grow. * TODO: This class should be grown as the scope of circuit breakers grow.
*
* The class and its derivatives raise a standard exception when a circuit breaker is triggered.
* We should make it into a dedicated exception (https://issues.apache.org/jira/browse/SOLR-14755)
* </p> * </p>
*/ */
public abstract class CircuitBreaker { public abstract class CircuitBreaker {
@ -53,4 +56,9 @@ public abstract class CircuitBreaker {
* Get debug useful info. * Get debug useful info.
*/ */
public abstract String getDebugInfo(); public abstract String getDebugInfo();
/**
* Get error message when the circuit breaker triggers
*/
public abstract String getErrorMessage();
} }

View File

@ -20,6 +20,7 @@ package org.apache.solr.util.circuitbreaker;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import com.google.common.annotations.VisibleForTesting;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
/** /**
@ -107,9 +108,7 @@ public class CircuitBreakerManager {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for (CircuitBreaker circuitBreaker : circuitBreakerList) { for (CircuitBreaker circuitBreaker : circuitBreakerList) {
sb.append(circuitBreaker.getClass().getName()); sb.append(circuitBreaker.getErrorMessage());
sb.append(" ");
sb.append(circuitBreaker.getDebugInfo());
sb.append("\n"); sb.append("\n");
} }
@ -127,8 +126,16 @@ public class CircuitBreakerManager {
// Install the default circuit breakers // Install the default circuit breakers
CircuitBreaker memoryCircuitBreaker = new MemoryCircuitBreaker(solrConfig); CircuitBreaker memoryCircuitBreaker = new MemoryCircuitBreaker(solrConfig);
CircuitBreaker cpuCircuitBreaker = new CPUCircuitBreaker(solrConfig);
circuitBreakerManager.register(memoryCircuitBreaker); circuitBreakerManager.register(memoryCircuitBreaker);
circuitBreakerManager.register(cpuCircuitBreaker);
return circuitBreakerManager; return circuitBreakerManager;
} }
@VisibleForTesting
public List<CircuitBreaker> getRegisteredCircuitBreakers() {
return circuitBreakerList;
}
} }

View File

@ -43,22 +43,25 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean(); private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean();
private boolean enabled;
private final long heapMemoryThreshold; private final long heapMemoryThreshold;
// Assumption -- the value of these parameters will be set correctly before invoking getDebugInfo() // Assumption -- the value of these parameters will be set correctly before invoking getDebugInfo()
private final ThreadLocal<Long> seenMemory = new ThreadLocal<>(); private static final ThreadLocal<Long> seenMemory = ThreadLocal.withInitial(() -> 0L);
private final ThreadLocal<Long> allowedMemory = new ThreadLocal<>(); private static final ThreadLocal<Long> allowedMemory = ThreadLocal.withInitial(() -> 0L);
public MemoryCircuitBreaker(SolrConfig solrConfig) { public MemoryCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig); super(solrConfig);
this.enabled = solrConfig.memCBEnabled;
long currentMaxHeap = MEMORY_MX_BEAN.getHeapMemoryUsage().getMax(); long currentMaxHeap = MEMORY_MX_BEAN.getHeapMemoryUsage().getMax();
if (currentMaxHeap <= 0) { if (currentMaxHeap <= 0) {
throw new IllegalArgumentException("Invalid JVM state for the max heap usage"); throw new IllegalArgumentException("Invalid JVM state for the max heap usage");
} }
int thresholdValueInPercentage = solrConfig.memoryCircuitBreakerThresholdPct; int thresholdValueInPercentage = solrConfig.memCBThreshold;
double thresholdInFraction = thresholdValueInPercentage / (double) 100; double thresholdInFraction = thresholdValueInPercentage / (double) 100;
heapMemoryThreshold = (long) (currentMaxHeap * thresholdInFraction); heapMemoryThreshold = (long) (currentMaxHeap * thresholdInFraction);
@ -76,6 +79,10 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
return false; return false;
} }
if (!enabled) {
return false;
}
long localAllowedMemory = getCurrentMemoryThreshold(); long localAllowedMemory = getCurrentMemoryThreshold();
long localSeenMemory = calculateLiveMemoryUsage(); long localSeenMemory = calculateLiveMemoryUsage();
@ -95,6 +102,13 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
return "seenMemory=" + seenMemory.get() + " allowedMemory=" + allowedMemory.get(); return "seenMemory=" + seenMemory.get() + " allowedMemory=" + allowedMemory.get();
} }
@Override
public String getErrorMessage() {
return "Memory Circuit Breaker triggered as JVM heap usage values are greater than allocated threshold." +
"Seen JVM heap memory usage " + seenMemory.get() + " and allocated threshold " +
allowedMemory.get();
}
private long getCurrentMemoryThreshold() { private long getCurrentMemoryThreshold() {
return heapMemoryThreshold; return heapMemoryThreshold;
} }

View File

@ -55,8 +55,17 @@
"queryResultMaxDocsCached":1, "queryResultMaxDocsCached":1,
"enableLazyFieldLoading":1, "enableLazyFieldLoading":1,
"boolTofilterOptimizer":1, "boolTofilterOptimizer":1,
"useCircuitBreakers":10, "circuitBreakers":{
"memoryCircuitBreakerThresholdPct":20, "enabled":10,
"memBreaker":{
"enabled":10,
"threshold":20
},
"cpuBreaker":{
"enabled":10,
"threshold":20
}
},
"maxBooleanClauses":1}, "maxBooleanClauses":1},
"jmx":{ "jmx":{
"agentId":0, "agentId":0,

View File

@ -78,13 +78,10 @@
</query> </query>
<circuitBreaker> <circuitBreakers enabled="true">
<cpuBreaker enabled="true" threshold="75"/>
<useCircuitBreakers>true</useCircuitBreakers> <memBreaker enabled="true" threshold="75"/>
</circuitBreakers>
<memoryCircuitBreakerThresholdPct>75</memoryCircuitBreakerThresholdPct>
</circuitBreaker>
<initParams path="/select"> <initParams path="/select">
<lst name="defaults"> <lst name="defaults">

View File

@ -267,7 +267,8 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
assertEquals("wrong config for enableLazyFieldLoading", true, solrConfig.enableLazyFieldLoading); assertEquals("wrong config for enableLazyFieldLoading", true, solrConfig.enableLazyFieldLoading);
assertEquals("wrong config for queryResultWindowSize", 10, solrConfig.queryResultWindowSize); assertEquals("wrong config for queryResultWindowSize", 10, solrConfig.queryResultWindowSize);
assertEquals("wrong config for useCircuitBreakers", false, solrConfig.useCircuitBreakers); assertEquals("wrong config for useCircuitBreakers", false, solrConfig.useCircuitBreakers);
assertEquals("wrong config for memoryCircuitBreakerThresholdPct", 95, solrConfig.memoryCircuitBreakerThresholdPct); assertEquals("wrong config for memoryCircuitBreakerThresholdPct", 95, solrConfig.memCBThreshold);
assertEquals("wrong config for cpuCircuitBreakerThreshold", 95, solrConfig.cpuCBThreshold);
} }
/** /**

View File

@ -18,8 +18,11 @@
package org.apache.solr.util; package org.apache.solr.util;
import java.lang.invoke.MethodHandles; import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -31,6 +34,7 @@ import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.SolrNamedThreadFactory; import org.apache.solr.common.util.SolrNamedThreadFactory;
import org.apache.solr.core.SolrConfig; import org.apache.solr.core.SolrConfig;
import org.apache.solr.search.QueryParsing; import org.apache.solr.search.QueryParsing;
import org.apache.solr.util.circuitbreaker.CPUCircuitBreaker;
import org.apache.solr.util.circuitbreaker.CircuitBreaker; import org.apache.solr.util.circuitbreaker.CircuitBreaker;
import org.apache.solr.util.circuitbreaker.MemoryCircuitBreaker; import org.apache.solr.util.circuitbreaker.MemoryCircuitBreaker;
import org.junit.After; import org.junit.After;
@ -41,6 +45,9 @@ import org.junit.rules.TestRule;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.hamcrest.CoreMatchers.containsString;
@SuppressWarnings({"rawtypes"})
public class TestCircuitBreaker extends SolrTestCaseJ4 { public class TestCircuitBreaker extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private final static int NUM_DOCS = 20; private final static int NUM_DOCS = 20;
@ -84,6 +91,8 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME); args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
args.put(CommonParams.FL, "id"); args.put(CommonParams.FL, "id");
removeAllExistingCircuitBreakers();
CircuitBreaker circuitBreaker = new MockCircuitBreaker(h.getCore().getSolrConfig()); CircuitBreaker circuitBreaker = new MockCircuitBreaker(h.getCore().getSolrConfig());
h.getCore().getCircuitBreakerManager().register(circuitBreaker); h.getCore().getCircuitBreakerManager().register(circuitBreaker);
@ -99,6 +108,8 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME); args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
args.put(CommonParams.FL, "id"); args.put(CommonParams.FL, "id");
removeAllExistingCircuitBreakers();
CircuitBreaker circuitBreaker = new FakeMemoryPressureCircuitBreaker(h.getCore().getSolrConfig()); CircuitBreaker circuitBreaker = new FakeMemoryPressureCircuitBreaker(h.getCore().getSolrConfig());
h.getCore().getCircuitBreakerManager().register(circuitBreaker); h.getCore().getCircuitBreakerManager().register(circuitBreaker);
@ -119,33 +130,42 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
AtomicInteger failureCount = new AtomicInteger(); AtomicInteger failureCount = new AtomicInteger();
try { try {
removeAllExistingCircuitBreakers();
CircuitBreaker circuitBreaker = new BuildingUpMemoryPressureCircuitBreaker(h.getCore().getSolrConfig()); CircuitBreaker circuitBreaker = new BuildingUpMemoryPressureCircuitBreaker(h.getCore().getSolrConfig());
h.getCore().getCircuitBreakerManager().register(circuitBreaker); h.getCore().getCircuitBreakerManager().register(circuitBreaker);
List<Future<?>> futures = new ArrayList<>();
for (int i = 0; i < 5; i++) { for (int i = 0; i < 5; i++) {
executor.submit(() -> { Future<?> future = executor.submit(() -> {
try { try {
h.query(req("name:\"john smith\"")); h.query(req("name:\"john smith\""));
} catch (SolrException e) { } catch (SolrException e) {
if (!e.getMessage().startsWith("Circuit Breakers tripped")) { assertThat(e.getMessage(), containsString("Circuit Breakers tripped"));
if (log.isInfoEnabled()) {
String logMessage = "Expected error message for testBuildingMemoryPressure was not received. Error message " + e.getMessage();
log.info(logMessage);
}
throw new RuntimeException("Expected error message was not received. Error message " + e.getMessage());
}
failureCount.incrementAndGet(); failureCount.incrementAndGet();
} catch (Exception e) { } catch (Exception e) {
throw new RuntimeException(e.getMessage()); throw new RuntimeException(e.getMessage());
} }
}); });
futures.add(future);
}
for (Future<?> future : futures) {
try {
future.get();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
} }
executor.shutdown(); executor.shutdown();
try { try {
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS); executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
} catch (InterruptedException e) { } catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e.getMessage()); throw new RuntimeException(e.getMessage());
} }
@ -157,6 +177,59 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
} }
} }
public void testFakeCPUCircuitBreaker() {
AtomicInteger failureCount = new AtomicInteger();
ExecutorService executor = ExecutorUtil.newMDCAwareCachedThreadPool(
new SolrNamedThreadFactory("TestCircuitBreaker"));
try {
removeAllExistingCircuitBreakers();
CircuitBreaker circuitBreaker = new FakeCPUCircuitBreaker(h.getCore().getSolrConfig());
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
List<Future<?>> futures = new ArrayList<>();
for (int i = 0; i < 5; i++) {
Future<?> future = executor.submit(() -> {
try {
h.query(req("name:\"john smith\""));
} catch (SolrException e) {
assertThat(e.getMessage(), containsString("Circuit Breakers tripped"));
failureCount.incrementAndGet();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
});
futures.add(future);
}
for (Future<?> future : futures) {
try {
future.get();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
}
executor.shutdown();
try {
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e.getMessage());
}
assertEquals("Number of failed queries is not correct",5, failureCount.get());
} finally {
if (!executor.isShutdown()) {
executor.shutdown();
}
}
}
public void testResponseWithCBTiming() { public void testResponseWithCBTiming() {
assertQ(req("q", "*:*", CommonParams.DEBUG_QUERY, "true"), assertQ(req("q", "*:*", CommonParams.DEBUG_QUERY, "true"),
"//str[@name='rawquerystring']='*:*'", "//str[@name='rawquerystring']='*:*'",
@ -179,7 +252,13 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
); );
} }
private class MockCircuitBreaker extends CircuitBreaker { private void removeAllExistingCircuitBreakers() {
List<CircuitBreaker> registeredCircuitBreakers = h.getCore().getCircuitBreakerManager().getRegisteredCircuitBreakers();
registeredCircuitBreakers.clear();
}
private static class MockCircuitBreaker extends MemoryCircuitBreaker {
public MockCircuitBreaker(SolrConfig solrConfig) { public MockCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig); super(solrConfig);
@ -197,7 +276,7 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
} }
} }
private class FakeMemoryPressureCircuitBreaker extends MemoryCircuitBreaker { private static class FakeMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
public FakeMemoryPressureCircuitBreaker(SolrConfig solrConfig) { public FakeMemoryPressureCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig); super(solrConfig);
@ -210,7 +289,7 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
} }
} }
private class BuildingUpMemoryPressureCircuitBreaker extends MemoryCircuitBreaker { private static class BuildingUpMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
private AtomicInteger count; private AtomicInteger count;
public BuildingUpMemoryPressureCircuitBreaker(SolrConfig solrConfig) { public BuildingUpMemoryPressureCircuitBreaker(SolrConfig solrConfig) {
@ -240,4 +319,15 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
return Long.MIN_VALUE; // Random number guaranteed to not trip the circuit breaker return Long.MIN_VALUE; // Random number guaranteed to not trip the circuit breaker
} }
} }
private static class FakeCPUCircuitBreaker extends CPUCircuitBreaker {
public FakeCPUCircuitBreaker(SolrConfig solrConfig) {
super(solrConfig);
}
@Override
protected double calculateLiveCPUUsage() {
return 92; // Return a value large enough to trigger the circuit breaker
}
}
} }

View File

@ -582,27 +582,24 @@
Circuit Breaker Section - This section consists of configurations for Circuit Breaker Section - This section consists of configurations for
circuit breakers circuit breakers
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ --> ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
<circuitBreaker>
<!-- Enable Circuit Breakers <!-- Circuit Breakers
Circuit breakers are designed to allow stability and predictable query Circuit breakers are designed to allow stability and predictable query
execution. They prevent operations that can take down the node and cause execution. They prevent operations that can take down the node and cause
noisy neighbour issues. noisy neighbour issues.
This flag is the uber control switch which controls the activation/deactivation of all circuit This flag is the uber control switch which controls the activation/deactivation of all circuit
breakers. At the moment, the only circuit breaker (max JVM circuit breaker) does not have its breakers. If a circuit breaker wishes to be independently configurable,
own specific configuration. However, if a circuit breaker wishes to be independently configurable,
they are free to add their specific configuration but need to ensure that this flag is always they are free to add their specific configuration but need to ensure that this flag is always
respected - this should have veto over all independent configuration flags. respected - this should have veto over all independent configuration flags.
--> -->
<!-- <circuitBreakers enabled="true">
<useCircuitBreakers>true</useCircuitBreakers>
-->
<!-- Memory Circuit Breaker Threshold In Percentage <!-- Memory Circuit Breaker Configuration
Specific configuration for max JVM heap usage circuit breaker. This configuration defines the Specific configuration for max JVM heap usage circuit breaker. This configuration defines whether
threshold percentage of maximum heap allocated beyond which queries will be rejected until the the circuit breaker is enabled and the threshold percentage of maximum heap allocated beyond which queries will be rejected until the
current JVM usage goes below the threshold. The valid value range for this value is 50-95. current JVM usage goes below the threshold. The valid value range for this value is 50-95.
Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThreshold is Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThreshold is
@ -613,12 +610,31 @@
If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped" If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped"
in logs and the corresponding error message should tell you what transpired (if the failure in logs and the corresponding error message should tell you what transpired (if the failure
was caused by tripped circuit breakers). was caused by tripped circuit breakers).
If, at any point, the current JVM heap usage goes above 3 GB, queries will be rejected until the heap usage goes below 3 GB again.
If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped"
in logs and the corresponding error message should tell you what transpired (if the failure
was caused by tripped circuit breakers).
--> -->
<!-- <!--
<memoryCircuitBreakerThresholdPct>100</memoryCircuitBreakerThresholdPct> <memBreaker enabled="true" threshold="75"/>
--> -->
</circuitBreaker> <!-- CPU Circuit Breaker Configuration
Specific configuration for CPU utilization based circuit breaker. This configuration defines whether the circuit breaker is enabled
and the average load over the last minute at which the circuit breaker should start rejecting queries.
Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThreshold is
defined as 75. Threshold JVM usage will be 4 * 0.75 = 3 GB. Its generally a good idea to keep this value between 75 - 80% of maximum heap
allocated.
-->
<!--
<cpuBreaker enabled="true" threshold="75"/>
-->
</circuitBreakers>
<!-- Request Dispatcher <!-- Request Dispatcher

View File

@ -32,9 +32,14 @@ will be disabled globally. Per circuit breaker configurations are specified in t
[source,xml] [source,xml]
---- ----
<useCircuitBreakers>false</useCircuitBreakers> <circuitBreakers enabled="true">
<!-- All specific configs in this section -->
</circuitBreakers>
---- ----
This flag acts as the highest authority and global controller of circuit breakers. For using specific circuit breakers, each one
needs to be individually enabled in addition to this flag being enabled.
== Currently Supported Circuit Breakers == Currently Supported Circuit Breakers
=== JVM Heap Usage Based Circuit Breaker === JVM Heap Usage Based Circuit Breaker
@ -42,26 +47,43 @@ This circuit breaker tracks JVM heap memory usage and rejects incoming search re
exceeds a configured percentage of maximum heap allocated to the JVM (-Xmx). The main configuration for this circuit breaker is exceeds a configured percentage of maximum heap allocated to the JVM (-Xmx). The main configuration for this circuit breaker is
controlling the threshold percentage at which the breaker will trip. controlling the threshold percentage at which the breaker will trip.
It does not logically make sense to have a threshold below 50% and above 95% of the max heap allocated to the JVM. Hence, the range Configuration for JVM heap usage based circuit breaker:
of valid values for this parameter is [50, 95], both inclusive.
[source,xml] [source,xml]
---- ----
<memoryCircuitBreakerThresholdPct>75</memoryCircuitBreakerThresholdPct> <memBreaker enabled="true" threshold="75"/>
---- ----
Note that this configuration will be overridden by the global circuit breaker flag -- if circuit breakers are disabled, this flag
will not help you. Also, the triggering threshold is defined as a percentage of the max heap allocated to the JVM.
It does not logically make sense to have a threshold below 50% and above 95% of the max heap allocated to the JVM. Hence, the range
of valid values for this parameter is [50, 95], both inclusive.
Consider the following example: Consider the following example:
JVM has been allocated a maximum heap of 5GB (-Xmx) and memoryCircuitBreakerThresholdPct is set to 75. In this scenario, the heap usage JVM has been allocated a maximum heap of 5GB (-Xmx) and memoryCircuitBreakerThresholdPct is set to 75. In this scenario, the heap usage
at which the circuit breaker will trip is 3.75GB. at which the circuit breaker will trip is 3.75GB.
Note that this circuit breaker is checked for each incoming search request and considers the current heap usage of the node, i.e every search
request will get the live heap usage and compare it against the set memory threshold. The check does not impact performance,
but any performance regressions that are suspected to be caused by this feature should be reported to the dev list.
=== CPU Utilization Based Circuit Breaker
This circuit breaker tracks CPU utilization and triggers if the average CPU utilization over the last one minute
exceeds a configurable threshold. Note that the value used in computation is over the last one minute -- so a sudden
spike in traffic that goes down might still cause the circuit breaker to trigger for a short while before it resolves
and updates the value. For more details of the calculation, please see https://en.wikipedia.org/wiki/Load_(computing)
Configuration for CPU utilization based circuit breaker:
[source,xml]
----
<cpuBreaker enabled="true" threshold="20"/>
----
Note that this configuration will be overridden by the global circuit breaker flag -- if circuit breakers are disabled, this flag
will not help you. The triggering threshold is defined in units of CPU utilization.
== Performance Considerations == Performance Considerations
It is worth noting that while JVM circuit breaker does not add any noticeable overhead per query, having too many It is worth noting that while JVM or CPU circuit breakers do not add any noticeable overhead per query, having too many
circuit breakers checked for a single request can cause a performance overhead. circuit breakers checked for a single request can cause a performance overhead.
In addition, it is a good practice to exponentially back off while retrying requests on a busy node. In addition, it is a good practice to exponentially back off while retrying requests on a busy node.