mirror of https://github.com/apache/lucene.git
SOLR-14615: Implement CPU Utilization Based Circuit Breaker (#1737)
This commit introduces CPU based circuit breaker. This circuit breaker tracks the average CPU load per minute and triggers if the value exceeds a configurable value. This commit also adds a specific control flag for Memory Circuit Breaker to allow enabling/disabling the same.
This commit is contained in:
parent
fa878eb5b8
commit
2f37f40171
|
@ -42,6 +42,8 @@ Improvements
|
||||||
|
|
||||||
* SOLR-13528 Rate Limiting in Solr (Atri Sharma, Mike Drob)
|
* SOLR-13528 Rate Limiting in Solr (Atri Sharma, Mike Drob)
|
||||||
|
|
||||||
|
* SOLR-14615: CPU Utilization Based Circuit Breaker (Atri Sharma)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
* SOLR-14656: Autoscaling framework removed (Ishan Chattopadhyaya, noble, Ilan Ginzburg)
|
* SOLR-14656: Autoscaling framework removed (Ishan Chattopadhyaya, noble, Ilan Ginzburg)
|
||||||
|
|
|
@ -228,10 +228,13 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
|
||||||
queryResultMaxDocsCached = getInt("query/queryResultMaxDocsCached", Integer.MAX_VALUE);
|
queryResultMaxDocsCached = getInt("query/queryResultMaxDocsCached", Integer.MAX_VALUE);
|
||||||
enableLazyFieldLoading = getBool("query/enableLazyFieldLoading", false);
|
enableLazyFieldLoading = getBool("query/enableLazyFieldLoading", false);
|
||||||
|
|
||||||
useCircuitBreakers = getBool("circuitBreaker/useCircuitBreakers", false);
|
useCircuitBreakers = getBool("circuitBreakers/@enabled", false);
|
||||||
memoryCircuitBreakerThresholdPct = getInt("circuitBreaker/memoryCircuitBreakerThresholdPct", 95);
|
cpuCBEnabled = getBool("circuitBreakers/cpuBreaker/@enabled", false);
|
||||||
|
memCBEnabled = getBool("circuitBreakers/memBreaker/@enabled", false);
|
||||||
|
memCBThreshold = getInt("circuitBreakers/memBreaker/@threshold", 95);
|
||||||
|
cpuCBThreshold = getInt("circuitBreakers/cpuBreaker/@threshold", 95);
|
||||||
|
|
||||||
validateMemoryBreakerThreshold();
|
validateCircuitBreakerThresholds();
|
||||||
|
|
||||||
filterCacheConfig = CacheConfig.getConfig(this, "query/filterCache");
|
filterCacheConfig = CacheConfig.getConfig(this, "query/filterCache");
|
||||||
queryResultCacheConfig = CacheConfig.getConfig(this, "query/queryResultCache");
|
queryResultCacheConfig = CacheConfig.getConfig(this, "query/queryResultCache");
|
||||||
|
@ -530,7 +533,10 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
|
||||||
|
|
||||||
// Circuit Breaker Configuration
|
// Circuit Breaker Configuration
|
||||||
public final boolean useCircuitBreakers;
|
public final boolean useCircuitBreakers;
|
||||||
public final int memoryCircuitBreakerThresholdPct;
|
public final int memCBThreshold;
|
||||||
|
public final boolean memCBEnabled;
|
||||||
|
public final boolean cpuCBEnabled;
|
||||||
|
public final int cpuCBThreshold;
|
||||||
|
|
||||||
// IndexConfig settings
|
// IndexConfig settings
|
||||||
public final SolrIndexConfig indexConfig;
|
public final SolrIndexConfig indexConfig;
|
||||||
|
@ -811,13 +817,15 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
|
||||||
loader.reloadLuceneSPI();
|
loader.reloadLuceneSPI();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateMemoryBreakerThreshold() {
|
private void validateCircuitBreakerThresholds() {
|
||||||
if (useCircuitBreakers) {
|
if (useCircuitBreakers) {
|
||||||
if (memoryCircuitBreakerThresholdPct > 95 || memoryCircuitBreakerThresholdPct < 50) {
|
if (memCBEnabled) {
|
||||||
|
if (memCBThreshold > 95 || memCBThreshold < 50) {
|
||||||
throw new IllegalArgumentException("Valid value range of memoryCircuitBreakerThresholdPct is 50 - 95");
|
throw new IllegalArgumentException("Valid value range of memoryCircuitBreakerThresholdPct is 50 - 95");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public int getMultipartUploadLimitKB() {
|
public int getMultipartUploadLimitKB() {
|
||||||
return multipartUploadLimitKB;
|
return multipartUploadLimitKB;
|
||||||
|
@ -889,7 +897,10 @@ public class SolrConfig extends XmlConfigFile implements MapSerializable {
|
||||||
m.put("enableLazyFieldLoading", enableLazyFieldLoading);
|
m.put("enableLazyFieldLoading", enableLazyFieldLoading);
|
||||||
m.put("maxBooleanClauses", booleanQueryMaxClauseCount);
|
m.put("maxBooleanClauses", booleanQueryMaxClauseCount);
|
||||||
m.put("useCircuitBreakers", useCircuitBreakers);
|
m.put("useCircuitBreakers", useCircuitBreakers);
|
||||||
m.put("memoryCircuitBreakerThresholdPct", memoryCircuitBreakerThresholdPct);
|
m.put("cpuCircuitBreakerEnabled", cpuCBEnabled);
|
||||||
|
m.put("memoryCircuitBreakerEnabled", memCBEnabled);
|
||||||
|
m.put("memoryCircuitBreakerThresholdPct", memCBThreshold);
|
||||||
|
m.put("cpuCircuitBreakerThreshold", cpuCBThreshold);
|
||||||
for (SolrPluginInfo plugin : plugins) {
|
for (SolrPluginInfo plugin : plugins) {
|
||||||
List<PluginInfo> infos = getPluginInfos(plugin.clazz.getName());
|
List<PluginInfo> infos = getPluginInfos(plugin.clazz.getName());
|
||||||
if (infos == null || infos.isEmpty()) continue;
|
if (infos == null || infos.isEmpty()) continue;
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.util.circuitbreaker;
|
||||||
|
|
||||||
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.lang.management.ManagementFactory;
|
||||||
|
import java.lang.management.OperatingSystemMXBean;
|
||||||
|
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>
|
||||||
|
* Tracks current CPU usage and triggers if the specified threshold is breached.
|
||||||
|
*
|
||||||
|
* This circuit breaker gets the average CPU load over the last minute and uses
|
||||||
|
* that data to take a decision. We depend on OperatingSystemMXBean which does
|
||||||
|
* not allow a configurable interval of collection of data.
|
||||||
|
* //TODO: Use Codahale Meter to calculate the value locally.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The configuration to define which mode to use and the trigger threshold are defined in
|
||||||
|
* solrconfig.xml
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
public class CPUCircuitBreaker extends CircuitBreaker {
|
||||||
|
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
|
private static final OperatingSystemMXBean operatingSystemMXBean = ManagementFactory.getOperatingSystemMXBean();
|
||||||
|
|
||||||
|
private final boolean enabled;
|
||||||
|
private final double cpuUsageThreshold;
|
||||||
|
|
||||||
|
// Assumption -- the value of these parameters will be set correctly before invoking getDebugInfo()
|
||||||
|
private static final ThreadLocal<Double> seenCPUUsage = ThreadLocal.withInitial(() -> 0.0);
|
||||||
|
|
||||||
|
private static final ThreadLocal<Double> allowedCPUUsage = ThreadLocal.withInitial(() -> 0.0);
|
||||||
|
|
||||||
|
public CPUCircuitBreaker(SolrConfig solrConfig) {
|
||||||
|
super(solrConfig);
|
||||||
|
|
||||||
|
this.enabled = solrConfig.cpuCBEnabled;
|
||||||
|
this.cpuUsageThreshold = solrConfig.cpuCBThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isTripped() {
|
||||||
|
if (!isEnabled()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!enabled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
double localAllowedCPUUsage = getCpuUsageThreshold();
|
||||||
|
double localSeenCPUUsage = calculateLiveCPUUsage();
|
||||||
|
|
||||||
|
if (localSeenCPUUsage < 0) {
|
||||||
|
if (log.isWarnEnabled()) {
|
||||||
|
String msg = "Unable to get CPU usage";
|
||||||
|
|
||||||
|
log.warn(msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
allowedCPUUsage.set(localAllowedCPUUsage);
|
||||||
|
|
||||||
|
seenCPUUsage.set(localSeenCPUUsage);
|
||||||
|
|
||||||
|
return (localSeenCPUUsage >= localAllowedCPUUsage);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDebugInfo() {
|
||||||
|
|
||||||
|
if (seenCPUUsage.get() == 0.0 || seenCPUUsage.get() == 0.0) {
|
||||||
|
log.warn("CPUCircuitBreaker's monitored values (seenCPUUSage, allowedCPUUsage) not set");
|
||||||
|
}
|
||||||
|
|
||||||
|
return "seenCPUUSage=" + seenCPUUsage.get() + " allowedCPUUsage=" + allowedCPUUsage.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getErrorMessage() {
|
||||||
|
return "CPU Circuit Breaker triggered as seen CPU usage is above allowed threshold." +
|
||||||
|
"Seen CPU usage " + seenCPUUsage.get() + " and allocated threshold " +
|
||||||
|
allowedCPUUsage.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
public double getCpuUsageThreshold() {
|
||||||
|
return cpuUsageThreshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected double calculateLiveCPUUsage() {
|
||||||
|
return operatingSystemMXBean.getSystemLoadAverage();
|
||||||
|
}
|
||||||
|
}
|
|
@ -27,6 +27,9 @@ import org.apache.solr.core.SolrConfig;
|
||||||
* 2. Use the circuit breaker in a specific code path(s).
|
* 2. Use the circuit breaker in a specific code path(s).
|
||||||
*
|
*
|
||||||
* TODO: This class should be grown as the scope of circuit breakers grow.
|
* TODO: This class should be grown as the scope of circuit breakers grow.
|
||||||
|
*
|
||||||
|
* The class and its derivatives raise a standard exception when a circuit breaker is triggered.
|
||||||
|
* We should make it into a dedicated exception (https://issues.apache.org/jira/browse/SOLR-14755)
|
||||||
* </p>
|
* </p>
|
||||||
*/
|
*/
|
||||||
public abstract class CircuitBreaker {
|
public abstract class CircuitBreaker {
|
||||||
|
@ -53,4 +56,9 @@ public abstract class CircuitBreaker {
|
||||||
* Get debug useful info.
|
* Get debug useful info.
|
||||||
*/
|
*/
|
||||||
public abstract String getDebugInfo();
|
public abstract String getDebugInfo();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get error message when the circuit breaker triggers
|
||||||
|
*/
|
||||||
|
public abstract String getErrorMessage();
|
||||||
}
|
}
|
|
@ -20,6 +20,7 @@ package org.apache.solr.util.circuitbreaker;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.solr.core.SolrConfig;
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -107,9 +108,7 @@ public class CircuitBreakerManager {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
for (CircuitBreaker circuitBreaker : circuitBreakerList) {
|
for (CircuitBreaker circuitBreaker : circuitBreakerList) {
|
||||||
sb.append(circuitBreaker.getClass().getName());
|
sb.append(circuitBreaker.getErrorMessage());
|
||||||
sb.append(" ");
|
|
||||||
sb.append(circuitBreaker.getDebugInfo());
|
|
||||||
sb.append("\n");
|
sb.append("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,8 +126,16 @@ public class CircuitBreakerManager {
|
||||||
|
|
||||||
// Install the default circuit breakers
|
// Install the default circuit breakers
|
||||||
CircuitBreaker memoryCircuitBreaker = new MemoryCircuitBreaker(solrConfig);
|
CircuitBreaker memoryCircuitBreaker = new MemoryCircuitBreaker(solrConfig);
|
||||||
|
CircuitBreaker cpuCircuitBreaker = new CPUCircuitBreaker(solrConfig);
|
||||||
|
|
||||||
circuitBreakerManager.register(memoryCircuitBreaker);
|
circuitBreakerManager.register(memoryCircuitBreaker);
|
||||||
|
circuitBreakerManager.register(cpuCircuitBreaker);
|
||||||
|
|
||||||
return circuitBreakerManager;
|
return circuitBreakerManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public List<CircuitBreaker> getRegisteredCircuitBreakers() {
|
||||||
|
return circuitBreakerList;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,22 +43,25 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
|
||||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean();
|
private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean();
|
||||||
|
|
||||||
|
private boolean enabled;
|
||||||
private final long heapMemoryThreshold;
|
private final long heapMemoryThreshold;
|
||||||
|
|
||||||
// Assumption -- the value of these parameters will be set correctly before invoking getDebugInfo()
|
// Assumption -- the value of these parameters will be set correctly before invoking getDebugInfo()
|
||||||
private final ThreadLocal<Long> seenMemory = new ThreadLocal<>();
|
private static final ThreadLocal<Long> seenMemory = ThreadLocal.withInitial(() -> 0L);
|
||||||
private final ThreadLocal<Long> allowedMemory = new ThreadLocal<>();
|
private static final ThreadLocal<Long> allowedMemory = ThreadLocal.withInitial(() -> 0L);
|
||||||
|
|
||||||
public MemoryCircuitBreaker(SolrConfig solrConfig) {
|
public MemoryCircuitBreaker(SolrConfig solrConfig) {
|
||||||
super(solrConfig);
|
super(solrConfig);
|
||||||
|
|
||||||
|
this.enabled = solrConfig.memCBEnabled;
|
||||||
|
|
||||||
long currentMaxHeap = MEMORY_MX_BEAN.getHeapMemoryUsage().getMax();
|
long currentMaxHeap = MEMORY_MX_BEAN.getHeapMemoryUsage().getMax();
|
||||||
|
|
||||||
if (currentMaxHeap <= 0) {
|
if (currentMaxHeap <= 0) {
|
||||||
throw new IllegalArgumentException("Invalid JVM state for the max heap usage");
|
throw new IllegalArgumentException("Invalid JVM state for the max heap usage");
|
||||||
}
|
}
|
||||||
|
|
||||||
int thresholdValueInPercentage = solrConfig.memoryCircuitBreakerThresholdPct;
|
int thresholdValueInPercentage = solrConfig.memCBThreshold;
|
||||||
double thresholdInFraction = thresholdValueInPercentage / (double) 100;
|
double thresholdInFraction = thresholdValueInPercentage / (double) 100;
|
||||||
heapMemoryThreshold = (long) (currentMaxHeap * thresholdInFraction);
|
heapMemoryThreshold = (long) (currentMaxHeap * thresholdInFraction);
|
||||||
|
|
||||||
|
@ -76,6 +79,10 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!enabled) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
long localAllowedMemory = getCurrentMemoryThreshold();
|
long localAllowedMemory = getCurrentMemoryThreshold();
|
||||||
long localSeenMemory = calculateLiveMemoryUsage();
|
long localSeenMemory = calculateLiveMemoryUsage();
|
||||||
|
|
||||||
|
@ -95,6 +102,13 @@ public class MemoryCircuitBreaker extends CircuitBreaker {
|
||||||
return "seenMemory=" + seenMemory.get() + " allowedMemory=" + allowedMemory.get();
|
return "seenMemory=" + seenMemory.get() + " allowedMemory=" + allowedMemory.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getErrorMessage() {
|
||||||
|
return "Memory Circuit Breaker triggered as JVM heap usage values are greater than allocated threshold." +
|
||||||
|
"Seen JVM heap memory usage " + seenMemory.get() + " and allocated threshold " +
|
||||||
|
allowedMemory.get();
|
||||||
|
}
|
||||||
|
|
||||||
private long getCurrentMemoryThreshold() {
|
private long getCurrentMemoryThreshold() {
|
||||||
return heapMemoryThreshold;
|
return heapMemoryThreshold;
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,8 +55,17 @@
|
||||||
"queryResultMaxDocsCached":1,
|
"queryResultMaxDocsCached":1,
|
||||||
"enableLazyFieldLoading":1,
|
"enableLazyFieldLoading":1,
|
||||||
"boolTofilterOptimizer":1,
|
"boolTofilterOptimizer":1,
|
||||||
"useCircuitBreakers":10,
|
"circuitBreakers":{
|
||||||
"memoryCircuitBreakerThresholdPct":20,
|
"enabled":10,
|
||||||
|
"memBreaker":{
|
||||||
|
"enabled":10,
|
||||||
|
"threshold":20
|
||||||
|
},
|
||||||
|
"cpuBreaker":{
|
||||||
|
"enabled":10,
|
||||||
|
"threshold":20
|
||||||
|
}
|
||||||
|
},
|
||||||
"maxBooleanClauses":1},
|
"maxBooleanClauses":1},
|
||||||
"jmx":{
|
"jmx":{
|
||||||
"agentId":0,
|
"agentId":0,
|
||||||
|
|
|
@ -78,13 +78,10 @@
|
||||||
|
|
||||||
</query>
|
</query>
|
||||||
|
|
||||||
<circuitBreaker>
|
<circuitBreakers enabled="true">
|
||||||
|
<cpuBreaker enabled="true" threshold="75"/>
|
||||||
<useCircuitBreakers>true</useCircuitBreakers>
|
<memBreaker enabled="true" threshold="75"/>
|
||||||
|
</circuitBreakers>
|
||||||
<memoryCircuitBreakerThresholdPct>75</memoryCircuitBreakerThresholdPct>
|
|
||||||
|
|
||||||
</circuitBreaker>
|
|
||||||
|
|
||||||
<initParams path="/select">
|
<initParams path="/select">
|
||||||
<lst name="defaults">
|
<lst name="defaults">
|
||||||
|
|
|
@ -267,7 +267,8 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
|
||||||
assertEquals("wrong config for enableLazyFieldLoading", true, solrConfig.enableLazyFieldLoading);
|
assertEquals("wrong config for enableLazyFieldLoading", true, solrConfig.enableLazyFieldLoading);
|
||||||
assertEquals("wrong config for queryResultWindowSize", 10, solrConfig.queryResultWindowSize);
|
assertEquals("wrong config for queryResultWindowSize", 10, solrConfig.queryResultWindowSize);
|
||||||
assertEquals("wrong config for useCircuitBreakers", false, solrConfig.useCircuitBreakers);
|
assertEquals("wrong config for useCircuitBreakers", false, solrConfig.useCircuitBreakers);
|
||||||
assertEquals("wrong config for memoryCircuitBreakerThresholdPct", 95, solrConfig.memoryCircuitBreakerThresholdPct);
|
assertEquals("wrong config for memoryCircuitBreakerThresholdPct", 95, solrConfig.memCBThreshold);
|
||||||
|
assertEquals("wrong config for cpuCircuitBreakerThreshold", 95, solrConfig.cpuCBThreshold);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -18,8 +18,11 @@
|
||||||
package org.apache.solr.util;
|
package org.apache.solr.util;
|
||||||
|
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
@ -31,6 +34,7 @@ import org.apache.solr.common.util.ExecutorUtil;
|
||||||
import org.apache.solr.common.util.SolrNamedThreadFactory;
|
import org.apache.solr.common.util.SolrNamedThreadFactory;
|
||||||
import org.apache.solr.core.SolrConfig;
|
import org.apache.solr.core.SolrConfig;
|
||||||
import org.apache.solr.search.QueryParsing;
|
import org.apache.solr.search.QueryParsing;
|
||||||
|
import org.apache.solr.util.circuitbreaker.CPUCircuitBreaker;
|
||||||
import org.apache.solr.util.circuitbreaker.CircuitBreaker;
|
import org.apache.solr.util.circuitbreaker.CircuitBreaker;
|
||||||
import org.apache.solr.util.circuitbreaker.MemoryCircuitBreaker;
|
import org.apache.solr.util.circuitbreaker.MemoryCircuitBreaker;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
@ -41,6 +45,9 @@ import org.junit.rules.TestRule;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static org.hamcrest.CoreMatchers.containsString;
|
||||||
|
|
||||||
|
@SuppressWarnings({"rawtypes"})
|
||||||
public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||||
private final static int NUM_DOCS = 20;
|
private final static int NUM_DOCS = 20;
|
||||||
|
@ -84,6 +91,8 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
|
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
|
||||||
args.put(CommonParams.FL, "id");
|
args.put(CommonParams.FL, "id");
|
||||||
|
|
||||||
|
removeAllExistingCircuitBreakers();
|
||||||
|
|
||||||
CircuitBreaker circuitBreaker = new MockCircuitBreaker(h.getCore().getSolrConfig());
|
CircuitBreaker circuitBreaker = new MockCircuitBreaker(h.getCore().getSolrConfig());
|
||||||
|
|
||||||
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
|
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
|
||||||
|
@ -99,6 +108,8 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
|
args.put(QueryParsing.DEFTYPE, CircuitBreaker.NAME);
|
||||||
args.put(CommonParams.FL, "id");
|
args.put(CommonParams.FL, "id");
|
||||||
|
|
||||||
|
removeAllExistingCircuitBreakers();
|
||||||
|
|
||||||
CircuitBreaker circuitBreaker = new FakeMemoryPressureCircuitBreaker(h.getCore().getSolrConfig());
|
CircuitBreaker circuitBreaker = new FakeMemoryPressureCircuitBreaker(h.getCore().getSolrConfig());
|
||||||
|
|
||||||
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
|
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
|
||||||
|
@ -119,33 +130,42 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
AtomicInteger failureCount = new AtomicInteger();
|
AtomicInteger failureCount = new AtomicInteger();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
removeAllExistingCircuitBreakers();
|
||||||
|
|
||||||
CircuitBreaker circuitBreaker = new BuildingUpMemoryPressureCircuitBreaker(h.getCore().getSolrConfig());
|
CircuitBreaker circuitBreaker = new BuildingUpMemoryPressureCircuitBreaker(h.getCore().getSolrConfig());
|
||||||
|
|
||||||
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
|
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
|
||||||
|
|
||||||
|
List<Future<?>> futures = new ArrayList<>();
|
||||||
|
|
||||||
for (int i = 0; i < 5; i++) {
|
for (int i = 0; i < 5; i++) {
|
||||||
executor.submit(() -> {
|
Future<?> future = executor.submit(() -> {
|
||||||
try {
|
try {
|
||||||
h.query(req("name:\"john smith\""));
|
h.query(req("name:\"john smith\""));
|
||||||
} catch (SolrException e) {
|
} catch (SolrException e) {
|
||||||
if (!e.getMessage().startsWith("Circuit Breakers tripped")) {
|
assertThat(e.getMessage(), containsString("Circuit Breakers tripped"));
|
||||||
if (log.isInfoEnabled()) {
|
|
||||||
String logMessage = "Expected error message for testBuildingMemoryPressure was not received. Error message " + e.getMessage();
|
|
||||||
log.info(logMessage);
|
|
||||||
}
|
|
||||||
throw new RuntimeException("Expected error message was not received. Error message " + e.getMessage());
|
|
||||||
}
|
|
||||||
failureCount.incrementAndGet();
|
failureCount.incrementAndGet();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e.getMessage());
|
throw new RuntimeException(e.getMessage());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
futures.add(future);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Future<?> future : futures) {
|
||||||
|
try {
|
||||||
|
future.get();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e.getMessage());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
executor.shutdown();
|
executor.shutdown();
|
||||||
try {
|
try {
|
||||||
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
|
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
throw new RuntimeException(e.getMessage());
|
throw new RuntimeException(e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -157,6 +177,59 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testFakeCPUCircuitBreaker() {
|
||||||
|
AtomicInteger failureCount = new AtomicInteger();
|
||||||
|
|
||||||
|
ExecutorService executor = ExecutorUtil.newMDCAwareCachedThreadPool(
|
||||||
|
new SolrNamedThreadFactory("TestCircuitBreaker"));
|
||||||
|
try {
|
||||||
|
removeAllExistingCircuitBreakers();
|
||||||
|
|
||||||
|
CircuitBreaker circuitBreaker = new FakeCPUCircuitBreaker(h.getCore().getSolrConfig());
|
||||||
|
|
||||||
|
h.getCore().getCircuitBreakerManager().register(circuitBreaker);
|
||||||
|
|
||||||
|
List<Future<?>> futures = new ArrayList<>();
|
||||||
|
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
Future<?> future = executor.submit(() -> {
|
||||||
|
try {
|
||||||
|
h.query(req("name:\"john smith\""));
|
||||||
|
} catch (SolrException e) {
|
||||||
|
assertThat(e.getMessage(), containsString("Circuit Breakers tripped"));
|
||||||
|
failureCount.incrementAndGet();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e.getMessage());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
futures.add(future);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Future<?> future : futures) {
|
||||||
|
try {
|
||||||
|
future.get();
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new RuntimeException(e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
executor.shutdown();
|
||||||
|
try {
|
||||||
|
executor.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
throw new RuntimeException(e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals("Number of failed queries is not correct",5, failureCount.get());
|
||||||
|
} finally {
|
||||||
|
if (!executor.isShutdown()) {
|
||||||
|
executor.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testResponseWithCBTiming() {
|
public void testResponseWithCBTiming() {
|
||||||
assertQ(req("q", "*:*", CommonParams.DEBUG_QUERY, "true"),
|
assertQ(req("q", "*:*", CommonParams.DEBUG_QUERY, "true"),
|
||||||
"//str[@name='rawquerystring']='*:*'",
|
"//str[@name='rawquerystring']='*:*'",
|
||||||
|
@ -179,7 +252,13 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MockCircuitBreaker extends CircuitBreaker {
|
private void removeAllExistingCircuitBreakers() {
|
||||||
|
List<CircuitBreaker> registeredCircuitBreakers = h.getCore().getCircuitBreakerManager().getRegisteredCircuitBreakers();
|
||||||
|
|
||||||
|
registeredCircuitBreakers.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class MockCircuitBreaker extends MemoryCircuitBreaker {
|
||||||
|
|
||||||
public MockCircuitBreaker(SolrConfig solrConfig) {
|
public MockCircuitBreaker(SolrConfig solrConfig) {
|
||||||
super(solrConfig);
|
super(solrConfig);
|
||||||
|
@ -197,7 +276,7 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class FakeMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
|
private static class FakeMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
|
||||||
|
|
||||||
public FakeMemoryPressureCircuitBreaker(SolrConfig solrConfig) {
|
public FakeMemoryPressureCircuitBreaker(SolrConfig solrConfig) {
|
||||||
super(solrConfig);
|
super(solrConfig);
|
||||||
|
@ -210,7 +289,7 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class BuildingUpMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
|
private static class BuildingUpMemoryPressureCircuitBreaker extends MemoryCircuitBreaker {
|
||||||
private AtomicInteger count;
|
private AtomicInteger count;
|
||||||
|
|
||||||
public BuildingUpMemoryPressureCircuitBreaker(SolrConfig solrConfig) {
|
public BuildingUpMemoryPressureCircuitBreaker(SolrConfig solrConfig) {
|
||||||
|
@ -240,4 +319,15 @@ public class TestCircuitBreaker extends SolrTestCaseJ4 {
|
||||||
return Long.MIN_VALUE; // Random number guaranteed to not trip the circuit breaker
|
return Long.MIN_VALUE; // Random number guaranteed to not trip the circuit breaker
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class FakeCPUCircuitBreaker extends CPUCircuitBreaker {
|
||||||
|
public FakeCPUCircuitBreaker(SolrConfig solrConfig) {
|
||||||
|
super(solrConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected double calculateLiveCPUUsage() {
|
||||||
|
return 92; // Return a value large enough to trigger the circuit breaker
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -582,27 +582,24 @@
|
||||||
Circuit Breaker Section - This section consists of configurations for
|
Circuit Breaker Section - This section consists of configurations for
|
||||||
circuit breakers
|
circuit breakers
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -->
|
||||||
<circuitBreaker>
|
|
||||||
<!-- Enable Circuit Breakers
|
<!-- Circuit Breakers
|
||||||
|
|
||||||
Circuit breakers are designed to allow stability and predictable query
|
Circuit breakers are designed to allow stability and predictable query
|
||||||
execution. They prevent operations that can take down the node and cause
|
execution. They prevent operations that can take down the node and cause
|
||||||
noisy neighbour issues.
|
noisy neighbour issues.
|
||||||
|
|
||||||
This flag is the uber control switch which controls the activation/deactivation of all circuit
|
This flag is the uber control switch which controls the activation/deactivation of all circuit
|
||||||
breakers. At the moment, the only circuit breaker (max JVM circuit breaker) does not have its
|
breakers. If a circuit breaker wishes to be independently configurable,
|
||||||
own specific configuration. However, if a circuit breaker wishes to be independently configurable,
|
|
||||||
they are free to add their specific configuration but need to ensure that this flag is always
|
they are free to add their specific configuration but need to ensure that this flag is always
|
||||||
respected - this should have veto over all independent configuration flags.
|
respected - this should have veto over all independent configuration flags.
|
||||||
-->
|
-->
|
||||||
<!--
|
<circuitBreakers enabled="true">
|
||||||
<useCircuitBreakers>true</useCircuitBreakers>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Memory Circuit Breaker Threshold In Percentage
|
<!-- Memory Circuit Breaker Configuration
|
||||||
|
|
||||||
Specific configuration for max JVM heap usage circuit breaker. This configuration defines the
|
Specific configuration for max JVM heap usage circuit breaker. This configuration defines whether
|
||||||
threshold percentage of maximum heap allocated beyond which queries will be rejected until the
|
the circuit breaker is enabled and the threshold percentage of maximum heap allocated beyond which queries will be rejected until the
|
||||||
current JVM usage goes below the threshold. The valid value range for this value is 50-95.
|
current JVM usage goes below the threshold. The valid value range for this value is 50-95.
|
||||||
|
|
||||||
Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThreshold is
|
Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThreshold is
|
||||||
|
@ -613,12 +610,31 @@
|
||||||
If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped"
|
If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped"
|
||||||
in logs and the corresponding error message should tell you what transpired (if the failure
|
in logs and the corresponding error message should tell you what transpired (if the failure
|
||||||
was caused by tripped circuit breakers).
|
was caused by tripped circuit breakers).
|
||||||
|
|
||||||
|
If, at any point, the current JVM heap usage goes above 3 GB, queries will be rejected until the heap usage goes below 3 GB again.
|
||||||
|
If you see queries getting rejected with 503 error code, check for "Circuit Breakers tripped"
|
||||||
|
in logs and the corresponding error message should tell you what transpired (if the failure
|
||||||
|
was caused by tripped circuit breakers).
|
||||||
-->
|
-->
|
||||||
<!--
|
<!--
|
||||||
<memoryCircuitBreakerThresholdPct>100</memoryCircuitBreakerThresholdPct>
|
<memBreaker enabled="true" threshold="75"/>
|
||||||
-->
|
-->
|
||||||
|
|
||||||
</circuitBreaker>
|
<!-- CPU Circuit Breaker Configuration
|
||||||
|
|
||||||
|
Specific configuration for CPU utilization based circuit breaker. This configuration defines whether the circuit breaker is enabled
|
||||||
|
and the average load over the last minute at which the circuit breaker should start rejecting queries.
|
||||||
|
|
||||||
|
Consider a scenario where the max heap allocated is 4 GB and memoryCircuitBreakerThreshold is
|
||||||
|
defined as 75. Threshold JVM usage will be 4 * 0.75 = 3 GB. Its generally a good idea to keep this value between 75 - 80% of maximum heap
|
||||||
|
allocated.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!--
|
||||||
|
<cpuBreaker enabled="true" threshold="75"/>
|
||||||
|
-->
|
||||||
|
|
||||||
|
</circuitBreakers>
|
||||||
|
|
||||||
|
|
||||||
<!-- Request Dispatcher
|
<!-- Request Dispatcher
|
||||||
|
|
|
@ -32,9 +32,14 @@ will be disabled globally. Per circuit breaker configurations are specified in t
|
||||||
|
|
||||||
[source,xml]
|
[source,xml]
|
||||||
----
|
----
|
||||||
<useCircuitBreakers>false</useCircuitBreakers>
|
<circuitBreakers enabled="true">
|
||||||
|
<!-- All specific configs in this section -->
|
||||||
|
</circuitBreakers>
|
||||||
----
|
----
|
||||||
|
|
||||||
|
This flag acts as the highest authority and global controller of circuit breakers. For using specific circuit breakers, each one
|
||||||
|
needs to be individually enabled in addition to this flag being enabled.
|
||||||
|
|
||||||
== Currently Supported Circuit Breakers
|
== Currently Supported Circuit Breakers
|
||||||
|
|
||||||
=== JVM Heap Usage Based Circuit Breaker
|
=== JVM Heap Usage Based Circuit Breaker
|
||||||
|
@ -42,26 +47,43 @@ This circuit breaker tracks JVM heap memory usage and rejects incoming search re
|
||||||
exceeds a configured percentage of maximum heap allocated to the JVM (-Xmx). The main configuration for this circuit breaker is
|
exceeds a configured percentage of maximum heap allocated to the JVM (-Xmx). The main configuration for this circuit breaker is
|
||||||
controlling the threshold percentage at which the breaker will trip.
|
controlling the threshold percentage at which the breaker will trip.
|
||||||
|
|
||||||
It does not logically make sense to have a threshold below 50% and above 95% of the max heap allocated to the JVM. Hence, the range
|
Configuration for JVM heap usage based circuit breaker:
|
||||||
of valid values for this parameter is [50, 95], both inclusive.
|
|
||||||
|
|
||||||
[source,xml]
|
[source,xml]
|
||||||
----
|
----
|
||||||
<memoryCircuitBreakerThresholdPct>75</memoryCircuitBreakerThresholdPct>
|
<memBreaker enabled="true" threshold="75"/>
|
||||||
----
|
----
|
||||||
|
|
||||||
|
Note that this configuration will be overridden by the global circuit breaker flag -- if circuit breakers are disabled, this flag
|
||||||
|
will not help you. Also, the triggering threshold is defined as a percentage of the max heap allocated to the JVM.
|
||||||
|
|
||||||
|
It does not logically make sense to have a threshold below 50% and above 95% of the max heap allocated to the JVM. Hence, the range
|
||||||
|
of valid values for this parameter is [50, 95], both inclusive.
|
||||||
|
|
||||||
Consider the following example:
|
Consider the following example:
|
||||||
|
|
||||||
JVM has been allocated a maximum heap of 5GB (-Xmx) and memoryCircuitBreakerThresholdPct is set to 75. In this scenario, the heap usage
|
JVM has been allocated a maximum heap of 5GB (-Xmx) and memoryCircuitBreakerThresholdPct is set to 75. In this scenario, the heap usage
|
||||||
at which the circuit breaker will trip is 3.75GB.
|
at which the circuit breaker will trip is 3.75GB.
|
||||||
|
|
||||||
Note that this circuit breaker is checked for each incoming search request and considers the current heap usage of the node, i.e every search
|
|
||||||
request will get the live heap usage and compare it against the set memory threshold. The check does not impact performance,
|
|
||||||
but any performance regressions that are suspected to be caused by this feature should be reported to the dev list.
|
|
||||||
|
|
||||||
|
=== CPU Utilization Based Circuit Breaker
|
||||||
|
This circuit breaker tracks CPU utilization and triggers if the average CPU utilization over the last one minute
|
||||||
|
exceeds a configurable threshold. Note that the value used in computation is over the last one minute -- so a sudden
|
||||||
|
spike in traffic that goes down might still cause the circuit breaker to trigger for a short while before it resolves
|
||||||
|
and updates the value. For more details of the calculation, please see https://en.wikipedia.org/wiki/Load_(computing)
|
||||||
|
|
||||||
|
Configuration for CPU utilization based circuit breaker:
|
||||||
|
|
||||||
|
[source,xml]
|
||||||
|
----
|
||||||
|
<cpuBreaker enabled="true" threshold="20"/>
|
||||||
|
----
|
||||||
|
|
||||||
|
Note that this configuration will be overridden by the global circuit breaker flag -- if circuit breakers are disabled, this flag
|
||||||
|
will not help you. The triggering threshold is defined in units of CPU utilization.
|
||||||
|
|
||||||
== Performance Considerations
|
== Performance Considerations
|
||||||
It is worth noting that while JVM circuit breaker does not add any noticeable overhead per query, having too many
|
It is worth noting that while JVM or CPU circuit breakers do not add any noticeable overhead per query, having too many
|
||||||
circuit breakers checked for a single request can cause a performance overhead.
|
circuit breakers checked for a single request can cause a performance overhead.
|
||||||
|
|
||||||
In addition, it is a good practice to exponentially back off while retrying requests on a busy node.
|
In addition, it is a good practice to exponentially back off while retrying requests on a busy node.
|
||||||
|
|
Loading…
Reference in New Issue