SOLR-13790: LRUStatsCache size explosion and ineffective caching.

This commit is contained in:
Andrzej Bialecki 2019-10-07 19:52:22 +02:00
parent 4d0afd4aff
commit c0a446b179
27 changed files with 1122 additions and 328 deletions

View File

@ -261,6 +261,8 @@ Bug Fixes
* SOLR-13539: Fix for class-cast issues during atomic-update 'removeregex' operations. This also incorporated some
tests Tim wrote as a part of SOLR-9505. (Tim Owen via Jason Gerlowski)
* SOLR-13790: LRUStatsCache size explosion and ineffective caching. (ab)
Other Changes
----------------------

View File

@ -193,8 +193,6 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
private boolean isReloaded = false;
private StatsCache statsCache;
private final SolrConfig solrConfig;
private final SolrResourceLoader resourceLoader;
private volatile IndexSchema schema;
@ -982,8 +980,6 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
reqHandlers = new RequestHandlers(this);
reqHandlers.initHandlersFromConfig(solrConfig);
statsCache = initStatsCache();
// cause the executor to stall so firstSearcher events won't fire
// until after inform() has been called for all components.
// searchExecutor must be single-threaded for this to work
@ -1417,7 +1413,10 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
return factory.getCodec();
}
private StatsCache initStatsCache() {
/**
* Create an instance of {@link StatsCache} using configured parameters.
*/
public StatsCache createStatsCache() {
final StatsCache cache;
PluginInfo pluginInfo = solrConfig.getPluginInfo(StatsCache.class.getName());
if (pluginInfo != null && pluginInfo.className != null && pluginInfo.className.length() > 0) {
@ -1431,13 +1430,6 @@ public final class SolrCore implements SolrInfoBean, SolrMetricProducer, Closeab
return cache;
}
/**
* Get the StatsCache.
*/
public StatsCache getStatsCache() {
return statsCache;
}
/**
* Load the request processors
*/

View File

@ -37,7 +37,9 @@ import org.apache.solr.common.util.SuppressForbidden;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.DocList;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.facet.FacetDebugInfo;
import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.util.SolrPluginUtils;
import static org.apache.solr.common.params.CommonParams.FQ;
@ -74,7 +76,7 @@ public class DebugComponent extends SearchComponent
map.put(ResponseBuilder.STAGE_DONE, "DONE");
stages = Collections.unmodifiableMap(map);
}
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
@ -89,6 +91,9 @@ public class DebugComponent extends SearchComponent
public void process(ResponseBuilder rb) throws IOException
{
if( rb.isDebug() ) {
SolrQueryRequest req = rb.req;
StatsCache statsCache = req.getSearcher().getStatsCache();
req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req));
DocList results = null;
//some internal grouping requests won't have results value set
if(rb.getResults() != null) {
@ -173,6 +178,11 @@ public class DebugComponent extends SearchComponent
// Turn on debug to get explain only when retrieving fields
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS) != 0) {
sreq.purpose |= ShardRequest.PURPOSE_GET_DEBUG;
// always distribute the latest version of global stats
sreq.purpose |= ShardRequest.PURPOSE_SET_TERM_STATS;
StatsCache statsCache = rb.req.getSearcher().getStatsCache();
statsCache.sendGlobalStats(rb, sreq);
if (rb.isDebugAll()) {
sreq.params.set(CommonParams.DEBUG_QUERY, "true");
} else {

View File

@ -330,11 +330,11 @@ public class QueryComponent extends SearchComponent
return;
}
StatsCache statsCache = req.getCore().getStatsCache();
SolrIndexSearcher searcher = req.getSearcher();
StatsCache statsCache = searcher.getStatsCache();
int purpose = params.getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS);
if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
SolrIndexSearcher searcher = req.getSearcher();
statsCache.returnLocalStats(rb, searcher);
return;
}
@ -686,7 +686,7 @@ public class QueryComponent extends SearchComponent
}
protected void createDistributedStats(ResponseBuilder rb) {
StatsCache cache = rb.req.getCore().getStatsCache();
StatsCache cache = rb.req.getSearcher().getStatsCache();
if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
ShardRequest sreq = cache.retrieveStatsRequest(rb);
if (sreq != null) {
@ -696,7 +696,7 @@ public class QueryComponent extends SearchComponent
}
protected void updateStats(ResponseBuilder rb, ShardRequest sreq) {
StatsCache cache = rb.req.getCore().getStatsCache();
StatsCache cache = rb.req.getSearcher().getStatsCache();
cache.mergeToGlobalStats(rb.req, sreq.responses);
}
@ -776,8 +776,9 @@ public class QueryComponent extends SearchComponent
// TODO: should this really sendGlobalDfs if just includeScore?
if (shardQueryIncludeScore) {
StatsCache statsCache = rb.req.getCore().getStatsCache();
if (shardQueryIncludeScore || rb.isDebug()) {
StatsCache statsCache = rb.req.getSearcher().getStatsCache();
sreq.purpose |= ShardRequest.PURPOSE_SET_TERM_STATS;
statsCache.sendGlobalStats(rb, sreq);
}

View File

@ -166,8 +166,6 @@ public class ResponseBuilder
}
}
public GlobalCollectionStat globalCollectionStat;
public Map<Object, ShardDoc> resultIds;
// Maps uniqueKeyValue to ShardDoc, which may be used to
// determine order of the doc or uniqueKey in the final
@ -417,18 +415,6 @@ public class ResponseBuilder
this.timer = timer;
}
public static class GlobalCollectionStat {
public final long numDocs;
public final Map<String, Long> dfMap;
public GlobalCollectionStat(int numDocs, Map<String, Long> dfMap) {
this.numDocs = numDocs;
this.dfMap = dfMap;
}
}
/**
* Creates a SolrIndexSearcher.QueryCommand from this
* ResponseBuilder. TimeAllowed is left unset.

View File

@ -17,7 +17,7 @@
package org.apache.solr.metrics;
/**
* Used by objects that expose metrics through {@link SolrCoreMetricManager}.
* Used by objects that expose metrics through {@link SolrMetricManager}.
*/
public interface SolrMetricProducer {

View File

@ -140,6 +140,63 @@ public class FastLRUCache<K, V> extends SolrCacheBase implements SolrCache<K,V>,
statsList.add(new ConcurrentLRUCache.Stats());
}
statsList.add(cache.getStats());
cacheMap = new MetricsMap((detailed, map) -> {
if (cache != null) {
ConcurrentLRUCache.Stats stats = cache.getStats();
long lookups = stats.getCumulativeLookups();
long hits = stats.getCumulativeHits();
long inserts = stats.getCumulativePuts();
long evictions = stats.getCumulativeEvictions();
long idleEvictions = stats.getCumulativeIdleEvictions();
long size = stats.getCurrentSize();
long clookups = 0;
long chits = 0;
long cinserts = 0;
long cevictions = 0;
long cIdleEvictions = 0;
// NOTE: It is safe to iterate on a CopyOnWriteArrayList
for (ConcurrentLRUCache.Stats statistiscs : statsList) {
clookups += statistiscs.getCumulativeLookups();
chits += statistiscs.getCumulativeHits();
cinserts += statistiscs.getCumulativePuts();
cevictions += statistiscs.getCumulativeEvictions();
cIdleEvictions += statistiscs.getCumulativeIdleEvictions();
}
map.put(LOOKUPS_PARAM, lookups);
map.put(HITS_PARAM, hits);
map.put(HIT_RATIO_PARAM, calcHitRatio(lookups, hits));
map.put(INSERTS_PARAM, inserts);
map.put(EVICTIONS_PARAM, evictions);
map.put(SIZE_PARAM, size);
map.put("cleanupThread", cleanupThread);
map.put("idleEvictions", idleEvictions);
map.put(RAM_BYTES_USED_PARAM, ramBytesUsed());
map.put(MAX_RAM_MB_PARAM, getMaxRamMB());
map.put("warmupTime", warmupTime);
map.put("cumulative_lookups", clookups);
map.put("cumulative_hits", chits);
map.put("cumulative_hitratio", calcHitRatio(clookups, chits));
map.put("cumulative_inserts", cinserts);
map.put("cumulative_evictions", cevictions);
map.put("cumulative_idleEvictions", cIdleEvictions);
if (detailed && showItems != 0) {
Map items = cache.getLatestAccessedItems(showItems == -1 ? Integer.MAX_VALUE : showItems);
for (Map.Entry e : (Set<Map.Entry>) items.entrySet()) {
Object k = e.getKey();
Object v = e.getValue();
String ks = "item_" + k;
String vs = v.toString();
map.put(ks, vs);
}
}
}
});
return statsList;
}
@ -256,67 +313,9 @@ public class FastLRUCache<K, V> extends SolrCacheBase implements SolrCache<K,V>,
@Override
public void initializeMetrics(SolrMetricManager manager, String registryName, String tag, String scope) {
registry = manager.registry(registryName);
cacheMap = new MetricsMap((detailed, map) -> {
if (cache != null) {
ConcurrentLRUCache.Stats stats = cache.getStats();
long lookups = stats.getCumulativeLookups();
long hits = stats.getCumulativeHits();
long inserts = stats.getCumulativePuts();
long evictions = stats.getCumulativeEvictions();
long idleEvictions = stats.getCumulativeIdleEvictions();
long size = stats.getCurrentSize();
long clookups = 0;
long chits = 0;
long cinserts = 0;
long cevictions = 0;
long cIdleEvictions = 0;
// NOTE: It is safe to iterate on a CopyOnWriteArrayList
for (ConcurrentLRUCache.Stats statistiscs : statsList) {
clookups += statistiscs.getCumulativeLookups();
chits += statistiscs.getCumulativeHits();
cinserts += statistiscs.getCumulativePuts();
cevictions += statistiscs.getCumulativeEvictions();
cIdleEvictions += statistiscs.getCumulativeIdleEvictions();
}
map.put(LOOKUPS_PARAM, lookups);
map.put(HITS_PARAM, hits);
map.put(HIT_RATIO_PARAM, calcHitRatio(lookups, hits));
map.put(INSERTS_PARAM, inserts);
map.put(EVICTIONS_PARAM, evictions);
map.put(SIZE_PARAM, size);
map.put("cleanupThread", cleanupThread);
map.put("idleEvictions", idleEvictions);
map.put(RAM_BYTES_USED_PARAM, ramBytesUsed());
map.put(MAX_RAM_MB_PARAM, getMaxRamMB());
map.put("warmupTime", warmupTime);
map.put("cumulative_lookups", clookups);
map.put("cumulative_hits", chits);
map.put("cumulative_hitratio", calcHitRatio(clookups, chits));
map.put("cumulative_inserts", cinserts);
map.put("cumulative_evictions", cevictions);
map.put("cumulative_idleEvictions", cIdleEvictions);
if (detailed && showItems != 0) {
Map items = cache.getLatestAccessedItems( showItems == -1 ? Integer.MAX_VALUE : showItems );
for (Map.Entry e : (Set <Map.Entry>)items.entrySet()) {
Object k = e.getKey();
Object v = e.getValue();
String ks = "item_" + k;
String vs = v.toString();
map.put(ks,vs);
}
}
}
});
manager.registerGauge(this, registryName, cacheMap, tag, true, scope, getCategory().toString());
}
// for unit tests only
MetricsMap getMetricsMap() {
return cacheMap;

View File

@ -66,6 +66,7 @@ import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.index.SlowCompositeReaderWrapper;
import org.apache.solr.metrics.MetricsMap;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.SolrMetricProducer;
import org.apache.solr.request.LocalSolrQueryRequest;
@ -75,6 +76,7 @@ import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.facet.UnInvertedField;
import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.search.stats.StatsSource;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.update.IndexFingerprint;
@ -135,6 +137,8 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private final String path;
private boolean releaseDirectory;
private final StatsCache statsCache;
private Set<String> metricNames = ConcurrentHashMap.newKeySet();
private SolrMetricManager metricManager;
private String registryName;
@ -236,6 +240,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
this.rawReader = r;
this.leafReader = SlowCompositeReaderWrapper.wrap(this.reader);
this.core = core;
this.statsCache = core.createStatsCache();
this.schema = schema;
this.name = "Searcher@" + Integer.toHexString(hashCode()) + "[" + core.getName() + "]"
+ (name != null ? " " + name : "");
@ -315,6 +320,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return super.leafContexts;
}
public StatsCache getStatsCache() {
return statsCache;
}
public FieldInfos getFieldInfos() {
return leafReader.getFieldInfos();
}
@ -2294,7 +2303,13 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return -1;
}
}, tag, true, "indexCommitSize", Category.SEARCHER.toString(), scope);
// statsCache metrics
manager.registerGauge(this, registry,
new MetricsMap((detailed, map) -> {
statsCache.getCacheMetrics().getSnapshot(map::put);
map.put("statsCacheImpl", statsCache.getClass().getSimpleName());
}),
tag, true, "statsCache", Category.CACHE.toString(), scope);
}
@Override

View File

@ -21,13 +21,19 @@ import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SolrQueryRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class implements exact caching of statistics. It requires an additional
* round-trip to parse query at shard servers, and return term statistics for
* query terms (and collection statistics for term fields).
* <p>Global statistics are accumulated in the instance of this component (with the same life-cycle as
* SolrSearcher), in unbounded maps. NOTE: This may lead to excessive memory usage, in which case
* a {@link LRUStatsCache} should be considered.</p>
*/
public class ExactSharedStatsCache extends ExactStatsCache {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -39,13 +45,19 @@ public class ExactSharedStatsCache extends ExactStatsCache {
private final Map<String,CollectionStats> currentGlobalColStats = new ConcurrentHashMap<>();
@Override
public StatsSource get(SolrQueryRequest req) {
protected StatsSource doGet(SolrQueryRequest req) {
log.debug("total={}, cache {}", currentGlobalColStats, currentGlobalTermStats.size());
return new ExactStatsSource(currentGlobalTermStats, currentGlobalColStats);
return new ExactStatsSource(statsCacheMetrics, currentGlobalTermStats, currentGlobalColStats);
}
@Override
public void init(PluginInfo info) {}
public void clear() {
super.clear();
perShardTermStats.clear();
perShardColStats.clear();
currentGlobalTermStats.clear();
currentGlobalColStats.clear();
}
@Override
protected void addToPerShardColStats(SolrQueryRequest req, String shard,

View File

@ -18,6 +18,7 @@ package org.apache.solr.search.stats;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
@ -25,21 +26,23 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.stream.Collectors;
import com.google.common.collect.Lists;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermStatistics;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
@ -52,36 +55,30 @@ import org.slf4j.LoggerFactory;
* This class implements exact caching of statistics. It requires an additional
* round-trip to parse query at shard servers, and return term statistics for
* query terms (and collection statistics for term fields).
* <p>Global statistics are cached in the current request's context and discarded
* once the processing of the current request is complete. There's no support for
* longer-term caching, and each request needs to build the global statistics from scratch,
* even for repeating queries.</p>
*/
public class ExactStatsCache extends StatsCache {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
// experimenting with strategy that takes more RAM, but also doesn't share memory
// across threads
private static final String CURRENT_GLOBAL_COL_STATS = "org.apache.solr.stats.currentGlobalColStats";
private static final String CURRENT_GLOBAL_TERM_STATS = "org.apache.solr.stats.currentGlobalTermStats";
private static final String PER_SHARD_TERM_STATS = "org.apache.solr.stats.perShardTermStats";
private static final String PER_SHARD_COL_STATS = "org.apache.solr.stats.perShardColStats";
private static final String CURRENT_GLOBAL_COL_STATS = "solr.stats.globalCol";
private static final String CURRENT_GLOBAL_TERM_STATS = "solr.stats.globalTerm";
private static final String PER_SHARD_TERM_STATS = "solr.stats.shardTerm";
private static final String PER_SHARD_COL_STATS = "solr.stats.shardCol";
@Override
public StatsSource get(SolrQueryRequest req) {
Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().get(CURRENT_GLOBAL_COL_STATS);
Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().get(CURRENT_GLOBAL_TERM_STATS);
if (currentGlobalColStats == null) {
currentGlobalColStats = Collections.emptyMap();
}
if (currentGlobalTermStats == null) {
currentGlobalTermStats = Collections.emptyMap();
}
protected StatsSource doGet(SolrQueryRequest req) {
Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().getOrDefault(CURRENT_GLOBAL_COL_STATS, Collections.emptyMap());
Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().getOrDefault(CURRENT_GLOBAL_TERM_STATS, Collections.emptyMap());
log.debug("Returning StatsSource. Collection stats={}, Term stats size= {}", currentGlobalColStats, currentGlobalTermStats.size());
return new ExactStatsSource(currentGlobalTermStats, currentGlobalColStats);
return new ExactStatsSource(statsCacheMetrics, currentGlobalTermStats, currentGlobalColStats);
}
@Override
public void init(PluginInfo info) {}
@Override
public ShardRequest retrieveStatsRequest(ResponseBuilder rb) {
protected ShardRequest doRetrieveStatsRequest(ResponseBuilder rb) {
// always request shard statistics
ShardRequest sreq = new ShardRequest();
sreq.purpose = ShardRequest.PURPOSE_GET_TERM_STATS;
sreq.params = new ModifiableSolrParams(rb.req.getParams());
@ -91,20 +88,27 @@ public class ExactStatsCache extends StatsCache {
}
@Override
public void mergeToGlobalStats(SolrQueryRequest req, List<ShardResponse> responses) {
Set<Object> allTerms = new HashSet<>();
protected void doMergeToGlobalStats(SolrQueryRequest req, List<ShardResponse> responses) {
Set<Term> allTerms = new HashSet<>();
for (ShardResponse r : responses) {
log.debug("Merging to global stats, shard={}, response={}", r.getShard(), r.getSolrResponse().getResponse());
// response's "shard" is really a shardURL, or even a list of URLs
String shard = r.getShard();
SolrResponse res = r.getSolrResponse();
if (res.getException() != null) {
log.debug("Exception response={}", res);
continue;
}
if (res.getResponse().get(ShardParams.SHARD_NAME) != null) {
shard = (String) res.getResponse().get(ShardParams.SHARD_NAME);
}
NamedList<Object> nl = res.getResponse();
// TODO: nl == null if not all shards respond (no server hosting shard)
String termStatsString = (String) nl.get(TERM_STATS_KEY);
if (termStatsString != null) {
addToPerShardTermStats(req, shard, termStatsString);
}
List<Object> terms = nl.getAll(TERMS_KEY);
Set<Term> terms = StatsUtil.termsFromEncodedString((String) nl.get(TERMS_KEY));
allTerms.addAll(terms);
String colStatsString = (String) nl.get(COL_STATS_KEY);
Map<String,CollectionStats> colStats = StatsUtil.colStatsMapFromString(colStatsString);
@ -113,48 +117,36 @@ public class ExactStatsCache extends StatsCache {
}
}
if (allTerms.size() > 0) {
req.getContext().put(TERMS_KEY, Lists.newArrayList(allTerms));
req.getContext().put(TERMS_KEY, StatsUtil.termsToEncodedString(allTerms));
}
if (log.isDebugEnabled()) printStats(req);
}
protected void addToPerShardColStats(SolrQueryRequest req, String shard, Map<String,CollectionStats> colStats) {
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().get(PER_SHARD_COL_STATS);
if (perShardColStats == null) {
perShardColStats = new HashMap<>();
req.getContext().put(PER_SHARD_COL_STATS, perShardColStats);
}
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().computeIfAbsent(PER_SHARD_COL_STATS, Utils.NEW_HASHMAP_FUN);
perShardColStats.put(shard, colStats);
}
protected void printStats(SolrQueryRequest req) {
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
if (perShardTermStats == null) {
perShardTermStats = Collections.emptyMap();
}
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().get(PER_SHARD_COL_STATS);
if (perShardColStats == null) {
perShardColStats = Collections.emptyMap();
}
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().getOrDefault(PER_SHARD_TERM_STATS, Collections.emptyMap());
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) req.getContext().getOrDefault(PER_SHARD_COL_STATS, Collections.emptyMap());
log.debug("perShardColStats={}, perShardTermStats={}", perShardColStats, perShardTermStats);
}
protected void addToPerShardTermStats(SolrQueryRequest req, String shard, String termStatsString) {
Map<String,TermStats> termStats = StatsUtil.termStatsMapFromString(termStatsString);
if (termStats != null) {
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
if (perShardTermStats == null) {
perShardTermStats = new HashMap<>();
req.getContext().put(PER_SHARD_TERM_STATS, perShardTermStats);
}
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().computeIfAbsent(PER_SHARD_TERM_STATS, Utils.NEW_HASHMAP_FUN);
perShardTermStats.put(shard, termStats);
}
}
@Override
public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
protected void doReturnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
Query q = rb.getQuery();
try {
Set<Term> additionalTerms = StatsUtil.termsFromEncodedString(rb.req.getParams().get(TERMS_KEY));
Set<String> additionalFields = StatsUtil.fieldsFromString(rb.req.getParams().get(FIELDS_KEY));
HashSet<Term> terms = new HashSet<>();
HashMap<String,TermStats> statsMap = new HashMap<>();
HashMap<String,CollectionStats> colMap = new HashMap<>();
@ -177,18 +169,31 @@ public class ExactStatsCache extends StatsCache {
}
};
statsCollectingSearcher.createWeight(searcher.rewrite(q), ScoreMode.COMPLETE, 1);
for (Term t : terms) {
rb.rsp.add(TERMS_KEY, t.toString());
for (String field : additionalFields) {
if (colMap.containsKey(field)) {
continue;
}
statsCollectingSearcher.collectionStatistics(field);
}
if (statsMap.size() != 0) { //Don't add empty keys
for (Term term : additionalTerms) {
statsCollectingSearcher.createWeight(searcher.rewrite(new TermQuery(term)), ScoreMode.COMPLETE, 1);
}
CloudDescriptor cloudDescriptor = searcher.getCore().getCoreDescriptor().getCloudDescriptor();
if (cloudDescriptor != null) {
rb.rsp.add(ShardParams.SHARD_NAME, cloudDescriptor.getShardId());
}
if (!terms.isEmpty()) {
rb.rsp.add(TERMS_KEY, StatsUtil.termsToEncodedString(terms));
}
if (!statsMap.isEmpty()) { //Don't add empty keys
String termStatsString = StatsUtil.termStatsMapToString(statsMap);
rb.rsp.add(TERM_STATS_KEY, termStatsString);
if (log.isDebugEnabled()) {
log.debug("termStats={}, terms={}, numDocs={}", termStatsString, terms, searcher.maxDoc());
}
}
if (colMap.size() != 0){
if (!colMap.isEmpty()) {
String colStatsString = StatsUtil.colStatsMapToString(colMap);
rb.rsp.add(COL_STATS_KEY, colStatsString);
if (log.isDebugEnabled()) {
@ -202,21 +207,29 @@ public class ExactStatsCache extends StatsCache {
}
@Override
public void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) {
outgoing.purpose |= ShardRequest.PURPOSE_SET_TERM_STATS;
protected void doSendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) {
ModifiableSolrParams params = outgoing.params;
List<String> terms = (List<String>) rb.req.getContext().get(TERMS_KEY);
if (terms != null) {
Set<String> fields = new HashSet<>();
for (String t : terms) {
String[] fv = t.split(":");
fields.add(fv[0]);
}
Set<Term> terms = StatsUtil.termsFromEncodedString((String) rb.req.getContext().get(TERMS_KEY));
if (!terms.isEmpty()) {
Set<String> fields = terms.stream().map(t -> t.field()).collect(Collectors.toSet());
Map<String,TermStats> globalTermStats = new HashMap<>();
Map<String,CollectionStats> globalColStats = new HashMap<>();
// aggregate collection stats, only for the field in terms
for (String shard : rb.shards) {
String collectionName = rb.req.getCore().getCoreDescriptor().getCollectionName();
if (collectionName == null) {
collectionName = rb.req.getCore().getCoreDescriptor().getName();
}
List<String> shards = new ArrayList<>();
for (String shardUrl : rb.shards) {
String shard = StatsUtil.shardUrlToShard(collectionName, shardUrl);
if (shard == null) {
log.warn("Can't determine shard from collectionName=" + collectionName + " and shardUrl=" + shardUrl + ", skipping...");
continue;
} else {
shards.add(shard);
}
}
for (String shard : shards) {
Map<String,CollectionStats> s = getPerShardColStats(rb, shard);
if (s == null) {
continue;
@ -235,17 +248,18 @@ public class ExactStatsCache extends StatsCache {
}
params.add(COL_STATS_KEY, StatsUtil.colStatsMapToString(globalColStats));
// sum up only from relevant shards
for (String t : terms) {
params.add(TERMS_KEY, t);
for (String shard : rb.shards) {
TermStats termStats = getPerShardTermStats(rb.req, t, shard);
params.add(TERMS_KEY, StatsUtil.termsToEncodedString(terms));
for (Term t : terms) {
String term = t.toString();
for (String shard : shards) {
TermStats termStats = getPerShardTermStats(rb.req, term, shard);
if (termStats == null || termStats.docFreq == 0) {
continue;
}
TermStats g = globalTermStats.get(t);
TermStats g = globalTermStats.get(term);
if (g == null) {
g = new TermStats(t);
globalTermStats.put(t, g);
g = new TermStats(term);
globalTermStats.put(term, g);
}
g.add(termStats);
}
@ -257,24 +271,18 @@ public class ExactStatsCache extends StatsCache {
}
protected Map<String,CollectionStats> getPerShardColStats(ResponseBuilder rb, String shard) {
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) rb.req.getContext().get(PER_SHARD_COL_STATS);
if (perShardColStats == null) {
perShardColStats = Collections.emptyMap();
}
Map<String,Map<String,CollectionStats>> perShardColStats = (Map<String,Map<String,CollectionStats>>) rb.req.getContext().getOrDefault(PER_SHARD_COL_STATS, Collections.emptyMap());
return perShardColStats.get(shard);
}
protected TermStats getPerShardTermStats(SolrQueryRequest req, String t, String shard) {
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().get(PER_SHARD_TERM_STATS);
if (perShardTermStats == null) {
perShardTermStats = Collections.emptyMap();
}
Map<String,Map<String,TermStats>> perShardTermStats = (Map<String,Map<String,TermStats>>) req.getContext().getOrDefault(PER_SHARD_TERM_STATS, Collections.emptyMap());
Map<String,TermStats> cache = perShardTermStats.get(shard);
return (cache != null) ? cache.get(t) : null; //Term doesn't exist in shard
}
@Override
public void receiveGlobalStats(SolrQueryRequest req) {
protected void doReceiveGlobalStats(SolrQueryRequest req) {
String globalTermStats = req.getParams().get(TERM_STATS_KEY);
String globalColStats = req.getParams().get(COL_STATS_KEY);
if (globalColStats != null) {
@ -297,29 +305,23 @@ public class ExactStatsCache extends StatsCache {
protected void addToGlobalColStats(SolrQueryRequest req,
Entry<String,CollectionStats> e) {
Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().get(CURRENT_GLOBAL_COL_STATS);
if (currentGlobalColStats == null) {
currentGlobalColStats = new HashMap<>();
req.getContext().put(CURRENT_GLOBAL_COL_STATS, currentGlobalColStats);
}
Map<String,CollectionStats> currentGlobalColStats = (Map<String,CollectionStats>) req.getContext().computeIfAbsent(CURRENT_GLOBAL_COL_STATS, Utils.NEW_HASHMAP_FUN);
currentGlobalColStats.put(e.getKey(), e.getValue());
}
protected void addToGlobalTermStats(SolrQueryRequest req, Entry<String,TermStats> e) {
Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().get(CURRENT_GLOBAL_TERM_STATS);
if (currentGlobalTermStats == null) {
currentGlobalTermStats = new HashMap<>();
req.getContext().put(CURRENT_GLOBAL_TERM_STATS, currentGlobalTermStats);
}
Map<String,TermStats> currentGlobalTermStats = (Map<String,TermStats>) req.getContext().computeIfAbsent(CURRENT_GLOBAL_TERM_STATS, Utils.NEW_HASHMAP_FUN);
currentGlobalTermStats.put(e.getKey(), e.getValue());
}
protected static class ExactStatsSource extends StatsSource {
private final Map<String,TermStats> termStatsCache;
private final Map<String,CollectionStats> colStatsCache;
private final StatsCacheMetrics metrics;
public ExactStatsSource(Map<String,TermStats> termStatsCache,
public ExactStatsSource(StatsCacheMetrics metrics, Map<String,TermStats> termStatsCache,
Map<String,CollectionStats> colStatsCache) {
this.metrics = metrics;
this.termStatsCache = termStatsCache;
this.colStatsCache = colStatsCache;
}
@ -332,7 +334,8 @@ public class ExactStatsCache extends StatsCache {
// Not sure we need a warning here
if (termStats == null) {
log.debug("Missing global termStats info for term={}, using local stats", term);
return localSearcher.localTermStatistics(term, docFreq, totalTermFreq);
metrics.missingGlobalTermStats.increment();
return localSearcher != null ? localSearcher.localTermStatistics(term, docFreq, totalTermFreq) : null;
} else {
return termStats.toTermStatistics();
}
@ -344,7 +347,8 @@ public class ExactStatsCache extends StatsCache {
CollectionStats colStats = colStatsCache.get(field);
if (colStats == null) {
log.debug("Missing global colStats info for field={}, using local", field);
return localSearcher.localCollectionStatistics(field);
metrics.missingGlobalFieldStats.increment();
return localSearcher != null ? localSearcher.localCollectionStatistics(field) : null;
} else {
return colStats.toCollectionStatistics();
}

View File

@ -21,13 +21,17 @@ import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.LongAdder;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.FastLRUCache;
import org.apache.solr.search.SolrCache;
@ -37,44 +41,129 @@ import org.slf4j.LoggerFactory;
/**
* Unlike {@link ExactStatsCache} this implementation preserves term stats
* across queries in a set of LRU caches, and based on surface features of a
* query it determines the need to send additional RPC-s. As a result the
* additional RPC-s are needed much less frequently.
*
* across queries in a set of LRU caches (with the same life-cycle as SolrIndexSearcher),
* and based on surface features of a
* query it determines the need to send additional requests to retrieve local term
* and collection statistics from shards. As a result the
* additional requests may be needed much less frequently.
* <p>
* Query terms and their stats are maintained in a set of maps. At the query
* front-end there will be as many maps as there are shards, each maintaining
* the respective shard statistics. At each shard server there is a single map
* that is updated with the global statistics on every request.
* Query terms, their stats and field stats are maintained in LRU caches, with the size by default
* {@link #DEFAULT_MAX_SIZE}, one cache per shard. These caches
* are updated as needed (when term or field statistics are missing). Each instance of the component
* keeps also a global stats cache, which is aggregated from per-shard caches.
* <p>Cache entries expire after a max idle time, by default {@link #DEFAULT_MAX_IDLE_TIME}.
*/
public class LRUStatsCache extends ExactStatsCache {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final int DEFAULT_MAX_SIZE = 200;
public static final int DEFAULT_MAX_IDLE_TIME = 60;
// local stats obtained from shard servers
// map of <shardName, <term, termStats>>
private final Map<String,SolrCache<String,TermStats>> perShardTermStats = new ConcurrentHashMap<>();
// map of <shardName, <field, collStats>>
private final Map<String,Map<String,CollectionStats>> perShardColStats = new ConcurrentHashMap<>();
// global stats synchronized from the master
private final FastLRUCache<String,TermStats> currentGlobalTermStats = new FastLRUCache<>();
private final Map<String,CollectionStats> currentGlobalColStats = new ConcurrentHashMap<>();
// local term context (caching term lookups)
private final Map lruCacheInitArgs = new HashMap();
// cache of <term, termStats>
private final FastLRUCache<String,TermStats> currentGlobalTermStats = new FastLRUCache<>();
// cache of <field, colStats>
private final FastLRUCache<String,CollectionStats> currentGlobalColStats = new FastLRUCache<>();
// missing stats to be fetched with the next request
private Set<String> missingColStats = ConcurrentHashMap.newKeySet();
private Set<Term> missingTermStats = ConcurrentHashMap.newKeySet();
private final Map<String, String> lruCacheInitArgs = new HashMap<>();
private final StatsCacheMetrics ignorableMetrics = new StatsCacheMetrics();
@Override
public StatsSource get(SolrQueryRequest req) {
protected StatsSource doGet(SolrQueryRequest req) {
log.debug("## GET total={}, cache {}", currentGlobalColStats , currentGlobalTermStats.size());
return new LRUStatsSource(currentGlobalTermStats, currentGlobalColStats);
return new LRUStatsSource(statsCacheMetrics);
}
@Override
public void clear() {
super.clear();
perShardTermStats.clear();
perShardColStats.clear();
currentGlobalTermStats.clear();
currentGlobalColStats.clear();
ignorableMetrics.clear();
}
@Override
public void init(PluginInfo info) {
// TODO: make this configurable via PluginInfo
lruCacheInitArgs.put("size", "100");
super.init(info);
if (info != null && info.attributes != null) {
lruCacheInitArgs.putAll(info.attributes);
}
lruCacheInitArgs.computeIfAbsent(SolrCache.SIZE_PARAM, s -> String.valueOf(DEFAULT_MAX_SIZE));
lruCacheInitArgs.computeIfAbsent(SolrCache.MAX_IDLE_TIME_PARAM, t -> String.valueOf(DEFAULT_MAX_IDLE_TIME));
Map<String, Object> map = new HashMap<>(lruCacheInitArgs);
map.put(CommonParams.NAME, "globalTermStats");
currentGlobalTermStats.init(lruCacheInitArgs, null, null);
currentGlobalTermStats.setState(SolrCache.State.LIVE);
map = new HashMap<>(lruCacheInitArgs);
map.put(CommonParams.NAME, "globalColStats");
currentGlobalColStats.init(lruCacheInitArgs, null, null);
currentGlobalColStats.setState(SolrCache.State.LIVE); }
@Override
protected ShardRequest doRetrieveStatsRequest(ResponseBuilder rb) {
// check approximately what terms are needed.
// NOTE: query rewrite only expands to terms that are present in the local index
// so it's possible that the result will contain less terms than present in all shards.
// HOWEVER: the absence of these terms is recorded by LRUStatsSource, and they will be
// force-fetched on next request and cached.
// check for missing stats from previous requests
if (!missingColStats.isEmpty() || !missingColStats.isEmpty()) {
// needs to fetch anyway, so get the full query stats + the missing stats for caching
ShardRequest sreq = super.doRetrieveStatsRequest(rb);
if (!missingColStats.isEmpty()) {
Set<String> requestColStats = missingColStats;
// there's a small window when new items may be added before
// creating the request and clearing, so don't clear - instead replace the instance
missingColStats = ConcurrentHashMap.newKeySet();
sreq.params.add(FIELDS_KEY, StatsUtil.fieldsToString(requestColStats));
}
if (!missingTermStats.isEmpty()) {
Set<Term> requestTermStats = missingTermStats;
missingTermStats = ConcurrentHashMap.newKeySet();
sreq.params.add(TERMS_KEY, StatsUtil.termsToEncodedString(requestTermStats));
}
return sreq;
}
// rewrite locally to see if there are any missing terms. See the note above for caveats.
LongAdder missing = new LongAdder();
try {
// use ignorableMetrics to avoid counting this checking as real misses
approxCheckMissingStats(rb, new LRUStatsSource(ignorableMetrics), t -> missing.increment(), f -> missing.increment());
if (missing.sum() == 0) {
// it should be (approximately) ok to skip the fetching
// since we already incremented the stats decrement it here
statsCacheMetrics.retrieveStats.decrement();
statsCacheMetrics.useCachedGlobalStats.increment();
return null;
} else {
return super.doRetrieveStatsRequest(rb);
}
} catch (IOException e) {
log.warn("Exception checking missing stats for query " + rb.getQuery() + ", forcing retrieving stats", e);
// retrieve anyway
return super.doRetrieveStatsRequest(rb);
}
}
@Override
protected void addToGlobalTermStats(SolrQueryRequest req, Entry<String,TermStats> e) {
currentGlobalTermStats.put(e.getKey(), e.getValue());
@ -94,12 +183,14 @@ public class LRUStatsCache extends ExactStatsCache {
protected void addToPerShardTermStats(SolrQueryRequest req, String shard, String termStatsString) {
Map<String,TermStats> termStats = StatsUtil.termStatsMapFromString(termStatsString);
if (termStats != null) {
SolrCache<String,TermStats> cache = perShardTermStats.get(shard);
if (cache == null) { // initialize
cache = new FastLRUCache<>();
cache.init(lruCacheInitArgs, null, null);
perShardTermStats.put(shard, cache);
}
SolrCache<String,TermStats> cache = perShardTermStats.computeIfAbsent(shard, s -> {
FastLRUCache c = new FastLRUCache<>();
Map<String, String> map = new HashMap<>(lruCacheInitArgs);
map.put(CommonParams.NAME, s);
c.init(map, null, null);
c.setState(SolrCache.State.LIVE);
return c;
});
for (Entry<String,TermStats> e : termStats.entrySet()) {
cache.put(e.getKey(), e.getValue());
}
@ -122,21 +213,22 @@ public class LRUStatsCache extends ExactStatsCache {
log.debug("## MERGED: perShardColStats={}, perShardTermStats={}", perShardColStats, perShardTermStats);
}
static class LRUStatsSource extends StatsSource {
private final SolrCache<String,TermStats> termStatsCache;
private final Map<String,CollectionStats> colStatsCache;
public LRUStatsSource(SolrCache<String,TermStats> termStatsCache, Map<String,CollectionStats> colStatsCache) {
this.termStatsCache = termStatsCache;
this.colStatsCache = colStatsCache;
class LRUStatsSource extends StatsSource {
private final StatsCacheMetrics metrics;
LRUStatsSource(StatsCacheMetrics metrics) {
this.metrics = metrics;
}
@Override
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
throws IOException {
TermStats termStats = termStatsCache.get(term.toString());
TermStats termStats = currentGlobalTermStats.get(term.toString());
if (termStats == null) {
log.debug("## Missing global termStats info: {}, using local", term);
return localSearcher.localTermStatistics(term, docFreq, totalTermFreq);
missingTermStats.add(term);
metrics.missingGlobalTermStats.increment();
return localSearcher != null ? localSearcher.localTermStatistics(term, docFreq, totalTermFreq) : null;
} else {
return termStats.toTermStatistics();
}
@ -145,10 +237,12 @@ public class LRUStatsCache extends ExactStatsCache {
@Override
public CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)
throws IOException {
CollectionStats colStats = colStatsCache.get(field);
CollectionStats colStats = currentGlobalColStats.get(field);
if (colStats == null) {
log.debug("## Missing global colStats info: {}, using local", field);
return localSearcher.localCollectionStatistics(field);
missingColStats.add(field);
metrics.missingGlobalFieldStats.increment();
return localSearcher != null ? localSearcher.localCollectionStatistics(field) : null;
} else {
return colStats.toCollectionStatistics();
}

View File

@ -20,7 +20,6 @@ import java.lang.invoke.MethodHandles;
import java.util.List;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
@ -37,27 +36,25 @@ public class LocalStatsCache extends StatsCache {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@Override
public StatsSource get(SolrQueryRequest req) {
protected StatsSource doGet(SolrQueryRequest req) {
log.debug("## GET {}", req);
return new LocalStatsSource();
}
@Override
public void init(PluginInfo info) {
return new LocalStatsSource(statsCacheMetrics);
}
// by returning null we don't create additional round-trip request.
@Override
public ShardRequest retrieveStatsRequest(ResponseBuilder rb) {
log.debug("## RDR {}", rb.req);
protected ShardRequest doRetrieveStatsRequest(ResponseBuilder rb) {
log.debug("## RSR {}", rb.req);
// already incremented the stats - decrement it now
statsCacheMetrics.retrieveStats.decrement();
return null;
}
@Override
public void mergeToGlobalStats(SolrQueryRequest req,
protected void doMergeToGlobalStats(SolrQueryRequest req,
List<ShardResponse> responses) {
if (log.isDebugEnabled()) {
log.debug("## MTGD {}", req);
log.debug("## MTGS {}", req);
for (ShardResponse r : responses) {
log.debug(" - {}", r);
}
@ -65,17 +62,17 @@ public class LocalStatsCache extends StatsCache {
}
@Override
public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
log.debug("## RLD {}", rb.req);
protected void doReturnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
log.debug("## RLS {}", rb.req);
}
@Override
public void receiveGlobalStats(SolrQueryRequest req) {
log.debug("## RGD {}", req);
protected void doReceiveGlobalStats(SolrQueryRequest req) {
log.debug("## RGS {}", req);
}
@Override
public void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) {
log.debug("## SGD {}", outgoing);
protected void doSendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) {
log.debug("## SGS {}", outgoing);
}
}

View File

@ -28,19 +28,23 @@ import org.apache.solr.search.SolrIndexSearcher;
* local statistics.
*/
public final class LocalStatsSource extends StatsSource {
private final StatsCache.StatsCacheMetrics metrics;
public LocalStatsSource() {
public LocalStatsSource(StatsCache.StatsCacheMetrics metrics) {
this.metrics = metrics;
}
@Override
public TermStatistics termStatistics(SolrIndexSearcher localSearcher, Term term, int docFreq, long totalTermFreq)
throws IOException {
metrics.missingGlobalTermStats.increment();
return localSearcher.localTermStatistics(term, docFreq, totalTermFreq);
}
@Override
public CollectionStatistics collectionStatistics(SolrIndexSearcher localSearcher, String field)
throws IOException {
metrics.missingGlobalFieldStats.increment();
return localSearcher.localCollectionStatistics(field);
}
}

View File

@ -16,14 +16,29 @@
*/
package org.apache.solr.search.stats;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.LongAdder;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.Weight;
import org.apache.solr.core.PluginInfo;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QueryCommand;
import org.apache.solr.search.SolrCache;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.plugin.PluginInfoInitialized;
@ -36,7 +51,7 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
* <p>
* There are instances of this class at the aggregator node (where the partial
* data from shards is aggregated), and on each core involved in a shard request
* (where this data is maintained and updated from the central cache).
* (where this data is maintained and updated from the aggregator's cache).
* </p>
*/
public abstract class StatsCache implements PluginInfoInitialized {
@ -44,75 +59,228 @@ public abstract class StatsCache implements PluginInfoInitialized {
/**
* Map of terms and {@link TermStats}.
*/
public static final String TERM_STATS_KEY = "org.apache.solr.stats.termStats";
public static final String TERM_STATS_KEY = "solr.stats.term";
/**
* Value of {@link CollectionStats}.
*/
public static final String COL_STATS_KEY = "org.apache.solr.stats.colStats";
public static final String COL_STATS_KEY = "solr.stats.col";
/**
* List of terms in the query.
*/
public static final String TERMS_KEY = "org.apache.solr.stats.terms";
public static final String TERMS_KEY = "solr.stats.terms";
/**
* List of fields in the query.
*/
public static final String FIELDS_KEY = "solr.stats.fields";
public static final class StatsCacheMetrics {
public final LongAdder lookups = new LongAdder();
public final LongAdder retrieveStats = new LongAdder();
public final LongAdder receiveGlobalStats = new LongAdder();
public final LongAdder returnLocalStats = new LongAdder();
public final LongAdder mergeToGlobalStats = new LongAdder();
public final LongAdder sendGlobalStats = new LongAdder();
public final LongAdder useCachedGlobalStats = new LongAdder();
public final LongAdder missingGlobalTermStats = new LongAdder();
public final LongAdder missingGlobalFieldStats = new LongAdder();
public void clear() {
lookups.reset();
retrieveStats.reset();
receiveGlobalStats.reset();
returnLocalStats.reset();
mergeToGlobalStats.reset();
sendGlobalStats.reset();
useCachedGlobalStats.reset();
missingGlobalTermStats.reset();
missingGlobalFieldStats.reset();
}
public void getSnapshot(BiConsumer<String, Object> consumer) {
consumer.accept(SolrCache.LOOKUPS_PARAM, lookups.longValue());
consumer.accept("retrieveStats", retrieveStats.longValue());
consumer.accept("receiveGlobalStats", receiveGlobalStats.longValue());
consumer.accept("returnLocalStats", returnLocalStats.longValue());
consumer.accept("mergeToGlobalStats", mergeToGlobalStats.longValue());
consumer.accept("sendGlobalStats", sendGlobalStats.longValue());
consumer.accept("useCachedGlobalStats", useCachedGlobalStats.longValue());
consumer.accept("missingGlobalTermStats", missingGlobalTermStats.longValue());
consumer.accept("missingGlobalFieldStats", missingGlobalFieldStats.longValue());
}
public String toString() {
Map<String, Object> map = new HashMap<>();
getSnapshot(map::put);
return map.toString();
}
}
protected StatsCacheMetrics statsCacheMetrics = new StatsCacheMetrics();
protected PluginInfo pluginInfo;
public StatsCacheMetrics getCacheMetrics() {
return statsCacheMetrics;
}
@Override
public void init(PluginInfo info) {
this.pluginInfo = info;
}
/**
* Creates a {@link ShardRequest} to retrieve per-shard stats related to the
* current query and the current state of the requester's {@link StatsCache}.
* <p>This method updates the cache metrics and calls {@link #doRetrieveStatsRequest(ResponseBuilder)}.</p>
*
* @param rb contains current request
* @return shard request to retrieve stats for terms in the current request,
* or null if no additional request is needed (e.g. if the information
* in global cache is already sufficient to satisfy this request).
*/
public abstract ShardRequest retrieveStatsRequest(ResponseBuilder rb);
public ShardRequest retrieveStatsRequest(ResponseBuilder rb) {
statsCacheMetrics.retrieveStats.increment();
return doRetrieveStatsRequest(rb);
}
protected abstract ShardRequest doRetrieveStatsRequest(ResponseBuilder rb);
/**
* Prepare a local (from the local shard) response to a "retrieve stats" shard
* request.
* <p>This method updates the cache metrics and calls {@link #doReturnLocalStats(ResponseBuilder, SolrIndexSearcher)}.</p>
*
* @param rb response builder
* @param searcher current local searcher
*/
public abstract void returnLocalStats(ResponseBuilder rb,
SolrIndexSearcher searcher);
public void returnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher) {
statsCacheMetrics.returnLocalStats.increment();
doReturnLocalStats(rb, searcher);
}
protected abstract void doReturnLocalStats(ResponseBuilder rb, SolrIndexSearcher searcher);
/**
* Process shard responses that contain partial local stats. Usually this
* entails combining per-shard stats for each term.
* <p>This method updates the cache metrics and calls {@link #doMergeToGlobalStats(SolrQueryRequest, List)}.</p>
*
* @param req query request
* @param responses responses from shards containing local stats for each shard
*/
public abstract void mergeToGlobalStats(SolrQueryRequest req,
List<ShardResponse> responses);
public void mergeToGlobalStats(SolrQueryRequest req,
List<ShardResponse> responses) {
statsCacheMetrics.mergeToGlobalStats.increment();
doMergeToGlobalStats(req, responses);
}
protected abstract void doMergeToGlobalStats(SolrQueryRequest req, List<ShardResponse> responses);
/**
* Receive global stats data from the master and update a local cache of stats
* Receive global stats data from the master and update a local cache of global stats
* with this global data. This event occurs either as a separate request, or
* together with the regular query request, in which case this method is
* called first, before preparing a {@link QueryCommand} to be submitted to
* the local {@link SolrIndexSearcher}.
* <p>This method updates the cache metrics and calls {@link #doReceiveGlobalStats(SolrQueryRequest)}.</p>
*
* @param req query request with global stats data
*/
public abstract void receiveGlobalStats(SolrQueryRequest req);
public void receiveGlobalStats(SolrQueryRequest req) {
statsCacheMetrics.receiveGlobalStats.increment();
doReceiveGlobalStats(req);
}
protected abstract void doReceiveGlobalStats(SolrQueryRequest req);
/**
* Prepare global stats data to be sent out to shards in this request.
* <p>This method updates the cache metrics and calls {@link #doSendGlobalStats(ResponseBuilder, ShardRequest)}.</p>
*
* @param rb response builder
* @param outgoing shard request to be sent
*/
public abstract void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing);
public void sendGlobalStats(ResponseBuilder rb, ShardRequest outgoing) {
statsCacheMetrics.sendGlobalStats.increment();
doSendGlobalStats(rb, outgoing);
}
protected abstract void doSendGlobalStats(ResponseBuilder rb, ShardRequest outgoing);
/**
* Prepare local {@link StatsSource} to provide stats information to perform
* Prepare a {@link StatsSource} that provides stats information to perform
* local scoring (to be precise, to build a local {@link Weight} from the
* query).
* <p>This method updates the cache metrics and calls {@link #doGet(SolrQueryRequest)}.</p>
*
* @param req query request
* @return an instance of {@link StatsSource} to use in creating a query
* {@link Weight}
*/
public abstract StatsSource get(SolrQueryRequest req);
public StatsSource get(SolrQueryRequest req) {
statsCacheMetrics.lookups.increment();
return doGet(req);
}
protected abstract StatsSource doGet(SolrQueryRequest req);
/**
* Clear cached statistics.
*/
public void clear() {
statsCacheMetrics.clear();
};
/**
* Check if the <code>statsSource</code> is missing some term or field statistics info,
* which then needs to be retrieved.
* <p>NOTE: this uses the local IndexReader for query rewriting, which may expand to less (or different)
* terms as rewriting the same query on other shards' readers. This in turn may falsely fail to inform the consumers
* about possibly missing stats, which may lead consumers to skip the fetching of full stats. Consequently
* this would lead to incorrect global IDF data for the missing terms (because for these terms only local stats
* would be used).</p>
* @param rb request to evaluate against the statsSource
* @param statsSource stats source to check
* @param missingTermStats consumer of missing term stats
* @param missingFieldStats consumer of missing field stats
* @return approximate number of missing term stats and field stats combined
*/
public int approxCheckMissingStats(ResponseBuilder rb, StatsSource statsSource, Consumer<Term> missingTermStats, Consumer<String> missingFieldStats) throws IOException {
CheckingIndexSearcher checkingSearcher = new CheckingIndexSearcher(statsSource, rb.req.getSearcher().getIndexReader(), missingTermStats, missingFieldStats);
Query q = rb.getQuery();
q = checkingSearcher.rewrite(q);
checkingSearcher.createWeight(q, ScoreMode.COMPLETE, 1);
return checkingSearcher.missingFieldsCount + checkingSearcher.missingTermsCount;
}
static final class CheckingIndexSearcher extends IndexSearcher {
final StatsSource statsSource;
final Consumer<Term> missingTermStats;
final Consumer<String> missingFieldStats;
int missingTermsCount, missingFieldsCount;
CheckingIndexSearcher(StatsSource statsSource, IndexReader reader, Consumer<Term> missingTermStats, Consumer<String> missingFieldStats) {
super(reader);
this.statsSource = statsSource;
this.missingTermStats = missingTermStats;
this.missingFieldStats = missingFieldStats;
}
@Override
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
if (statsSource.termStatistics(null, term, docFreq, totalTermFreq) == null) {
missingTermStats.accept(term);
missingTermsCount++;
}
return super.termStatistics(term, docFreq, totalTermFreq);
}
@Override
public CollectionStatistics collectionStatistics(String field) throws IOException {
if (statsSource.collectionStatistics(null, field) == null) {
missingFieldStats.accept(field);
missingFieldsCount++;
}
return super.collectionStatistics(field);
}
}
}

View File

@ -16,25 +16,126 @@
*/
package org.apache.solr.search.stats;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Various utilities for de/serialization of term stats and collection stats.
* <p>TODO: serialization format is very simple and does nothing to compress the data.</p>
*/
public class StatsUtil {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String ENTRY_SEPARATOR = "!";
public static final char ENTRY_SEPARATOR_CHAR = '!';
/**
* Parse a list of urls separated by "|" in order to retrieve a shard name.
* @param collectionName collection name
* @param shardUrls list of urls
* @return shard name, or shardUrl if no shard info is present,
* or null if impossible to determine (eg. empty string)
*/
public static String shardUrlToShard(String collectionName, String shardUrls) {
// we may get multiple replica urls
String[] urls = shardUrls.split("\\|");
if (urls.length == 0) {
return null;
}
String[] urlParts = urls[0].split("/");
String coreName = urlParts[urlParts.length - 1];
String replicaName = Utils.parseMetricsReplicaName(collectionName, coreName);
String shard;
if (replicaName != null) {
shard = coreName.substring(collectionName.length() + 1);
shard = shard.substring(0, shard.length() - replicaName.length() - 1);
} else {
if (coreName.length() > collectionName.length() && coreName.startsWith(collectionName)) {
shard = coreName.substring(collectionName.length() + 1);
if (shard.isEmpty()) {
shard = urls[0];
}
} else {
shard = urls[0];
}
}
return shard;
}
public static String termsToEncodedString(Collection<?> terms) {
StringBuilder sb = new StringBuilder();
for (Object o : terms) {
if (sb.length() > 0) {
sb.append(ENTRY_SEPARATOR);
}
if (o instanceof Term) {
sb.append(termToEncodedString((Term) o));
} else {
sb.append(termToEncodedString(String.valueOf(o)));
}
}
return sb.toString();
}
public static Set<Term> termsFromEncodedString(String data) {
Set<Term> terms = new HashSet<>();
if (data == null || data.isBlank()) {
return terms;
}
String[] items = data.split(ENTRY_SEPARATOR);
for (String item : items) {
Term t = termFromEncodedString(item);
if (t != null) {
terms.add(t);
}
}
return terms;
}
public static Set<String> fieldsFromString(String data) {
Set<String> fields = new HashSet<>();
if (data == null || data.isBlank()) {
return fields;
}
String[] items = data.split(ENTRY_SEPARATOR);
for (String item : items) {
if (!item.isBlank()) {
fields.add(item);
}
}
return fields;
}
public static String fieldsToString(Collection<String> fields) {
StringBuilder sb = new StringBuilder();
for (String field : fields) {
if (field.isBlank()) {
continue;
}
if (sb.length() > 0) {
sb.append(ENTRY_SEPARATOR);
}
sb.append(field);
}
return sb.toString();
}
/**
* Make a String representation of {@link CollectionStats}
*/
@ -42,13 +143,13 @@ public class StatsUtil {
StringBuilder sb = new StringBuilder();
sb.append(colStats.field);
sb.append(',');
sb.append(String.valueOf(colStats.maxDoc));
sb.append(colStats.maxDoc);
sb.append(',');
sb.append(String.valueOf(colStats.docCount));
sb.append(colStats.docCount);
sb.append(',');
sb.append(String.valueOf(colStats.sumTotalTermFreq));
sb.append(colStats.sumTotalTermFreq);
sb.append(',');
sb.append(String.valueOf(colStats.sumDocFreq));
sb.append(colStats.sumDocFreq);
return sb.toString();
}
@ -78,15 +179,69 @@ public class StatsUtil {
}
}
public static String termToString(Term t) {
public static String termToEncodedString(Term t) {
StringBuilder sb = new StringBuilder();
sb.append(t.field()).append(':');
BytesRef bytes = t.bytes();
sb.append(Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.offset));
sb.append(encode(t.text()));
return sb.toString();
}
public static final char ESCAPE = '_';
public static final char ESCAPE_ENTRY_SEPARATOR = '0';
public static String encode(String value) {
StringBuilder output = new StringBuilder(value.length() + 2);
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
switch (c) {
case ESCAPE :
output.append(ESCAPE).append(ESCAPE);
break;
case ENTRY_SEPARATOR_CHAR :
output.append(ESCAPE).append(ESCAPE_ENTRY_SEPARATOR);
break;
default :
output.append(c);
}
}
return URLEncoder.encode(output.toString(), Charset.forName("UTF-8"));
}
public static String decode(String value) throws IOException {
value = URLDecoder.decode(value, Charset.forName("UTF-8"));
StringBuilder output = new StringBuilder(value.length());
for (int i = 0; i < value.length(); i++) {
char c = value.charAt(i);
// escaped char follows
if (c == ESCAPE && i < value.length() - 1) {
i++;
char next = value.charAt(i);
if (next == ESCAPE) {
output.append(ESCAPE);
} else if (next == ESCAPE_ENTRY_SEPARATOR) {
output.append(ENTRY_SEPARATOR_CHAR);
} else {
throw new IOException("invalid escape sequence in " + value);
}
} else {
output.append(c);
}
}
return output.toString();
}
public static String termToEncodedString(String term) {
int idx = term.indexOf(':');
if (idx == -1) {
log.warn("Invalid term data without ':': '" + term + "'");
return null;
}
String prefix = term.substring(0, idx + 1);
String value = term.substring(idx + 1);
return prefix + encode(value);
}
private static Term termFromString(String data) {
public static Term termFromEncodedString(String data) {
if (data == null || data.trim().length() == 0) {
log.warn("Invalid empty term value");
return null;
@ -99,76 +254,50 @@ public class StatsUtil {
String field = data.substring(0, idx);
String value = data.substring(idx + 1);
try {
return new Term(field, value);
// XXX this would be more correct
// byte[] bytes = Base64.base64ToByteArray(value);
// return new Term(field, new BytesRef(bytes));
return new Term(field, decode(value));
} catch (Exception e) {
log.warn("Invalid term value '" + value + "'");
return null;
}
}
public static String termStatsToString(TermStats termStats,
boolean includeTerm) {
public static String termStatsToString(TermStats termStats, boolean encode) {
StringBuilder sb = new StringBuilder();
if (includeTerm) {
sb.append(termStats.term).append(',');
}
sb.append(String.valueOf(termStats.docFreq));
sb.append(encode ? termToEncodedString(termStats.term) : termStats.term).append(',');
sb.append(termStats.docFreq);
sb.append(',');
sb.append(String.valueOf(termStats.totalTermFreq));
sb.append(termStats.totalTermFreq);
return sb.toString();
}
private static TermStats termStatsFromString(String data, Term t) {
private static TermStats termStatsFromString(String data) {
if (data == null || data.trim().length() == 0) {
log.warn("Invalid empty term stats string");
return null;
}
String[] vals = data.split(",");
if (vals.length < 2) {
if (vals.length < 3) {
log.warn("Invalid term stats string, num fields " + vals.length
+ " < 2, '" + data + "'");
return null;
}
Term termToUse;
int idx = 0;
if (vals.length == 3) {
idx++;
// with term
Term term = termFromString(vals[0]);
if (term != null) {
termToUse = term;
if (t != null) {
assert term.equals(t);
}
} else { // failed term decoding
termToUse = t;
}
} else {
termToUse = t;
}
if (termToUse == null) {
log.warn("Missing term in termStats '" + data + "'");
+ " < 3, '" + data + "'");
return null;
}
Term term = termFromEncodedString(vals[0]);
try {
long docFreq = Long.parseLong(vals[idx++]);
long totalTermFreq = Long.parseLong(vals[idx]);
return new TermStats(termToUse.toString(), docFreq, totalTermFreq);
long docFreq = Long.parseLong(vals[1]);
long totalTermFreq = Long.parseLong(vals[2]);
return new TermStats(term.toString(), docFreq, totalTermFreq);
} catch (Exception e) {
log.warn("Invalid termStats string '" + data + "'");
return null;
}
}
public static Map<String,CollectionStats> colStatsMapFromString(String data) {
if (data == null || data.trim().length() == 0) {
return null;
}
Map<String,CollectionStats> map = new HashMap<String,CollectionStats>();
String[] entries = data.split("!");
String[] entries = data.split(ENTRY_SEPARATOR);
for (String es : entries) {
CollectionStats stats = colStatsFromString(es);
if (stats != null) {
@ -185,7 +314,7 @@ public class StatsUtil {
StringBuilder sb = new StringBuilder();
for (Entry<String,CollectionStats> e : stats.entrySet()) {
if (sb.length() > 0) {
sb.append('!');
sb.append(ENTRY_SEPARATOR);
}
sb.append(colStatsToString(e.getValue()));
}
@ -197,9 +326,9 @@ public class StatsUtil {
return null;
}
Map<String,TermStats> map = new HashMap<>();
String[] entries = data.split("!");
String[] entries = data.split(ENTRY_SEPARATOR);
for (String es : entries) {
TermStats termStats = termStatsFromString(es, null);
TermStats termStats = termStatsFromString(es);
if (termStats != null) {
map.put(termStats.term, termStats);
}
@ -214,7 +343,7 @@ public class StatsUtil {
StringBuilder sb = new StringBuilder();
for (Entry<String,TermStats> e : stats.entrySet()) {
if (sb.length() > 0) {
sb.append('!');
sb.append(ENTRY_SEPARATOR);
}
sb.append(termStatsToString(e.getValue(), true));
}

View File

@ -33,7 +33,7 @@ public class TermStats {
this.term = term;
t = makeTerm(term);
}
private Term makeTerm(String s) {
int idx = s.indexOf(':');
if (idx == -1) {
@ -68,6 +68,6 @@ public class TermStats {
}
public String toString() {
return StatsUtil.termStatsToString(this, true);
return StatsUtil.termStatsToString(this, false);
}
}

View File

@ -32,4 +32,6 @@
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<similarity class="${solr.similarity:solr.SchemaSimilarityFactory}"/>
</schema>

View File

@ -29,6 +29,8 @@
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
<statsCache class="${solr.statsCache:}"/>
<updateHandler class="solr.DirectUpdateHandler2">
<commitWithin>
<softCommit>${solr.commitwithin.softcommit:true}</softCommit>

View File

@ -0,0 +1,221 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import java.lang.invoke.MethodHandles;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.GenericSolrRequest;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.similarities.CustomSimilarityFactory;
import org.apache.solr.search.stats.StatsCache;
import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*/
@Ignore("Abstract classes should not be executed as tests")
public abstract class TestBaseStatsCacheCloud extends SolrCloudTestCase {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
protected int numNodes = 2;
protected String configset = "cloud-dynamic";
protected String collectionName = "collection_" + getClass().getSimpleName();
protected Function<Integer, SolrInputDocument> generator = i -> {
SolrInputDocument doc = new SolrInputDocument("id", "id-" + i);
if (i % 3 == 0) {
doc.addField("foo_t", "bar baz");
} else if (i % 3 == 1) {
doc.addField("foo_t", "bar");
} else {
// skip the field
}
return doc;
};
protected CloudSolrClient solrClient;
protected SolrClient control;
protected int NUM_DOCS = 100;
// implementation name
protected abstract String getImplementationName();
// does this implementation produce the same distrib scores as local ones?
protected abstract boolean assertSameScores();
@Before
public void setupCluster() throws Exception {
// create control core & client
System.setProperty("solr.statsCache", getImplementationName());
System.setProperty("solr.similarity", CustomSimilarityFactory.class.getName());
initCore("solrconfig-minimal.xml", "schema-tiny.xml");
control = new EmbeddedSolrServer(h.getCore());
// create cluster
configureCluster(numNodes) // 2 + random().nextInt(3)
.addConfig("conf", configset(configset))
.configure();
solrClient = cluster.getSolrClient();
createTestCollection();
}
protected void createTestCollection() throws Exception {
CollectionAdminRequest.createCollection(collectionName, "conf", 2, numNodes)
.setMaxShardsPerNode(2)
.process(solrClient);
indexDocs(solrClient, collectionName, NUM_DOCS, 0, generator);
indexDocs(control, "collection1", NUM_DOCS, 0, generator);
}
@After
public void tearDownCluster() {
System.clearProperty("solr.statsCache");
System.clearProperty("solr.similarity");
}
@Test
public void testBasicStats() throws Exception {
QueryResponse cloudRsp = solrClient.query(collectionName,
params("q", "foo_t:\"bar baz\"", "fl", "*,score", "rows", "" + NUM_DOCS, "debug", "true"));
QueryResponse controlRsp = control.query("collection1",
params("q", "foo_t:\"bar baz\"", "fl", "*,score", "rows", "" + NUM_DOCS, "debug", "true"));
assertResponses(controlRsp, cloudRsp, assertSameScores());
// test after updates
indexDocs(solrClient, collectionName, NUM_DOCS, NUM_DOCS, generator);
indexDocs(control, "collection1", NUM_DOCS, NUM_DOCS, generator);
cloudRsp = solrClient.query(collectionName,
params("q", "foo_t:\"bar baz\"", "fl", "*,score", "rows", "" + (NUM_DOCS * 2)));
controlRsp = control.query("collection1",
params("q", "foo_t:\"bar baz\"", "fl", "*,score", "rows", "" + (NUM_DOCS * 2)));
assertResponses(controlRsp, cloudRsp, assertSameScores());
// check cache metrics
StatsCache.StatsCacheMetrics statsCacheMetrics = new StatsCache.StatsCacheMetrics();
for (JettySolrRunner jettySolrRunner : cluster.getJettySolrRunners()) {
try (SolrClient client = getHttpSolrClient(jettySolrRunner.getBaseUrl().toString())) {
NamedList<Object> metricsRsp = client.request(
new GenericSolrRequest(SolrRequest.METHOD.GET, "/admin/metrics", params("group", "solr.core", "prefix", "CACHE.searcher.statsCache")));
assertNotNull(metricsRsp);
NamedList<Object> metricsPerReplica = (NamedList<Object>)metricsRsp.get("metrics");
assertNotNull("no metrics perReplica", metricsPerReplica);
//log.info("======= Node: " + jettySolrRunner.getBaseUrl());
//log.info("======= Metrics:\n" + Utils.toJSONString(metricsPerReplica));
metricsPerReplica.forEach((replica, metrics) -> {
Map<String, Object> values = (Map<String, Object>)((NamedList<Object>)metrics).get("CACHE.searcher.statsCache");
values.forEach((name, value) -> {
long val = value instanceof Number ? ((Number) value).longValue() : 0;
switch (name) {
case "lookups" :
statsCacheMetrics.lookups.add(val);
break;
case "returnLocalStats" :
statsCacheMetrics.returnLocalStats.add(val);
break;
case "mergeToGlobalStats" :
statsCacheMetrics.mergeToGlobalStats.add(val);
break;
case "missingGlobalFieldStats" :
statsCacheMetrics.missingGlobalFieldStats.add(val);
break;
case "missingGlobalTermStats" :
statsCacheMetrics.missingGlobalTermStats.add(val);
break;
case "receiveGlobalStats" :
statsCacheMetrics.receiveGlobalStats.add(val);
break;
case "retrieveStats" :
statsCacheMetrics.retrieveStats.add(val);
break;
case "sendGlobalStats" :
statsCacheMetrics.sendGlobalStats.add(val);
break;
case "useCachedGlobalStats" :
statsCacheMetrics.useCachedGlobalStats.add(val);
break;
case "statsCacheImpl" :
assertTrue("incorreect cache impl, expected" + getImplementationName() + " but was " + value,
getImplementationName().endsWith((String)value));
break;
default:
fail("Unexpected cache metrics: key=" + name + ", value=" + value);
}
});
});
}
}
checkStatsCacheMetrics(statsCacheMetrics);
}
protected void checkStatsCacheMetrics(StatsCache.StatsCacheMetrics statsCacheMetrics) {
assertEquals(statsCacheMetrics.toString(), 0, statsCacheMetrics.missingGlobalFieldStats.intValue());
assertEquals(statsCacheMetrics.toString(), 0, statsCacheMetrics.missingGlobalTermStats.intValue());
}
protected void assertResponses(QueryResponse controlRsp, QueryResponse cloudRsp, boolean sameScores) throws Exception {
Map<String, SolrDocument> cloudDocs = new HashMap<>();
Map<String, SolrDocument> controlDocs = new HashMap<>();
cloudRsp.getResults().forEach(doc -> cloudDocs.put((String) doc.getFieldValue("id"), doc));
controlRsp.getResults().forEach(doc -> controlDocs.put((String) doc.getFieldValue("id"), doc));
assertEquals("number of docs", controlDocs.size(), cloudDocs.size());
for (Map.Entry<String, SolrDocument> entry : controlDocs.entrySet()) {
SolrDocument controlDoc = entry.getValue();
SolrDocument cloudDoc = cloudDocs.get(entry.getKey());
assertNotNull("missing cloud doc " + controlDoc, cloudDoc);
Float controlScore = (Float) controlDoc.getFieldValue("score");
Float cloudScore = (Float) cloudDoc.getFieldValue("score");
if (sameScores) {
assertEquals("cloud score differs from control", controlScore, cloudScore, controlScore * 0.01f);
} else {
assertFalse("cloud score the same as control", controlScore == cloudScore);
}
}
}
protected void indexDocs(SolrClient client, String collectionName, int num, int start, Function<Integer, SolrInputDocument> generator) throws Exception {
UpdateRequest ureq = new UpdateRequest();
for (int i = 0; i < num; i++) {
SolrInputDocument doc = generator.apply(i + start);
ureq.add(doc);
}
ureq.process(client, collectionName);
client.commit(collectionName);
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import org.apache.solr.search.stats.ExactSharedStatsCache;
/**
*
*/
public class TestExactSharedStatsCacheCloud extends TestBaseStatsCacheCloud {
@Override
protected boolean assertSameScores() {
return true;
}
@Override
protected String getImplementationName() {
return ExactSharedStatsCache.class.getName();
}
}

View File

@ -0,0 +1,36 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import org.apache.solr.search.stats.ExactStatsCache;
import org.apache.solr.util.LogLevel;
/**
*
*/
@LogLevel("org.apache.solr.search=DEBUG")
public class TestExactStatsCacheCloud extends TestBaseStatsCacheCloud {
@Override
protected boolean assertSameScores() {
return true;
}
@Override
protected String getImplementationName() {
return ExactStatsCache.class.getName();
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import org.apache.solr.search.stats.LRUStatsCache;
/**
*
*/
public class TestLRUStatsCacheCloud extends TestBaseStatsCacheCloud {
@Override
protected boolean assertSameScores() {
return true;
}
@Override
protected String getImplementationName() {
return LRUStatsCache.class.getName();
}
}

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.cloud;
import org.apache.solr.search.stats.LocalStatsCache;
import org.apache.solr.search.stats.StatsCache;
import org.apache.solr.util.LogLevel;
/**
*
*/
@LogLevel("org.apache.solr.search=DEBUG")
public class TestLocalStatsCacheCloud extends TestBaseStatsCacheCloud {
@Override
protected boolean assertSameScores() {
return false;
}
@Override
protected String getImplementationName() {
return LocalStatsCache.class.getName();
}
@Override
protected void checkStatsCacheMetrics(StatsCache.StatsCacheMetrics statsCacheMetrics) {
assertTrue("LocalStatsCache should produce missing stats: " + statsCacheMetrics,
statsCacheMetrics.missingGlobalFieldStats.intValue() > 0);
assertTrue("LocalStatsCache should produce missing stats: " + statsCacheMetrics,
statsCacheMetrics.missingGlobalTermStats.intValue() > 0);
}
}

View File

@ -185,9 +185,11 @@ public class DebugComponentTest extends SolrTestCaseJ4 {
//if the request has debugQuery=true or debug=track, the sreq should get debug=track always
assertTrue(Arrays.asList(sreq.params.getParams(CommonParams.DEBUG)).contains(CommonParams.TRACK));
//the purpose must be added as readable param to be included in the shard logs
assertEquals("GET_FIELDS,GET_DEBUG", sreq.params.get(CommonParams.REQUEST_PURPOSE));
assertEquals("GET_FIELDS,GET_DEBUG,SET_TERM_STATS", sreq.params.get(CommonParams.REQUEST_PURPOSE));
//the rid must be added to be included in the shard logs
assertEquals("123456-my_rid", sreq.params.get(CommonParams.REQUEST_ID));
// close requests - this method obtains a searcher in order to access its StatsCache
req.close();
}
}

View File

@ -54,14 +54,14 @@ public class TestFastLRUCache extends SolrTestCase {
public void testPercentageAutowarm() throws IOException {
FastLRUCache<Object, Object> fastCache = new FastLRUCache<>();
fastCache.initializeMetrics(metricManager, registry, "foo", scope);
MetricsMap metrics = fastCache.getMetricsMap();
Map<String, String> params = new HashMap<>();
params.put("size", "100");
params.put("initialSize", "10");
params.put("autowarmCount", "100%");
CacheRegenerator cr = new NoOpRegenerator();
Object o = fastCache.init(params, null, cr);
fastCache.initializeMetrics(metricManager, registry, "foo", scope);
MetricsMap metrics = fastCache.getMetricsMap();
fastCache.setState(SolrCache.State.LIVE);
for (int i = 0; i < 101; i++) {
fastCache.put(i + 1, "" + (i + 1));
@ -74,9 +74,9 @@ public class TestFastLRUCache extends SolrTestCase {
assertEquals(101L, nl.get("inserts"));
assertEquals(null, fastCache.get(1)); // first item put in should be the first out
FastLRUCache<Object, Object> fastCacheNew = new FastLRUCache<>();
fastCacheNew.init(params, o, cr);
fastCacheNew.initializeMetrics(metricManager, registry, "foo", scope);
metrics = fastCacheNew.getMetricsMap();
fastCacheNew.init(params, o, cr);
fastCacheNew.warm(null, fastCache);
fastCacheNew.setState(SolrCache.State.LIVE);
fastCache.close();
@ -104,21 +104,21 @@ public class TestFastLRUCache extends SolrTestCase {
private void doTestPercentageAutowarm(int limit, int percentage, int[] hits, int[]misses) {
FastLRUCache<Object, Object> fastCache = new FastLRUCache<>();
fastCache.initializeMetrics(metricManager, registry, "foo", scope);
Map<String, String> params = new HashMap<>();
params.put("size", String.valueOf(limit));
params.put("initialSize", "10");
params.put("autowarmCount", percentage + "%");
CacheRegenerator cr = new NoOpRegenerator();
Object o = fastCache.init(params, null, cr);
fastCache.initializeMetrics(metricManager, registry, "foo", scope);
fastCache.setState(SolrCache.State.LIVE);
for (int i = 1; i <= limit; i++) {
fastCache.put(i, "" + i);//adds numbers from 1 to 100
}
FastLRUCache<Object, Object> fastCacheNew = new FastLRUCache<>();
fastCacheNew.initializeMetrics(metricManager, registry, "foo", scope);
fastCacheNew.init(params, o, cr);
fastCacheNew.initializeMetrics(metricManager, registry, "foo", scope);
fastCacheNew.warm(null, fastCache);
fastCacheNew.setState(SolrCache.State.LIVE);
fastCache.close();
@ -138,12 +138,12 @@ public class TestFastLRUCache extends SolrTestCase {
public void testNoAutowarm() throws IOException {
FastLRUCache<Object, Object> fastCache = new FastLRUCache<>();
fastCache.initializeMetrics(metricManager, registry, "foo", scope);
Map<String, String> params = new HashMap<>();
params.put("size", "100");
params.put("initialSize", "10");
CacheRegenerator cr = new NoOpRegenerator();
Object o = fastCache.init(params, null, cr);
fastCache.initializeMetrics(metricManager, registry, "foo", scope);
fastCache.setState(SolrCache.State.LIVE);
for (int i = 0; i < 101; i++) {
fastCache.put(i + 1, "" + (i + 1));
@ -198,13 +198,13 @@ public class TestFastLRUCache extends SolrTestCase {
public void testSimple() throws IOException {
FastLRUCache sc = new FastLRUCache();
sc.initializeMetrics(metricManager, registry, "foo", scope);
Map l = new HashMap();
l.put("size", "100");
l.put("initialSize", "10");
l.put("autowarmCount", "25");
CacheRegenerator cr = new NoOpRegenerator();
Object o = sc.init(l, null, cr);
sc.initializeMetrics(metricManager, registry, "foo", scope);
sc.setState(SolrCache.State.LIVE);
for (int i = 0; i < 101; i++) {
sc.put(i + 1, "" + (i + 1));
@ -221,8 +221,8 @@ public class TestFastLRUCache extends SolrTestCase {
FastLRUCache scNew = new FastLRUCache();
scNew.initializeMetrics(metricManager, registry, "foo", scope);
scNew.init(l, o, cr);
scNew.initializeMetrics(metricManager, registry, "foo", scope);
scNew.warm(null, sc);
scNew.setState(SolrCache.State.LIVE);
sc.close();
@ -307,13 +307,13 @@ public class TestFastLRUCache extends SolrTestCase {
public void testAccountable() {
FastLRUCache<Query, DocSet> sc = new FastLRUCache<>();
try {
sc.initializeMetrics(metricManager, registry, "foo", scope);
Map l = new HashMap();
l.put("size", "100");
l.put("initialSize", "10");
l.put("autowarmCount", "25");
CacheRegenerator cr = new NoOpRegenerator();
Object o = sc.init(l, null, cr);
sc.initializeMetrics(metricManager, registry, "foo", scope);
sc.setState(SolrCache.State.LIVE);
long initialBytes = sc.ramBytesUsed();
WildcardQuery q = new WildcardQuery(new Term("foo", "bar"));
@ -334,12 +334,12 @@ public class TestFastLRUCache extends SolrTestCase {
public void testSetLimits() throws Exception {
FastLRUCache<String, Accountable> cache = new FastLRUCache<>();
cache.initializeMetrics(metricManager, registry, "foo", scope);
Map<String, String> params = new HashMap<>();
params.put("size", "6");
params.put("maxRamMB", "8");
CacheRegenerator cr = new NoOpRegenerator();
Object o = cache.init(params, null, cr);
cache.initializeMetrics(metricManager, registry, "foo", scope);
for (int i = 0; i < 6; i++) {
cache.put("" + i, new Accountable() {
@Override

View File

@ -41,6 +41,7 @@ public class TestDefaultStatsCache extends BaseDistributedSearchTestCase {
@Test
public void test() throws Exception {
del("*:*");
commit();
String aDocId=null;
for (int i = 0; i < clients.size(); i++) {
int shard = i + 1;

View File

@ -41,7 +41,10 @@ public interface ShardParams {
/** The requested URL for this shard */
String SHARD_URL = "shard.url";
/** The requested shard name */
String SHARD_NAME = "shard.name";
/** The Request Handler for shard requests */
String SHARDS_QT = "shards.qt";