SOLR-13979: Expose separate metrics for distributed and non-distributed requests.

This commit is contained in:
Andrzej Bialecki 2019-12-09 13:46:33 +01:00
parent 0d78535dcf
commit 86cab79730
3 changed files with 35 additions and 2 deletions

View File

@ -165,6 +165,8 @@ Improvements
* SOLR-13987: Admin UI should not rely on javascript eval() (rmuir, Kevin Risden)
* SOLR-13979: Expose separate metrics for distributed and non-distributed requests. (ab)
Optimizations
---------------------
(No changes)

View File

@ -19,7 +19,6 @@ package org.apache.solr.handler;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import com.codahale.metrics.Counter;
@ -30,6 +29,7 @@ import org.apache.solr.api.Api;
import org.apache.solr.api.ApiBag;
import org.apache.solr.api.ApiSupport;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@ -68,7 +68,11 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
private Counter requests = new Counter();
private final Map<String, Counter> shardPurposes = new ConcurrentHashMap<>();
private Timer requestTimes = new Timer();
private Timer distribRequestTimes = new Timer();
private Timer localRequestTimes = new Timer();
private Counter totalTime = new Counter();
private Counter distribTotalTime = new Counter();
private Counter localTotalTime = new Counter();
private final long handlerStart;
@ -76,7 +80,6 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
private PluginInfo pluginInfo;
private Set<String> metricNames = ConcurrentHashMap.newKeySet();
protected SolrMetricsContext solrMetricsContext;
@ -156,7 +159,11 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
shardPurposes.forEach((k, v) -> map.put(k, v.getCount())));
solrMetricsContext.gauge(metricsMap, true, "shardRequests", getCategory().toString(), scope);
requestTimes = solrMetricsContext.timer("requestTimes", getCategory().toString(), scope);
distribRequestTimes = solrMetricsContext.timer("requestTimes", getCategory().toString(), scope, "distrib");
localRequestTimes = solrMetricsContext.timer("requestTimes", getCategory().toString(), scope, "local");
totalTime = solrMetricsContext.counter("totalTime", getCategory().toString(), scope);
distribTotalTime = solrMetricsContext.counter("totalTime", getCategory().toString(), scope, "distrib");
localTotalTime = solrMetricsContext.counter("totalTime", getCategory().toString(), scope, "local");
solrMetricsContext.gauge(() -> handlerStart, true, "handlerStart", getCategory().toString(), scope);
}
@ -177,6 +184,9 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
@Override
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
requests.inc();
// requests are distributed by default when ZK is in use, unless indicated otherwise
boolean distrib = req.getParams().getBool(CommonParams.DISTRIB,
req.getCore() != null ? req.getCore().getCoreContainer().isZooKeeperAware() : false);
if (req.getParams().getBool(ShardParams.IS_SHARD, false)) {
shardPurposes.computeIfAbsent("total", name -> new Counter()).inc();
int purpose = req.getParams().getInt(ShardParams.SHARDS_PURPOSE, 0);
@ -188,6 +198,7 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
}
}
Timer.Context timer = requestTimes.time();
Timer.Context dTimer = distrib ? distribRequestTimes.time() : localRequestTimes.time();
try {
if (pluginInfo != null && pluginInfo.attributes.containsKey(USEPARAM))
req.getContext().put(USEPARAM, pluginInfo.attributes.get(USEPARAM));
@ -246,8 +257,14 @@ public abstract class RequestHandlerBase implements SolrRequestHandler, SolrInfo
}
}
} finally {
dTimer.stop();
long elapsed = timer.stop();
totalTime.inc(elapsed);
if (distrib) {
distribTotalTime.inc(elapsed);
} else {
localTotalTime.inc(elapsed);
}
}
}

View File

@ -94,6 +94,20 @@ The table below shows the metric names and attributes to request:
`UPDATE./update.handlerStart` |Epoch time when the handler was registered.
|===
*Distributed vs. Local Request Times*
Processing of a single distributed request in SolrCloud usually requires making several requests to
other nodes and other replicas. The common statistics listed above lump these timings together, even though
they are very different in nature, thus making it difficult to measure the latency of distributed and
local requests separately. Solr 8.4 introduced additional statistics that help to do this.
These metrics are structured the same as `requestTimes` and `totalTime` metrics above but they use
different full names, eg. `QUERY./select.distrib.requestTimes` and `QUERY./select.local.requestTimes`.
The metrics under the `distrib` path correspond to the time it takes for a (potentially) distributed
request to complete all remote calls plus any local processing, and return the result to the caller.
The metrics under the `local` path correspond to the time it takes for a local call (non-distributed,
i.e. being processed only by the Solr core where the handler operates) to complete.
== Update Handler
This section has information on the total number of adds and how many commits have been fired against a Solr core.