SOLR-12438: Improve status reporting of metrics history API.

This commit is contained in:
Andrzej Bialecki 2018-06-07 21:23:55 +02:00
parent d7abebd7af
commit 417940cdd7
10 changed files with 581 additions and 316 deletions

View File

@ -162,7 +162,7 @@ New Features
* SOLR-12401: Add getValue() and setValue() Stream Evaluators (Joel Bernstein, janhoy)
* SOLR-11779: Basic long-term collection of aggregated metrics. Historical data is
* SOLR-11779, SOLR-12438: Basic long-term collection of aggregated metrics. Historical data is
maintained as multi-resolution time series using round-robin databases in the '.system'
collection. New /admin/metrics/history API allows retrieval of this data in numeric
or graph formats. (ab)

View File

@ -26,6 +26,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -43,6 +44,9 @@ import org.apache.http.client.CredentialsProvider;
import org.apache.http.config.Lookup;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.Directory;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.CloudSolrClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.client.solrj.impl.SolrHttpClientBuilder;
@ -59,6 +63,7 @@ import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.DocCollection;
import org.apache.solr.common.cloud.Replica;
import org.apache.solr.common.cloud.Replica.State;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.util.ExecutorUtil;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.Utils;
@ -570,21 +575,7 @@ public class CoreContainer {
containerHandlers.put(METRICS_PATH, metricsHandler);
metricsHandler.initializeMetrics(metricManager, SolrInfoBean.Group.node.toString(), metricTag, METRICS_PATH);
if (isZooKeeperAware()) {
PluginInfo plugin = cfg.getMetricsConfig().getHistoryHandler();
Map<String, Object> initArgs;
if (plugin != null && plugin.initArgs != null) {
initArgs = plugin.initArgs.asMap(5);
initArgs.put(MetricsHistoryHandler.ENABLE_PROP, plugin.isEnabled());
} else {
initArgs = Collections.emptyMap();
}
metricsHistoryHandler = new MetricsHistoryHandler(getZkController().getNodeName(), metricsHandler,
new CloudSolrClient.Builder(Collections.singletonList(getZkController().getZkServerAddress()), Optional.empty())
.withHttpClient(updateShardHandler.getDefaultHttpClient()).build(), getZkController().getSolrCloudManager(), initArgs);
containerHandlers.put(METRICS_HISTORY_PATH, metricsHistoryHandler);
metricsHistoryHandler.initializeMetrics(metricManager, SolrInfoBean.Group.node.toString(), metricTag, METRICS_HISTORY_PATH);
}
createMetricsHistoryHandler();
autoscalingHistoryHandler = createHandler(AUTOSCALING_HISTORY_PATH, AutoscalingHistoryHandler.class.getName(), AutoscalingHistoryHandler.class);
metricsCollectorHandler = createHandler(MetricsCollectorHandler.HANDLER_PATH, MetricsCollectorHandler.class.getName(), MetricsCollectorHandler.class);
@ -748,6 +739,49 @@ public class CoreContainer {
status |= LOAD_COMPLETE | INITIAL_CORE_LOAD_COMPLETE;
}
// MetricsHistoryHandler supports both cloud and standalone configs
private void createMetricsHistoryHandler() {
PluginInfo plugin = cfg.getMetricsConfig().getHistoryHandler();
Map<String, Object> initArgs;
if (plugin != null && plugin.initArgs != null) {
initArgs = plugin.initArgs.asMap(5);
initArgs.put(MetricsHistoryHandler.ENABLE_PROP, plugin.isEnabled());
} else {
initArgs = new HashMap<>();
}
String name;
SolrCloudManager cloudManager;
SolrClient client;
if (isZooKeeperAware()) {
name = getZkController().getNodeName();
cloudManager = getZkController().getSolrCloudManager();
client = new CloudSolrClient.Builder(Collections.singletonList(getZkController().getZkServerAddress()), Optional.empty())
.withHttpClient(updateShardHandler.getDefaultHttpClient()).build();
} else {
name = getNodeConfig().getNodeName();
if (name == null || name.isEmpty()) {
name = "localhost";
}
cloudManager = null;
client = new EmbeddedSolrServer(this, CollectionAdminParams.SYSTEM_COLL) {
@Override
public void close() throws IOException {
// do nothing - we close the container ourselves
}
};
// enable local metrics unless specifically set otherwise
if (!initArgs.containsKey(MetricsHistoryHandler.ENABLE_NODES_PROP)) {
initArgs.put(MetricsHistoryHandler.ENABLE_NODES_PROP, true);
}
if (!initArgs.containsKey(MetricsHistoryHandler.ENABLE_REPLICAS_PROP)) {
initArgs.put(MetricsHistoryHandler.ENABLE_REPLICAS_PROP, true);
}
}
metricsHistoryHandler = new MetricsHistoryHandler(name, metricsHandler,
client, cloudManager, initArgs);
containerHandlers.put(METRICS_HISTORY_PATH, metricsHistoryHandler);
metricsHistoryHandler.initializeMetrics(metricManager, SolrInfoBean.Group.node.toString(), metricTag, METRICS_HISTORY_PATH);
}
public void securityNodeChanged() {
log.info("Security node changed, reloading security.json");
@ -792,6 +826,12 @@ public class CoreContainer {
ExecutorUtil.shutdownAndAwaitTermination(coreContainerWorkExecutor);
replayUpdatesExecutor.shutdownAndAwaitTermination();
if (metricsHistoryHandler != null) {
IOUtils.closeQuietly(metricsHistoryHandler.getSolrClient());
metricsHistoryHandler.close();
}
if (metricManager != null) {
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.node));
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.jvm));
@ -810,10 +850,6 @@ public class CoreContainer {
} catch (Exception e) {
log.warn("Error removing live node. Continuing to close CoreContainer", e);
}
if (metricsHistoryHandler != null) {
IOUtils.closeQuietly(metricsHistoryHandler.getSolrClient());
metricsHistoryHandler.close();
}
if (metricManager != null) {
metricManager.closeReporters(SolrMetricManager.getRegistryName(SolrInfoBean.Group.cluster));
}

View File

@ -19,10 +19,13 @@ package org.apache.solr.handler.admin;
import javax.imageio.ImageIO;
import java.awt.Color;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@ -45,17 +48,21 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.atomic.DoubleAdder;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.google.common.annotations.VisibleForTesting;
import org.apache.solr.api.Api;
import org.apache.solr.api.ApiBag;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.cloud.NodeStateProvider;
import org.apache.solr.client.solrj.cloud.SolrCloudManager;
import org.apache.solr.client.solrj.cloud.autoscaling.ReplicaInfo;
import org.apache.solr.client.solrj.cloud.autoscaling.Suggestion;
import org.apache.solr.client.solrj.cloud.autoscaling.VersionedData;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
import org.apache.solr.cloud.Overseer;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.cloud.ClusterState;
@ -68,9 +75,12 @@ import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.JavaBinCodec;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.metrics.SolrMetricManager;
import org.apache.solr.metrics.rrd.SolrRrdBackendFactory;
@ -105,30 +115,32 @@ import static org.apache.solr.common.params.CommonParams.ID;
public class MetricsHistoryHandler extends RequestHandlerBase implements PermissionNameProvider, Closeable {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final List<String> DEFAULT_CORE_COUNTERS = new ArrayList<String>() {{
add("QUERY./select.requests");
add("UPDATE./update.requests");
}};
public static final List<String> DEFAULT_CORE_GAUGES = new ArrayList<String>() {{
add("INDEX.sizeInBytes");
}};
public static final List<String> DEFAULT_NODE_GAUGES = new ArrayList<String>() {{
add("CONTAINER.fs.coreRoot.usableSpace");
}};
public static final List<String> DEFAULT_JVM_GAUGES = new ArrayList<String>() {{
add("memory.heap.used");
add("os.processCpuLoad");
add("os.systemLoadAverage");
}};
public static final List<String> DEFAULT_CORE_COUNTERS = new ArrayList<>();
public static final List<String> DEFAULT_CORE_GAUGES = new ArrayList<>();
public static final List<String> DEFAULT_NODE_GAUGES = new ArrayList<>();
public static final List<String> DEFAULT_JVM_GAUGES = new ArrayList<>();
public static final String NUM_SHARDS_KEY = "numShards";
public static final String NUM_REPLICAS_KEY = "numReplicas";
public static final String NUM_NODES_KEY = "numNodes";
public static final List<String> DEFAULT_COLLECTION_GAUGES = new ArrayList<String>() {{
add(NUM_SHARDS_KEY);
add(NUM_REPLICAS_KEY);
}};
public static final List<String> DEFAULT_COLLECTION_GAUGES = new ArrayList<>();
static {
DEFAULT_JVM_GAUGES.add("memory.heap.used");
DEFAULT_JVM_GAUGES.add("os.processCpuLoad");
DEFAULT_JVM_GAUGES.add("os.systemLoadAverage");
DEFAULT_NODE_GAUGES.add("CONTAINER.fs.coreRoot.usableSpace");
DEFAULT_CORE_GAUGES.add("INDEX.sizeInBytes");
DEFAULT_CORE_COUNTERS.add("QUERY./select.requests");
DEFAULT_CORE_COUNTERS.add("UPDATE./update.requests");
DEFAULT_COLLECTION_GAUGES.add(NUM_SHARDS_KEY);
DEFAULT_COLLECTION_GAUGES.add(NUM_REPLICAS_KEY);
}
public static final String COLLECT_PERIOD_PROP = "collectPeriod";
public static final String SYNC_PERIOD_PROP = "syncPeriod";
@ -148,6 +160,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
private final int collectPeriod;
private final Map<String, List<String>> counters = new HashMap<>();
private final Map<String, List<String>> gauges = new HashMap<>();
private final String overseerUrlScheme;
private final Map<String, RrdDb> knownDbs = new ConcurrentHashMap<>();
@ -166,11 +179,17 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
if (pluginArgs != null) {
args.putAll(pluginArgs);
}
// override from ZK
Map<String, Object> props = (Map<String, Object>)cloudManager.getClusterStateProvider()
.getClusterProperty("metrics", Collections.emptyMap())
.getOrDefault("history", Collections.emptyMap());
args.putAll(props);
// override from ZK if available
if (cloudManager != null) {
Map<String, Object> props = (Map<String, Object>)cloudManager.getClusterStateProvider()
.getClusterProperty("metrics", Collections.emptyMap())
.getOrDefault("history", Collections.emptyMap());
args.putAll(props);
overseerUrlScheme = cloudManager.getClusterStateProvider().getClusterProperty("urlScheme", "http");
} else {
overseerUrlScheme = "http";
}
this.nodeName = nodeName;
this.enable = Boolean.parseBoolean(String.valueOf(args.getOrDefault(ENABLE_PROP, "true")));
@ -180,12 +199,12 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
this.collectPeriod = Integer.parseInt(String.valueOf(args.getOrDefault(COLLECT_PERIOD_PROP, DEFAULT_COLLECT_PERIOD)));
int syncPeriod = Integer.parseInt(String.valueOf(args.getOrDefault(SYNC_PERIOD_PROP, SolrRrdBackendFactory.DEFAULT_SYNC_PERIOD)));
factory = new SolrRrdBackendFactory(solrClient, CollectionAdminParams.SYSTEM_COLL,
syncPeriod, cloudManager.getTimeSource());
this.solrClient = solrClient;
this.metricsHandler = metricsHandler;
this.cloudManager = cloudManager;
this.timeSource = cloudManager.getTimeSource();
this.timeSource = cloudManager != null ? cloudManager.getTimeSource() : TimeSource.NANO_TIME;
factory = new SolrRrdBackendFactory(solrClient, CollectionAdminParams.SYSTEM_COLL,
syncPeriod, this.timeSource);
counters.put(Group.core.toString(), DEFAULT_CORE_COUNTERS);
counters.put(Group.node.toString(), Collections.emptyList());
@ -217,43 +236,60 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
}
}
// check that .system exists
public void checkSystemCollection() {
// check that .system exists
try {
if (cloudManager.isClosed() || Thread.interrupted()) {
factory.setPersistent(false);
return;
}
ClusterState clusterState = cloudManager.getClusterStateProvider().getClusterState();
DocCollection systemColl = clusterState.getCollectionOrNull(CollectionAdminParams.SYSTEM_COLL);
if (systemColl == null) {
if (logMissingCollection) {
log.warn("Missing " + CollectionAdminParams.SYSTEM_COLL + ", keeping metrics history in memory");
logMissingCollection = false;
}
factory.setPersistent(false);
return;
} else {
boolean ready = false;
for (Replica r : systemColl.getReplicas()) {
if (r.isActive(clusterState.getLiveNodes())) {
ready = true;
break;
}
}
if (!ready) {
log.debug(CollectionAdminParams.SYSTEM_COLL + " not ready yet, keeping metrics history in memory");
if (cloudManager != null) {
try {
if (cloudManager.isClosed() || Thread.interrupted()) {
factory.setPersistent(false);
return;
}
ClusterState clusterState = cloudManager.getClusterStateProvider().getClusterState();
DocCollection systemColl = clusterState.getCollectionOrNull(CollectionAdminParams.SYSTEM_COLL);
if (systemColl == null) {
if (logMissingCollection) {
log.warn("Missing " + CollectionAdminParams.SYSTEM_COLL + ", keeping metrics history in memory");
logMissingCollection = false;
}
factory.setPersistent(false);
return;
} else {
boolean ready = false;
for (Replica r : systemColl.getReplicas()) {
if (r.isActive(clusterState.getLiveNodes())) {
ready = true;
break;
}
}
if (!ready) {
log.debug(CollectionAdminParams.SYSTEM_COLL + " not ready yet, keeping metrics history in memory");
factory.setPersistent(false);
return;
}
}
} catch (Exception e) {
if (logMissingCollection) {
log.warn("Error getting cluster state, keeping metrics history in memory", e);
}
logMissingCollection = false;
factory.setPersistent(false);
return;
}
logMissingCollection = true;
factory.setPersistent(true);
} else {
try {
solrClient.query(CollectionAdminParams.SYSTEM_COLL, new SolrQuery(CommonParams.Q, "*:*", CommonParams.ROWS, "0"));
factory.setPersistent(true);
logMissingCollection = true;
} catch (Exception e) {
if (logMissingCollection) {
log.warn("Error querying .system collection, keeping metrics history in memory", e);
}
logMissingCollection = false;
factory.setPersistent(false);
}
} catch (Exception e) {
log.warn("Error getting cluster state, keeping metrics history in memory", e);
factory.setPersistent(false);
return;
}
logMissingCollection = true;
factory.setPersistent(true);
}
public SolrClient getSolrClient() {
@ -271,7 +307,11 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
return factory;
}
private boolean isOverseerLeader() {
private String getOverseerLeader() {
// non-ZK node has no Overseer
if (cloudManager == null) {
return null;
}
ZkNodeProps props = null;
try {
VersionedData data = cloudManager.getDistribStateManager().getData(
@ -281,24 +321,39 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
}
} catch (KeeperException | IOException | NoSuchElementException e) {
log.warn("Could not obtain overseer's address, skipping.", e);
return false;
return null;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return false;
return null;
}
if (props == null) {
return false;
return null;
}
String oid = props.getStr(ID);
if (oid == null) {
return false;
return null;
}
String[] ids = oid.split("-");
if (ids.length != 3) { // unknown format
log.warn("Unknown format of leader id, skipping: " + oid);
return false;
return null;
}
return ids[1];
}
private boolean amIOverseerLeader() {
return amIOverseerLeader(null);
}
private boolean amIOverseerLeader(String leader) {
if (leader == null) {
leader = getOverseerLeader();
}
if (leader == null) {
return false;
} else {
return nodeName.equals(leader);
}
return nodeName.equals(ids[1]);
}
private void collectMetrics() {
@ -383,7 +438,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
}
private void collectGlobalMetrics() {
if (!isOverseerLeader()) {
if (!amIOverseerLeader()) {
return;
}
Set<String> nodes = new HashSet<>(cloudManager.getClusterStateProvider().getLiveNodes());
@ -640,11 +695,19 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
if (cmd == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "unknown 'action' param '" + actionStr + "', supported actions: " + Cmd.actions);
}
Object res = null;
final SimpleOrderedMap<Object> res = new SimpleOrderedMap<>();
rsp.add("metrics", res);
switch (cmd) {
case LIST:
int rows = req.getParams().getInt(CommonParams.ROWS, SolrRrdBackendFactory.DEFAULT_MAX_DBS);
res = factory.list(rows);
List<Pair<String, Long>> lst = factory.list(rows);
lst.forEach(p -> {
SimpleOrderedMap<Object> data = new SimpleOrderedMap<>();
// RrdDb always uses seconds - convert here for compatibility
data.add("lastModified", TimeUnit.SECONDS.convert(p.second(), TimeUnit.MILLISECONDS));
data.add("node", nodeName);
res.add(p.first(), data);
});
break;
case GET:
String name = req.getParams().get(CommonParams.NAME);
@ -657,15 +720,14 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
if (format == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "unknown 'format' param '" + formatStr + "', supported formats: " + Format.formats);
}
if (!factory.exists(name)) {
rsp.add("error", "'" + name + "' doesn't exist");
} else {
if (factory.exists(name)) {
// get a throwaway copy (safe to close and discard)
RrdDb db = new RrdDb(URI_PREFIX + name, true, factory);
res = new NamedList<>();
NamedList<Object> data = new NamedList<>();
SimpleOrderedMap<Object> data = new SimpleOrderedMap<>();
data.add("data", getDbData(db, dsNames, format, req.getParams()));
((NamedList)res).add(name, data);
data.add("lastModified", db.getLastUpdateTime());
data.add("node", nodeName);
res.add(name, data);
db.close();
}
break;
@ -674,17 +736,14 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
if (name == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'name' is a required param");
}
if (!factory.exists(name)) {
rsp.add("error", "'" + name + "' doesn't exist");
} else {
if (factory.exists(name)) {
// get a throwaway copy (safe to close and discard)
RrdDb db = new RrdDb(URI_PREFIX + name, true, factory);
NamedList<Object> map = new NamedList<>();
NamedList<Object> status = new NamedList<>();
SimpleOrderedMap<Object> status = new SimpleOrderedMap<>();
status.add("status", getDbStatus(db));
map.add(name, status);
status.add("node", nodeName);
res.add(name, status);
db.close();
res = map;
}
break;
case DELETE:
@ -700,9 +759,61 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
rsp.add("success", "ok");
break;
}
if (res != null) {
rsp.add("metrics", res);
// when using in-memory DBs non-overseer node has no access to overseer DBs - in this case
// forward the request to Overseer leader if available
if (!factory.isPersistent()) {
String leader = getOverseerLeader();
if (leader != null && !amIOverseerLeader(leader)) {
// get & merge remote response
NamedList<Object> remoteRes = handleRemoteRequest(leader, req);
mergeRemoteRes(rsp, remoteRes);
}
}
SimpleOrderedMap<Object> apiState = new SimpleOrderedMap<>();
apiState.add("enableReplicas", enableReplicas);
apiState.add("enableNodes", enableNodes);
apiState.add("mode", enable ? (factory.isPersistent() ? "index" : "memory") : "inactive");
if (!factory.isPersistent()) {
apiState.add("message", "WARNING: metrics history is not being persisted. Create .system collection to start persisting history.");
}
rsp.add("state", apiState);
rsp.getResponseHeader().add("zkConnected", cloudManager != null);
}
private NamedList<Object> handleRemoteRequest(String nodeName, SolrQueryRequest req) {
String baseUrl = Utils.getBaseUrlForNodeName(nodeName, overseerUrlScheme);
String url;
try {
URL u = new URL(baseUrl);
u = new URL(u.getProtocol(), u.getHost(), u.getPort(), "/api/cluster/metrics/history");
url = u.toString();
} catch (MalformedURLException e) {
log.warn("Invalid Overseer url '" + baseUrl + "', unable to fetch remote metrics history", e);
return null;
}
// always use javabin
ModifiableSolrParams params = new ModifiableSolrParams(req.getParams());
params.set(CommonParams.WT, "javabin");
url = url + "?" + params.toString();
try {
byte[] data = cloudManager.httpRequest(url, SolrRequest.METHOD.GET, null, null, HttpClientUtil.DEFAULT_CONNECT_TIMEOUT, true);
// response is always a NamedList
try (JavaBinCodec codec = new JavaBinCodec()) {
return (NamedList<Object>)codec.unmarshal(new ByteArrayInputStream(data));
}
} catch (IOException e) {
log.warn("Exception forwarding request to Overseer at " + url, e);
return null;
}
}
private void mergeRemoteRes(SolrQueryResponse rsp, NamedList<Object> remoteRes) {
if (remoteRes == null || remoteRes.get("metrics") == null) {
return;
}
NamedList<Object> remoteMetrics = (NamedList<Object>)remoteRes.get("metrics");
SimpleOrderedMap localMetrics = (SimpleOrderedMap) rsp.getValues().get("metrics");
remoteMetrics.forEach((k, v) -> localMetrics.add(k, v));
}
private NamedList<Object> getDbStatus(RrdDb db) throws IOException {
@ -750,7 +861,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
RrdDef def = db.getRrdDef();
ArcDef[] arcDefs = def.getArcDefs();
for (ArcDef arcDef : arcDefs) {
SimpleOrderedMap map = new SimpleOrderedMap();
SimpleOrderedMap<Object> map = new SimpleOrderedMap<>();
res.add(arcDef.dump(), map);
Archive a = db.getArchive(arcDef.getConsolFun(), arcDef.getSteps());
// startTime / endTime, arcStep are in seconds
@ -761,22 +872,21 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
if (format != Format.GRAPH) {
// add timestamps separately from values
long[] timestamps = fd.getTimestamps();
str.setLength(0);
for (int i = 0; i < timestamps.length; i++) {
if (format == Format.LIST) {
map.add("timestamps", timestamps[i]);
} else {
if (format == Format.LIST) {
// Arrays.asList works only on arrays of Objects
map.add("timestamps", Arrays.stream(timestamps).boxed().collect(Collectors.toList()));
} else {
str.setLength(0);
for (int i = 0; i < timestamps.length; i++) {
if (i > 0) {
str.append('\n');
}
str.append(String.valueOf(timestamps[i]));
}
}
if (format == Format.STRING) {
map.add("timestamps", str.toString());
}
}
SimpleOrderedMap values = new SimpleOrderedMap();
SimpleOrderedMap<Object> values = new SimpleOrderedMap<>();
map.add("values", values);
for (String name : dsNames) {
double[] vals = fd.getValues(name);
@ -825,9 +935,7 @@ public class MetricsHistoryHandler extends RequestHandlerBase implements Permiss
values.add(name, str.toString());
break;
case LIST:
for (int i = 0; i < vals.length; i++) {
values.add(name, vals[i]);
}
values.add(name, Arrays.stream(vals).boxed().collect(Collectors.toList()));
break;
}
}

View File

@ -19,6 +19,7 @@ package org.apache.solr.metrics.rrd;
import java.io.Closeable;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.ReentrantLock;
import org.rrd4j.core.RrdByteArrayBackend;
@ -36,14 +37,27 @@ public class SolrRrdBackend extends RrdByteArrayBackend implements Closeable {
private final ReentrantLock lock = new ReentrantLock();
private volatile boolean dirty = false;
private volatile boolean closed = false;
private volatile long lastModifiedTime;
public static final class SyncData {
public byte[] data;
public long timestamp;
public SyncData(byte[] data, long timestamp) {
this.data = data;
this.timestamp = timestamp;
}
}
public SolrRrdBackend(String path, boolean readOnly, SolrRrdBackendFactory factory) {
super(path);
this.factory = factory;
this.lastModifiedTime = TimeUnit.MILLISECONDS.convert(factory.getTimeSource().getEpochTimeNs(), TimeUnit.NANOSECONDS);
try {
byte[] data = factory.getData(path);
if (data != null) {
this.buffer = data;
SyncData syncData = factory.getData(path);
if (syncData != null) {
this.buffer = syncData.data;
this.lastModifiedTime = syncData.timestamp;
}
} catch (IOException e) {
log.warn("Exception retrieving data from " + path + ", store will be readOnly", e);
@ -60,6 +74,7 @@ public class SolrRrdBackend extends RrdByteArrayBackend implements Closeable {
super(other.getPath());
readOnly = true;
factory = null;
this.lastModifiedTime = other.lastModifiedTime;
byte[] otherBuffer = other.buffer;
buffer = new byte[otherBuffer.length];
System.arraycopy(otherBuffer, 0, buffer, 0, otherBuffer.length);
@ -69,6 +84,10 @@ public class SolrRrdBackend extends RrdByteArrayBackend implements Closeable {
return readOnly;
}
public long getLastModifiedTime() {
return lastModifiedTime;
}
@Override
protected void write(long offset, byte[] bytes) throws IOException {
if (readOnly || closed) {
@ -77,13 +96,14 @@ public class SolrRrdBackend extends RrdByteArrayBackend implements Closeable {
lock.lock();
try {
super.write(offset, bytes);
lastModifiedTime = TimeUnit.MILLISECONDS.convert(factory.getTimeSource().getEpochTimeNs(), TimeUnit.NANOSECONDS);
dirty = true;
} finally {
lock.unlock();
}
}
public byte[] getSyncData() {
public SyncData getSyncData() {
if (readOnly || closed) {
return null;
}
@ -95,7 +115,7 @@ public class SolrRrdBackend extends RrdByteArrayBackend implements Closeable {
try {
byte[] bufferCopy = new byte[buffer.length];
System.arraycopy(buffer, 0, bufferCopy, 0, buffer.length);
return bufferCopy;
return new SyncData(bufferCopy, lastModifiedTime);
} finally {
lock.unlock();
}

View File

@ -22,14 +22,12 @@ import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledThreadPoolExecutor;
@ -47,6 +45,7 @@ import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.IOUtils;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.util.DefaultSolrThreadFactory;
import org.rrd4j.core.RrdBackend;
@ -114,6 +113,10 @@ public class SolrRrdBackendFactory extends RrdBackendFactory implements SolrClos
TimeUnit.MILLISECONDS);
}
public TimeSource getTimeSource() {
return timeSource;
}
private void ensureOpen() throws IOException {
if (closed) {
throw new IOException("Factory already closed");
@ -181,7 +184,7 @@ public class SolrRrdBackendFactory extends RrdBackendFactory implements SolrClos
}
}
byte[] getData(String path) throws IOException {
SolrRrdBackend.SyncData getData(String path) throws IOException {
if (!persistent) {
return null;
}
@ -203,7 +206,8 @@ public class SolrRrdBackendFactory extends RrdBackendFactory implements SolrClos
return null;
}
if (o instanceof byte[]) {
return (byte[])o;
Long time = (Long)doc.getFieldValue("timestamp_l");
return new SolrRrdBackend.SyncData((byte[])o, time);
} else {
throw new SolrServerException("Unexpected value of '" + DATA_FIELD + "' field: " + o.getClass().getName() + ": " + o);
}
@ -216,34 +220,58 @@ public class SolrRrdBackendFactory extends RrdBackendFactory implements SolrClos
backends.remove(path);
}
private static final class DbComparator implements Comparator<Pair<String, Long>> {
static final DbComparator INSTANCE = new DbComparator();
@Override
public int compare(Pair<String, Long> o1, Pair<String, Long> o2) {
return o1.first().compareTo(o2.first());
}
}
/**
* List all available databases created by this node name
* @param maxLength maximum number of results to return
* @return list of database names, or empty
* @return list of database names and their last update times, or empty
* @throws IOException on server errors
*/
public List<String> list(int maxLength) throws IOException {
Set<String> names = new HashSet<>();
public List<Pair<String, Long>> list(int maxLength) throws IOException {
Map<String, Pair<String, Long>> byName = new HashMap<>();
if (persistent) {
try {
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CommonParams.Q, "*:*");
params.add(CommonParams.FQ, CommonParams.TYPE + ":" + DOC_TYPE);
params.add(CommonParams.FL, "id");
params.add(CommonParams.FL, "id,timestamp_l");
params.add(CommonParams.ROWS, String.valueOf(maxLength));
QueryResponse rsp = solrClient.query(collection, params);
SolrDocumentList docs = rsp.getResults();
if (docs != null) {
docs.forEach(d -> names.add(((String)d.getFieldValue("id")).substring(idPrefixLength)));
docs.forEach(d -> {
Long time = (Long)d.getFieldValue("timestamp_l");
Pair<String, Long> p = new Pair<>(((String)d.getFieldValue("id")).substring(idPrefixLength), time);
byName.put(p.first(), p);
});
}
} catch (SolrServerException e) {
log.warn("Error retrieving RRD list", e);
}
}
// add in-memory backends not yet stored
names.addAll(backends.keySet());
ArrayList<String> list = new ArrayList<>(names);
Collections.sort(list);
// add in-memory backends not yet stored, or replace with more recent versions
backends.forEach((name, db) -> {
long lastModifiedTime = db.getLastModifiedTime();
Pair<String, Long> stored = byName.get(name);
Pair<String, Long> inMemory = new Pair(name, lastModifiedTime);
if (stored != null) {
if (stored.second() < lastModifiedTime) {
byName.put(name, inMemory);
}
} else {
byName.put(name, inMemory);
}
});
ArrayList<Pair<String, Long>> list = new ArrayList<>(byName.values());
Collections.sort(list, DbComparator.INSTANCE);
return list;
}
@ -301,25 +329,25 @@ public class SolrRrdBackendFactory extends RrdBackendFactory implements SolrClos
return;
}
log.debug("-- maybe sync backends: " + backends.keySet());
Map<String, byte[]> syncData = new HashMap<>();
Map<String, SolrRrdBackend.SyncData> syncDatas = new HashMap<>();
backends.forEach((path, backend) -> {
byte[] data = backend.getSyncData();
if (data != null) {
syncData.put(backend.getPath(), data);
SolrRrdBackend.SyncData syncData = backend.getSyncData();
if (syncData != null) {
syncDatas.put(backend.getPath(), syncData);
}
});
if (syncData.isEmpty()) {
if (syncDatas.isEmpty()) {
return;
}
log.debug("-- syncing " + syncData.keySet());
log.debug("-- syncing " + syncDatas.keySet());
// write updates
try {
syncData.forEach((path, data) -> {
syncDatas.forEach((path, syncData) -> {
SolrInputDocument doc = new SolrInputDocument();
doc.setField("id", ID_PREFIX + ID_SEP + path);
doc.addField(CommonParams.TYPE, DOC_TYPE);
doc.addField(DATA_FIELD, data);
doc.setField("timestamp", new Date(TimeUnit.MILLISECONDS.convert(timeSource.getEpochTimeNs(), TimeUnit.NANOSECONDS)));
doc.addField(DATA_FIELD, syncData.data);
doc.setField("timestamp_l", syncData.timestamp);
try {
solrClient.add(collection, doc);
} catch (SolrServerException | IOException e) {
@ -334,7 +362,7 @@ public class SolrRrdBackendFactory extends RrdBackendFactory implements SolrClos
} catch (SolrServerException e) {
log.warn("Error committing RRD data updates", e);
}
syncData.forEach((path, data) -> {
syncDatas.forEach((path, data) -> {
SolrRrdBackend backend = backends.get(path);
if (backend != null) {
backend.markClean();

View File

@ -35,6 +35,7 @@ import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.util.LogLevel;
import org.junit.AfterClass;
@ -94,12 +95,12 @@ public class MetricsHistoryIntegrationTest extends SolrCloudTestCase {
NamedList<Object> rsp = solrClient.request(createHistoryRequest(params(CommonParams.ACTION, "list")));
assertNotNull(rsp);
// expected solr.jvm, solr.node and solr.collection..system
List<String> lst = (List<String>)rsp.get("metrics");
SimpleOrderedMap<Object> lst = (SimpleOrderedMap<Object>) rsp.get("metrics");
assertNotNull(lst);
assertEquals(lst.toString(), 3, lst.size());
assertTrue(lst.toString(), lst.contains("solr.jvm"));
assertTrue(lst.toString(), lst.contains("solr.node"));
assertTrue(lst.toString(), lst.contains("solr.collection..system"));
assertNotNull(lst.toString(), lst.get("solr.jvm"));
assertNotNull(lst.toString(), lst.get("solr.node"));
assertNotNull(lst.toString(), lst.get("solr.collection..system"));
}
@Test

View File

@ -665,6 +665,7 @@ public class SimCloudManager implements SolrCloudManager {
}
queryRequest.getContext().put("httpMethod", req.getMethod().toString());
SolrQueryResponse queryResponse = new SolrQueryResponse();
queryResponse.addResponseHeader(new SimpleOrderedMap<>());
if (autoscaling) {
autoScalingHandler.handleRequest(queryRequest, queryResponse);
} else {

View File

@ -30,6 +30,7 @@ import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.cloud.autoscaling.sim.SimCloudManager;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.metrics.SolrMetricManager;
@ -57,7 +58,7 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
simulated = random().nextBoolean();
simulated = random().nextBoolean() || true;
Map<String, Object> args = new HashMap<>();
args.put(MetricsHistoryHandler.SYNC_PERIOD_PROP, 1);
args.put(MetricsHistoryHandler.COLLECT_PERIOD_PROP, 1);
@ -111,11 +112,11 @@ public class MetricsHistoryHandlerTest extends SolrCloudTestCase {
@Test
public void testBasic() throws Exception {
timeSource.sleep(10000);
List<String> list = handler.getFactory().list(100);
List<Pair<String, Long>> list = handler.getFactory().list(100);
// solr.jvm, solr.node, solr.collection..system
assertEquals(list.toString(), 3, list.size());
for (String path : list) {
RrdDb db = new RrdDb(MetricsHistoryHandler.URI_PREFIX + path, true, handler.getFactory());
for (Pair<String, Long> p : list) {
RrdDb db = new RrdDb(MetricsHistoryHandler.URI_PREFIX + p.first(), true, handler.getFactory());
int dsCount = db.getDsCount();
int arcCount = db.getArcCount();
assertTrue("dsCount should be > 0, was " + dsCount, dsCount > 0);

View File

@ -17,13 +17,13 @@
package org.apache.solr.metrics.rrd;
import java.util.Date;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CollectionAdminParams;
import org.apache.solr.common.util.Pair;
import org.apache.solr.common.util.TimeSource;
import org.apache.solr.util.MockSearchableSolrClient;
import org.junit.After;
@ -78,15 +78,15 @@ public class SolrRrdBackendFactoryTest extends SolrTestCaseJ4 {
@Test
public void testBasic() throws Exception {
RrdDb db = new RrdDb(createDef(), factory);
List<String> list = factory.list(100);
List<Pair<String, Long>> list = factory.list(100);
assertEquals(list.toString(), 1, list.size());
assertEquals(list.toString(), "foo", list.get(0));
assertEquals(list.toString(), "foo", list.get(0).first());
timeSource.sleep(2000);
// there should be one sync data
assertEquals(solrClient.docs.toString(), 1, solrClient.docs.size());
String id = SolrRrdBackendFactory.ID_PREFIX + SolrRrdBackendFactory.ID_SEP + "foo";
SolrInputDocument doc = solrClient.docs.get(CollectionAdminParams.SYSTEM_COLL).get(id);
long timestamp = ((Date)doc.getFieldValue("timestamp")).getTime();
long timestamp = (Long)doc.getFieldValue("timestamp_l");
timeSource.sleep(2000);
SolrInputDocument newDoc = solrClient.docs.get(CollectionAdminParams.SYSTEM_COLL).get(id);
assertEquals(newDoc.toString(), newDoc, doc);
@ -104,7 +104,7 @@ public class SolrRrdBackendFactoryTest extends SolrTestCaseJ4 {
timeSource.sleep(3000);
newDoc = solrClient.docs.get(CollectionAdminParams.SYSTEM_COLL).get(id);
assertFalse(newDoc.toString(), newDoc.equals(doc));
long newTimestamp = ((Date)newDoc.getFieldValue("timestamp")).getTime();
long newTimestamp = (Long)newDoc.getFieldValue("timestamp_l");
assertNotSame(newTimestamp, timestamp);
FetchRequest fr = db.createFetchRequest(ConsolFun.AVERAGE, firstTimestamp + 60, lastTimestamp - 60, 60);
FetchData fd = fr.fetchData();
@ -126,7 +126,7 @@ public class SolrRrdBackendFactoryTest extends SolrTestCaseJ4 {
// should still be listed
list = factory.list(100);
assertEquals(list.toString(), 1, list.size());
assertEquals(list.toString(), "foo", list.get(0));
assertEquals(list.toString(), "foo", list.get(0).first());
// re-open read-write
db = new RrdDb("solr:foo", factory);
@ -141,7 +141,7 @@ public class SolrRrdBackendFactoryTest extends SolrTestCaseJ4 {
doc = newDoc;
newDoc = solrClient.docs.get(CollectionAdminParams.SYSTEM_COLL).get(id);
assertFalse(newDoc.toString(), newDoc.equals(doc));
newTimestamp = ((Date)newDoc.getFieldValue("timestamp")).getTime();
newTimestamp = (Long)newDoc.getFieldValue("timestamp_l");
assertNotSame(newTimestamp, timestamp);
fr = db.createFetchRequest(ConsolFun.AVERAGE, firstTimestamp + 60, lastTimestamp, 60);
fd = fr.fetchData();
@ -174,7 +174,7 @@ public class SolrRrdBackendFactoryTest extends SolrTestCaseJ4 {
timestamp = newTimestamp;
newDoc = solrClient.docs.get(CollectionAdminParams.SYSTEM_COLL).get(id);
assertTrue(newDoc.toString(), newDoc.equals(doc));
newTimestamp = ((Date)newDoc.getFieldValue("timestamp")).getTime();
newTimestamp = (Long)newDoc.getFieldValue("timestamp_l");
assertEquals(newTimestamp, timestamp);
readOnly.close();
}

View File

@ -18,15 +18,16 @@
== Design
=== Round-robin databases
When Solr runs in "cloud" mode it collects long-term history of certain key metrics. This information
can be used for very simple monitoring and troubleshooting, but also some Solr Cloud components
(eg. autoscaling) can use this data for making informed decisions based on long-term
trends of selected metrics.
Solr collects long-term history of certain key metrics both in SolrCloud and in standalone mode.
This information can be used for very simple monitoring and troubleshooting, but also some
Solr Cloud components (eg. autoscaling) can use this data for making informed decisions based on
long-term trends of selected metrics.
[IMPORTANT]
====
Metrics history is available ONLY in SolrCloud mode, it's not supported in standalone Solr. Also,
the `.system` collection must exist if metrics history should be persisted.
The `.system` collection must exist if metrics history should be persisted. If this collection
is absent then metrics history will still be collected and kept in memory but it will be lost
on node restart.
====
This data is maintained as multi-resolution time series, with a fixed total number of data points
@ -61,14 +62,16 @@ update operations than storing each data point in a separate Solr document. Metr
detailed data from each database, including retrieval of all individual datapoints.
Databases are identified primarily by their corresponding metric registry name, so for databases that
keep track of aggregated metrics this will be eg. `solr.jvm`, `solr.node`, `solr.collection.gettingstarted`,
and for databases with non-aggregated metrics this will be eg. `solr.jvm.localhost:8983_solr`,
`solr.node.localhost:7574_solr`, `solr.core.gettingstarted.shard1.replica_n1`.
keep track of aggregated metrics this will be eg. `solr.jvm`, `solr.node`, `solr.collection.gettingstarted`.
For databases with non-aggregated metrics the name consists of the registry name, optionally with a node name
to identify databases with the same name coming from different nodes. For example, per-node databases are
name like this: `solr.jvm.localhost:8983_solr`, `solr.node.localhost:7574_solr`, but per-replica names are
already unique across the cluster so they are named like this: `solr.core.gettingstarted.shard1.replica_n1`.
=== Collected metrics
Currently the following selected metrics are tracked:
* `solr.core` and `solr.collection` metrics:
* Non-aggregated `solr.core` and aggregated `solr.collection` metrics:
** `QUERY./select.requests`
** `UPDATE./update.requests`
** `INDEX.sizeInBytes`
@ -78,6 +81,7 @@ Currently the following selected metrics are tracked:
* `solr.node` metrics:
** `CONTAINER.fs.coreRoot.usableSpace`
** `numNodes` (aggregated, number of live nodes)
* `solr.jvm` metrics:
** `memory.heap.used`
** `os.processCpuLoad`
@ -86,6 +90,10 @@ Currently the following selected metrics are tracked:
Separate databases are created for each of these groups, and each database keeps data for
all metrics listed in that group.
NOTE: Currently this list is not configurable. Also, if you change this list in the code then
all existing databases must be first removed from the `.system` collection because RRD4j doesn't allow
adding new datasources once the database is created.
=== SolrRrdBackendFactory
This component is responsible for managing in-memory databases and periodically saving them
to the `.system` collection. If the `.system` collection is not available the updates to the
@ -101,7 +109,8 @@ collecting and periodically updating the in-memory databases.
This handler also performs aggregation of metrics on per-collection level, and on a cluster level.
By default only these aggregated metrics are tracked - historic data from each node and each replica
in each collection is not collected separately. Aggregated databases are managed on the Overseer leader
node.
node but they are still accessible from other nodes even if they are not persisted - the handler redirects
the call from originating node to the current Overseer leader.
The handler assumes that a simple aggregation (sum of partial metric values from each resource) is
sufficient. This happens to make sense for the default built-in sets of metrics. Future extensions will
@ -135,6 +144,7 @@ databases.
`collectPeriod`:: integer, in seconds, default is 60. Metrics values will be collected and respective
databases updated every `collectPeriod` seconds.
[IMPORTANT]
====
Value of `collectPeriod` must be at least 1, and if it's changed then all previously existing databases
@ -142,9 +152,9 @@ with their historic data must be manually removed (new databases will be created
====
`syncPeriod`:: integer, in seconds, default is 60. Data from modified databases will be saved to Solr
every `syncPeriod` seconds. When accessing the databases via REST API the visibility of most recent
data depends on this period, because requests accessing the data from other nodes see only the
version of the data that is stored in the `.system` collection.
every `syncPeriod` seconds. When accessing the databases via REST API in `index` mode the visibility of
most recent data depends on this period, because requests accessing the data from other nodes see only
the version of the data that is stored in the `.system` collection.
=== Example configuration
Example `/clusterprops.json` file with metrics history configuration that turns on the collection of
@ -154,6 +164,7 @@ properties unrelated to metrics history API.
[source,json]
----
{
...
"metrics" : {
"history" : {
"enable" : true,
@ -161,42 +172,86 @@ properties unrelated to metrics history API.
"syncPeriod" : 300
}
}
...
}
----
== Metrics History API
Main entry point for accessing metrics history is `/admin/metrics/history` (or `/api/cluster/metrics/history` for
v2 API).
Main entry point for accessing metrics history is `/admin/metrics/history` (or `/api/cluster/metrics/history`
for v2 API).
The following sections describe actions available in this API. All calls have at least one
required parameter `action`.
All responses contain a section named `state`, which reports the current internal state of the API:
`enableReplicas`:: boolean, corresponds to the `enableReplicas` configuration setting.
`enableNodes`:: boolean, corresponds to the `enableNodes` configuration setting.
`mode`:: one of the following values:
* `inactive` - when metrics collection is disabled (but access to existing metrics history is still available).
* `memory` - when metrics history is kept only in memory because `.system` collection doesn't exist. In this mode
clients can access metrics history available on the node that received the reuqest and on the Overseer leader.
* `index` - when metrics history is periodically stored in the `.system` collection. Data available in memory on
the node that accepted the request is retrieved from memory, any other data is retrieved from the
`.system` collection (so it's at least `syncPeriod` old).
Also, the response header section (`responseHeader`) contains `zkConnected` boolean property that indicates
whether the current node is a part of SolrCloud cluster.
=== List databases (`action=list`)
This call produces a list of available databases. It supports the following parameters:
`rows`:: optional integer, default is 500. Maximum number of results to return
`rows`:: optional integer, default is 500. Maximum number of results to return.
Example:
In this SolrCloud example the API is in `memory` mode, and the request was made to a node that is
not Overseer leader. The API transparently forwarded the request to Overseer leader.
[source,bash]
----
curl http://localhost:8983/solr/admin/metrics/history?action=list&rows=10
curl http://localhost:7574/solr/admin/metrics/history?action=list&rows=10
----
[source,json]
----
{
"responseHeader": {
"status": 0,
"QTime": 16
"responseHeader": {
"zkConnected": true,
"status": 0,
"QTime": 9
},
"metrics": {
"solr.collection..system": {
"lastModified": 1528360138,
"node": "127.0.0.1:8983_solr"
},
"metrics": [
"solr.collection..system",
"solr.collection.gettingstarted",
"solr.jvm",
"solr.node"
]
"solr.collection.gettingstarted": {
"lastModified": 1528360138,
"node": "127.0.0.1:8983_solr"
},
"solr.jvm": {
"lastModified": 1528360138,
"node": "127.0.0.1:8983_solr"
},
"solr.node": {
"lastModified": 1528360138,
"node": "127.0.0.1:8983_solr"
}
},
"state": {
"enableReplicas": false,
"enableNodes": false,
"mode": "memory"
}
}
----
Note the presence of the `node` element in each section, which shows where the information is coming
from - when API is in `memory` mode this indicates which results are local and which ones are retrieved
from the Overseer leader node. When the API is in `index` mode this element always shows the node name that
received the request (because the data is retrieved from the `.system` collection anyway).
Each section also contains a `lastModified` element, which contains the last modification time when the
database was update. All timestamps returned from this API correspond to Unix epoch time in seconds.
=== Database status (`action=status`)
This call provides detailed status of the selected database.
@ -207,66 +262,71 @@ The following parameters are supported:
Example:
[source,bash]
----
curl http://localhost:8983/solr/admin/metrics/history?action=status&name=solr.collection.gettingstarted
curl http://localhost:7574/solr/admin/metrics/history?action=status&name=solr.collection.gettingstarted
----
[source,json]
----
{
"responseHeader": {
"status": 0,
"QTime": 38
},
"metrics": [
"solr.collection.gettingstarted",
[
"status",
{
"lastModified": 1527268438,
"step": 60,
"datasourceCount": 5,
"archiveCount": 5,
"datasourceNames": [
"numShards",
"numReplicas",
"QUERY./select.requests",
"UPDATE./update.requests",
"INDEX.sizeInBytes"
],
"datasources": [
{
"datasource": "DS:numShards:GAUGE:120:U:U",
"lastValue": 2
},
{
"datasource": "DS:QUERY./select.requests:COUNTER:120:U:U",
"lastValue": 8786
},
...
],
"archives": [
{
"archive": "RRA:AVERAGE:0.5:1:240",
"steps": 1,
"consolFun": "AVERAGE",
"xff": 0.5,
"startTime": 1527254040,
"endTime": 1527268380,
"rows": 240
},
{
"archive": "RRA:AVERAGE:0.5:10:288",
"steps": 10,
"consolFun": "AVERAGE",
"xff": 0.5,
"startTime": 1527096000,
"endTime": 1527268200,
"rows": 288
},
...
]
}
"responseHeader": {
"zkConnected": true,
"status": 0,
"QTime": 46
},
"metrics": {
"solr.collection.gettingstarted": {
"status": {
"lastModified": 1528318361,
"step": 60,
"datasourceCount": 5,
"archiveCount": 5,
"datasourceNames": [
"numShards",
"numReplicas",
"QUERY./select.requests",
"UPDATE./update.requests",
"INDEX.sizeInBytes"
],
"datasources": [
{
"datasource": "DS:numShards:GAUGE:120:U:U",
"lastValue": 2
},
{
"datasource": "DS:numReplicas:GAUGE:120:U:U",
"lastValue": 4
},
...
],
"archives": [
{
"archive": "RRA:AVERAGE:0.5:1:240",
"steps": 1,
"consolFun": "AVERAGE",
"xff": 0.5,
"startTime": 1528303980,
"endTime": 1528318320,
"rows": 240
},
{
"archive": "RRA:AVERAGE:0.5:10:288",
"steps": 10,
"consolFun": "AVERAGE",
"xff": 0.5,
"startTime": 1528146000,
"endTime": 1528318200,
"rows": 288
},
...
]
]
},
"node": "127.0.0.1:7574_solr"
}
},
"state": {
"enableReplicas": false,
"enableNodes": false,
"mode": "index"
}
}
----
@ -286,7 +346,7 @@ values (because points from all datasources in a given time series share the sam
* `graph` - data is returned as PNG images, Base64-encoded, containing graphs of each time series values over time.
In each case the response is structured in a similar way: archive identifiers are keys in a JSON map,
and timestamps / datapoints / graphs are values.
all data is placed in a `data` element, with timestamps / datapoints / graphs as values in lists or maps.
==== Examples
This is the output using the default `list` format:
@ -297,37 +357,49 @@ curl http://localhost:8983/solr/admin/metrics/history?action=get&name=solr.colle
[source,json]
----
{
"responseHeader": {
"status": 0,
"QTime": 36
},
"metrics": [
"solr.collection.gettingstarted",
[
"data",
{
"RRA:AVERAGE:0.5:1:240": {
"timestamps":1527254460,
"timestamps":1527254520,
"timestamps":1527254580,
...
"values": {
"numShards": "NaN",
"numShards": 2.0,
"numShards": 2.0,
...
"numReplicas": "NaN",
"numReplicas": 4.0,
"numReplicas": 4.0,
...
"QUERY./select.requests": "NaN",
"QUERY./select.requests": 123,
"QUERY./select.requests": 456,
...
}
},
"RRA:AVERAGE:0.5:10:288": {
...
"responseHeader": {
"zkConnected": true,
"status": 0,
"QTime": 4
},
"metrics": {
"solr.collection.gettingstarted": {
"data": {
"RRA:AVERAGE:0.5:1:240": {
"timestamps": [
1528304160,
1528304220,
...
],
"values": {
"numShards": [
"NaN",
2.0,
...
],
"numReplicas": [
"NaN",
4.0,
...
],
...
}
},
"RRA:AVERAGE:0.5:10:288": {
"timestamps": [
1528145400,
1528146000,
...
"lastModified": 1528318606,
"node": "127.0.0.1:8983_solr"
}
},
"state": {
"enableReplicas": false,
"enableNodes": false,
"mode": "index"
}
}
----
This is the output when using the `string` format:
@ -338,25 +410,24 @@ curl http://localhost:8983/solr/admin/metrics/history?action=get&name=solr.colle
[source,json]
----
{
"responseHeader": {
"status": 0,
"QTime": 11
},
"metrics": [
"solr.collection.gettingstarted",
[
"data",
{
"RRA:AVERAGE:0.5:1:240": {
"timestamps": "1527254820\n1527254880\n1527254940\n...",
"values": {
"numShards": "NaN\n2.0\n2.0\n2.0\n2.0\n2.0\n2.0\n...",
"numReplicas": "NaN\n4.0\n4.0\n4.0\n4.0\n4.0\n4.0\n...",
"QUERY./select.requests": "NaN\n123\n456\n789\n...",
...
}
},
"RRA:AVERAGE:0.5:10:288": {
"responseHeader": {
"zkConnected": true,
"status": 0,
"QTime": 2
},
"metrics": {
"solr.collection.gettingstarted": {
"data": {
"RRA:AVERAGE:0.5:1:240": {
"timestamps": "1527254820\n1527254880\n1527254940\n...",
"values": {
"numShards": "NaN\n2.0\n2.0\n2.0\n2.0\n2.0\n2.0\n...",
"numReplicas": "NaN\n4.0\n4.0\n4.0\n4.0\n4.0\n4.0\n...",
"QUERY./select.requests": "NaN\n123\n456\n789\n...",
...
}
},
"RRA:AVERAGE:0.5:10:288": {
...
----
@ -368,29 +439,28 @@ curl http://localhost:8983/solr/admin/metrics/history?action=get&name=solr.colle
[source,json]
----
{
"responseHeader": {
"status": 0,
"QTime": 2275
},
"metrics": [
"solr.collection.gettingstarted",
[
"data",
{
"RRA:AVERAGE:0.5:1:240": {
"values": {
"numShards": "iVBORw0KGgoAAAANSUhEUgAAAkQAAA...",
"numReplicas": "iVBORw0KGgoAAAANSUhEUgAAAkQA...",
"QUERY./select.requests": "iVBORw0KGgoAAAANS...",
...
}
},
"RRA:AVERAGE:0.5:10:288": {
"values": {
"numShards": "iVBORw0KGgoAAAANSUhEUgAAAkQAAA...",
...
},
...
"responseHeader": {
"zkConnected": true,
"status": 0,
"QTime": 2
},
"metrics": {
"solr.collection.gettingstarted": {
"data": {
"RRA:AVERAGE:0.5:1:240": {
"values": {
"numShards": "iVBORw0KGgoAAAANSUhEUgAAAkQAAA...",
"numReplicas": "iVBORw0KGgoAAAANSUhEUgAAAkQA...",
"QUERY./select.requests": "iVBORw0KGgoAAAANS...",
...
}
},
"RRA:AVERAGE:0.5:10:288": {
"values": {
"numShards": "iVBORw0KGgoAAAANSUhEUgAAAkQAAA...",
...
},
...
----
.Example 60 sec resolution history graph for `QUERY./select.requests` metric