diff --git a/docs/reference/cat.asciidoc b/docs/reference/cat.asciidoc index 4c30d21693d..d557a8c930a 100644 --- a/docs/reference/cat.asciidoc +++ b/docs/reference/cat.asciidoc @@ -227,6 +227,8 @@ include::cat/alias.asciidoc[] include::cat/allocation.asciidoc[] +include::cat/anomaly-detectors.asciidoc[] + include::cat/count.asciidoc[] include::cat/dataframeanalytics.asciidoc[] diff --git a/docs/reference/cat/anomaly-detectors.asciidoc b/docs/reference/cat/anomaly-detectors.asciidoc new file mode 100644 index 00000000000..cc88b2c2b3b --- /dev/null +++ b/docs/reference/cat/anomaly-detectors.asciidoc @@ -0,0 +1,280 @@ +[role="xpack"] +[testenv="platinum"] +[[cat-anomaly-detectors]] +=== cat anomaly detectors API +++++ +cat anomaly detectors +++++ + +Returns configuration and usage information about {anomaly-jobs}. + +[[cat-anomaly-detectors-request]] +==== {api-request-title} + +`GET /_cat/ml/anomaly_detectors/` + + +`GET /_cat/ml/anomaly_detectors` + +[[cat-anomaly-detectors-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have `monitor_ml`, +`monitor`, `manage_ml`, or `manage` cluster privileges to use this API. See +<> and {ml-docs}/setup.html[Set up {ml-features}]. + + +[[cat-anomaly-detectors-desc]] +==== {api-description-title} + +See {ml-docs}/ml-jobs.html[{anomaly-jobs-cap}]. + +NOTE: This API returns a maximum of 10,000 jobs. + +[[cat-anomaly-detectors-path-params]] +==== {api-path-parms-title} + +``:: +(Optional, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] + +[[cat-anomaly-detectors-query-params]] +==== {api-query-parms-title} + +`allow_no_jobs`:: +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-jobs] + +include::{docdir}/rest-api/common-parms.asciidoc[tag=bytes] + +include::{docdir}/rest-api/common-parms.asciidoc[tag=http-format] + +include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-h] ++ +If you do not specify which columns to include, the API returns the default +columns. If you explicitly specify one or more columns, it returns only the +specified columns. ++ +Valid columns are: + +`assignment_explanation`, `ae`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-anomaly-jobs] + +`buckets.count`, `bc`, `bucketsCount`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count-anomaly-jobs] + +`buckets.time.exp_avg`, `btea`, `bucketsTimeExpAvg`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average] + +`buckets.time.exp_avg_hour`, `bteah`, `bucketsTimeExpAvgHour`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average-hour] + +`buckets.time.max`, `btmax`, `bucketsTimeMax`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-maximum] + +`buckets.time.min`, `btmin`, `bucketsTimeMin`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-minimum] + +`buckets.time.total`, `btt`, `bucketsTimeTotal`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-total] + +`data.buckets`, `db`, `dataBuckets`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count] + +`data.earliest_record`, `der`, `dataEarliestRecord`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=earliest-record-timestamp] + +`data.empty_buckets`, `deb`, `dataEmptyBuckets`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=empty-bucket-count] + +`data.input_bytes`, `dib`, `dataInputBytes`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=input-bytes] + +`data.input_fields`, `dif`, `dataInputFields`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=input-field-count] + +`data.input_records`, `dir`, `dataInputRecords`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=input-record-count] + +`data.invalid_dates`, `did`, `dataInvalidDates`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=invalid-date-count] + +`data.last`, `dl`, `dataLast`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=last-data-time] + +`data.last_empty_bucket`, `dleb`, `dataLastEmptyBucket`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=latest-empty-bucket-timestamp] + +`data.last_sparse_bucket`, `dlsb`, `dataLastSparseBucket`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=latest-sparse-record-timestamp] + +`data.latest_record`, `dlr`, `dataLatestRecord`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=latest-record-timestamp] + +`data.missing_fields`, `dmf`, `dataMissingFields`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=missing-field-count] + +`data.out_of_order_timestamps`, `doot`, `dataOutOfOrderTimestamps`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=out-of-order-timestamp-count] + +`data.processed_fields`, `dpf`, `dataProcessedFields`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=processed-field-count] + +`data.processed_records`, `dpr`, `dataProcessedRecords`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=processed-record-count] + +`data.sparse_buckets`, `dsb`, `dataSparseBuckets`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count] + +`forecasts.memory.avg`, `fmavg`, `forecastsMemoryAvg`::: +The average memory usage in bytes for forecasts related to the {anomaly-job}. + +`forecasts.memory.max`, `fmmax`, `forecastsMemoryMax`::: +The maximum memory usage in bytes for forecasts related to the {anomaly-job}. + +`forecasts.memory.min`, `fmmin`, `forecastsMemoryMin`::: +The minimum memory usage in bytes for forecasts related to the {anomaly-job}. + +`forecasts.memory.total`, `fmt`, `forecastsMemoryTotal`::: +The total memory usage in bytes for forecasts related to the {anomaly-job}. + +`forecasts.records.avg`, `fravg`, `forecastsRecordsAvg`::: +The average number of `model_forecast` documents written for forecasts related +to the {anomaly-job}. + +`forecasts.records.max`, `frmax`, `forecastsRecordsMax`::: +The maximum number of `model_forecast` documents written for forecasts related +to the {anomaly-job}. + +`forecasts.records.min`, `frmin`, `forecastsRecordsMin`::: +The minimum number of `model_forecast` documents written for forecasts related +to the {anomaly-job}. + +`forecasts.records.total`, `frt`, `forecastsRecordsTotal`::: +The total number of `model_forecast` documents written for forecasts related to +the {anomaly-job}. + +`forecasts.time.avg`, `ftavg`, `forecastsTimeAvg`::: +The average runtime in milliseconds for forecasts related to the {anomaly-job}. + +`forecasts.time.max`, `ftmax`, `forecastsTimeMax`::: +The maximum runtime in milliseconds for forecasts related to the {anomaly-job}. + +`forecasts.time.min`, `ftmin`, `forecastsTimeMin`::: +The minimum runtime in milliseconds for forecasts related to the {anomaly-job}. + +`forecasts.time.total`, `ftt`, `forecastsTimeTotal`::: +The total runtime in milliseconds for forecasts related to the {anomaly-job}. + +`forecasts.total`, `ft`, `forecastsTotal`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=forecast-total] + +`id`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] + +`model.bucket_allocation_failures`, `mbaf`, `modelBucketAllocationFailures`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-allocation-failures-count] + +`model.by_fields`, `mbf`, `modelByFields`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=total-by-field-count] + +`model.bytes`, `mb`, `modelBytes`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes] + +`model.bytes_exceeded`, `mbe`, `modelBytesExceeded`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded] + +`model.categorization_status`, `mcs`, `modelCategorizationStatus`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-status] + +`model.categorized_doc_count`, `mcdc`, `modelCategorizedDocCount`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=categorized-doc-count] + +`model.dead_category_count`, `mdcc`, `modelDeadCategoryCount`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=dead-category-count] + +`model.frequent_category_count`, `mfcc`, `modelFrequentCategoryCount`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=frequent-category-count] + +`model.log_time`, `mlt`, `modelLogTime`::: +The timestamp when the model stats were gathered, according to server time. + +`model.memory_limit`, `mml`, `modelMemoryLimit`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs] + +`model.memory_status`, `mms`, `modelMemoryStatus`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-status] + +`model.over_fields`, `mof`, `modelOverFields`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=total-over-field-count] + +`model.partition_fields`, `mpf`, `modelPartitionFields`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=total-partition-field-count] + +`model.rare_category_count`, `mrcc`, `modelRareCategoryCount`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=rare-category-count] + +`model.timestamp`, `mt`, `modelTimestamp`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=model-timestamp] + +`model.total_category_count`, `mtcc`, `modelTotalCategoryCount`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=total-category-count] + +`node.address`, `na`, `nodeAddress`::: +The network address of the node. ++ +include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs] + +`node.ephemeral_id`, `ne`, `nodeEphemeralId`::: +The ephemeral ID of the node. ++ +include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs] + +`node.id`, `ni`, `nodeId`::: +The unique identifier of the node. ++ +include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs] + +`node.name`, `nn`, `nodeName`::: +The node name. ++ +include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs] + +`opened_time`, `ot`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=open-time] + +`state`, `s`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] + +include::{docdir}/rest-api/common-parms.asciidoc[tag=help] + +include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-s] + +include::{docdir}/rest-api/common-parms.asciidoc[tag=time] + +include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-v] + +[[cat-anomaly-detectors-example]] +==== {api-examples-title} + +[source,console] +-------------------------------------------------- +GET _cat/ml/anomaly_detectors?h=id,s,dpr,mb&v +-------------------------------------------------- +// TEST[skip:kibana sample data] + +[source,console-result] +---- +id s dpr mb +high_sum_total_sales closed 14022 1.5mb +low_request_rate closed 1216 40.5kb +response_code_rates closed 28146 132.7kb +url_scanning closed 28146 501.6kb +---- +// TESTRESPONSE[skip:kibana sample data] diff --git a/docs/reference/cat/datafeeds.asciidoc b/docs/reference/cat/datafeeds.asciidoc index 95a830aa823..57645633757 100644 --- a/docs/reference/cat/datafeeds.asciidoc +++ b/docs/reference/cat/datafeeds.asciidoc @@ -22,12 +22,14 @@ Returns configuration and usage information about {dfeeds}. `monitor`, `manage_ml`, or `manage` cluster privileges to use this API. See <> and {ml-docs}/setup.html[Set up {ml-features}]. -//// + [[cat-datafeeds-desc]] ==== {api-description-title} -TBD: This API returns a maximum of 10,000 {dfeeds}. -//// +{dfeeds-cap} retrieve data from {es} for analysis by {anomaly-jobs}. For more +information, see {ml-docs}/ml-dfeeds.html[{dfeeds-cap}]. + +NOTE: This API returns a maximum of 10,000 jobs. [[cat-datafeeds-path-params]] ==== {api-path-parms-title} @@ -46,6 +48,60 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds] include::{docdir}/rest-api/common-parms.asciidoc[tag=http-format] include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-h] ++ +If you do not specify which columns to include, the API returns the default +columns. If you explicitly specify one or more columns, it returns only the +specified columns. ++ +Valid columns are: + +`assignment_explanation`, `ae`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-datafeeds] + +`buckets.count`, `bc`, `bucketsCount`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count] + +`id`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] + +`node.address`, `na`, `nodeAddress`::: +The network address of the node. ++ +include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds] + +`node.ephemeral_id`, `ne`, `nodeEphemeralId`::: +The ephemeral ID of the node. ++ +include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds] + +`node.id`, `ni`, `nodeId`::: +The unique identifier of the node. ++ +include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds] + +`node.name`, `nn`, `nodeName`::: +The node name. ++ +include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds] + +`search.bucket_avg`, `sba`, `searchBucketAvg`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=search-bucket-avg] + +`search.count`, `sc`, `searchCount`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=search-count] + +`search.exp_avg_hour`, `seah`, `searchExpAvgHour`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=search-exp-avg-hour] + +`search.time`, `st`, `searchTime`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=search-time] + +`state`, `s`::: +(Default) +include::{docdir}/ml/ml-shared.asciidoc[tag=state-datafeed] include::{docdir}/rest-api/common-parms.asciidoc[tag=help] @@ -55,86 +111,6 @@ include::{docdir}/rest-api/common-parms.asciidoc[tag=time] include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-v] -[[cat-datafeeds-results]] -==== {api-response-body-title} - -`assignment_explanation`:: -include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation] -+ -To retrieve this information, specify the `ae` column in the `h` query parameter. - -`bucket.count`:: -include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count] -+ -To retrieve this information, specify the `bc` or `bucketCount` column in the -`h` query parameter. - -`id`:: -include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] -+ -To retrieve this information, specify the `id` column in the `h` query parameter. - -`node.address`:: -The network address of the node. -+ -include::{docdir}/ml/ml-shared.asciidoc[tag=node] -+ -To retrieve this information, specify the `na` or `nodeAddress` column in the -`h` query parameter. - -`node.ephemeral_id`:: -The ephemeral ID of the node. -+ -include::{docdir}/ml/ml-shared.asciidoc[tag=node] -+ -To retrieve this information, specify the `ne` or `nodeEphemeralId` column in -the `h` query parameter. - -`node.id`:: -The unique identifier of the node. -+ -include::{docdir}/ml/ml-shared.asciidoc[tag=node] -+ -To retrieve this information, specify the `ni` or `nodeId` column in the `h` -query parameter. - -`node.name`:: -The node name. -+ -include::{docdir}/ml/ml-shared.asciidoc[tag=node] -+ -To retrieve this information, specify the `nn` or `nodeName` column in the `h` -query parameter. - -`search.bucket_avg`:: -include::{docdir}/ml/ml-shared.asciidoc[tag=search-bucket-avg] -+ -To retrieve this information, specify the `sba` or `searchBucketAvg` column in -the `h` query parameter. - -`search.count`:: -include::{docdir}/ml/ml-shared.asciidoc[tag=search-count] -+ -To retrieve this information, specify the `sc` or `searchCount` column in the -`h` query parameter. - -`search.exp_avg_hour`:: -include::{docdir}/ml/ml-shared.asciidoc[tag=search-exp-avg-hour] -+ -To retrieve this information, specify the `seah` or `searchExpAvgHour` column in -the `h` query parameter. - -`search.time`:: -include::{docdir}/ml/ml-shared.asciidoc[tag=search-time] -+ -To retrieve this information, specify the `st` or `searchTime` column in the `h` -query parameter. - -`state`:: -include::{docdir}/ml/ml-shared.asciidoc[tag=state-datafeed] -+ -To retrieve this information, specify the `s` column in the `h` query parameter. - [[cat-datafeeds-example]] ==== {api-examples-title} @@ -146,7 +122,7 @@ GET _cat/ml/datafeeds?v [source,console-result] ---- -id state bucket.count search.count +id state buckets.count search.count datafeed-high_sum_total_sales stopped 743 7 datafeed-low_request_rate stopped 1457 3 datafeed-response_code_rates stopped 1460 18 diff --git a/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc index 45381156a7c..bf49722991a 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc @@ -68,7 +68,7 @@ informational; you cannot update their values. `assignment_explanation`:: (string) -include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation] +include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-datafeeds] `datafeed_id`:: (string) @@ -76,12 +76,18 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] `node`:: (object) -include::{docdir}/ml/ml-shared.asciidoc[tag=node] -`node`.`id`::: The unique identifier of the node. For example, "0-o0tOoRTwKFZifatTWKNw". +include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds] + +`node`.`id`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=node-id] + `node`.`name`::: The node name. For example, `0-o0tOo`. -`node`.`ephemeral_id`::: The node ephemeral ID. -`node`.`transport_address`::: The host and port where transport HTTP connections -are accepted. For example, `127.0.0.1:9300`. + +`node`.`ephemeral_id`::: +include::{docdir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id] + +`node`.`transport_address`::: The host and port where transport HTTP connections are +accepted. For example, `127.0.0.1:9300`. `node`.`attributes`::: For example, `{"ml.machine_memory": "17179869184"}`. `state`:: diff --git a/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc index ea9f1849e91..33edc621c30 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc @@ -57,8 +57,8 @@ The API returns the following information about the operational progress of a job: `assignment_explanation`:: -(string) For open jobs only, contains messages relating to the selection of a -node to run the job. +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-anomaly-jobs] [[datacounts]]`data_counts`:: (object) An object that describes the quantity of input to the job and any @@ -67,85 +67,73 @@ a job. If a model snapshot is reverted or old results are deleted, the job counts are not reset. `data_counts`.`bucket_count`::: -(long) The number of bucket results produced by the job. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count-anomaly-jobs] `data_counts`.`earliest_record_timestamp`::: -(date) The timestamp of the earliest chronologically input document. +(date) +include::{docdir}/ml/ml-shared.asciidoc[tag=earliest-record-timestamp] `data_counts`.`empty_bucket_count`::: -(long) The number of buckets which did not contain any data. If your data -contains many empty buckets, consider increasing your `bucket_span` or using -functions that are tolerant to gaps in data such as `mean`, `non_null_sum` or -`non_zero_count`. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=empty-bucket-count] `data_counts`.`input_bytes`::: -(long) The number of raw bytes read by the job. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=input-bytes] `data_counts`.`input_field_count`::: -(long) The total number of fields in input documents posted to the job. This -count includes fields that are not used in the analysis. However, be aware that -if you are using a {dfeed}, it extracts only the required fields from the -documents it retrieves before posting them to the job. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=input-field-count] `data_counts`.`input_record_count`::: -(long) The number of data records read by the job. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=input-record-count] `data_counts`.`invalid_date_count`::: -(long) The number of records with either a missing date field or a date that -could not be parsed. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=invalid-date-count] `data_counts`.`job_id`::: (string) include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] `data_counts`.`last_data_time`::: -(date) The timestamp at which data was last analyzed, according to server time. +(date) +include::{docdir}/ml/ml-shared.asciidoc[tag=last-data-time] `data_counts`.`latest_empty_bucket_timestamp`::: -(date) The timestamp of the last bucket that did not contain any data. +(date) +include::{docdir}/ml/ml-shared.asciidoc[tag=latest-empty-bucket-timestamp] `data_counts`.`latest_record_timestamp`::: -(date) The timestamp of the latest chronologically input document. +(date) +include::{docdir}/ml/ml-shared.asciidoc[tag=latest-record-timestamp] `data_counts`.`latest_sparse_bucket_timestamp`::: -(date) The timestamp of the last bucket that was considered sparse. +(date) +include::{docdir}/ml/ml-shared.asciidoc[tag=latest-sparse-record-timestamp] `data_counts`.`missing_field_count`::: -(long) The number of input documents that are missing a field that the job is -configured to analyze. Input documents with missing fields are still processed -because it is possible that not all fields are missing. The value of -`processed_record_count` includes this count. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=missing-field-count] + --- -NOTE: If you are using {dfeeds} or posting data to the job in JSON format, a -high `missing_field_count` is often not an indication of data issues. It is not -necessarily a cause for concern. - --- +The value of `processed_record_count` includes this count. `data_counts`.`out_of_order_timestamp_count`::: -(long) The number of input documents that are out of time sequence and outside -of the latency window. This information is applicable only when you provide data -to the job by using the <>. These out of order -documents are discarded, since jobs require time series data to be in ascending -chronological order. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=out-of-order-timestamp-count] `data_counts`.`processed_field_count`::: -(long) The total number of fields in all the documents that have been processed -by the job. Only fields that are specified in the detector configuration object -contribute to this count. The time stamp is not included in this count. +include::{docdir}/ml/ml-shared.asciidoc[tag=processed-field-count] `data_counts`.`processed_record_count`::: -(long) The number of input documents that have been processed by the job. This -value includes documents with missing fields, since they are nonetheless -analyzed. If you use {dfeeds} and have aggregations in your search query, the -`processed_record_count` will be the number of aggregation results processed, -not the number of {es} documents. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=processed-record-count] `data_counts`.`sparse_bucket_count`::: -(long) The number of buckets that contained few data points compared to the -expected number of data points. If your data contains many sparse buckets, -consider using a longer `bucket_span`. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count] [[forecastsstats]]`forecasts_stats`:: (object) An object that provides statistical information about forecasts @@ -171,8 +159,8 @@ related to this job. If there are no forecasts, this property is omitted. maximum, average and total. `forecasts_stats`.`records`::: -(object) The `avg`, `min`, `max` and `total` number of model_forecast documents -written for forecasts related to this job. If there are no forecasts, this +(object) The `avg`, `min`, `max` and `total` number of `model_forecast` documents +written for forecasts related to this job. If there are no forecasts, this property is omitted. `forecasts_stats`.`processing_time_ms`::: @@ -186,8 +174,8 @@ omitted. omitted. `forecasts_stats`.`total`::: -(long) The number of individual forecasts currently available for this job. A -value of `1` or more indicates that forecasts exist. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=forecast-total] `job_id`:: (string) @@ -198,38 +186,24 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] model. It has the following properties: `model_size_stats`.`bucket_allocation_failures_count`::: -(long) The number of buckets for which new entities in incoming data were not -processed due to insufficient model memory. This situation is also signified -by a `hard_limit: memory_status` property value. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-allocation-failures-count] `model_size_stats`.`categorized_doc_count`::: -(long) The number of documents that have had a field categorized. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=categorized-doc-count] `model_size_stats`.`categorization_status`::: -(string) The status of categorization for this job. -Contains one of the following values. -+ --- -* `ok`: Categorization is performing acceptably well (or not being -used at all). -* `warn`: Categorization is detecting a distribution of categories -that suggests the input data is inappropriate for categorization. -Problems could be that there is only one category, more than 90% of -categories are rare, the number of categories is greater than 50% of -the number of categorized documents, there are no frequently -matched categories, or more than 50% of categories are dead. - --- +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-status] `model_size_stats`.`dead_category_count`::: -(long) The number of categories created by categorization that will -never be assigned again because another category's definition -makes it a superset of the dead category. (Dead categories are a -side effect of the way categorization has no prior training.) +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=dead-category-count] `model_size_stats`.`frequent_category_count`::: -(long) The number of categories that match more than 1% of categorized -documents. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=frequent-category-count] `model_size_stats`.`job_id`::: (string) @@ -239,53 +213,47 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] (date) The timestamp of the `model_size_stats` according to server time. `model_size_stats`.`memory_status`::: -(string) The status of the mathematical models. This property can have one of -the following values: -+ --- -* `ok`: The models stayed below the configured value. -* `soft_limit`: The models used more than 60% of the configured memory limit and -older unused models will be pruned to free up space. -* `hard_limit`: The models used more space than the configured memory limit. As -a result, not all incoming data was processed. --- +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-status] `model_size_stats`.`model_bytes`::: -(long) The number of bytes of memory used by the models. This is the maximum -value since the last time the model was persisted. If the job is closed, -this value indicates the latest size. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes] `model_size_stats`.`model_bytes_exceeded`::: - (long) The number of bytes over the high limit for memory usage at the last - allocation failure. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded] `model_size_stats`.`model_bytes_memory_limit`::: -(long) The upper limit for memory usage, checked on increasing values. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs] `model_size_stats`.`rare_category_count`::: -(long) The number of categories that match just one categorized document. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=rare-category-count] `model_size_stats`.`result_type`::: (string) For internal use. The type of result. `model_size_stats`.`total_by_field_count`::: -(long) The number of `by` field values that were analyzed by the models. This -value is cumulative for all detectors. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=total-by-field-count] `model_size_stats`.`total_category_count`::: -(long) The number of categories created by categorization. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=total-category-count] `model_size_stats`.`total_over_field_count`::: -(long) The number of `over` field values that were analyzed by the models. This -value is cumulative for all detectors. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=total-over-field-count] `model_size_stats`.`total_partition_field_count`::: -(long) The number of `partition` field values that were analyzed by the models. -This value is cumulative for all detectors. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=total-partition-field-count] `model_size_stats`.`timestamp`::: -(date) The timestamp of the `model_size_stats` according to the timestamp of the -data. +(date) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-timestamp] [[stats-node]]`node`:: (object) Contains properties for the node that runs the job. This information is @@ -296,10 +264,12 @@ available only for open jobs. `{"ml.machine_memory": "17179869184"}`. `node`.`ephemeral_id`::: -(string) The ephemeral id of the node. +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id] `node`.`id`::: -(string) The unique identifier of the node. +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=node-id] `node`.`name`::: (string) The node name. @@ -308,24 +278,12 @@ available only for open jobs. (string) The host and port where transport HTTP connections are accepted. `open_time`:: -(string) For open jobs only, the elapsed time for which the job has been open. -For example, `28746386s`. +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=open-time] `state`:: -(string) The status of the job, which can be one of the following values: -+ --- -* `closed`: The job finished successfully with its model state persisted. The -job must be opened before it can accept further data. -* `closing`: The job close action is in progress and has not yet completed. A -closing job cannot accept further data. -* `failed`: The job did not finish successfully due to an error. This situation -can occur due to invalid input data. If the job had irrevocably failed, it must -be force closed and then deleted. If the {dfeed} can be corrected, the job can -be closed and then re-opened. -* `opened`: The job is available to receive and process data. -* `opening`: The job open action is in progress and has not yet completed. --- +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=state-anomaly-job] [[timingstats]]`timing_stats`:: (object) An object that provides statistical information about timing aspect of @@ -335,28 +293,32 @@ this job. It has the following properties: (double) Average of all bucket processing times in milliseconds. `timing_stats`.`bucket_count`::: -(long) The number of buckets processed. +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count] `timing_stats`.`exponential_average_bucket_processing_time_ms`::: -(double) Exponential moving average of all bucket processing times in -milliseconds. +(double) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average] `timing_stats`.`exponential_average_bucket_processing_time_per_hour_ms`::: -(double) Exponentially-weighted moving average of bucket processing times -calculated in a 1 hour time window. +(double) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average-hour] `timing_stats`.`job_id`::: (string) include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] `timing_stats`.`maximum_bucket_processing_time_ms`::: -(double) Maximum among all bucket processing times in milliseconds. - +(double) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-maximum] + `timing_stats`.`minimum_bucket_processing_time_ms`::: -(double) Minimum among all bucket processing times in milliseconds. +(double) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-minimum] `timing_stats`.`total_bucket_processing_time_ms`::: -(double) Sum of all bucket processing times in milliseconds. +(double) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-total] [[ml-get-job-stats-response-codes]] ==== {api-response-codes-title} diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index 59957d1450a..adddef76475 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -136,9 +136,14 @@ tag::analyzed-fields-includes[] An array of strings that defines the fields that will be included in the analysis. end::analyzed-fields-includes[] -tag::assignment-explanation[] +tag::assignment-explanation-anomaly-jobs[] +For open {anomaly-jobs} only, contains messages relating to the selection +of a node to run the job. +end::assignment-explanation-anomaly-jobs[] + +tag::assignment-explanation-datafeeds[] For started {dfeeds} only, contains messages relating to the selection of a node. -end::assignment-explanation[] +end::assignment-explanation-datafeeds[] tag::assignment-explanation-dfanalytics[] Contains messages relating to the selection of a node. @@ -157,10 +162,20 @@ so do not set the `background_persist_interval` value too low. -- end::background-persist-interval[] +tag::bucket-allocation-failures-count[] +The number of buckets for which new entities in incoming data were not processed +due to insufficient model memory. This situation is also signified by a +`hard_limit: memory_status` property value. +end::bucket-allocation-failures-count[] + tag::bucket-count[] The number of buckets processed. end::bucket-count[] +tag::bucket-count-anomaly-jobs[] +The number of bucket results produced by the job. +end::bucket-count-anomaly-jobs[] + tag::bucket-span[] The size of the interval that the analysis is aggregated into, typically between `5m` and `1h`. The default value is `5m`. If the {anomaly-job} uses a {dfeed} @@ -174,6 +189,27 @@ The length of the bucket in seconds. This value matches the `bucket_span` that is specified in the job. end::bucket-span-results[] +tag::bucket-time-exponential-average[] +Exponential moving average of all bucket processing times, in milliseconds. +end::bucket-time-exponential-average[] + +tag::bucket-time-exponential-average-hour[] +Exponentially-weighted moving average of bucket processing times +calculated in a 1 hour time window, in milliseconds. +end::bucket-time-exponential-average-hour[] + +tag::bucket-time-maximum[] +Maximum among all bucket processing times, in milliseconds. +end::bucket-time-maximum[] + +tag::bucket-time-minimum[] +Minimum among all bucket processing times, in milliseconds. +end::bucket-time-minimum[] + +tag::bucket-time-total[] +Sum of all bucket processing times, in milliseconds. +end::bucket-time-total[] + tag::by-field-name[] The field used to split the data. In particular, this property is used for analyzing the splits with respect to their own history. It is used for finding @@ -251,6 +287,24 @@ customize the tokenizer or post-tokenization filtering, use the `pattern_replace` character filters. The effect is exactly the same. end::categorization-filters[] +tag::categorization-status[] +The status of categorization for the job. Contains one of the following values: ++ +-- +* `ok`: Categorization is performing acceptably well (or not being used at all). +* `warn`: Categorization is detecting a distribution of categories that suggests +the input data is inappropriate for categorization. Problems could be that there +is only one category, more than 90% of categories are rare, the number of +categories is greater than 50% of the number of categorized documents, there are +no frequently matched categories, or more than 50% of categories are dead. + +-- +end::categorization-status[] + +tag::categorized-doc-count[] +The number of documents that have had a field categorized. +end::categorized-doc-count[] + tag::char-filter[] One or more <>. In addition to the built-in character filters, other plugins can provide more character filters. @@ -482,6 +536,13 @@ Identifier for the {dfeed}. It can be a {dfeed} identifier or a wildcard expression. end::datafeed-id-wildcard[] +tag::dead-category-count[] +The number of categories created by categorization that will never be assigned +again because another category's definition makes it a superset of the dead +category. (Dead categories are a side effect of the way categorization has no +prior training.) +end::dead-category-count[] + tag::decompress-definition[] Specifies whether the included model definition should be returned as a JSON map (`true`) or in a custom compressed format (`false`). Defaults to `true`. @@ -562,6 +623,17 @@ A unique identifier for the detector. This identifier is based on the order of the detectors in the `analysis_config`, starting at zero. end::detector-index[] +tag::earliest-record-timestamp[] +The timestamp of the earliest chronologically input document. +end::earliest-record-timestamp[] + +tag::empty-bucket-count[] +The number of buckets which did not contain any data. If your data +contains many empty buckets, consider increasing your `bucket_span` or using +functions that are tolerant to gaps in data such as `mean`, `non_null_sum` or +`non_zero_count`. +end::empty-bucket-count[] + tag::eta[] Advanced configuration option. The shrinkage applied to the weights. Smaller values result in larger forests which have better generalization error. However, @@ -628,6 +700,11 @@ tag::filter-id[] A string that uniquely identifies a filter. end::filter-id[] +tag::forecast-total[] +The number of individual forecasts currently available for the job. A value of +`1` or more indicates that forecasts exist. +end::forecast-total[] + tag::frequency[] The interval at which scheduled queries are made while the {dfeed} runs in real time. The default value is either the bucket span for short bucket spans, or, @@ -638,6 +715,10 @@ bucket results. If the {dfeed} uses aggregations, this value must be divisible by the interval of the date histogram aggregation. end::frequency[] +tag::frequent-category-count[] +The number of categories that match more than 1% of categorized documents. +end::frequent-category-count[] + tag::from[] Skips the specified number of {dfanalytics-jobs}. The default value is `0`. end::from[] @@ -698,6 +779,26 @@ is available as part of the input data. When you use multiple detectors, the use of influencers is recommended as it aggregates results for each influencer entity. end::influencers[] +tag::input-bytes[] +The number of bytes of input data posted to the {anomaly-job}. +end::input-bytes[] + +tag::input-field-count[] +The total number of fields in input documents posted to the {anomaly-job}. This +count includes fields that are not used in the analysis. However, be aware that +if you are using a {dfeed}, it extracts only the required fields from the +documents it retrieves before posting them to the job. +end::input-field-count[] + +tag::input-record-count[] +The number of input documents posted to the {anomaly-job}. +end::input-record-count[] + +tag::invalid-date-count[] +The number of input documents with either a missing date field or a date that +could not be parsed. +end::invalid-date-count[] + tag::is-interim[] If `true`, this is an interim result. In other words, the results are calculated based on partial input data. @@ -768,6 +869,10 @@ relevant relationships between the features and the {depvar}. The smaller this parameter the larger individual trees will be and the longer train will take. end::lambda[] +tag::last-data-time[] +The timestamp at which data was last analyzed, according to server time. +end::last-data-time[] + tag::latency[] The size of the window in which to expect data that is out of time order. The default value is 0 (no latency). If you specify a non-zero value, it must be @@ -781,6 +886,18 @@ the <> API. -- end::latency[] +tag::latest-empty-bucket-timestamp[] +The timestamp of the last bucket that did not contain any data. +end::latest-empty-bucket-timestamp[] + +tag::latest-record-timestamp[] +The timestamp of the latest chronologically input document. +end::latest-record-timestamp[] + +tag::latest-sparse-record-timestamp[] +The timestamp of the last bucket that was considered sparse. +end::latest-sparse-record-timestamp[] + tag::max-empty-searches[] If a real-time {dfeed} has never seen any data (including during any initial training period) then it will automatically stop itself and close its associated @@ -818,6 +935,19 @@ ensemble method. Available methods are `lof`, `ldof`, `distance_kth_nn`, `distance_knn`. end::method[] +tag::missing-field-count[] +The number of input documents that are missing a field that the {anomaly-job} is +configured to analyze. Input documents with missing fields are still processed +because it is possible that not all fields are missing. ++ +-- +NOTE: If you are using {dfeeds} or posting data to the job in JSON format, a +high `missing_field_count` is often not an indication of data issues. It is not +necessarily a cause for concern. + +-- +end::missing-field-count[] + tag::mode[] There are three available modes: + @@ -829,6 +959,17 @@ recommended value. -- end::mode[] +tag::model-bytes[] +The number of bytes of memory used by the models. This is the maximum value +since the last time the model was persisted. If the job is closed, this value +indicates the latest size. +end::model-bytes[] + +tag::model-bytes-exceeded[] +The number of bytes over the high limit for memory usage at the last allocation +failure. +end::model-bytes-exceeded[] + tag::model-id[] The unique identifier of the trained {infer} model. end::model-id[] @@ -858,6 +999,10 @@ see <>. -- end::model-memory-limit[] +tag::model-memory-limit-anomaly-jobs[] +The upper limit for model memory usage, checked on increasing values. +end::model-memory-limit-anomaly-jobs[] + tag::model-memory-limit-dfa[] The approximate maximum amount of memory resources that are permitted for analytical processing. The default value for {dfanalytics-jobs} is `1gb`. If @@ -867,6 +1012,19 @@ setting, an error occurs when you try to create {dfanalytics-jobs} that have <>. end::model-memory-limit-dfa[] +tag::model-memory-status[] +The status of the mathematical models, which can have one of the following +values: ++ +-- +* `ok`: The models stayed below the configured value. +* `soft_limit`: The models used more than 60% of the configured memory limit +and older unused models will be pruned to free up space. +* `hard_limit`: The models used more space than the configured memory limit. +As a result, not all incoming data was processed. +-- +end::model-memory-status[] + tag::model-plot-config[] This advanced configuration option stores model information along with the results. It provides a more detailed view into {anomaly-detect}. @@ -904,6 +1062,10 @@ The default value is `1`, which means snapshots that are one day (twenty-four ho older than the newest snapshot are deleted. end::model-snapshot-retention-days[] +tag::model-timestamp[] +The timestamp of the last record when the model stats were gathered. +end::model-timestamp[] + tag::multivariate-by-fields[] This functionality is reserved for internal use. It is not supported for use in customer environments and is not subject to the support SLA of official GA @@ -934,10 +1096,27 @@ improve diversity in the ensemble. Therefore, only override this if you are confident that the value you choose is appropriate for the data set. end::n-neighbors[] -tag::node[] +tag::node-address[] +The network address of the node. +end::node-address[] + +tag::node-datafeeds[] For started {dfeeds} only, this information pertains to the node upon which the {dfeed} is started. -end::node[] +end::node-datafeeds[] + +tag::node-ephemeral-id[] +The ephemeral ID of the node. +end::node-ephemeral-id[] + +tag::node-id[] +The unique identifier of the node. +end::node-id[] + +tag::node-jobs[] +Contains properties for the node that runs the job. This information is +available only for open jobs. +end::node-jobs[] tag::num-top-classes[] Defines the number of categories for which the predicted @@ -946,12 +1125,17 @@ total number of categories (in the {version} version of the {stack}, it's two) to predict then we will report all category probabilities. Defaults to 2. end::num-top-classes[] -tag::over-field-name[] -The field used to split the data. In particular, this property is used for -analyzing the splits with respect to the history of all splits. It is used for -finding unusual values in the population of all splits. For more information, -see {ml-docs}/ml-configuring-pop.html[Performing population analysis]. -end::over-field-name[] +tag::open-time[] +For open jobs only, the elapsed time for which the job has been open. +end::open-time[] + +tag::out-of-order-timestamp-count[] +The number of input documents that are out of time sequence and outside +of the latency window. This information is applicable only when you provide data +to the {anomaly-job} by using the <>. These out of +order documents are discarded, since jobs require time series data to be in +ascending chronological order. +end::out-of-order-timestamp-count[] tag::outlier-fraction[] Sets the proportion of the data set that is assumed to be outlying prior to @@ -959,6 +1143,13 @@ Sets the proportion of the data set that is assumed to be outlying prior to outliers and 95% are inliers. end::outlier-fraction[] +tag::over-field-name[] +The field used to split the data. In particular, this property is used for +analyzing the splits with respect to the history of all splits. It is used for +finding unusual values in the population of all splits. For more information, +see {ml-docs}/ml-configuring-pop.html[Performing population analysis]. +end::over-field-name[] + tag::partition-field-name[] The field used to segment the analysis. When you use this property, you have completely independent baselines for each value of this field. @@ -969,6 +1160,20 @@ Defines the name of the prediction field in the results. Defaults to `_prediction`. end::prediction-field-name[] +tag::processed-field-count[] +The total number of fields in all the documents that have been processed by the +{anomaly-job}. Only fields that are specified in the detector configuration +object contribute to this count. The timestamp is not included in this count. +end::processed-field-count[] + +tag::processed-record-count[] +The number of input documents that have been processed by the {anomaly-job}. +This value includes documents with missing fields, since they are nonetheless +analyzed. If you use {dfeeds} and have aggregations in your search query, the +`processed_record_count` is the number of aggregation results processed, not the +number of {es} documents. +end::processed-record-count[] + tag::query[] The {es} query domain-specific language (DSL). This value corresponds to the query object in an {es} search POST body. All the options that are supported by @@ -993,6 +1198,10 @@ assuming other related parameters (e.g. `source`, `analyzed_fields`, etc.) are the same. end::randomize-seed[] +tag::rare-category-count[] +The number of categories that match just one categorized document. +end::rare-category-count[] + tag::renormalization-window-days[] Advanced configuration option. The period over which adjustments to the score are applied, as new data is seen. The default value is the longer of 30 days or @@ -1086,6 +1295,12 @@ The configuration of how to source the analysis data. It requires an excluded from the destination. end::source-put-dfa[] +tag::sparse-bucket-count[] +The number of buckets that contained few data points compared to the expected +number of data points. If your data contains many sparse buckets, consider using +a longer `bucket_span`. +end::sparse-bucket-count[] + tag::standardization-enabled[] If `true`, then the following operation is performed on the columns before computing outlier scores: (x_i - mean(x_i)) / sd(x_i). Defaults to `true`. For @@ -1093,6 +1308,25 @@ more information, see https://en.wikipedia.org/wiki/Feature_scaling#Standardization_(Z-score_Normalization)[this wiki page about standardization]. end::standardization-enabled[] +tag::state-anomaly-job[] +The status of the {anomaly-job}, which can be one of the following values: ++ +-- +* `closed`: The job finished successfully with its model state persisted. The +job must be opened before it can accept further data. +* `closing`: The job close action is in progress and has not yet completed. A +closing job cannot accept further data. +* `failed`: The job did not finish successfully due to an error. This situation +can occur due to invalid input data, a fatal error occurring during the analysis, +or an external interaction such as the process being killed by the Linux out of +memory (OOM) killer. If the job had irrevocably failed, it must be force closed +and then deleted. If the {dfeed} can be corrected, the job can be closed and +then re-opened. +* `opened`: The job is available to receive and process data. +* `opening`: The job open action is in progress and has not yet completed. +-- +end::state-anomaly-job[] + tag::state-datafeed[] The status of the {dfeed}, which can be one of the following values: + @@ -1168,6 +1402,25 @@ that tokenizer but change the character or token filters, specify `"tokenizer": "ml_classic"` in your `categorization_analyzer`. end::tokenizer[] +tag::total-by-field-count[] +The number of `by` field values that were analyzed by the models. This value is +cumulative for all detectors in the job. +end::total-by-field-count[] + +tag::total-category-count[] +The number of categories created by categorization. +end::total-category-count[] + +tag::total-over-field-count[] +The number of `over` field values that were analyzed by the models. This value +is cumulative for all detectors in the job. +end::total-over-field-count[] + +tag::total-partition-field-count[] +The number of `partition` field values that were analyzed by the models. This +value is cumulative for all detectors in the job. +end::total-partition-field-count[] + tag::training-percent[] Defines what percentage of the eligible documents that will be used for training. Documents that are ignored by the analysis (for example diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java index 446574323e0..d830ff21bde 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java @@ -82,9 +82,9 @@ public class RestCatDatafeedsAction extends AbstractCatAction { .build()); // Timing stats - table.addCell("bucket.count", + table.addCell("buckets.count", TableColumnAttributeBuilder.builder("bucket count") - .setAliases("bc", "bucketCount") + .setAliases("bc", "bucketsCount") .build()); table.addCell("search.count", TableColumnAttributeBuilder.builder("number of searches ran by the datafeed") diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java index 64cd39edf30..8227c0c8f58 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java @@ -97,7 +97,7 @@ public class RestCatJobsAction extends AbstractCatAction { .build()); table.addCell("data.processed_fields", TableColumnAttributeBuilder.builder("number of processed fields", false) - .setAliases("dpr", "dataProcessedFields") + .setAliases("dpf", "dataProcessedFields") .build()); table.addCell("data.input_bytes", TableColumnAttributeBuilder.builder("total input bytes", false) @@ -223,55 +223,55 @@ public class RestCatJobsAction extends AbstractCatAction { .build()); // Forecast Stats - table.addCell("forecast." + ForecastStats.Fields.TOTAL, - TableColumnAttributeBuilder.builder("total number of forecasts").setAliases("ft", "forecastTotal").build()); - table.addCell("forecast.memory.min", + table.addCell("forecasts." + ForecastStats.Fields.TOTAL, + TableColumnAttributeBuilder.builder("total number of forecasts").setAliases("ft", "forecastsTotal").build()); + table.addCell("forecasts.memory.min", TableColumnAttributeBuilder.builder("minimum memory used by forecasts", false) - .setAliases("fmmin", "forecastMemoryMin") + .setAliases("fmmin", "forecastsMemoryMin") .build()); - table.addCell("forecast.memory.max", + table.addCell("forecasts.memory.max", TableColumnAttributeBuilder.builder("maximum memory used by forecasts", false) .setAliases("fmmax", "forecastsMemoryMax") .build()); - table.addCell("forecast.memory.avg", + table.addCell("forecasts.memory.avg", TableColumnAttributeBuilder.builder("average memory used by forecasts", false) - .setAliases("fmavg", "forecastMemoryAvg") + .setAliases("fmavg", "forecastsMemoryAvg") .build()); - table.addCell("forecast.memory.total", + table.addCell("forecasts.memory.total", TableColumnAttributeBuilder.builder("total memory used by all forecasts", false) - .setAliases("fmt", "forecastMemoryTotal") + .setAliases("fmt", "forecastsMemoryTotal") .build()); - table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".min", + table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".min", TableColumnAttributeBuilder.builder("minimum record count for forecasts", false) - .setAliases("frmin", "forecastRecordsMin") + .setAliases("frmin", "forecastsRecordsMin") .build()); - table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".max", + table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".max", TableColumnAttributeBuilder.builder("maximum record count for forecasts", false) - .setAliases("frmax", "forecastRecordsMax") + .setAliases("frmax", "forecastsRecordsMax") .build()); - table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".avg", + table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".avg", TableColumnAttributeBuilder.builder("average record count for forecasts", false) - .setAliases("fravg", "forecastRecordsAvg") + .setAliases("fravg", "forecastsRecordsAvg") .build()); - table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".total", + table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".total", TableColumnAttributeBuilder.builder("total record count for all forecasts", false) - .setAliases("frt", "forecastRecordsTotal") + .setAliases("frt", "forecastsRecordsTotal") .build()); - table.addCell("forecast.time.min", + table.addCell("forecasts.time.min", TableColumnAttributeBuilder.builder("minimum runtime for forecasts", false) - .setAliases("ftmin", "forecastTimeMin") + .setAliases("ftmin", "forecastsTimeMin") .build()); - table.addCell("forecast.time.max", + table.addCell("forecasts.time.max", TableColumnAttributeBuilder.builder("maximum run time for forecasts", false) - .setAliases("ftmax", "forecastTimeMax") + .setAliases("ftmax", "forecastsTimeMax") .build()); - table.addCell("forecast.time.avg", + table.addCell("forecasts.time.avg", TableColumnAttributeBuilder.builder("average runtime for all forecasts (milliseconds)", false) - .setAliases("ftavg", "forecastTimeAvg") + .setAliases("ftavg", "forecastsTimeAvg") .build()); - table.addCell("forecast.time.total", + table.addCell("forecasts.time.total", TableColumnAttributeBuilder.builder("total runtime for all forecasts", false) - .setAliases("ftt", "forecastTimeTotal").build()); + .setAliases("ftt", "forecastsTimeTotal").build()); //Node info table.addCell("node.id", @@ -292,29 +292,29 @@ public class RestCatJobsAction extends AbstractCatAction { .build()); //Timing Stats - table.addCell("bucket.count", + table.addCell("buckets.count", TableColumnAttributeBuilder.builder("bucket count") - .setAliases("bc", "bucketCount") + .setAliases("bc", "bucketsCount") .build()); - table.addCell("bucket.time.total", + table.addCell("buckets.time.total", TableColumnAttributeBuilder.builder("total bucket processing time", false) - .setAliases("btt", "bucketTimeTotal") + .setAliases("btt", "bucketsTimeTotal") .build()); - table.addCell("bucket.time.min", + table.addCell("buckets.time.min", TableColumnAttributeBuilder.builder("minimum bucket processing time", false) - .setAliases("btmin", "bucketTimeMin") + .setAliases("btmin", "bucketsTimeMin") .build()); - table.addCell("bucket.time.max", + table.addCell("buckets.time.max", TableColumnAttributeBuilder.builder("maximum bucket processing time", false) - .setAliases("btmax", "bucketTimeMax") + .setAliases("btmax", "bucketsTimeMax") .build()); - table.addCell("bucket.time.exp_avg", + table.addCell("buckets.time.exp_avg", TableColumnAttributeBuilder.builder("exponential average bucket processing time (milliseconds)", false) - .setAliases("btea", "bucketTimeExpAvg") + .setAliases("btea", "bucketsTimeExpAvg") .build()); - table.addCell("bucket.time.exp_avg_hour", + table.addCell("buckets.time.exp_avg_hour", TableColumnAttributeBuilder.builder("exponential average bucket processing time by hour (milliseconds)", false) - .setAliases("bteah", "bucketTimeExpAvgHour") + .setAliases("bteah", "bucketsTimeExpAvgHour") .build()); table.endHeaders(); diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json index d773e6bbf5e..f62a46ba341 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json +++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json @@ -1,7 +1,7 @@ { "cat.ml_jobs":{ "documentation":{ - "url":"http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-get-job-stats.html" + "url":"http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-anomaly-detectors.html" }, "stability":"stable", "url":{ diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml index 4437a31b5fd..89274c59884 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml @@ -86,7 +86,7 @@ setup: datafeed_id: datafeed-job-stats-test - match: $body: | - / #id state bucket.count search.count + / #id state buckets.count search.count ^ (datafeed\-job\-stats\-test \s+ \w+ \s+ \d+ \s+ \d+ \n)+ $/ - do: @@ -95,7 +95,7 @@ setup: datafeed_id: datafeed-job-stats-test - match: $body: | - /^ id \s+ state \s+ bucket\.count \s+ search\.count \n + /^ id \s+ state \s+ buckets\.count \s+ search\.count \n (datafeed\-job\-stats\-test \s+ \w+ \s+ \d+ \s+ \d+ \n)+ $/ - do: diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml index bb13c3a5cc5..a82ce200320 100644 --- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml +++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml @@ -90,7 +90,7 @@ setup: job_id: job-stats-test - match: $body: | - / #id state data.processed_records model.bytes model.memory_status forecast.total bucket.count + / #id state data.processed_records model.bytes model.memory_status forecasts.total buckets.count ^ (job\-stats\-test \s+ \w+ \s+ \d+ \s+ .*? \s+ \w+ \s+ \d+ \s+ \d+ \n)+ $/ - do: @@ -99,7 +99,7 @@ setup: job_id: job-stats-test - match: $body: | - /^ id \s+ state \s+ data\.processed_records \s+ model\.bytes \s+ model\.memory_status \s+ forecast\.total \s+ bucket\.count \n + /^ id \s+ state \s+ data\.processed_records \s+ model\.bytes \s+ model\.memory_status \s+ forecasts\.total \s+ buckets\.count \n (job\-stats\-test \s+ \w+ \s+ \d+ \s+ .*? \s+ \w+ \s+ \d+ \s+ \d+ \n)+ $/ - do: