diff --git a/docs/reference/cat.asciidoc b/docs/reference/cat.asciidoc
index 4c30d21693d..d557a8c930a 100644
--- a/docs/reference/cat.asciidoc
+++ b/docs/reference/cat.asciidoc
@@ -227,6 +227,8 @@ include::cat/alias.asciidoc[]
include::cat/allocation.asciidoc[]
+include::cat/anomaly-detectors.asciidoc[]
+
include::cat/count.asciidoc[]
include::cat/dataframeanalytics.asciidoc[]
diff --git a/docs/reference/cat/anomaly-detectors.asciidoc b/docs/reference/cat/anomaly-detectors.asciidoc
new file mode 100644
index 00000000000..cc88b2c2b3b
--- /dev/null
+++ b/docs/reference/cat/anomaly-detectors.asciidoc
@@ -0,0 +1,280 @@
+[role="xpack"]
+[testenv="platinum"]
+[[cat-anomaly-detectors]]
+=== cat anomaly detectors API
+++++
+cat anomaly detectors
+++++
+
+Returns configuration and usage information about {anomaly-jobs}.
+
+[[cat-anomaly-detectors-request]]
+==== {api-request-title}
+
+`GET /_cat/ml/anomaly_detectors/` +
+
+`GET /_cat/ml/anomaly_detectors`
+
+[[cat-anomaly-detectors-prereqs]]
+==== {api-prereq-title}
+
+* If the {es} {security-features} are enabled, you must have `monitor_ml`,
+`monitor`, `manage_ml`, or `manage` cluster privileges to use this API. See
+<> and {ml-docs}/setup.html[Set up {ml-features}].
+
+
+[[cat-anomaly-detectors-desc]]
+==== {api-description-title}
+
+See {ml-docs}/ml-jobs.html[{anomaly-jobs-cap}].
+
+NOTE: This API returns a maximum of 10,000 jobs.
+
+[[cat-anomaly-detectors-path-params]]
+==== {api-path-parms-title}
+
+``::
+(Optional, string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
+
+[[cat-anomaly-detectors-query-params]]
+==== {api-query-parms-title}
+
+`allow_no_jobs`::
+(Optional, boolean)
+include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-jobs]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=bytes]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=http-format]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-h]
++
+If you do not specify which columns to include, the API returns the default
+columns. If you explicitly specify one or more columns, it returns only the
+specified columns.
++
+Valid columns are:
+
+`assignment_explanation`, `ae`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-anomaly-jobs]
+
+`buckets.count`, `bc`, `bucketsCount`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count-anomaly-jobs]
+
+`buckets.time.exp_avg`, `btea`, `bucketsTimeExpAvg`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average]
+
+`buckets.time.exp_avg_hour`, `bteah`, `bucketsTimeExpAvgHour`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average-hour]
+
+`buckets.time.max`, `btmax`, `bucketsTimeMax`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-maximum]
+
+`buckets.time.min`, `btmin`, `bucketsTimeMin`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-minimum]
+
+`buckets.time.total`, `btt`, `bucketsTimeTotal`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-total]
+
+`data.buckets`, `db`, `dataBuckets`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count]
+
+`data.earliest_record`, `der`, `dataEarliestRecord`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=earliest-record-timestamp]
+
+`data.empty_buckets`, `deb`, `dataEmptyBuckets`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=empty-bucket-count]
+
+`data.input_bytes`, `dib`, `dataInputBytes`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-bytes]
+
+`data.input_fields`, `dif`, `dataInputFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-field-count]
+
+`data.input_records`, `dir`, `dataInputRecords`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-record-count]
+
+`data.invalid_dates`, `did`, `dataInvalidDates`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=invalid-date-count]
+
+`data.last`, `dl`, `dataLast`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=last-data-time]
+
+`data.last_empty_bucket`, `dleb`, `dataLastEmptyBucket`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-empty-bucket-timestamp]
+
+`data.last_sparse_bucket`, `dlsb`, `dataLastSparseBucket`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-sparse-record-timestamp]
+
+`data.latest_record`, `dlr`, `dataLatestRecord`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-record-timestamp]
+
+`data.missing_fields`, `dmf`, `dataMissingFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=missing-field-count]
+
+`data.out_of_order_timestamps`, `doot`, `dataOutOfOrderTimestamps`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=out-of-order-timestamp-count]
+
+`data.processed_fields`, `dpf`, `dataProcessedFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=processed-field-count]
+
+`data.processed_records`, `dpr`, `dataProcessedRecords`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=processed-record-count]
+
+`data.sparse_buckets`, `dsb`, `dataSparseBuckets`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count]
+
+`forecasts.memory.avg`, `fmavg`, `forecastsMemoryAvg`:::
+The average memory usage in bytes for forecasts related to the {anomaly-job}.
+
+`forecasts.memory.max`, `fmmax`, `forecastsMemoryMax`:::
+The maximum memory usage in bytes for forecasts related to the {anomaly-job}.
+
+`forecasts.memory.min`, `fmmin`, `forecastsMemoryMin`:::
+The minimum memory usage in bytes for forecasts related to the {anomaly-job}.
+
+`forecasts.memory.total`, `fmt`, `forecastsMemoryTotal`:::
+The total memory usage in bytes for forecasts related to the {anomaly-job}.
+
+`forecasts.records.avg`, `fravg`, `forecastsRecordsAvg`:::
+The average number of `model_forecast` documents written for forecasts related
+to the {anomaly-job}.
+
+`forecasts.records.max`, `frmax`, `forecastsRecordsMax`:::
+The maximum number of `model_forecast` documents written for forecasts related
+to the {anomaly-job}.
+
+`forecasts.records.min`, `frmin`, `forecastsRecordsMin`:::
+The minimum number of `model_forecast` documents written for forecasts related
+to the {anomaly-job}.
+
+`forecasts.records.total`, `frt`, `forecastsRecordsTotal`:::
+The total number of `model_forecast` documents written for forecasts related to
+the {anomaly-job}.
+
+`forecasts.time.avg`, `ftavg`, `forecastsTimeAvg`:::
+The average runtime in milliseconds for forecasts related to the {anomaly-job}.
+
+`forecasts.time.max`, `ftmax`, `forecastsTimeMax`:::
+The maximum runtime in milliseconds for forecasts related to the {anomaly-job}.
+
+`forecasts.time.min`, `ftmin`, `forecastsTimeMin`:::
+The minimum runtime in milliseconds for forecasts related to the {anomaly-job}.
+
+`forecasts.time.total`, `ftt`, `forecastsTimeTotal`:::
+The total runtime in milliseconds for forecasts related to the {anomaly-job}.
+
+`forecasts.total`, `ft`, `forecastsTotal`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=forecast-total]
+
+`id`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
+
+`model.bucket_allocation_failures`, `mbaf`, `modelBucketAllocationFailures`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-allocation-failures-count]
+
+`model.by_fields`, `mbf`, `modelByFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-by-field-count]
+
+`model.bytes`, `mb`, `modelBytes`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes]
+
+`model.bytes_exceeded`, `mbe`, `modelBytesExceeded`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded]
+
+`model.categorization_status`, `mcs`, `modelCategorizationStatus`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-status]
+
+`model.categorized_doc_count`, `mcdc`, `modelCategorizedDocCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=categorized-doc-count]
+
+`model.dead_category_count`, `mdcc`, `modelDeadCategoryCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=dead-category-count]
+
+`model.frequent_category_count`, `mfcc`, `modelFrequentCategoryCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=frequent-category-count]
+
+`model.log_time`, `mlt`, `modelLogTime`:::
+The timestamp when the model stats were gathered, according to server time.
+
+`model.memory_limit`, `mml`, `modelMemoryLimit`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs]
+
+`model.memory_status`, `mms`, `modelMemoryStatus`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-status]
+
+`model.over_fields`, `mof`, `modelOverFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-over-field-count]
+
+`model.partition_fields`, `mpf`, `modelPartitionFields`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-partition-field-count]
+
+`model.rare_category_count`, `mrcc`, `modelRareCategoryCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=rare-category-count]
+
+`model.timestamp`, `mt`, `modelTimestamp`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-timestamp]
+
+`model.total_category_count`, `mtcc`, `modelTotalCategoryCount`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-category-count]
+
+`node.address`, `na`, `nodeAddress`:::
+The network address of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs]
+
+`node.ephemeral_id`, `ne`, `nodeEphemeralId`:::
+The ephemeral ID of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs]
+
+`node.id`, `ni`, `nodeId`:::
+The unique identifier of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs]
+
+`node.name`, `nn`, `nodeName`:::
+The node name.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-jobs]
+
+`opened_time`, `ot`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=open-time]
+
+`state`, `s`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=state-anomaly-job]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=help]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-s]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=time]
+
+include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-v]
+
+[[cat-anomaly-detectors-example]]
+==== {api-examples-title}
+
+[source,console]
+--------------------------------------------------
+GET _cat/ml/anomaly_detectors?h=id,s,dpr,mb&v
+--------------------------------------------------
+// TEST[skip:kibana sample data]
+
+[source,console-result]
+----
+id s dpr mb
+high_sum_total_sales closed 14022 1.5mb
+low_request_rate closed 1216 40.5kb
+response_code_rates closed 28146 132.7kb
+url_scanning closed 28146 501.6kb
+----
+// TESTRESPONSE[skip:kibana sample data]
diff --git a/docs/reference/cat/datafeeds.asciidoc b/docs/reference/cat/datafeeds.asciidoc
index 95a830aa823..57645633757 100644
--- a/docs/reference/cat/datafeeds.asciidoc
+++ b/docs/reference/cat/datafeeds.asciidoc
@@ -22,12 +22,14 @@ Returns configuration and usage information about {dfeeds}.
`monitor`, `manage_ml`, or `manage` cluster privileges to use this API. See
<> and {ml-docs}/setup.html[Set up {ml-features}].
-////
+
[[cat-datafeeds-desc]]
==== {api-description-title}
-TBD: This API returns a maximum of 10,000 {dfeeds}.
-////
+{dfeeds-cap} retrieve data from {es} for analysis by {anomaly-jobs}. For more
+information, see {ml-docs}/ml-dfeeds.html[{dfeeds-cap}].
+
+NOTE: This API returns a maximum of 10,000 jobs.
[[cat-datafeeds-path-params]]
==== {api-path-parms-title}
@@ -46,6 +48,60 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-datafeeds]
include::{docdir}/rest-api/common-parms.asciidoc[tag=http-format]
include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-h]
++
+If you do not specify which columns to include, the API returns the default
+columns. If you explicitly specify one or more columns, it returns only the
+specified columns.
++
+Valid columns are:
+
+`assignment_explanation`, `ae`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-datafeeds]
+
+`buckets.count`, `bc`, `bucketsCount`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count]
+
+`id`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id]
+
+`node.address`, `na`, `nodeAddress`:::
+The network address of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
+
+`node.ephemeral_id`, `ne`, `nodeEphemeralId`:::
+The ephemeral ID of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
+
+`node.id`, `ni`, `nodeId`:::
+The unique identifier of the node.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
+
+`node.name`, `nn`, `nodeName`:::
+The node name.
++
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
+
+`search.bucket_avg`, `sba`, `searchBucketAvg`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=search-bucket-avg]
+
+`search.count`, `sc`, `searchCount`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=search-count]
+
+`search.exp_avg_hour`, `seah`, `searchExpAvgHour`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=search-exp-avg-hour]
+
+`search.time`, `st`, `searchTime`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=search-time]
+
+`state`, `s`:::
+(Default)
+include::{docdir}/ml/ml-shared.asciidoc[tag=state-datafeed]
include::{docdir}/rest-api/common-parms.asciidoc[tag=help]
@@ -55,86 +111,6 @@ include::{docdir}/rest-api/common-parms.asciidoc[tag=time]
include::{docdir}/rest-api/common-parms.asciidoc[tag=cat-v]
-[[cat-datafeeds-results]]
-==== {api-response-body-title}
-
-`assignment_explanation`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation]
-+
-To retrieve this information, specify the `ae` column in the `h` query parameter.
-
-`bucket.count`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count]
-+
-To retrieve this information, specify the `bc` or `bucketCount` column in the
-`h` query parameter.
-
-`id`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id]
-+
-To retrieve this information, specify the `id` column in the `h` query parameter.
-
-`node.address`::
-The network address of the node.
-+
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-+
-To retrieve this information, specify the `na` or `nodeAddress` column in the
-`h` query parameter.
-
-`node.ephemeral_id`::
-The ephemeral ID of the node.
-+
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-+
-To retrieve this information, specify the `ne` or `nodeEphemeralId` column in
-the `h` query parameter.
-
-`node.id`::
-The unique identifier of the node.
-+
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-+
-To retrieve this information, specify the `ni` or `nodeId` column in the `h`
-query parameter.
-
-`node.name`::
-The node name.
-+
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-+
-To retrieve this information, specify the `nn` or `nodeName` column in the `h`
-query parameter.
-
-`search.bucket_avg`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=search-bucket-avg]
-+
-To retrieve this information, specify the `sba` or `searchBucketAvg` column in
-the `h` query parameter.
-
-`search.count`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=search-count]
-+
-To retrieve this information, specify the `sc` or `searchCount` column in the
-`h` query parameter.
-
-`search.exp_avg_hour`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=search-exp-avg-hour]
-+
-To retrieve this information, specify the `seah` or `searchExpAvgHour` column in
-the `h` query parameter.
-
-`search.time`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=search-time]
-+
-To retrieve this information, specify the `st` or `searchTime` column in the `h`
-query parameter.
-
-`state`::
-include::{docdir}/ml/ml-shared.asciidoc[tag=state-datafeed]
-+
-To retrieve this information, specify the `s` column in the `h` query parameter.
-
[[cat-datafeeds-example]]
==== {api-examples-title}
@@ -146,7 +122,7 @@ GET _cat/ml/datafeeds?v
[source,console-result]
----
-id state bucket.count search.count
+id state buckets.count search.count
datafeed-high_sum_total_sales stopped 743 7
datafeed-low_request_rate stopped 1457 3
datafeed-response_code_rates stopped 1460 18
diff --git a/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc
index 45381156a7c..bf49722991a 100644
--- a/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc
+++ b/docs/reference/ml/anomaly-detection/apis/get-datafeed-stats.asciidoc
@@ -68,7 +68,7 @@ informational; you cannot update their values.
`assignment_explanation`::
(string)
-include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation]
+include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-datafeeds]
`datafeed_id`::
(string)
@@ -76,12 +76,18 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id]
`node`::
(object)
-include::{docdir}/ml/ml-shared.asciidoc[tag=node]
-`node`.`id`::: The unique identifier of the node. For example, "0-o0tOoRTwKFZifatTWKNw".
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-datafeeds]
+
+`node`.`id`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-id]
+
`node`.`name`::: The node name. For example, `0-o0tOo`.
-`node`.`ephemeral_id`::: The node ephemeral ID.
-`node`.`transport_address`::: The host and port where transport HTTP connections
-are accepted. For example, `127.0.0.1:9300`.
+
+`node`.`ephemeral_id`:::
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
+
+`node`.`transport_address`::: The host and port where transport HTTP connections are
+accepted. For example, `127.0.0.1:9300`.
`node`.`attributes`::: For example, `{"ml.machine_memory": "17179869184"}`.
`state`::
diff --git a/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc
index ea9f1849e91..33edc621c30 100644
--- a/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc
+++ b/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc
@@ -57,8 +57,8 @@ The API returns the following information about the operational progress of a
job:
`assignment_explanation`::
-(string) For open jobs only, contains messages relating to the selection of a
-node to run the job.
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=assignment-explanation-anomaly-jobs]
[[datacounts]]`data_counts`::
(object) An object that describes the quantity of input to the job and any
@@ -67,85 +67,73 @@ a job. If a model snapshot is reverted or old results are deleted, the job
counts are not reset.
`data_counts`.`bucket_count`:::
-(long) The number of bucket results produced by the job.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count-anomaly-jobs]
`data_counts`.`earliest_record_timestamp`:::
-(date) The timestamp of the earliest chronologically input document.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=earliest-record-timestamp]
`data_counts`.`empty_bucket_count`:::
-(long) The number of buckets which did not contain any data. If your data
-contains many empty buckets, consider increasing your `bucket_span` or using
-functions that are tolerant to gaps in data such as `mean`, `non_null_sum` or
-`non_zero_count`.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=empty-bucket-count]
`data_counts`.`input_bytes`:::
-(long) The number of raw bytes read by the job.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-bytes]
`data_counts`.`input_field_count`:::
-(long) The total number of fields in input documents posted to the job. This
-count includes fields that are not used in the analysis. However, be aware that
-if you are using a {dfeed}, it extracts only the required fields from the
-documents it retrieves before posting them to the job.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-field-count]
`data_counts`.`input_record_count`:::
-(long) The number of data records read by the job.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=input-record-count]
`data_counts`.`invalid_date_count`:::
-(long) The number of records with either a missing date field or a date that
-could not be parsed.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=invalid-date-count]
`data_counts`.`job_id`:::
(string)
include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
`data_counts`.`last_data_time`:::
-(date) The timestamp at which data was last analyzed, according to server time.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=last-data-time]
`data_counts`.`latest_empty_bucket_timestamp`:::
-(date) The timestamp of the last bucket that did not contain any data.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-empty-bucket-timestamp]
`data_counts`.`latest_record_timestamp`:::
-(date) The timestamp of the latest chronologically input document.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-record-timestamp]
`data_counts`.`latest_sparse_bucket_timestamp`:::
-(date) The timestamp of the last bucket that was considered sparse.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=latest-sparse-record-timestamp]
`data_counts`.`missing_field_count`:::
-(long) The number of input documents that are missing a field that the job is
-configured to analyze. Input documents with missing fields are still processed
-because it is possible that not all fields are missing. The value of
-`processed_record_count` includes this count.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=missing-field-count]
+
---
-NOTE: If you are using {dfeeds} or posting data to the job in JSON format, a
-high `missing_field_count` is often not an indication of data issues. It is not
-necessarily a cause for concern.
-
---
+The value of `processed_record_count` includes this count.
`data_counts`.`out_of_order_timestamp_count`:::
-(long) The number of input documents that are out of time sequence and outside
-of the latency window. This information is applicable only when you provide data
-to the job by using the <>. These out of order
-documents are discarded, since jobs require time series data to be in ascending
-chronological order.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=out-of-order-timestamp-count]
`data_counts`.`processed_field_count`:::
-(long) The total number of fields in all the documents that have been processed
-by the job. Only fields that are specified in the detector configuration object
-contribute to this count. The time stamp is not included in this count.
+include::{docdir}/ml/ml-shared.asciidoc[tag=processed-field-count]
`data_counts`.`processed_record_count`:::
-(long) The number of input documents that have been processed by the job. This
-value includes documents with missing fields, since they are nonetheless
-analyzed. If you use {dfeeds} and have aggregations in your search query, the
-`processed_record_count` will be the number of aggregation results processed,
-not the number of {es} documents.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=processed-record-count]
`data_counts`.`sparse_bucket_count`:::
-(long) The number of buckets that contained few data points compared to the
-expected number of data points. If your data contains many sparse buckets,
-consider using a longer `bucket_span`.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=sparse-bucket-count]
[[forecastsstats]]`forecasts_stats`::
(object) An object that provides statistical information about forecasts
@@ -171,8 +159,8 @@ related to this job. If there are no forecasts, this property is omitted.
maximum, average and total.
`forecasts_stats`.`records`:::
-(object) The `avg`, `min`, `max` and `total` number of model_forecast documents
-written for forecasts related to this job. If there are no forecasts, this
+(object) The `avg`, `min`, `max` and `total` number of `model_forecast` documents
+written for forecasts related to this job. If there are no forecasts, this
property is omitted.
`forecasts_stats`.`processing_time_ms`:::
@@ -186,8 +174,8 @@ omitted.
omitted.
`forecasts_stats`.`total`:::
-(long) The number of individual forecasts currently available for this job. A
-value of `1` or more indicates that forecasts exist.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=forecast-total]
`job_id`::
(string)
@@ -198,38 +186,24 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
model. It has the following properties:
`model_size_stats`.`bucket_allocation_failures_count`:::
-(long) The number of buckets for which new entities in incoming data were not
-processed due to insufficient model memory. This situation is also signified
-by a `hard_limit: memory_status` property value.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-allocation-failures-count]
`model_size_stats`.`categorized_doc_count`:::
-(long) The number of documents that have had a field categorized.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=categorized-doc-count]
`model_size_stats`.`categorization_status`:::
-(string) The status of categorization for this job.
-Contains one of the following values.
-+
---
-* `ok`: Categorization is performing acceptably well (or not being
-used at all).
-* `warn`: Categorization is detecting a distribution of categories
-that suggests the input data is inappropriate for categorization.
-Problems could be that there is only one category, more than 90% of
-categories are rare, the number of categories is greater than 50% of
-the number of categorized documents, there are no frequently
-matched categories, or more than 50% of categories are dead.
-
---
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-status]
`model_size_stats`.`dead_category_count`:::
-(long) The number of categories created by categorization that will
-never be assigned again because another category's definition
-makes it a superset of the dead category. (Dead categories are a
-side effect of the way categorization has no prior training.)
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=dead-category-count]
`model_size_stats`.`frequent_category_count`:::
-(long) The number of categories that match more than 1% of categorized
-documents.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=frequent-category-count]
`model_size_stats`.`job_id`:::
(string)
@@ -239,53 +213,47 @@ include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
(date) The timestamp of the `model_size_stats` according to server time.
`model_size_stats`.`memory_status`:::
-(string) The status of the mathematical models. This property can have one of
-the following values:
-+
---
-* `ok`: The models stayed below the configured value.
-* `soft_limit`: The models used more than 60% of the configured memory limit and
-older unused models will be pruned to free up space.
-* `hard_limit`: The models used more space than the configured memory limit. As
-a result, not all incoming data was processed.
---
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-status]
`model_size_stats`.`model_bytes`:::
-(long) The number of bytes of memory used by the models. This is the maximum
-value since the last time the model was persisted. If the job is closed,
-this value indicates the latest size.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes]
`model_size_stats`.`model_bytes_exceeded`:::
- (long) The number of bytes over the high limit for memory usage at the last
- allocation failure.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-bytes-exceeded]
`model_size_stats`.`model_bytes_memory_limit`:::
-(long) The upper limit for memory usage, checked on increasing values.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-limit-anomaly-jobs]
`model_size_stats`.`rare_category_count`:::
-(long) The number of categories that match just one categorized document.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=rare-category-count]
`model_size_stats`.`result_type`:::
(string) For internal use. The type of result.
`model_size_stats`.`total_by_field_count`:::
-(long) The number of `by` field values that were analyzed by the models. This
-value is cumulative for all detectors.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-by-field-count]
`model_size_stats`.`total_category_count`:::
-(long) The number of categories created by categorization.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-category-count]
`model_size_stats`.`total_over_field_count`:::
-(long) The number of `over` field values that were analyzed by the models. This
-value is cumulative for all detectors.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-over-field-count]
`model_size_stats`.`total_partition_field_count`:::
-(long) The number of `partition` field values that were analyzed by the models.
-This value is cumulative for all detectors.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=total-partition-field-count]
`model_size_stats`.`timestamp`:::
-(date) The timestamp of the `model_size_stats` according to the timestamp of the
-data.
+(date)
+include::{docdir}/ml/ml-shared.asciidoc[tag=model-timestamp]
[[stats-node]]`node`::
(object) Contains properties for the node that runs the job. This information is
@@ -296,10 +264,12 @@ available only for open jobs.
`{"ml.machine_memory": "17179869184"}`.
`node`.`ephemeral_id`:::
-(string) The ephemeral id of the node.
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-ephemeral-id]
`node`.`id`:::
-(string) The unique identifier of the node.
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=node-id]
`node`.`name`:::
(string) The node name.
@@ -308,24 +278,12 @@ available only for open jobs.
(string) The host and port where transport HTTP connections are accepted.
`open_time`::
-(string) For open jobs only, the elapsed time for which the job has been open.
-For example, `28746386s`.
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=open-time]
`state`::
-(string) The status of the job, which can be one of the following values:
-+
---
-* `closed`: The job finished successfully with its model state persisted. The
-job must be opened before it can accept further data.
-* `closing`: The job close action is in progress and has not yet completed. A
-closing job cannot accept further data.
-* `failed`: The job did not finish successfully due to an error. This situation
-can occur due to invalid input data. If the job had irrevocably failed, it must
-be force closed and then deleted. If the {dfeed} can be corrected, the job can
-be closed and then re-opened.
-* `opened`: The job is available to receive and process data.
-* `opening`: The job open action is in progress and has not yet completed.
---
+(string)
+include::{docdir}/ml/ml-shared.asciidoc[tag=state-anomaly-job]
[[timingstats]]`timing_stats`::
(object) An object that provides statistical information about timing aspect of
@@ -335,28 +293,32 @@ this job. It has the following properties:
(double) Average of all bucket processing times in milliseconds.
`timing_stats`.`bucket_count`:::
-(long) The number of buckets processed.
+(long)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-count]
`timing_stats`.`exponential_average_bucket_processing_time_ms`:::
-(double) Exponential moving average of all bucket processing times in
-milliseconds.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average]
`timing_stats`.`exponential_average_bucket_processing_time_per_hour_ms`:::
-(double) Exponentially-weighted moving average of bucket processing times
-calculated in a 1 hour time window.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-exponential-average-hour]
`timing_stats`.`job_id`:::
(string)
include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
`timing_stats`.`maximum_bucket_processing_time_ms`:::
-(double) Maximum among all bucket processing times in milliseconds.
-
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-maximum]
+
`timing_stats`.`minimum_bucket_processing_time_ms`:::
-(double) Minimum among all bucket processing times in milliseconds.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-minimum]
`timing_stats`.`total_bucket_processing_time_ms`:::
-(double) Sum of all bucket processing times in milliseconds.
+(double)
+include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-time-total]
[[ml-get-job-stats-response-codes]]
==== {api-response-codes-title}
diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc
index 59957d1450a..adddef76475 100644
--- a/docs/reference/ml/ml-shared.asciidoc
+++ b/docs/reference/ml/ml-shared.asciidoc
@@ -136,9 +136,14 @@ tag::analyzed-fields-includes[]
An array of strings that defines the fields that will be included in the analysis.
end::analyzed-fields-includes[]
-tag::assignment-explanation[]
+tag::assignment-explanation-anomaly-jobs[]
+For open {anomaly-jobs} only, contains messages relating to the selection
+of a node to run the job.
+end::assignment-explanation-anomaly-jobs[]
+
+tag::assignment-explanation-datafeeds[]
For started {dfeeds} only, contains messages relating to the selection of a node.
-end::assignment-explanation[]
+end::assignment-explanation-datafeeds[]
tag::assignment-explanation-dfanalytics[]
Contains messages relating to the selection of a node.
@@ -157,10 +162,20 @@ so do not set the `background_persist_interval` value too low.
--
end::background-persist-interval[]
+tag::bucket-allocation-failures-count[]
+The number of buckets for which new entities in incoming data were not processed
+due to insufficient model memory. This situation is also signified by a
+`hard_limit: memory_status` property value.
+end::bucket-allocation-failures-count[]
+
tag::bucket-count[]
The number of buckets processed.
end::bucket-count[]
+tag::bucket-count-anomaly-jobs[]
+The number of bucket results produced by the job.
+end::bucket-count-anomaly-jobs[]
+
tag::bucket-span[]
The size of the interval that the analysis is aggregated into, typically between
`5m` and `1h`. The default value is `5m`. If the {anomaly-job} uses a {dfeed}
@@ -174,6 +189,27 @@ The length of the bucket in seconds. This value matches the `bucket_span`
that is specified in the job.
end::bucket-span-results[]
+tag::bucket-time-exponential-average[]
+Exponential moving average of all bucket processing times, in milliseconds.
+end::bucket-time-exponential-average[]
+
+tag::bucket-time-exponential-average-hour[]
+Exponentially-weighted moving average of bucket processing times
+calculated in a 1 hour time window, in milliseconds.
+end::bucket-time-exponential-average-hour[]
+
+tag::bucket-time-maximum[]
+Maximum among all bucket processing times, in milliseconds.
+end::bucket-time-maximum[]
+
+tag::bucket-time-minimum[]
+Minimum among all bucket processing times, in milliseconds.
+end::bucket-time-minimum[]
+
+tag::bucket-time-total[]
+Sum of all bucket processing times, in milliseconds.
+end::bucket-time-total[]
+
tag::by-field-name[]
The field used to split the data. In particular, this property is used for
analyzing the splits with respect to their own history. It is used for finding
@@ -251,6 +287,24 @@ customize the tokenizer or post-tokenization filtering, use the
`pattern_replace` character filters. The effect is exactly the same.
end::categorization-filters[]
+tag::categorization-status[]
+The status of categorization for the job. Contains one of the following values:
++
+--
+* `ok`: Categorization is performing acceptably well (or not being used at all).
+* `warn`: Categorization is detecting a distribution of categories that suggests
+the input data is inappropriate for categorization. Problems could be that there
+is only one category, more than 90% of categories are rare, the number of
+categories is greater than 50% of the number of categorized documents, there are
+no frequently matched categories, or more than 50% of categories are dead.
+
+--
+end::categorization-status[]
+
+tag::categorized-doc-count[]
+The number of documents that have had a field categorized.
+end::categorized-doc-count[]
+
tag::char-filter[]
One or more <>. In addition to the
built-in character filters, other plugins can provide more character filters.
@@ -482,6 +536,13 @@ Identifier for the {dfeed}. It can be a {dfeed} identifier or a wildcard
expression.
end::datafeed-id-wildcard[]
+tag::dead-category-count[]
+The number of categories created by categorization that will never be assigned
+again because another category's definition makes it a superset of the dead
+category. (Dead categories are a side effect of the way categorization has no
+prior training.)
+end::dead-category-count[]
+
tag::decompress-definition[]
Specifies whether the included model definition should be returned as a JSON map
(`true`) or in a custom compressed format (`false`). Defaults to `true`.
@@ -562,6 +623,17 @@ A unique identifier for the detector. This identifier is based on the order of
the detectors in the `analysis_config`, starting at zero.
end::detector-index[]
+tag::earliest-record-timestamp[]
+The timestamp of the earliest chronologically input document.
+end::earliest-record-timestamp[]
+
+tag::empty-bucket-count[]
+The number of buckets which did not contain any data. If your data
+contains many empty buckets, consider increasing your `bucket_span` or using
+functions that are tolerant to gaps in data such as `mean`, `non_null_sum` or
+`non_zero_count`.
+end::empty-bucket-count[]
+
tag::eta[]
Advanced configuration option. The shrinkage applied to the weights. Smaller
values result in larger forests which have better generalization error. However,
@@ -628,6 +700,11 @@ tag::filter-id[]
A string that uniquely identifies a filter.
end::filter-id[]
+tag::forecast-total[]
+The number of individual forecasts currently available for the job. A value of
+`1` or more indicates that forecasts exist.
+end::forecast-total[]
+
tag::frequency[]
The interval at which scheduled queries are made while the {dfeed} runs in real
time. The default value is either the bucket span for short bucket spans, or,
@@ -638,6 +715,10 @@ bucket results. If the {dfeed} uses aggregations, this value must be divisible
by the interval of the date histogram aggregation.
end::frequency[]
+tag::frequent-category-count[]
+The number of categories that match more than 1% of categorized documents.
+end::frequent-category-count[]
+
tag::from[]
Skips the specified number of {dfanalytics-jobs}. The default value is `0`.
end::from[]
@@ -698,6 +779,26 @@ is available as part of the input data. When you use multiple detectors, the use
of influencers is recommended as it aggregates results for each influencer entity.
end::influencers[]
+tag::input-bytes[]
+The number of bytes of input data posted to the {anomaly-job}.
+end::input-bytes[]
+
+tag::input-field-count[]
+The total number of fields in input documents posted to the {anomaly-job}. This
+count includes fields that are not used in the analysis. However, be aware that
+if you are using a {dfeed}, it extracts only the required fields from the
+documents it retrieves before posting them to the job.
+end::input-field-count[]
+
+tag::input-record-count[]
+The number of input documents posted to the {anomaly-job}.
+end::input-record-count[]
+
+tag::invalid-date-count[]
+The number of input documents with either a missing date field or a date that
+could not be parsed.
+end::invalid-date-count[]
+
tag::is-interim[]
If `true`, this is an interim result. In other words, the results are calculated
based on partial input data.
@@ -768,6 +869,10 @@ relevant relationships between the features and the {depvar}. The smaller this
parameter the larger individual trees will be and the longer train will take.
end::lambda[]
+tag::last-data-time[]
+The timestamp at which data was last analyzed, according to server time.
+end::last-data-time[]
+
tag::latency[]
The size of the window in which to expect data that is out of time order. The
default value is 0 (no latency). If you specify a non-zero value, it must be
@@ -781,6 +886,18 @@ the <> API.
--
end::latency[]
+tag::latest-empty-bucket-timestamp[]
+The timestamp of the last bucket that did not contain any data.
+end::latest-empty-bucket-timestamp[]
+
+tag::latest-record-timestamp[]
+The timestamp of the latest chronologically input document.
+end::latest-record-timestamp[]
+
+tag::latest-sparse-record-timestamp[]
+The timestamp of the last bucket that was considered sparse.
+end::latest-sparse-record-timestamp[]
+
tag::max-empty-searches[]
If a real-time {dfeed} has never seen any data (including during any initial
training period) then it will automatically stop itself and close its associated
@@ -818,6 +935,19 @@ ensemble method. Available methods are `lof`, `ldof`, `distance_kth_nn`,
`distance_knn`.
end::method[]
+tag::missing-field-count[]
+The number of input documents that are missing a field that the {anomaly-job} is
+configured to analyze. Input documents with missing fields are still processed
+because it is possible that not all fields are missing.
++
+--
+NOTE: If you are using {dfeeds} or posting data to the job in JSON format, a
+high `missing_field_count` is often not an indication of data issues. It is not
+necessarily a cause for concern.
+
+--
+end::missing-field-count[]
+
tag::mode[]
There are three available modes:
+
@@ -829,6 +959,17 @@ recommended value.
--
end::mode[]
+tag::model-bytes[]
+The number of bytes of memory used by the models. This is the maximum value
+since the last time the model was persisted. If the job is closed, this value
+indicates the latest size.
+end::model-bytes[]
+
+tag::model-bytes-exceeded[]
+The number of bytes over the high limit for memory usage at the last allocation
+failure.
+end::model-bytes-exceeded[]
+
tag::model-id[]
The unique identifier of the trained {infer} model.
end::model-id[]
@@ -858,6 +999,10 @@ see <>.
--
end::model-memory-limit[]
+tag::model-memory-limit-anomaly-jobs[]
+The upper limit for model memory usage, checked on increasing values.
+end::model-memory-limit-anomaly-jobs[]
+
tag::model-memory-limit-dfa[]
The approximate maximum amount of memory resources that are permitted for
analytical processing. The default value for {dfanalytics-jobs} is `1gb`. If
@@ -867,6 +1012,19 @@ setting, an error occurs when you try to create {dfanalytics-jobs} that have
<>.
end::model-memory-limit-dfa[]
+tag::model-memory-status[]
+The status of the mathematical models, which can have one of the following
+values:
++
+--
+* `ok`: The models stayed below the configured value.
+* `soft_limit`: The models used more than 60% of the configured memory limit
+and older unused models will be pruned to free up space.
+* `hard_limit`: The models used more space than the configured memory limit.
+As a result, not all incoming data was processed.
+--
+end::model-memory-status[]
+
tag::model-plot-config[]
This advanced configuration option stores model information along with the
results. It provides a more detailed view into {anomaly-detect}.
@@ -904,6 +1062,10 @@ The default value is `1`, which means snapshots that are one day (twenty-four ho
older than the newest snapshot are deleted.
end::model-snapshot-retention-days[]
+tag::model-timestamp[]
+The timestamp of the last record when the model stats were gathered.
+end::model-timestamp[]
+
tag::multivariate-by-fields[]
This functionality is reserved for internal use. It is not supported for use in
customer environments and is not subject to the support SLA of official GA
@@ -934,10 +1096,27 @@ improve diversity in the ensemble. Therefore, only override this if you are
confident that the value you choose is appropriate for the data set.
end::n-neighbors[]
-tag::node[]
+tag::node-address[]
+The network address of the node.
+end::node-address[]
+
+tag::node-datafeeds[]
For started {dfeeds} only, this information pertains to the node upon which the
{dfeed} is started.
-end::node[]
+end::node-datafeeds[]
+
+tag::node-ephemeral-id[]
+The ephemeral ID of the node.
+end::node-ephemeral-id[]
+
+tag::node-id[]
+The unique identifier of the node.
+end::node-id[]
+
+tag::node-jobs[]
+Contains properties for the node that runs the job. This information is
+available only for open jobs.
+end::node-jobs[]
tag::num-top-classes[]
Defines the number of categories for which the predicted
@@ -946,12 +1125,17 @@ total number of categories (in the {version} version of the {stack}, it's two)
to predict then we will report all category probabilities. Defaults to 2.
end::num-top-classes[]
-tag::over-field-name[]
-The field used to split the data. In particular, this property is used for
-analyzing the splits with respect to the history of all splits. It is used for
-finding unusual values in the population of all splits. For more information,
-see {ml-docs}/ml-configuring-pop.html[Performing population analysis].
-end::over-field-name[]
+tag::open-time[]
+For open jobs only, the elapsed time for which the job has been open.
+end::open-time[]
+
+tag::out-of-order-timestamp-count[]
+The number of input documents that are out of time sequence and outside
+of the latency window. This information is applicable only when you provide data
+to the {anomaly-job} by using the <>. These out of
+order documents are discarded, since jobs require time series data to be in
+ascending chronological order.
+end::out-of-order-timestamp-count[]
tag::outlier-fraction[]
Sets the proportion of the data set that is assumed to be outlying prior to
@@ -959,6 +1143,13 @@ Sets the proportion of the data set that is assumed to be outlying prior to
outliers and 95% are inliers.
end::outlier-fraction[]
+tag::over-field-name[]
+The field used to split the data. In particular, this property is used for
+analyzing the splits with respect to the history of all splits. It is used for
+finding unusual values in the population of all splits. For more information,
+see {ml-docs}/ml-configuring-pop.html[Performing population analysis].
+end::over-field-name[]
+
tag::partition-field-name[]
The field used to segment the analysis. When you use this property, you have
completely independent baselines for each value of this field.
@@ -969,6 +1160,20 @@ Defines the name of the prediction field in the results.
Defaults to `_prediction`.
end::prediction-field-name[]
+tag::processed-field-count[]
+The total number of fields in all the documents that have been processed by the
+{anomaly-job}. Only fields that are specified in the detector configuration
+object contribute to this count. The timestamp is not included in this count.
+end::processed-field-count[]
+
+tag::processed-record-count[]
+The number of input documents that have been processed by the {anomaly-job}.
+This value includes documents with missing fields, since they are nonetheless
+analyzed. If you use {dfeeds} and have aggregations in your search query, the
+`processed_record_count` is the number of aggregation results processed, not the
+number of {es} documents.
+end::processed-record-count[]
+
tag::query[]
The {es} query domain-specific language (DSL). This value corresponds to the
query object in an {es} search POST body. All the options that are supported by
@@ -993,6 +1198,10 @@ assuming other related parameters (e.g. `source`, `analyzed_fields`, etc.) are
the same.
end::randomize-seed[]
+tag::rare-category-count[]
+The number of categories that match just one categorized document.
+end::rare-category-count[]
+
tag::renormalization-window-days[]
Advanced configuration option. The period over which adjustments to the score
are applied, as new data is seen. The default value is the longer of 30 days or
@@ -1086,6 +1295,12 @@ The configuration of how to source the analysis data. It requires an
excluded from the destination.
end::source-put-dfa[]
+tag::sparse-bucket-count[]
+The number of buckets that contained few data points compared to the expected
+number of data points. If your data contains many sparse buckets, consider using
+a longer `bucket_span`.
+end::sparse-bucket-count[]
+
tag::standardization-enabled[]
If `true`, then the following operation is performed on the columns before
computing outlier scores: (x_i - mean(x_i)) / sd(x_i). Defaults to `true`. For
@@ -1093,6 +1308,25 @@ more information, see
https://en.wikipedia.org/wiki/Feature_scaling#Standardization_(Z-score_Normalization)[this wiki page about standardization].
end::standardization-enabled[]
+tag::state-anomaly-job[]
+The status of the {anomaly-job}, which can be one of the following values:
++
+--
+* `closed`: The job finished successfully with its model state persisted. The
+job must be opened before it can accept further data.
+* `closing`: The job close action is in progress and has not yet completed. A
+closing job cannot accept further data.
+* `failed`: The job did not finish successfully due to an error. This situation
+can occur due to invalid input data, a fatal error occurring during the analysis,
+or an external interaction such as the process being killed by the Linux out of
+memory (OOM) killer. If the job had irrevocably failed, it must be force closed
+and then deleted. If the {dfeed} can be corrected, the job can be closed and
+then re-opened.
+* `opened`: The job is available to receive and process data.
+* `opening`: The job open action is in progress and has not yet completed.
+--
+end::state-anomaly-job[]
+
tag::state-datafeed[]
The status of the {dfeed}, which can be one of the following values:
+
@@ -1168,6 +1402,25 @@ that tokenizer but change the character or token filters, specify
`"tokenizer": "ml_classic"` in your `categorization_analyzer`.
end::tokenizer[]
+tag::total-by-field-count[]
+The number of `by` field values that were analyzed by the models. This value is
+cumulative for all detectors in the job.
+end::total-by-field-count[]
+
+tag::total-category-count[]
+The number of categories created by categorization.
+end::total-category-count[]
+
+tag::total-over-field-count[]
+The number of `over` field values that were analyzed by the models. This value
+is cumulative for all detectors in the job.
+end::total-over-field-count[]
+
+tag::total-partition-field-count[]
+The number of `partition` field values that were analyzed by the models. This
+value is cumulative for all detectors in the job.
+end::total-partition-field-count[]
+
tag::training-percent[]
Defines what percentage of the eligible documents that will
be used for training. Documents that are ignored by the analysis (for example
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java
index 446574323e0..d830ff21bde 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java
@@ -82,9 +82,9 @@ public class RestCatDatafeedsAction extends AbstractCatAction {
.build());
// Timing stats
- table.addCell("bucket.count",
+ table.addCell("buckets.count",
TableColumnAttributeBuilder.builder("bucket count")
- .setAliases("bc", "bucketCount")
+ .setAliases("bc", "bucketsCount")
.build());
table.addCell("search.count",
TableColumnAttributeBuilder.builder("number of searches ran by the datafeed")
diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java
index 64cd39edf30..8227c0c8f58 100644
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java
@@ -97,7 +97,7 @@ public class RestCatJobsAction extends AbstractCatAction {
.build());
table.addCell("data.processed_fields",
TableColumnAttributeBuilder.builder("number of processed fields", false)
- .setAliases("dpr", "dataProcessedFields")
+ .setAliases("dpf", "dataProcessedFields")
.build());
table.addCell("data.input_bytes",
TableColumnAttributeBuilder.builder("total input bytes", false)
@@ -223,55 +223,55 @@ public class RestCatJobsAction extends AbstractCatAction {
.build());
// Forecast Stats
- table.addCell("forecast." + ForecastStats.Fields.TOTAL,
- TableColumnAttributeBuilder.builder("total number of forecasts").setAliases("ft", "forecastTotal").build());
- table.addCell("forecast.memory.min",
+ table.addCell("forecasts." + ForecastStats.Fields.TOTAL,
+ TableColumnAttributeBuilder.builder("total number of forecasts").setAliases("ft", "forecastsTotal").build());
+ table.addCell("forecasts.memory.min",
TableColumnAttributeBuilder.builder("minimum memory used by forecasts", false)
- .setAliases("fmmin", "forecastMemoryMin")
+ .setAliases("fmmin", "forecastsMemoryMin")
.build());
- table.addCell("forecast.memory.max",
+ table.addCell("forecasts.memory.max",
TableColumnAttributeBuilder.builder("maximum memory used by forecasts", false)
.setAliases("fmmax", "forecastsMemoryMax")
.build());
- table.addCell("forecast.memory.avg",
+ table.addCell("forecasts.memory.avg",
TableColumnAttributeBuilder.builder("average memory used by forecasts", false)
- .setAliases("fmavg", "forecastMemoryAvg")
+ .setAliases("fmavg", "forecastsMemoryAvg")
.build());
- table.addCell("forecast.memory.total",
+ table.addCell("forecasts.memory.total",
TableColumnAttributeBuilder.builder("total memory used by all forecasts", false)
- .setAliases("fmt", "forecastMemoryTotal")
+ .setAliases("fmt", "forecastsMemoryTotal")
.build());
- table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".min",
+ table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".min",
TableColumnAttributeBuilder.builder("minimum record count for forecasts", false)
- .setAliases("frmin", "forecastRecordsMin")
+ .setAliases("frmin", "forecastsRecordsMin")
.build());
- table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".max",
+ table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".max",
TableColumnAttributeBuilder.builder("maximum record count for forecasts", false)
- .setAliases("frmax", "forecastRecordsMax")
+ .setAliases("frmax", "forecastsRecordsMax")
.build());
- table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".avg",
+ table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".avg",
TableColumnAttributeBuilder.builder("average record count for forecasts", false)
- .setAliases("fravg", "forecastRecordsAvg")
+ .setAliases("fravg", "forecastsRecordsAvg")
.build());
- table.addCell("forecast." + ForecastStats.Fields.RECORDS + ".total",
+ table.addCell("forecasts." + ForecastStats.Fields.RECORDS + ".total",
TableColumnAttributeBuilder.builder("total record count for all forecasts", false)
- .setAliases("frt", "forecastRecordsTotal")
+ .setAliases("frt", "forecastsRecordsTotal")
.build());
- table.addCell("forecast.time.min",
+ table.addCell("forecasts.time.min",
TableColumnAttributeBuilder.builder("minimum runtime for forecasts", false)
- .setAliases("ftmin", "forecastTimeMin")
+ .setAliases("ftmin", "forecastsTimeMin")
.build());
- table.addCell("forecast.time.max",
+ table.addCell("forecasts.time.max",
TableColumnAttributeBuilder.builder("maximum run time for forecasts", false)
- .setAliases("ftmax", "forecastTimeMax")
+ .setAliases("ftmax", "forecastsTimeMax")
.build());
- table.addCell("forecast.time.avg",
+ table.addCell("forecasts.time.avg",
TableColumnAttributeBuilder.builder("average runtime for all forecasts (milliseconds)", false)
- .setAliases("ftavg", "forecastTimeAvg")
+ .setAliases("ftavg", "forecastsTimeAvg")
.build());
- table.addCell("forecast.time.total",
+ table.addCell("forecasts.time.total",
TableColumnAttributeBuilder.builder("total runtime for all forecasts", false)
- .setAliases("ftt", "forecastTimeTotal").build());
+ .setAliases("ftt", "forecastsTimeTotal").build());
//Node info
table.addCell("node.id",
@@ -292,29 +292,29 @@ public class RestCatJobsAction extends AbstractCatAction {
.build());
//Timing Stats
- table.addCell("bucket.count",
+ table.addCell("buckets.count",
TableColumnAttributeBuilder.builder("bucket count")
- .setAliases("bc", "bucketCount")
+ .setAliases("bc", "bucketsCount")
.build());
- table.addCell("bucket.time.total",
+ table.addCell("buckets.time.total",
TableColumnAttributeBuilder.builder("total bucket processing time", false)
- .setAliases("btt", "bucketTimeTotal")
+ .setAliases("btt", "bucketsTimeTotal")
.build());
- table.addCell("bucket.time.min",
+ table.addCell("buckets.time.min",
TableColumnAttributeBuilder.builder("minimum bucket processing time", false)
- .setAliases("btmin", "bucketTimeMin")
+ .setAliases("btmin", "bucketsTimeMin")
.build());
- table.addCell("bucket.time.max",
+ table.addCell("buckets.time.max",
TableColumnAttributeBuilder.builder("maximum bucket processing time", false)
- .setAliases("btmax", "bucketTimeMax")
+ .setAliases("btmax", "bucketsTimeMax")
.build());
- table.addCell("bucket.time.exp_avg",
+ table.addCell("buckets.time.exp_avg",
TableColumnAttributeBuilder.builder("exponential average bucket processing time (milliseconds)", false)
- .setAliases("btea", "bucketTimeExpAvg")
+ .setAliases("btea", "bucketsTimeExpAvg")
.build());
- table.addCell("bucket.time.exp_avg_hour",
+ table.addCell("buckets.time.exp_avg_hour",
TableColumnAttributeBuilder.builder("exponential average bucket processing time by hour (milliseconds)", false)
- .setAliases("bteah", "bucketTimeExpAvgHour")
+ .setAliases("bteah", "bucketsTimeExpAvgHour")
.build());
table.endHeaders();
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json b/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json
index d773e6bbf5e..f62a46ba341 100644
--- a/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_jobs.json
@@ -1,7 +1,7 @@
{
"cat.ml_jobs":{
"documentation":{
- "url":"http://www.elastic.co/guide/en/elasticsearch/reference/current/ml-get-job-stats.html"
+ "url":"http://www.elastic.co/guide/en/elasticsearch/reference/current/cat-anomaly-detectors.html"
},
"stability":"stable",
"url":{
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml
index 4437a31b5fd..89274c59884 100644
--- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/datafeed_cat_apis.yml
@@ -86,7 +86,7 @@ setup:
datafeed_id: datafeed-job-stats-test
- match:
$body: |
- / #id state bucket.count search.count
+ / #id state buckets.count search.count
^ (datafeed\-job\-stats\-test \s+ \w+ \s+ \d+ \s+ \d+ \n)+ $/
- do:
@@ -95,7 +95,7 @@ setup:
datafeed_id: datafeed-job-stats-test
- match:
$body: |
- /^ id \s+ state \s+ bucket\.count \s+ search\.count \n
+ /^ id \s+ state \s+ buckets\.count \s+ search\.count \n
(datafeed\-job\-stats\-test \s+ \w+ \s+ \d+ \s+ \d+ \n)+ $/
- do:
diff --git a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml
index bb13c3a5cc5..a82ce200320 100644
--- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/job_cat_apis.yml
@@ -90,7 +90,7 @@ setup:
job_id: job-stats-test
- match:
$body: |
- / #id state data.processed_records model.bytes model.memory_status forecast.total bucket.count
+ / #id state data.processed_records model.bytes model.memory_status forecasts.total buckets.count
^ (job\-stats\-test \s+ \w+ \s+ \d+ \s+ .*? \s+ \w+ \s+ \d+ \s+ \d+ \n)+ $/
- do:
@@ -99,7 +99,7 @@ setup:
job_id: job-stats-test
- match:
$body: |
- /^ id \s+ state \s+ data\.processed_records \s+ model\.bytes \s+ model\.memory_status \s+ forecast\.total \s+ bucket\.count \n
+ /^ id \s+ state \s+ data\.processed_records \s+ model\.bytes \s+ model\.memory_status \s+ forecasts\.total \s+ buckets\.count \n
(job\-stats\-test \s+ \w+ \s+ \d+ \s+ .*? \s+ \w+ \s+ \d+ \s+ \d+ \n)+ $/
- do: