From 72840c0cb2b7d67f03c9d8e9a5bdfae287d88fbc Mon Sep 17 00:00:00 2001 From: Lisa Cawley Date: Fri, 27 Dec 2019 13:30:26 -0800 Subject: [PATCH] [7.x][DOCS] Move anomaly detection job resource definitions into APIs (#50490) --- docs/build.gradle | 1 + .../anomaly-detection/apis/close-job.asciidoc | 26 +- .../apis/datafeedresource.asciidoc | 2 +- .../apis/delete-job.asciidoc | 11 +- .../anomaly-detection/apis/flush-job.asciidoc | 3 +- .../anomaly-detection/apis/forecast.asciidoc | 5 +- .../apis/get-bucket.asciidoc | 3 +- .../apis/get-category.asciidoc | 3 +- .../apis/get-influencer.asciidoc | 3 +- .../apis/get-job-stats.asciidoc | 20 +- .../anomaly-detection/apis/get-job.asciidoc | 113 ++-- .../apis/get-overall-buckets.asciidoc | 9 +- .../apis/get-record.asciidoc | 3 +- .../apis/get-snapshot.asciidoc | 3 +- .../apis/jobresource.asciidoc | 561 ------------------ .../anomaly-detection/apis/open-job.asciidoc | 3 +- .../anomaly-detection/apis/post-data.asciidoc | 3 +- .../apis/put-calendar-job.asciidoc | 4 +- .../apis/put-datafeed.asciidoc | 7 +- .../apis/put-filter.asciidoc | 2 +- .../anomaly-detection/apis/put-job.asciidoc | 211 +++++-- .../apis/revert-snapshot.asciidoc | 49 +- .../apis/update-datafeed.asciidoc | 83 ++- .../apis/update-job.asciidoc | 177 ++++-- .../apis/update-snapshot.asciidoc | 3 +- .../apis/validate-detector.asciidoc | 2 +- .../apis/validate-job.asciidoc | 2 +- .../ml/anomaly-detection/categories.asciidoc | 52 +- .../ml/anomaly-detection/functions.asciidoc | 4 +- .../functions/count.asciidoc | 12 +- .../anomaly-detection/functions/geo.asciidoc | 4 +- .../anomaly-detection/functions/info.asciidoc | 4 +- .../functions/metric.asciidoc | 24 +- .../anomaly-detection/functions/rare.asciidoc | 8 +- .../anomaly-detection/functions/sum.asciidoc | 8 +- .../anomaly-detection/functions/time.asciidoc | 8 +- docs/reference/ml/ml-shared.asciidoc | 49 +- docs/reference/redirects.asciidoc | 9 + docs/reference/rest-api/defs.asciidoc | 1 - docs/reference/settings/ml-settings.asciidoc | 2 +- 40 files changed, 569 insertions(+), 928 deletions(-) delete mode 100644 docs/reference/ml/anomaly-detection/apis/jobresource.asciidoc diff --git a/docs/build.gradle b/docs/build.gradle index fece645fa27..2477733fca4 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -29,6 +29,7 @@ buildRestTests.expectedUnconvertedCandidates = [ 'reference/ml/anomaly-detection/apis/get-category.asciidoc', 'reference/ml/anomaly-detection/apis/get-influencer.asciidoc', 'reference/ml/anomaly-detection/apis/get-job-stats.asciidoc', + 'reference/ml/anomaly-detection/apis/get-job.asciidoc', 'reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc', 'reference/ml/anomaly-detection/apis/get-record.asciidoc', 'reference/ml/anomaly-detection/apis/get-snapshot.asciidoc', diff --git a/docs/reference/ml/anomaly-detection/apis/close-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/close-job.asciidoc index bdbbc88a501..3eb69fa27c1 100644 --- a/docs/reference/ml/anomaly-detection/apis/close-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/close-job.asciidoc @@ -60,33 +60,23 @@ results the job might have recently produced or might produce in the future. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. It can be a job - identifier, a group name, or a wildcard expression. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection-wildcard] [[ml-close-job-query-parms]] ==== {api-query-parms-title} `allow_no_jobs`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no jobs that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `jobs` array -when there are no matches and the subset of results when there are partial -matches. If this parameter is `false`, the request returns a `404` status code -when there are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-jobs] `force`:: - (Optional, boolean) Use to close a failed job, or to forcefully close a job - which has not responded to its initial close request. +(Optional, boolean) Use to close a failed job, or to forcefully close a job +which has not responded to its initial close request. `timeout`:: - (Optional, <>) Controls the time to wait until a job - has closed. The default value is 30 minutes. +(Optional, <>) Controls the time to wait until a job +has closed. The default value is 30 minutes. [[ml-close-job-response-codes]] ==== {api-response-codes-title} diff --git a/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc b/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc index 656e7948fbb..88fad9b4ed0 100644 --- a/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/datafeedresource.asciidoc @@ -50,7 +50,7 @@ A {dfeed} resource has the following properties: `script_fields`:: (object) Specifies scripts that evaluate custom expressions and returns script fields to the {dfeed}. - The <> in a job can contain + The detector configuration objects in a job can contain functions that use these script fields. For more information, see {ml-docs}/ml-configuring-transform.html[Transforming data with script fields]. diff --git a/docs/reference/ml/anomaly-detection/apis/delete-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/delete-job.asciidoc index 096939184ea..4ad8cfe3f1f 100644 --- a/docs/reference/ml/anomaly-detection/apis/delete-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/delete-job.asciidoc @@ -39,18 +39,19 @@ separated list. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] [[ml-delete-job-query-parms]] ==== {api-query-parms-title} `force`:: - (Optional, boolean) Use to forcefully delete an opened job; this method is - quicker than closing and deleting the job. +(Optional, boolean) Use to forcefully delete an opened job; this method is +quicker than closing and deleting the job. `wait_for_completion`:: - (Optional, boolean) Specifies whether the request should return immediately or - wait until the job deletion completes. Defaults to `true`. +(Optional, boolean) Specifies whether the request should return immediately or +wait until the job deletion completes. Defaults to `true`. [[ml-delete-job-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/apis/flush-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/flush-job.asciidoc index 7afef6eabde..f6e81a3b261 100644 --- a/docs/reference/ml/anomaly-detection/apis/flush-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/flush-job.asciidoc @@ -37,7 +37,8 @@ opened again before analyzing further data. ==== {api-path-parms-title} ``:: -(string) Required. Identifier for the job. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] [[ml-flush-job-query-parms]] ==== {api-query-parms-title} diff --git a/docs/reference/ml/anomaly-detection/apis/forecast.asciidoc b/docs/reference/ml/anomaly-detection/apis/forecast.asciidoc index 351880f6384..61b89818431 100644 --- a/docs/reference/ml/anomaly-detection/apis/forecast.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/forecast.asciidoc @@ -29,7 +29,7 @@ See {ml-docs}/ml-overview.html#ml-forecasting[Forecasting the future]. =============================== * If you use an `over_field_name` property in your job, you cannot create a -forecast. For more information about this property, see <>. +forecast. For more information about this property, see <>. * The job must be open when you create a forecast. Otherwise, an error occurs. =============================== @@ -37,7 +37,8 @@ forecast. For more information about this property, see <>. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the job. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] [[ml-forecast-request-body]] ==== {api-request-body-title} diff --git a/docs/reference/ml/anomaly-detection/apis/get-bucket.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-bucket.asciidoc index 91c473ebec9..027de1385e8 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-bucket.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-bucket.asciidoc @@ -36,7 +36,8 @@ bucket. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] ``:: (Optional, string) The timestamp of a single bucket result. If you do not diff --git a/docs/reference/ml/anomaly-detection/apis/get-category.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-category.asciidoc index 782efdbb195..914ca5daa16 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-category.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-category.asciidoc @@ -35,7 +35,8 @@ For more information about categories, see ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] ``:: (Optional, long) Identifier for the category. If you do not specify this diff --git a/docs/reference/ml/anomaly-detection/apis/get-influencer.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-influencer.asciidoc index a2da47720c9..2165d8ef9f7 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-influencer.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-influencer.asciidoc @@ -27,7 +27,8 @@ privileges. See <> and ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] [[ml-get-influencer-request-body]] ==== {api-request-body-title} diff --git a/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc index 9c7bcc6e7b3..2978cf47f5e 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-job-stats.asciidoc @@ -40,26 +40,15 @@ IMPORTANT: This API returns a maximum of 10,000 jobs. ==== {api-path-parms-title} ``:: - (Optional, string) An identifier for the {anomaly-job}. It can be a - job identifier, a group name, or a wildcard expression. If you do not specify - one of these options, the API returns statistics for all {anomaly-jobs}. +(Optional, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection-default] [[ml-get-job-stats-query-parms]] ==== {api-query-parms-title} `allow_no_jobs`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no jobs that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `jobs` array -when there are no matches and the subset of results when there are partial -matches. If this parameter is `false`, the request returns a `404` status code -when there are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-jobs] [[ml-get-job-stats-results]] ==== {api-response-body-title} @@ -68,7 +57,6 @@ The API returns the following information: `jobs`:: (array) An array of {anomaly-job} statistics objects. - For more information, see <>. [[ml-get-job-stats-response-codes]] ==== {api-response-codes-title} diff --git a/docs/reference/ml/anomaly-detection/apis/get-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-job.asciidoc index a816bcd3e1d..dfb5b6f8098 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-job.asciidoc @@ -40,35 +40,40 @@ IMPORTANT: This API returns a maximum of 10,000 jobs. ==== {api-path-parms-title} ``:: - (Optional, string) Identifier for the {anomaly-job}. It can be a job - identifier, a group name, or a wildcard expression. If you do not specify one - of these options, the API returns information for all {anomaly-jobs}. +(Optional, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection-default] [[ml-get-job-query-parms]] ==== {api-query-parms-title} `allow_no_jobs`:: - (Optional, boolean) Specifies what to do when the request: -+ --- -* Contains wildcard expressions and there are no jobs that match. -* Contains the `_all` string or no identifiers and there are no matches. -* Contains wildcard expressions and there are only partial matches. - -The default value is `true`, which returns an empty `jobs` array -when there are no matches and the subset of results when there are partial -matches. If this parameter is `false`, the request returns a `404` status code -when there are no matches or only partial matches. --- +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-jobs] [[ml-get-job-results]] ==== {api-response-body-title} -The API returns the following information: +The API returns an array of {anomaly-job} resources. For the full list of +properties, see <>. -`jobs`:: - (array) An array of {anomaly-job} resources. - For more information, see <>. +`create_time`:: +(string) The time the job was created. For example, `1491007356077`. This +property is informational; you cannot change its value. + +`finished_time`:: +(string) If the job closed or failed, this is the time the job finished. +Otherwise, it is `null`. This property is informational; you cannot change its +value. + +`job_type`:: +(string) Reserved for future use, currently set to `anomaly_detector`. + +`job_version`:: +(string) The version of {es} that existed on the node when the job was created. + +`model_snapshot_id`:: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-snapshot-id] [[ml-get-job-response-codes]] ==== {api-response-codes-title} @@ -80,53 +85,65 @@ The API returns the following information: [[ml-get-job-example]] ==== {api-examples-title} -The following example gets configuration information for the `total-requests` job: - [source,console] -------------------------------------------------- -GET _ml/anomaly_detectors/total-requests +GET _ml/anomaly_detectors/high_sum_total_sales -------------------------------------------------- -// TEST[skip:setup:server_metrics_job] +// TEST[skip:Kibana sample data] The API returns the following results: -[source,console-result] +[source,js] ---- { "count": 1, "jobs": [ { - "job_id": "total-requests", - "job_type": "anomaly_detector", - "job_version": "7.0.0-alpha1", - "description": "Total sum of requests", - "create_time": 1517011406091, - "analysis_config": { - "bucket_span": "10m", - "detectors": [ + "job_id" : "high_sum_total_sales", + "job_type" : "anomaly_detector", + "job_version" : "7.5.0", + "groups" : [ + "kibana_sample_data", + "kibana_sample_ecommerce" + ], + "description" : "Find customers spending an unusually high amount in an hour", + "create_time" : 1577221534700, + "analysis_config" : { + "bucket_span" : "1h", + "detectors" : [ { - "detector_description": "Sum of total", - "function": "sum", - "field_name": "total", - "detector_index": 0 + "detector_description" : "High total sales", + "function" : "high_sum", + "field_name" : "taxful_total_price", + "over_field_name" : "customer_full_name.keyword", + "detector_index" : 0 } ], - "influencers": [ ] + "influencers" : [ + "customer_full_name.keyword", + "category.keyword" + ] }, - "analysis_limits": { - "model_memory_limit": "1024mb", - "categorization_examples_limit": 4 + "analysis_limits" : { + "model_memory_limit" : "10mb", + "categorization_examples_limit" : 4 }, - "data_description": { - "time_field": "timestamp", - "time_format": "epoch_ms" + "data_description" : { + "time_field" : "order_date", + "time_format" : "epoch_ms" }, - "model_snapshot_retention_days": 1, - "results_index_name": "shared", - "allow_lazy_open": false + "model_plot_config" : { + "enabled" : true + }, + "model_snapshot_retention_days" : 1, + "custom_settings" : { + "created_by" : "ml-module-sample", + ... + }, + "model_snapshot_id" : "1575402237", + "results_index_name" : "shared", + "allow_lazy_open" : false } ] } ---- -// TESTRESPONSE[s/"7.0.0-alpha1"/$body.$_path/] -// TESTRESPONSE[s/1517011406091/$body.$_path/] diff --git a/docs/reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc index 4295a0685a3..a678aa51442 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-overall-buckets.asciidoc @@ -55,16 +55,15 @@ a span equal to the jobs' largest bucket span. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. It can be a job - identifier, a group name, a comma-separated list of jobs or groups, or a - wildcard expression. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection-wildcard-list] [[ml-get-overall-buckets-request-body]] ==== {api-request-body-title} `allow_no_jobs`:: - (Optional, boolean) If `false` and the `job_id` does not match any - {anomaly-jobs}, an error occurs. The default value is `true`. +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-no-jobs] `bucket_span`:: (Optional, string) The span of the overall buckets. Must be greater or equal diff --git a/docs/reference/ml/anomaly-detection/apis/get-record.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-record.asciidoc index a850524872c..b5bbb15580e 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-record.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-record.asciidoc @@ -26,7 +26,8 @@ privileges. See <> and <>. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] [[ml-get-record-request-body]] ==== {api-request-body-title} diff --git a/docs/reference/ml/anomaly-detection/apis/get-snapshot.asciidoc b/docs/reference/ml/anomaly-detection/apis/get-snapshot.asciidoc index 04d09b50d33..94b67f6f98b 100644 --- a/docs/reference/ml/anomaly-detection/apis/get-snapshot.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/get-snapshot.asciidoc @@ -26,7 +26,8 @@ Retrieves information about model snapshots. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] ``:: (Optional, string) Identifier for the model snapshot. If you do not specify diff --git a/docs/reference/ml/anomaly-detection/apis/jobresource.asciidoc b/docs/reference/ml/anomaly-detection/apis/jobresource.asciidoc deleted file mode 100644 index ca5b5c8daf8..00000000000 --- a/docs/reference/ml/anomaly-detection/apis/jobresource.asciidoc +++ /dev/null @@ -1,561 +0,0 @@ -[role="xpack"] -[testenv="platinum"] -[[ml-job-resource]] -=== Job resources - -A job resource has the following properties: - -`analysis_config`:: - (object) The analysis configuration, which specifies how to analyze the data. - See <>. - -`analysis_limits`:: - (object) Defines approximate limits on the memory resource requirements for the job. - See <>. - -`background_persist_interval`:: - (time units) Advanced configuration option. - The time between each periodic persistence of the model. - The default value is a randomized value between 3 to 4 hours, which avoids - all jobs persisting at exactly the same time. The smallest allowed value is - 1 hour. -+ --- -TIP: For very large models (several GB), persistence could take 10-20 minutes, -so do not set the `background_persist_interval` value too low. - --- - -`create_time`:: - (string) The time the job was created. For example, `1491007356077`. This - property is informational; you cannot change its value. - -`custom_settings`:: - (object) Advanced configuration option. Contains custom meta data about the - job. For example, it can contain custom URL information as shown in - {ml-docs}/ml-configuring-url.html[Adding custom URLs to {ml} results]. - -`data_description`:: - (object) Describes the data format and how APIs parse timestamp fields. - See <>. - -`description`:: - (string) An optional description of the job. - -`finished_time`:: - (string) If the job closed or failed, this is the time the job finished, - otherwise it is `null`. This property is informational; you cannot change its - value. - -`groups`:: - (array of strings) A list of job groups. A job can belong to no groups or - many. For example, `["group1", "group2"]`. - -`job_id`:: - (string) The unique identifier for the job. This identifier can contain - lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It - must start and end with alphanumeric characters. This property is - informational; you cannot change the identifier for existing jobs. - -`job_type`:: - (string) Reserved for future use, currently set to `anomaly_detector`. - -`job_version`:: - (string) The version of {es} that existed on the node when the job was created. - -`model_plot_config`:: - (object) Configuration properties for storing additional model information. - See <>. - -`model_snapshot_id`:: - (string) A numerical character string that uniquely identifies the model - snapshot. For example, `1491007364`. This property is informational; you - cannot change its value. For more information about model snapshots, see - <>. - -`model_snapshot_retention_days`:: - (long) The time in days that model snapshots are retained for the job. - Older snapshots are deleted. The default value is `1`, which means snapshots - are retained for one day (twenty-four hours). - -`renormalization_window_days`:: - (long) Advanced configuration option. - The period over which adjustments to the score are applied, as new data is seen. - The default value is the longer of 30 days or 100 `bucket_spans`. - -`results_index_name`:: - (string) The name of the index in which to store the {ml} results. - The default value is `shared`, - which corresponds to the index name `.ml-anomalies-shared` - -`results_retention_days`:: - (long) Advanced configuration option. - The number of days for which job results are retained. - Once per day at 00:30 (server time), results older than this period are - deleted from Elasticsearch. The default value is null, which means results - are retained. - -`allow_lazy_open`:: - (boolean) Advanced configuration option. - Whether this job should be allowed to open when there is insufficient - {ml} node capacity for it to be immediately assigned to a node. - The default is `false`, which means that the <> - will return an error if a {ml} node with capacity to run the - job cannot immediately be found. (However, this is also subject to - the cluster-wide `xpack.ml.max_lazy_ml_nodes` setting - see - <>.) If this option is set to `true` then - the <> will not return an error, and the job will - wait in the `opening` state until sufficient {ml} node capacity - is available. - -[[ml-analysisconfig]] -==== Analysis Configuration Objects - -An analysis configuration object has the following properties: - -`bucket_span`:: - (time units) The size of the interval that the analysis is aggregated into, - typically between `5m` and `1h`. The default value is `5m`. For more - information about time units, see <>. - -`categorization_field_name`:: - (string) If this property is specified, the values of the specified field will - be categorized. The resulting categories must be used in a detector by setting - `by_field_name`, `over_field_name`, or `partition_field_name` to the keyword - `mlcategory`. For more information, see - {ml-docs}/ml-configuring-categories.html[Categorizing log messages]. - -`categorization_filters`:: - (array of strings) If `categorization_field_name` is specified, - you can also define optional filters. This property expects an array of - regular expressions. The expressions are used to filter out matching sequences - from the categorization field values. You can use this functionality to fine - tune the categorization by excluding sequences from consideration when - categories are defined. For example, you can exclude SQL statements that - appear in your log files. For more information, see - {ml-docs}/ml-configuring-categories.html[Categorizing log messages]. - This property cannot be used at the same time as `categorization_analyzer`. - If you only want to define simple regular expression filters that are applied - prior to tokenization, setting this property is the easiest method. - If you also want to customize the tokenizer or post-tokenization filtering, - use the `categorization_analyzer` property instead and include the filters as - `pattern_replace` character filters. The effect is exactly the same. - -`categorization_analyzer`:: - (object or string) If `categorization_field_name` is specified, you can also - define the analyzer that is used to interpret the categorization field. This - property cannot be used at the same time as `categorization_filters`. See - <>. - -`detectors`:: - (array) An array of detector configuration objects, - which describe the anomaly detectors that are used in the job. - See <>. + -+ --- -NOTE: If the `detectors` array does not contain at least one detector, -no analysis can occur and an error is returned. - --- - -`influencers`:: - (array of strings) A comma separated list of influencer field names. - Typically these can be the by, over, or partition fields that are used in the - detector configuration. You might also want to use a field name that is not - specifically named in a detector, but is available as part of the input data. - When you use multiple detectors, the use of influencers is recommended as it - aggregates results for each influencer entity. - -`latency`:: - (time units) The size of the window in which to expect data that is out of - time order. The default value is 0 (no latency). If you specify a non-zero - value, it must be greater than or equal to one second. For more information - about time units, see <>. -+ --- -NOTE: Latency is only applicable when you send data by using -the <> API. - --- - -`multivariate_by_fields`:: - (boolean) This functionality is reserved for internal use. It is not supported - for use in customer environments and is not subject to the support SLA of - official GA features. -+ --- -If set to `true`, the analysis will automatically find correlations -between metrics for a given `by` field value and report anomalies when those -correlations cease to hold. For example, suppose CPU and memory usage on host A -is usually highly correlated with the same metrics on host B. Perhaps this -correlation occurs because they are running a load-balanced application. -If you enable this property, then anomalies will be reported when, for example, -CPU usage on host A is high and the value of CPU usage on host B is low. -That is to say, you'll see an anomaly when the CPU of host A is unusual given -the CPU of host B. - -NOTE: To use the `multivariate_by_fields` property, you must also specify -`by_field_name` in your detector. - --- - -`summary_count_field_name`:: - (string) If this property is specified, the data that is fed to the job is - expected to be pre-summarized. This property value is the name of the field - that contains the count of raw data points that have been summarized. The same - `summary_count_field_name` applies to all detectors in the job. -+ --- - -NOTE: The `summary_count_field_name` property cannot be used with the `metric` -function. - --- - -After you create a job, you cannot change the analysis configuration object; all -the properties are informational. - -[float] -[[ml-detectorconfig]] -==== Detector Configuration Objects - -Detector configuration objects specify which data fields a job analyzes. -They also specify which analytical functions are used. -You can specify multiple detectors for a job. -Each detector has the following properties: - -`by_field_name`:: - (string) The field used to split the data. - In particular, this property is used for analyzing the splits with respect to their own history. - It is used for finding unusual values in the context of the split. - -`detector_description`:: - (string) A description of the detector. For example, `Low event rate`. - -`detector_index`:: - (integer) A unique identifier for the detector. This identifier is based on - the order of the detectors in the `analysis_config`, starting at zero. You can - use this identifier when you want to update a specific detector. - -`exclude_frequent`:: - (string) Contains one of the following values: `all`, `none`, `by`, or `over`. - If set, frequent entities are excluded from influencing the anomaly results. - Entities can be considered frequent over time or frequent in a population. - If you are working with both over and by fields, then you can set `exclude_frequent` - to `all` for both fields, or to `by` or `over` for those specific fields. - -`field_name`:: - (string) The field that the detector uses in the function. If you use an event rate - function such as `count` or `rare`, do not specify this field. + -+ --- -NOTE: The `field_name` cannot contain double quotes or backslashes. - --- - -`function`:: - (string) The analysis function that is used. - For example, `count`, `rare`, `mean`, `min`, `max`, and `sum`. For more - information, see {ml-docs}/ml-functions.html[Function reference]. - -`over_field_name`:: - (string) The field used to split the data. - In particular, this property is used for analyzing the splits with respect to - the history of all splits. It is used for finding unusual values in the - population of all splits. For more information, see - {ml-docs}/ml-configuring-pop.html[Performing population analysis]. - -`partition_field_name`:: - (string) The field used to segment the analysis. - When you use this property, you have completely independent baselines for each value of this field. - -`use_null`:: - (boolean) Defines whether a new series is used as the null series - when there is no value for the by or partition fields. The default value is `false`. - -`custom_rules`:: - (array) An array of custom rule objects, which enable customizing how the detector works. - For example, a rule may dictate to the detector conditions under which results should be skipped. - For more information see <>. + -+ --- -IMPORTANT: Field names are case sensitive, for example a field named 'Bytes' -is different from one named 'bytes'. - --- - -After you create a job, the only properties you can change in the detector -configuration object are the `detector_description` and the `custom_rules`; -all other properties are informational. - -[float] -[[ml-datadescription]] -==== Data Description Objects - -The data description defines the format of the input data when you send data to -the job by using the <> API. Note that when configure -a {dfeed}, these properties are automatically set. - -When data is received via the <> API, it is not stored -in {es}. Only the results for anomaly detection are retained. - -A data description object has the following properties: - -`format`:: - (string) Only `JSON` format is supported at this time. - -`time_field`:: - (string) The name of the field that contains the timestamp. - The default value is `time`. - -`time_format`:: - (string) The time format, which can be `epoch`, `epoch_ms`, or a custom pattern. - The default value is `epoch`, which refers to UNIX or Epoch time (the number of seconds - since 1 Jan 1970). - The value `epoch_ms` indicates that time is measured in milliseconds since the epoch. - The `epoch` and `epoch_ms` time formats accept either integer or real values. + -+ --- -NOTE: Custom patterns must conform to the Java `DateTimeFormatter` class. -When you use date-time formatting patterns, it is recommended that you provide -the full date, time and time zone. For example: `yyyy-MM-dd'T'HH:mm:ssX`. -If the pattern that you specify is not sufficient to produce a complete timestamp, -job creation fails. - --- - -[float] -[[ml-categorizationanalyzer]] -==== Categorization Analyzer - -The categorization analyzer specifies how the `categorization_field` is -interpreted by the categorization process. The syntax is very similar to that -used to define the `analyzer` in the <>. - -The `categorization_analyzer` field can be specified either as a string or as -an object. - -If it is a string it must refer to a <> or -one added by another plugin. - -If it is an object it has the following properties: - -`char_filter`:: - (array of strings or objects) One or more - <>. In addition to the built-in - character filters, other plugins can provide more character filters. This - property is optional. If it is not specified, no character filters are applied - prior to categorization. If you are customizing some other aspect of the - analyzer and you need to achieve the equivalent of `categorization_filters` - (which are not permitted when some other aspect of the analyzer is customized), - add them here as - <>. - -`tokenizer`:: - (string or object) The name or definition of the - <> to use after character filters are applied. - This property is compulsory if `categorization_analyzer` is specified as an - object. Machine learning provides a tokenizer called `ml_classic` that - tokenizes in the same way as the non-customizable tokenizer in older versions - of the product. If you want to use that tokenizer but change the character or - token filters, specify `"tokenizer": "ml_classic"` in your - `categorization_analyzer`. - -`filter`:: - (array of strings or objects) One or more - <>. In addition to the built-in token - filters, other plugins can provide more token filters. This property is - optional. If it is not specified, no token filters are applied prior to - categorization. - -If you omit the `categorization_analyzer`, the following default values are used: - -[source,console] --------------------------------------------------- -POST _ml/anomaly_detectors/_validate -{ - "analysis_config" : { - "categorization_analyzer" : { - "tokenizer" : "ml_classic", - "filter" : [ - { "type" : "stop", "stopwords": [ - "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", - "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", - "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", - "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", - "GMT", "UTC" - ] } - ] - }, - "categorization_field_name": "message", - "detectors" :[{ - "function":"count", - "by_field_name": "mlcategory" - }] - }, - "data_description" : { - } -} --------------------------------------------------- - -If you specify any part of the `categorization_analyzer`, however, any omitted -sub-properties are _not_ set to default values. - -If you are categorizing non-English messages in a language where words are -separated by spaces, you might get better results if you change the day or month -words in the stop token filter to the appropriate words in your language. If you -are categorizing messages in a language where words are not separated by spaces, -you must use a different tokenizer as well in order to get sensible -categorization results. - -It is important to be aware that analyzing for categorization of machine -generated log messages is a little different from tokenizing for search. -Features that work well for search, such as stemming, synonym substitution, and -lowercasing are likely to make the results of categorization worse. However, in -order for drill down from {ml} results to work correctly, the tokens that the -categorization analyzer produces must be similar to those produced by the search -analyzer. If they are sufficiently similar, when you search for the tokens that -the categorization analyzer produces then you find the original document that -the categorization field value came from. - -For more information, see -{ml-docs}/ml-configuring-categories.html[Categorizing log messages]. - -[float] -[[ml-detector-custom-rule]] -==== Detector Custom Rule - -{ml-docs}/ml-rules.html[Custom rules] enable you to customize the way detectors -operate. - -A custom rule has the following properties: - -`actions`:: - (array) The set of actions to be triggered when the rule applies. - If more than one action is specified the effects of all actions are combined. - The available actions include: + - `skip_result`::: The result will not be created. This is the default value. - Unless you also specify `skip_model_update`, the model will be updated as - usual with the corresponding series value. - `skip_model_update`::: The value for that series will not be used to update - the model. Unless you also specify `skip_result`, the results will be created - as usual. This action is suitable when certain values are expected to be - consistently anomalous and they affect the model in a way that negatively - impacts the rest of the results. - -`scope`:: - (object) An optional scope of series where the rule applies. By default, the - scope includes all series. Scoping is allowed for any of the fields that are - also specified in `by_field_name`, `over_field_name`, or `partition_field_name`. - To add a scope for a field, add the field name as a key in the scope object and - set its value to an object with the following properties: -`filter_id`::: - (string) The id of the filter to be used. - -`filter_type`::: - (string) Either `include` (the rule applies for values in the filter) - or `exclude` (the rule applies for values not in the filter). Defaults - to `include`. - -`conditions`:: - (array) An optional array of numeric conditions when the rule applies. - Multiple conditions are combined together with a logical `AND`. -+ --- -NOTE: If your detector uses `lat_long`, `metric`, `rare`, or `freq_rare` -functions, you can only specify `conditions` that apply to `time`. - - -A condition has the following properties: - -`applies_to`::: - (string) Specifies the result property to which the condition applies. - The available options are `actual`, `typical`, `diff_from_typical`, `time`. -`operator`::: - (string) Specifies the condition operator. The available options are - `gt` (greater than), `gte` (greater than or equals), `lt` (less than) and `lte` (less than or equals). -`value`::: - (double) The value that is compared against the `applies_to` field using the `operator`. --- - -A rule is required to either have a non-empty scope or at least one condition. -For more examples see -{ml-docs}/ml-configuring-detector-custom-rules.html[Configuring detector custom rules]. - -[float] -[[ml-apilimits]] -==== Analysis Limits - -Limits can be applied for the resources required to hold the mathematical models in memory. -These limits are approximate and can be set per job. They do not control the -memory used by other processes, for example the Elasticsearch Java processes. -If necessary, you can increase the limits after the job is created. - -The `analysis_limits` object has the following properties: - -`categorization_examples_limit`:: - (long) The maximum number of examples stored per category in memory and - in the results data store. The default value is 4. If you increase this value, - more examples are available, however it requires that you have more storage available. - If you set this value to `0`, no examples are stored. + -+ --- -NOTE: The `categorization_examples_limit` only applies to analysis that uses categorization. -For more information, see -{ml-docs}/ml-configuring-categories.html[Categorizing log messages]. - --- - -`model_memory_limit`:: - (long or string) The approximate maximum amount of memory resources that are - required for analytical processing. Once this limit is approached, data pruning - becomes more aggressive. Upon exceeding this limit, new entities are not - modeled. The default value for jobs created in version 6.1 and later is `1024mb`. - This value will need to be increased for jobs that are expected to analyze high - cardinality fields, but the default is set to a relatively small size to ensure - that high resource usage is a conscious decision. The default value for jobs - created in versions earlier than 6.1 is `4096mb`. -+ --- -If you specify a number instead of a string, the units are assumed to be MiB. -Specifying a string is recommended for clarity. If you specify a byte size unit -of `b` or `kb` and the number does not equate to a discrete number of megabytes, -it is rounded down to the closest MiB. The minimum valid value is 1 MiB. If you -specify a value less than 1 MiB, an error occurs. For more information about -supported byte size units, see <>. - -If your `elasticsearch.yml` file contains an `xpack.ml.max_model_memory_limit` -setting, an error occurs when you try to create jobs that have -`model_memory_limit` values greater than that setting. For more information, -see <>. --- - -[float] -[[ml-apimodelplotconfig]] -==== Model Plot Config - -This advanced configuration option stores model information along with the -results. It provides a more detailed view into anomaly detection. - -WARNING: If you enable model plot it can add considerable overhead to the performance -of the system; it is not feasible for jobs with many entities. - -Model plot provides a simplified and indicative view of the model and its bounds. -It does not display complex features such as multivariate correlations or multimodal data. -As such, anomalies may occasionally be reported which cannot be seen in the model plot. - -Model plot config can be configured when the job is created or updated later. It must be -disabled if performance issues are experienced. - -The `model_plot_config` object has the following properties: - -`enabled`:: - (boolean) If true, enables calculation and storage of the model bounds for - each entity that is being analyzed. By default, this is not enabled. - -`terms`:: - experimental[] (string) Limits data collection to this comma separated list of - partition or by field values. If terms are not specified or it is an empty - string, no filtering is applied. For example, "CPU,NetworkIn,DiskWrites". - Wildcards are not supported. Only the specified `terms` can be viewed when - using the Single Metric Viewer. diff --git a/docs/reference/ml/anomaly-detection/apis/open-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/open-job.asciidoc index 5914ec502f1..3651834480f 100644 --- a/docs/reference/ml/anomaly-detection/apis/open-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/open-job.asciidoc @@ -37,7 +37,8 @@ data is received. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] [[ml-open-job-request-body]] ==== {api-request-body-title} diff --git a/docs/reference/ml/anomaly-detection/apis/post-data.asciidoc b/docs/reference/ml/anomaly-detection/apis/post-data.asciidoc index a1e2120728a..cfd3d4ca67f 100644 --- a/docs/reference/ml/anomaly-detection/apis/post-data.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/post-data.asciidoc @@ -53,7 +53,8 @@ or a comma-separated list. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the job. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] [[ml-post-data-query-parms]] ==== {api-query-parms-title} diff --git a/docs/reference/ml/anomaly-detection/apis/put-calendar-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/put-calendar-job.asciidoc index 7ba652b60a1..1030a8a7762 100644 --- a/docs/reference/ml/anomaly-detection/apis/put-calendar-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/put-calendar-job.asciidoc @@ -27,8 +27,8 @@ Adds an {anomaly-job} to a calendar. (Required, string) Identifier for the calendar. ``:: - (Required, string) An identifier for the {anomaly-jobs}. It can be a job - identifier, a group name, or a comma-separated list of jobs or groups. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection-list] [[ml-put-calendar-job-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc index 899f8cfe5cd..ca3b9d61ba7 100644 --- a/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/put-datafeed.asciidoc @@ -97,10 +97,9 @@ not be set to `false` on any ML node. `script_fields`:: (Optional, object) Specifies scripts that evaluate custom expressions and - returns script fields to the {dfeed}. The - <> in a job can contain - functions that use these script fields. For more information, see - <>. + returns script fields to the {dfeed}. The detector configuration objects in a + job can contain functions that use these script fields. For more information, + see <>. `scroll_size`:: (Optional, unsigned integer) The `size` parameter that is used in {es} diff --git a/docs/reference/ml/anomaly-detection/apis/put-filter.asciidoc b/docs/reference/ml/anomaly-detection/apis/put-filter.asciidoc index 7f14b826cf4..86245d84dbb 100644 --- a/docs/reference/ml/anomaly-detection/apis/put-filter.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/put-filter.asciidoc @@ -25,7 +25,7 @@ Instantiates a filter. A {ml-docs}/ml-rules.html[filter] contains a list of strings. It can be used by one or more jobs. Specifically, filters are referenced in -the `custom_rules` property of <>. +the `custom_rules` property of detector configuration objects. [[ml-put-filter-path-parms]] ==== {api-path-parms-title} diff --git a/docs/reference/ml/anomaly-detection/apis/put-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/put-job.asciidoc index 90d5972b3ee..9f38d08d334 100644 --- a/docs/reference/ml/anomaly-detection/apis/put-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/put-job.asciidoc @@ -32,64 +32,201 @@ a job directly to the `.ml-config` index using the {es} index API. If {es} ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the job. This identifier can contain - lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It - must start and end with alphanumeric characters. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection-define] [[ml-put-job-request-body]] ==== {api-request-body-title} -`analysis_config`:: - (Required, object) The analysis configuration, which specifies how to analyze - the data. See <>. +`allow_lazy_open`:: +(Optional, boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-lazy-open] -`analysis_limits`:: - (Optional, object) Specifies runtime limits for the job. See - <>. +[[put-analysisconfig]]`analysis_config`:: +(Required, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=analysis-config] + +`analysis_config`.`bucket_span`::: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=bucket-span] + +`analysis_config`.`categorization_field_name`::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-field-name] + +`analysis_config`.`categorization_filters`::: +(array of strings) +include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-filters] + +`analysis_config`.`categorization_analyzer`::: +(object or string) +include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-analyzer] + +`analysis_config`.`detectors`::: +(array) An array of detector configuration objects. Detector configuration +objects specify which data fields a job analyzes. They also specify which +analytical functions are used. You can specify multiple detectors for a job. ++ +-- +NOTE: If the `detectors` array does not contain at least one detector, +no analysis can occur and an error is returned. + +A detector has the following properties: +-- + +`analysis_config`.`detectors`.`by_field_name`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=by-field-name] + +`analysis_config`.`detectors`.`custom_rules`:::: ++ +-- +(array) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules] + +`analysis_config`.`detectors`.`custom_rules`.`actions`::: +(array) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-actions] + +`analysis_config`.`detectors`.`custom_rules`.`scope`::: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-scope] + +`analysis_config`.`detectors`.`custom_rules`.`scope`.`filter_id`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-scope-filter-id] + +`analysis_config`.`detectors`.`custom_rules`.`scope`.`filter_type`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-scope-filter-type] + +`analysis_config`.`detectors`.`custom_rules`.`conditions`::: +(array) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-conditions] + +`analysis_config`.`detectors`.`custom_rules`.`conditions`.`applies_to`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-conditions-applies-to] + +`analysis_config`.`detectors`.`custom_rules`.`conditions`.`operator`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-conditions-operator ] + +`analysis_config`.`detectors`.`custom_rules`.`conditions`.`value`:::: +(double) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-conditions-value] +-- + +`analysis_config`.`detectors`.`detector_description`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=detector-description] + +`analysis_config`.`detectors`.`detector_index`:::: +(integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=detector-index] + +`analysis_config`.`detectors`.`exclude_frequent`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=exclude-frequent] + +`analysis_config`.`detectors`.`field_name`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=detector-field-name] + +`analysis_config`.`detectors`.`function`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=function] + +`analysis_config`.`detectors`.`over_field_name`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=over-field-name] + +`analysis_config`.`detectors`.`partition_field_name`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=partition-field-name] + +`analysis_config`.`detectors`.`use_null`:::: +(boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=use-null] + +`analysis_config`.`influencers`::: +(array of strings) +include::{docdir}/ml/ml-shared.asciidoc[tag=influencers] + +`analysis_config`.`latency`::: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=latency] + +`analysis_config`.`multivariate_by_fields`::: +(boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=multivariate-by-fields] + +`analysis_config`.`summary_count_field_name`::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=summary-count-field-name] + +[[put-analysislimits]]`analysis_limits`:: +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=analysis-limits] ++ +-- +The `analysis_limits` object has the following properties: +-- + +`analysis_limits`.`categorization_examples_limit`::: +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=categorization-examples-limit] + +`analysis_limits`.`model_memory_limit`::: +(long or string) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-limit] `background_persist_interval`:: - (Optional, <>) Advanced configuration option. The time - between each periodic persistence of the model. See <>. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=background-persist-interval] -`custom_settings`:: - (Optional, object) Advanced configuration option. Contains custom meta data - about the job. See <>. +[[put-customsettings]]`custom_settings`:: +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-settings] -`data_description`:: - (Required, object) Describes the format of the input data. This object is - required, but it can be empty (`{}`). See - <>. +[[put-datadescription]]`data_description`:: +(Required, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=data-description] `description`:: - (Optional, string) A description of the job. +(Optional, string) A description of the job. `groups`:: - (Optional, array of strings) A list of job groups. See <>. +(Optional, array of strings) +include::{docdir}/ml/ml-shared.asciidoc[tag=groups] `model_plot_config`:: - (Optional, object) Advanced configuration option. Specifies to store model - information along with the results. This adds overhead to the performance of - the system and is not feasible for jobs with many entities, see - <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-plot-config] + +`model_plot_config`.`enabled`::: +(boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-plot-config-enabled] + +`model_plot_config`.`terms`::: +experimental[] (string) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-plot-config-terms] `model_snapshot_retention_days`:: - (Optional, long) The time in days that model snapshots are retained for the - job. Older snapshots are deleted. The default value is `1`, which means - snapshots are retained for one day (twenty-four hours). +(Optional, long) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-snapshot-retention-days] `renormalization_window_days`:: - (Optional, long) Advanced configuration option. The period over which - adjustments to the score are applied, as new data is seen. See - <>. +(Optional, long) +include::{docdir}/ml/ml-shared.asciidoc[tag=renormalization-window-days] `results_index_name`:: - (Optional, string) A text string that affects the name of the {ml} results - index. The default value is `shared`, which generates an index named - `.ml-anomalies-shared`. +(Optional, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=results-index-name] `results_retention_days`:: - (Optional, long) Advanced configuration option. The number of days for which - job results are retained. See <>. +(Optional, long) +include::{docdir}/ml/ml-shared.asciidoc[tag=results-retention-days] [[ml-put-job-example]] ==== {api-examples-title} @@ -125,7 +262,7 @@ When the job is created, you receive the following results: { "job_id" : "total-requests", "job_type" : "anomaly_detector", - "job_version" : "7.4.0", + "job_version" : "7.5.0", "description" : "Total sum of requests", "create_time" : 1562352500629, "analysis_config" : { @@ -153,5 +290,5 @@ When the job is created, you receive the following results: "allow_lazy_open" : false } ---- -// TESTRESPONSE[s/"job_version" : "7.4.0"/"job_version" : $body.job_version/] +// TESTRESPONSE[s/"job_version" : "7.5.0"/"job_version" : $body.job_version/] // TESTRESPONSE[s/1562352500629/$body.$_path/] diff --git a/docs/reference/ml/anomaly-detection/apis/revert-snapshot.asciidoc b/docs/reference/ml/anomaly-detection/apis/revert-snapshot.asciidoc index f04db39e25e..4e77758cde6 100644 --- a/docs/reference/ml/anomaly-detection/apis/revert-snapshot.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/revert-snapshot.asciidoc @@ -36,10 +36,12 @@ Friday or a critical system failure. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the job. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] ``:: - (Required, string) Identifier for the model snapshot. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=snapshot-id] [[ml-revert-snapshot-request-body]] ==== {api-request-body-title} @@ -56,45 +58,44 @@ If you want to resend data, then delete the intervening results. [[ml-revert-snapshot-example]] ==== {api-examples-title} -The following example reverts to the `1491856080` snapshot for the -`it_ops_new_kpi` job: - [source,console] -------------------------------------------------- POST -_ml/anomaly_detectors/it_ops_new_kpi/model_snapshots/1491856080/_revert +_ml/anomaly_detectors/high_sum_total_sales/model_snapshots/1577221697/_revert { "delete_intervening_results": true } -------------------------------------------------- -// TEST[skip:todo] +// TEST[skip:Kibana sample data] When the operation is complete, you receive the following results: [source,js] ---- { "model": { - "job_id": "it_ops_new_kpi", - "min_version": "6.3.0", - "timestamp": 1491856080000, - "description": "State persisted due to job close at 2017-04-10T13:28:00-0700", - "snapshot_id": "1491856080", + "job_id": "high_sum_total_sales", + "min_version": "6.4.0", + "timestamp": 1577221697000, + "description": "Periodic background persist at 2019-12-24T21:08:17+0000", + "snapshot_id": "1577221697", "snapshot_doc_count": 1, "model_size_stats": { - "job_id": "it_ops_new_kpi", + "job_id": "high_sum_total_sales", "result_type": "model_size_stats", - "model_bytes": 29518, - "total_by_field_count": 3, - "total_over_field_count": 0, - "total_partition_field_count": 2, - "bucket_allocation_failures_count": 0, - "memory_status": "ok", - "log_time": 1491856080000, - "timestamp": 1455318000000 + "model_bytes": 1325334, + "model_bytes_exceeded" : 0, + "model_bytes_memory_limit" : 10485760, + "total_by_field_count" : 3, + "total_over_field_count" : 2361, + "total_partition_field_count" : 2, + "bucket_allocation_failures_count" : 0, + "memory_status" : "ok", + "log_time" : 1577221697000, + "timestamp" : 1577217600000 }, - "latest_record_time_stamp": 1455318669000, - "latest_result_time_stamp": 1455318000000, - "retain": false + "latest_record_time_stamp" : 1577221286000, + "latest_result_time_stamp" : 1577217600000, + "retain" : false } } ---- diff --git a/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc b/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc index 732f23202b1..cf3e772bd53 100644 --- a/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/update-datafeed.asciidoc @@ -39,7 +39,8 @@ using those same roles. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {dfeed}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=datafeed-id] [[ml-update-datafeed-request-body]] ==== {api-request-body-title} @@ -47,70 +48,58 @@ using those same roles. The following properties can be updated after the {dfeed} is created: `aggregations`:: - (Optional, object) If set, the {dfeed} performs aggregation searches. For more - information, see <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=aggregations] `chunking_config`:: - (Optional, object) Specifies how data searches are split into time chunks. See - <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=chunking-config] `delayed_data_check_config`:: - (Optional, object) Specifies whether the data feed checks for missing data and - the size of the window. See <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=delayed-data-check-config] `frequency`:: - (Optional, <>) The interval at which scheduled queries - are made while the {dfeed} runs in real time. The default value is either the - bucket span for short bucket spans, or, for longer bucket spans, a sensible - fraction of the bucket span. For example: `150s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=frequency] `indices`:: - (Optional, array) An array of index names. Wildcards are supported. For - example: `["it_ops_metrics", "server*"]`. +(Optional, array) +include::{docdir}/ml/ml-shared.asciidoc[tag=indices] -`query`:: - (Optional, object) The {es} query domain-specific language (DSL). This value - corresponds to the query object in an {es} search POST body. All the options - that are supported by {es} can be used, as this object is passed verbatim to - {es}. By default, this property has the following value: - `{"match_all": {"boost": 1}}`. +`max_empty_searches`:: +(Optional, integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=max-empty-searches] + -- -WARNING: If you change the query, then the analyzed data will also be changed, -therefore the required time to learn might be long and the understandability of -the results is unpredictable. -If you want to make significant changes to the source data, we would recommend -you clone it and create a second job containing the amendments. Let both run in -parallel and close one when you are satisfied with the results of the other job. +The special value `-1` unsets this setting. +-- + +`query`:: +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=query] ++ +-- +WARNING: If you change the query, the analyzed data is also changed. Therefore, +the required time to learn might be long and the understandability of the +results is unpredictable. If you want to make significant changes to the source +data, we would recommend you clone it and create a second job containing the +amendments. Let both run in parallel and close one when you are satisfied with +the results of the other job. + -- `query_delay`:: - (Optional, <>) The number of seconds behind real-time - that data is queried. For example, if data from 10:04 a.m. might not be - searchable in {es} until 10:06 a.m., set this property to 120 seconds. The - default value is `60s`. +(Optional, <>) +include::{docdir}/ml/ml-shared.asciidoc[tag=query-delay] `script_fields`:: - (Optional, object) Specifies scripts that evaluate custom expressions and - returns script fields to the {dfeed}. The - <> in a job can contain - functions that use these script fields. For more information, see - <>. +(Optional, object) +include::{docdir}/ml/ml-shared.asciidoc[tag=script-fields] `scroll_size`:: - (Optional, unsigned integer) The `size` parameter that is used in {es} - searches. The default value is `1000`. - -`max_empty_searches`:: - (Optional, integer) If a real-time {dfeed} has never seen any data (including - during any initial training period) then it will automatically stop itself - and close its associated job after this many real-time searches that return - no documents. In other words, it will stop after `frequency` times - `max_empty_searches` of real-time operation. If not set - then a {dfeed} with no end time that sees no data will remain started until - it is explicitly stopped. The special value `-1` unsets this setting. - -For more information about these properties, see <>. +(Optional, unsigned integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=scroll-size] [[ml-update-datafeed-example]] diff --git a/docs/reference/ml/anomaly-detection/apis/update-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/update-job.asciidoc index 2af969dc993..62ea01d9992 100644 --- a/docs/reference/ml/anomaly-detection/apis/update-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/update-job.asciidoc @@ -25,72 +25,135 @@ Updates certain properties of an {anomaly-job}. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] [[ml-update-job-request-body]] ==== {api-request-body-title} The following properties can be updated after the job is created: -[cols="<,<,<",options="header",] -|======================================================================= -|Name |Description |Requires Restart - -|`analysis_limits.model_memory_limit` |The approximate maximum amount of -memory resources required for analytical processing. See <>. You -can update the `analysis_limits` only while the job is closed. The -`model_memory_limit` property value cannot be decreased below the current usage. -| Yes - -|`background_persist_interval` |Advanced configuration option. The time between -each periodic persistence of the model. See <>. | Yes - -|`custom_settings` |Contains custom meta data about the job. | No - -|`description` |A description of the job. See <>. | No - -|`detectors` |An array of detector update objects. | No - -|`detector_index` |The identifier of the detector to update (integer).| No - -|`detectors.description` |The new description for the detector.| No - -|`detectors.custom_rules` |The new list of <> -for the detector. | No - -|`groups` |A list of job groups. See <>. | No - -|`model_plot_config.enabled` |If true, enables calculation and storage of the -model bounds for each entity that is being analyzed. -See <>. | No - -|`model_snapshot_retention_days` |The time in days that model snapshots are -retained for the job. See <>. | No - -|`renormalization_window_days` |Advanced configuration option. The period over -which adjustments to the score are applied, as new data is seen. -See <>. | Yes - -|`results_retention_days` |Advanced configuration option. The number of days -for which job results are retained. See <>. | No - -|`allow_lazy_open` |Advanced configuration option. Whether to allow the job to be -opened when no {ml} node has sufficient capacity. See <>. | Yes - -|======================================================================= - -For those properties that have `Requires Restart` set to `Yes` in this table, -if the job is open when you make the update, you must stop the data feed, close -the job, then reopen the job and restart the data feed for the changes to take -effect. - -[NOTE] +`allow_lazy_open`:: +(boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=allow-lazy-open] ++ -- -* If the `memory_status` property in the `model_size_stats` object has a value -of `hard_limit`, this means that it was unable to process some data. You might -want to re-run this job with an increased `model_memory_limit`. +NOTE: If the job is open when you make the update, you must stop the {dfeed}, +close the job, then reopen the job and restart the {dfeed} for the changes to take effect. + -- +[[update-analysislimits]]`analysis_limits`.`model_memory_limit`:: +(long or string) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-memory-limit] ++ +-- +NOTE: You can update the `analysis_limits` only while the job is closed. The +`model_memory_limit` property value cannot be decreased below the current usage. + +TIP: If the `memory_status` property in the +<> has a value of `hard_limit`, +this means that it was unable to process some data. You might want to re-run the +job with an increased `model_memory_limit`. + +-- + +`background_persist_interval`:: +(<>) +include::{docdir}/ml/ml-shared.asciidoc[tag=background-persist-interval] ++ +-- +NOTE: If the job is open when you make the update, you must stop the {dfeed}, +close the job, then reopen the job and restart the {dfeed} for the changes to take effect. + +-- + +[[update-customsettings]]`custom_settings`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-settings] + +`description`:: +(string) A description of the job. + +`detectors`:: +(array) An array of detector update objects. + +`detectors`.`custom_rules`::: ++ +-- +(array) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules] + +`detectors`.`custom_rules`.`actions`::: +(array) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-actions] + +`detectors`.`custom_rules`.`scope`::: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-scope] + +`detectors`.`custom_rules`.`scope`.`filter_id`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-scope-filter-id] + +`detectors`.`custom_rules`.`scope`.`filter_type`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-scope-filter-type] + +`detectors`.`custom_rules`.`conditions`::: +(array) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-conditions] + +`detectors`.`custom_rules`.`conditions`.`applies_to`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-conditions-applies-to] + +`detectors`.`custom_rules`.`conditions`.`operator`:::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-conditions-operator] + +`detectors`.`custom_rules`.`conditions`.`value`:::: +(double) +include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules-conditions-value] +-- + +`detectors`.`description`::: +(string) +include::{docdir}/ml/ml-shared.asciidoc[tag=detector-description] + +`detectors`.`detector_index`::: +(integer) +include::{docdir}/ml/ml-shared.asciidoc[tag=detector-index] + +`groups`:: +(array of strings) +include::{docdir}/ml/ml-shared.asciidoc[tag=groups] + +`model_plot_config`:: +(object) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-plot-config] + +`model_plot_config`.`enabled`::: +(boolean) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-plot-config-enabled] + +`model_snapshot_retention_days`:: +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=model-snapshot-retention-days] + +`renormalization_window_days`:: +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=renormalization-window-days] ++ +-- +NOTE: If the job is open when you make the update, you must stop the {dfeed}, +close the job, then reopen the job and restart the {dfeed} for the changes to take effect. + +-- +`results_retention_days`:: +(long) +include::{docdir}/ml/ml-shared.asciidoc[tag=results-retention-days] + [[ml-update-job-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/apis/update-snapshot.asciidoc b/docs/reference/ml/anomaly-detection/apis/update-snapshot.asciidoc index 1eb3e78e69e..10f7228fd9b 100644 --- a/docs/reference/ml/anomaly-detection/apis/update-snapshot.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/update-snapshot.asciidoc @@ -25,7 +25,8 @@ Updates certain properties of a snapshot. ==== {api-path-parms-title} ``:: - (Required, string) Identifier for the {anomaly-job}. +(Required, string) +include::{docdir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection] ``:: (Required, string) Identifier for the model snapshot. diff --git a/docs/reference/ml/anomaly-detection/apis/validate-detector.asciidoc b/docs/reference/ml/anomaly-detection/apis/validate-detector.asciidoc index c931c1ef358..c6e6f630b8b 100644 --- a/docs/reference/ml/anomaly-detection/apis/validate-detector.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/validate-detector.asciidoc @@ -30,7 +30,7 @@ before you create an {anomaly-job}. ==== {api-request-body-title} For a list of the properties that you can specify in the body of this API, -see <>. +see detector configuration objects. [[ml-valid-detector-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/apis/validate-job.asciidoc b/docs/reference/ml/anomaly-detection/apis/validate-job.asciidoc index 27382568908..75cec11c4c4 100644 --- a/docs/reference/ml/anomaly-detection/apis/validate-job.asciidoc +++ b/docs/reference/ml/anomaly-detection/apis/validate-job.asciidoc @@ -30,7 +30,7 @@ create the job. ==== {api-request-body-title} For a list of the properties that you can specify in the body of this API, -see <>. +see <>. [[ml-valid-job-example]] ==== {api-examples-title} diff --git a/docs/reference/ml/anomaly-detection/categories.asciidoc b/docs/reference/ml/anomaly-detection/categories.asciidoc index a5e4d055416..79c34950915 100644 --- a/docs/reference/ml/anomaly-detection/categories.asciidoc +++ b/docs/reference/ml/anomaly-detection/categories.asciidoc @@ -144,7 +144,39 @@ language. The optional `categorization_analyzer` property allows even greater customization of how categorization interprets the categorization field value. It can refer to a built-in {es} analyzer or a combination of zero or more character filters, -a tokenizer, and zero or more token filters. +a tokenizer, and zero or more token filters. If you omit the +`categorization_analyzer`, the following default values are used: + +[source,console] +-------------------------------------------------- +POST _ml/anomaly_detectors/_validate +{ + "analysis_config" : { + "categorization_analyzer" : { + "tokenizer" : "ml_classic", + "filter" : [ + { "type" : "stop", "stopwords": [ + "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", + "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", + "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", + "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", + "GMT", "UTC" + ] } + ] + }, + "categorization_field_name": "message", + "detectors" :[{ + "function":"count", + "by_field_name": "mlcategory" + }] + }, + "data_description" : { + } +} +-------------------------------------------------- + +If you specify any part of the `categorization_analyzer`, however, any omitted +sub-properties are _not_ set to default values. The `ml_classic` tokenizer and the day and month stopword filter are more or less equivalent to the following analyzer, which is defined using only built-in {es} @@ -208,8 +240,22 @@ difference in behavior is that this custom analyzer does not include accented letters in tokens whereas the `ml_classic` tokenizer does, although that could be fixed by using more complex regular expressions. -For more information about the `categorization_analyzer` property, see -{ref}/ml-job-resource.html#ml-categorizationanalyzer[Categorization analyzer]. +If you are categorizing non-English messages in a language where words are +separated by spaces, you might get better results if you change the day or month +words in the stop token filter to the appropriate words in your language. If you +are categorizing messages in a language where words are not separated by spaces, +you must use a different tokenizer as well in order to get sensible +categorization results. + +It is important to be aware that analyzing for categorization of machine +generated log messages is a little different from tokenizing for search. +Features that work well for search, such as stemming, synonym substitution, and +lowercasing are likely to make the results of categorization worse. However, in +order for drill down from {ml} results to work correctly, the tokens that the +categorization analyzer produces must be similar to those produced by the search +analyzer. If they are sufficiently similar, when you search for the tokens that +the categorization analyzer produces then you find the original document that +the categorization field value came from. NOTE: To add the `categorization_analyzer` property in {kib}, you must use the **Edit JSON** tab and copy the `categorization_analyzer` object from one of the diff --git a/docs/reference/ml/anomaly-detection/functions.asciidoc b/docs/reference/ml/anomaly-detection/functions.asciidoc index d821a3ff4c0..e90a1281d6c 100644 --- a/docs/reference/ml/anomaly-detection/functions.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions.asciidoc @@ -7,12 +7,10 @@ flexible ways to analyze data for anomalies. When you create {anomaly-jobs}, you specify one or more detectors, which define the type of analysis that needs to be done. If you are creating your job by -using {ml} APIs, you specify the functions in -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +using {ml} APIs, you specify the functions in detector configuration objects. If you are creating your job in {kib}, you specify the functions differently depending on whether you are creating single metric, multi-metric, or advanced jobs. -//For a demonstration of creating jobs in {kib}, see <>. Most functions detect anomalies in both low and high values. In statistical terminology, they apply a two-sided test. Some functions offer low and high diff --git a/docs/reference/ml/anomaly-detection/functions/count.asciidoc b/docs/reference/ml/anomaly-detection/functions/count.asciidoc index fe81fc5f596..f1f39ef0840 100644 --- a/docs/reference/ml/anomaly-detection/functions/count.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/count.asciidoc @@ -39,8 +39,8 @@ These functions support the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, -see {ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 1: Analyzing events with the count function [source,console] @@ -164,8 +164,8 @@ These functions support the following properties: * `by_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, -see {ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. For example, if you have the following number of events per bucket: @@ -233,8 +233,8 @@ These functions support the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, -see {ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 6: Analyzing users with the distinct_count function [source,console] diff --git a/docs/reference/ml/anomaly-detection/functions/geo.asciidoc b/docs/reference/ml/anomaly-detection/functions/geo.asciidoc index 20b8e6816ef..59b42172c8b 100644 --- a/docs/reference/ml/anomaly-detection/functions/geo.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/geo.asciidoc @@ -25,8 +25,8 @@ This function supports the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, -see {ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 1: Analyzing transactions with the lat_long function [source,console] diff --git a/docs/reference/ml/anomaly-detection/functions/info.asciidoc b/docs/reference/ml/anomaly-detection/functions/info.asciidoc index 18eb6d9f4e9..61913e539ce 100644 --- a/docs/reference/ml/anomaly-detection/functions/info.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/info.asciidoc @@ -28,8 +28,8 @@ These functions support the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 1: Analyzing subdomain strings with the info_content function [source,js] diff --git a/docs/reference/ml/anomaly-detection/functions/metric.asciidoc b/docs/reference/ml/anomaly-detection/functions/metric.asciidoc index cb44b61849a..35aba1ee40b 100644 --- a/docs/reference/ml/anomaly-detection/functions/metric.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/metric.asciidoc @@ -34,8 +34,8 @@ This function supports the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 1: Analyzing minimum transactions with the min function [source,js] @@ -69,8 +69,8 @@ This function supports the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 2: Analyzing maximum response times with the max function [source,js] @@ -131,8 +131,8 @@ These functions support the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 4: Analyzing response times with the median function [source,js] @@ -169,8 +169,8 @@ These functions support the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 5: Analyzing response times with the mean function [source,js] @@ -237,8 +237,8 @@ This function supports the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 8: Analyzing response times with the metric function [source,js] @@ -274,8 +274,8 @@ These functions support the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 9: Analyzing response times with the varp function [source,js] diff --git a/docs/reference/ml/anomaly-detection/functions/rare.asciidoc b/docs/reference/ml/anomaly-detection/functions/rare.asciidoc index 94931191a26..f56b0fb8d07 100644 --- a/docs/reference/ml/anomaly-detection/functions/rare.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/rare.asciidoc @@ -46,8 +46,8 @@ This function supports the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 1: Analyzing status codes with the rare function [source,js] @@ -105,8 +105,8 @@ This function supports the following properties: * `over_field_name` (required) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 3: Analyzing URI values in a population with the freq_rare function [source,js] diff --git a/docs/reference/ml/anomaly-detection/functions/sum.asciidoc b/docs/reference/ml/anomaly-detection/functions/sum.asciidoc index 260fc3f726c..387769c80f3 100644 --- a/docs/reference/ml/anomaly-detection/functions/sum.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/sum.asciidoc @@ -35,8 +35,8 @@ These functions support the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 1: Analyzing total expenses with the sum function [source,js] @@ -91,8 +91,8 @@ These functions support the following properties: * `by_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. NOTE: Population analysis (that is to say, use of the `over_field_name` property) is not applicable for this function. diff --git a/docs/reference/ml/anomaly-detection/functions/time.asciidoc b/docs/reference/ml/anomaly-detection/functions/time.asciidoc index 22cab11151d..cdf11cba447 100644 --- a/docs/reference/ml/anomaly-detection/functions/time.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/time.asciidoc @@ -53,8 +53,8 @@ This function supports the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 1: Analyzing events with the time_of_day function [source,js] @@ -84,8 +84,8 @@ This function supports the following properties: * `over_field_name` (optional) * `partition_field_name` (optional) -For more information about those properties, see -{ref}/ml-job-resource.html#ml-detectorconfig[Detector configuration objects]. +For more information about those properties, see the +{ref}/ml-put-job.html#ml-put-job-request-body[create {anomaly-jobs} API]. .Example 2: Analyzing events with the time_of_week function [source,js] diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index 52e4d459600..3df4e0dc4ee 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -143,7 +143,7 @@ tag::categorization-analyzer[] If `categorization_field_name` is specified, you can also define the analyzer that is used to interpret the categorization field. This property cannot be used at the same time as `categorization_filters`. The categorization analyzer -specifies how the `categorization_field` is interpreted by the categorization +specifies how the categorization field is interpreted by the categorization process. The syntax is very similar to that used to define the `analyzer` in the <>. For more information, see {ml-docs}/ml-configuring-categories.html[Categorizing log messages]. @@ -170,7 +170,7 @@ end::categorization-analyzer[] tag::categorization-examples-limit[] The maximum number of examples stored per category in memory and in the results -data store. The default value is 4. If you increase this value, more examples +data store. The default value is `4`. If you increase this value, more examples are available, however it requires that you have more storage available. If you set this value to `0`, no examples are stored. + @@ -486,51 +486,6 @@ optionally `results_field` (`ml` by default). results of the analysis. Default to `ml`. end::dest[] -tag::detector[] -A detector has the following properties: - -`by_field_name`:::: -(string) -include::{docdir}/ml/ml-shared.asciidoc[tag=by-field-name] - -`custom_rules`:::: -(array) -include::{docdir}/ml/ml-shared.asciidoc[tag=custom-rules] - -`detector_description`:::: -(string) -include::{docdir}/ml/ml-shared.asciidoc[tag=detector-description] - -`detector_index`:::: -(integer) -include::{docdir}/ml/ml-shared.asciidoc[tag=detector-index] - -`exclude_frequent`:::: -(string) -include::{docdir}/ml/ml-shared.asciidoc[tag=exclude-frequent] - -`field_name`:::: -(string) -include::{docdir}/ml/ml-shared.asciidoc[tag=detector-field-name] - -`function`:::: -(string) -include::{docdir}/ml/ml-shared.asciidoc[tag=function] - -`over_field_name`:::: -(string) -include::{docdir}/ml/ml-shared.asciidoc[tag=over-field-name] - -`partition_field_name`:::: -(string) -include::{docdir}/ml/ml-shared.asciidoc[tag=partition-field-name] - -`use_null`:::: -(boolean) -include::{docdir}/ml/ml-shared.asciidoc[tag=use-null] - -end::detector[] - tag::detector-description[] A description of the detector. For example, `Low event rate`. end::detector-description[] diff --git a/docs/reference/redirects.asciidoc b/docs/reference/redirects.asciidoc index e6d2b6be2e6..4ec0564daf8 100644 --- a/docs/reference/redirects.asciidoc +++ b/docs/reference/redirects.asciidoc @@ -456,3 +456,12 @@ See <>. This page was deleted. See <>, <>, <>, <>. + +[role="exclude",id="ml-job-resource"] +=== Job resources + +This page was deleted. +[[ml-analysisconfig]] +See the details in +[[ml-apimodelplotconfig]] +<>, <>, and <>. diff --git a/docs/reference/rest-api/defs.asciidoc b/docs/reference/rest-api/defs.asciidoc index 265641db622..f5ecefbd2ac 100644 --- a/docs/reference/rest-api/defs.asciidoc +++ b/docs/reference/rest-api/defs.asciidoc @@ -16,7 +16,6 @@ These resource definitions are used in APIs related to {ml-features} and include::{es-repo-dir}/ml/anomaly-detection/apis/datafeedresource.asciidoc[] include::{es-repo-dir}/ml/df-analytics/apis/analysisobjects.asciidoc[] include::{es-repo-dir}/ml/anomaly-detection/apis/jobcounts.asciidoc[] -include::{es-repo-dir}/ml/anomaly-detection/apis/jobresource.asciidoc[] include::{es-repo-dir}/ml/anomaly-detection/apis/snapshotresource.asciidoc[] include::{xes-repo-dir}/rest-api/security/role-mapping-resources.asciidoc[] include::{es-repo-dir}/ml/anomaly-detection/apis/resultsresource.asciidoc[] diff --git a/docs/reference/settings/ml-settings.asciidoc b/docs/reference/settings/ml-settings.asciidoc index 52d0d8eb28b..8829a328f79 100644 --- a/docs/reference/settings/ml-settings.asciidoc +++ b/docs/reference/settings/ml-settings.asciidoc @@ -81,7 +81,7 @@ The maximum `model_memory_limit` property value that can be set for any job on this node. If you try to create a job with a `model_memory_limit` property value that is greater than this setting value, an error occurs. Existing jobs are not affected when you update this setting. For more information about the -`model_memory_limit` property, see <>. +`model_memory_limit` property, see <>. `xpack.ml.max_open_jobs` (<>):: The maximum number of jobs that can run simultaneously on a node. Defaults to