From 1a6f813d5ae8d5f51ebae86518d346e82b3471d0 Mon Sep 17 00:00:00 2001 From: lcawley Date: Tue, 11 Apr 2017 18:52:47 -0700 Subject: [PATCH] [DOCS] Update all ML API examples with latest build output Original commit: elastic/x-pack-elasticsearch@f9fa3b813afc415486183895bc7168684edff0ee --- docs/en/rest-api/ml/close-job.asciidoc | 19 +-- docs/en/rest-api/ml/datafeedresource.asciidoc | 19 ++- docs/en/rest-api/ml/delete-datafeed.asciidoc | 1 + docs/en/rest-api/ml/delete-job.asciidoc | 4 +- docs/en/rest-api/ml/delete-snapshot.asciidoc | 2 +- docs/en/rest-api/ml/flush-job.asciidoc | 30 ++--- docs/en/rest-api/ml/get-bucket.asciidoc | 17 +-- docs/en/rest-api/ml/get-category.asciidoc | 4 +- .../rest-api/ml/get-datafeed-stats.asciidoc | 44 ++++--- docs/en/rest-api/ml/get-datafeed.asciidoc | 47 +++++--- docs/en/rest-api/ml/get-influencer.asciidoc | 5 +- docs/en/rest-api/ml/get-job-stats.asciidoc | 82 ++++++------- docs/en/rest-api/ml/get-job.asciidoc | 58 +++++---- docs/en/rest-api/ml/get-record.asciidoc | 47 +++----- docs/en/rest-api/ml/get-snapshot.asciidoc | 30 ++--- docs/en/rest-api/ml/jobcounts.asciidoc | 113 +++++++++++------- docs/en/rest-api/ml/jobresource.asciidoc | 21 ++-- docs/en/rest-api/ml/open-job.asciidoc | 5 +- docs/en/rest-api/ml/post-data.asciidoc | 40 ++++--- docs/en/rest-api/ml/preview-datafeed.asciidoc | 9 +- docs/en/rest-api/ml/put-datafeed.asciidoc | 60 +++++----- docs/en/rest-api/ml/put-job.asciidoc | 29 +++-- docs/en/rest-api/ml/resultsresource.asciidoc | 70 +++++++++-- docs/en/rest-api/ml/revert-snapshot.asciidoc | 3 +- docs/en/rest-api/ml/snapshotresource.asciidoc | 5 +- docs/en/rest-api/ml/start-datafeed.asciidoc | 32 ++--- docs/en/rest-api/ml/stop-datafeed.asciidoc | 2 +- docs/en/rest-api/ml/update-datafeed.asciidoc | 97 +++++++++------ docs/en/rest-api/ml/update-job.asciidoc | 5 +- docs/en/rest-api/ml/update-snapshot.asciidoc | 3 +- .../en/rest-api/ml/validate-detector.asciidoc | 12 +- docs/en/rest-api/ml/validate-job.asciidoc | 32 ++--- 32 files changed, 538 insertions(+), 409 deletions(-) diff --git a/docs/en/rest-api/ml/close-job.asciidoc b/docs/en/rest-api/ml/close-job.asciidoc index f5c416a1349..830f11bd2b7 100644 --- a/docs/en/rest-api/ml/close-job.asciidoc +++ b/docs/en/rest-api/ml/close-job.asciidoc @@ -1,8 +1,9 @@ +//lcawley Verified example output 2017-04-11 [[ml-close-job]] ==== Close Jobs -An anomaly detection job must be opened in order for it to be ready to receive and analyze data. -A job may be opened and closed multiple times throughout its lifecycle. +The close job API enables you to close a job. +A job can be opened and closed multiple times throughout its lifecycle. ===== Request @@ -10,24 +11,24 @@ A job may be opened and closed multiple times throughout its lifecycle. ===== Description -A job can be closed once all data has been analyzed. +//A job can be closed once all data has been analyzed. When you close a job, it runs housekeeping tasks such as pruning the model history, -flushing buffers, calculating final results and persisting the internal models. +flushing buffers, calculating final results and persisting the model snapshots. Depending upon the size of the job, it could take several minutes to close and the equivalent time to re-open. -Once closed, the anomaly detection job has almost no overhead on the cluster -(except for maintaining its meta data). A closed job is blocked for receiving -data and analysis operations, however you can still explore and navigate results. +After it is closed, the job has almost no overhead on the cluster except for +maintaining its meta data. A closed job cannot receive data or perform analysis +operations, but you can still explore and navigate results. -//NOTE: +//NOTE: TBD //OUTDATED?: If using the {prelert} UI, the job will be automatically closed when stopping a datafeed job. ===== Path Parameters `job_id` (required):: - (+string+) Identifier for the job + (+string+) Identifier for the job ===== Query Parameters diff --git a/docs/en/rest-api/ml/datafeedresource.asciidoc b/docs/en/rest-api/ml/datafeedresource.asciidoc index b6cb365aa14..daa96a38b36 100644 --- a/docs/en/rest-api/ml/datafeedresource.asciidoc +++ b/docs/en/rest-api/ml/datafeedresource.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-datafeed-resource]] ==== Data Feed Resources @@ -14,6 +15,10 @@ A data feed resource has the following properties: "min_doc_count": 0}, "aggregations": {"events_per_min": {"sum": { "field": "events_per_min"}}}}}`. +`chunking_config`:: + (+object+) TBD. + For example: {"mode": "manual", "time_span": "30000000ms"} + `datafeed_id`:: (+string+) A numerical character string that uniquely identifies the data feed. @@ -41,6 +46,7 @@ A data feed resource has the following properties: `types` (required):: (+array+) TBD. For example: ["network","sql","kpi"] +[float] [[ml-datafeed-counts]] ==== Data Feed Counts @@ -48,14 +54,19 @@ The get data feed statistics API provides information about the operational progress of a data feed. For example: `assigment_explanation`:: - TBD. For example: "" + TBD. For example: " " + +`datafeed_id`:: + (+string+) A numerical character string that uniquely identifies the data feed. `node`:: (+object+) TBD The node that is running the query? - For example: `{"id": "0-o0tOoRTwKFZifatTWKNw","name": "0-o0tOo", - "ephemeral_id": "DOZltLxLS_SzYpW6hQ9hyg","transport_address": "127.0.0.1:9300", - "attributes": {"max_running_jobs": "10"}} + `id`::: TBD. For example, "0-o0tOoRTwKFZifatTWKNw". + `name`::: TBD. For example, "0-o0tOo". + `ephemeral_id::: TBD. For example, "DOZltLxLS_SzYpW6hQ9hyg". + `transport_address::: TBD. For example, "127.0.0.1:9300". + `attributes`::: TBD. For example, {"max_running_jobs": "10"}. `state`:: (+string+) The status of the data feed, diff --git a/docs/en/rest-api/ml/delete-datafeed.asciidoc b/docs/en/rest-api/ml/delete-datafeed.asciidoc index a5f14c95d91..8c937fa0fed 100644 --- a/docs/en/rest-api/ml/delete-datafeed.asciidoc +++ b/docs/en/rest-api/ml/delete-datafeed.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-delete-datafeed]] ==== Delete Data Feeds diff --git a/docs/en/rest-api/ml/delete-job.asciidoc b/docs/en/rest-api/ml/delete-job.asciidoc index 5199b846177..7b207091ab2 100644 --- a/docs/en/rest-api/ml/delete-job.asciidoc +++ b/docs/en/rest-api/ml/delete-job.asciidoc @@ -1,3 +1,4 @@ +//lcawley: Verified example output 2017-04-11 [[ml-delete-job]] ==== Delete Jobs @@ -17,7 +18,7 @@ IMPORTANT: Deleting a job must be done via this API only. Do not delete the privileges are granted to anyone over the `.ml-*` indices. Before you can delete a job, you must delete the data feeds that are associated with it. -//See <<>>. +See <>. It is not currently possible to delete multiple jobs using wildcards or a comma separated list. @@ -25,6 +26,7 @@ It is not currently possible to delete multiple jobs using wildcards or a comma `job_id` (required):: (+string+) Identifier for the job + //// ===== Responses diff --git a/docs/en/rest-api/ml/delete-snapshot.asciidoc b/docs/en/rest-api/ml/delete-snapshot.asciidoc index f2008ce7966..51682425f96 100644 --- a/docs/en/rest-api/ml/delete-snapshot.asciidoc +++ b/docs/en/rest-api/ml/delete-snapshot.asciidoc @@ -1,7 +1,7 @@ [[ml-delete-snapshot]] ==== Delete Model Snapshots -The delete model snapshot API allows you to delete an existing model snapshot. +The delete model snapshot API enables you to delete an existing model snapshot. ===== Request diff --git a/docs/en/rest-api/ml/flush-job.asciidoc b/docs/en/rest-api/ml/flush-job.asciidoc index 79eaf8a020a..401a8d57948 100644 --- a/docs/en/rest-api/ml/flush-job.asciidoc +++ b/docs/en/rest-api/ml/flush-job.asciidoc @@ -1,15 +1,16 @@ +//lcawley: Verified example output 2017-04-11 [[ml-flush-job]] ==== Flush Jobs -The flush job API forces any buffered data to be processed by the {ml} job. +The flush job API forces any buffered data to be processed by the job. ===== Request + `POST _xpack/ml/anomaly_detectors//_flush` ===== Description -The flush job API is only applicable when sending data for analysis using the POST `_data` API. -Depending on the content of the buffer, then it might additionally calculate new results. +The flush job API is only applicable when sending data for analysis using the <>. Depending on the content of the buffer, then it might additionally calculate new results. Both flush and close operations are similar, however the flush is more efficient if you are expecting to send more data for analysis. When flushing, the job remains open and is available to continue analyzing data. @@ -22,21 +23,20 @@ A close operation additionally prunes and persists the model state to disk and t ===== Query Parameters -`calc_interim`:: - (+boolean+; default: ++false++) If true (default false), will calculate interim - results for the most recent bucket or all buckets within the latency period - -`start`:: - (+string+; default: ++null++) When used in conjunction with `calc_interim`, - specifies the range of buckets on which to calculate interim results +`advance_time`:: + (+string+) Specifies that no data prior to the date `advance_time` is expected. `end`:: - (+string+; default: ++null++) When used in conjunction with `calc_interim`, - specifies the range of buckets on which to calculate interim results + (+string+) When used in conjunction with `calc_interim`, specifies the range + of buckets on which to calculate interim results. +`calc_interim`:: + (+boolean+) If true, calculates the interim results for the most recent bucket + or all buckets within the latency period. -`advance_time`:: - (+string+; default: ++null++) Specifies that no data prior to the date `advance_time` is expected +`start`:: + (+string+) When used in conjunction with `calc_interim`, specifies the range of + buckets on which to calculate interim results. //// ===== Responses @@ -49,7 +49,7 @@ A close operation additionally prunes and persists the model state to disk and t //// ===== Examples -The following example flushes the `event_rate` job: +The following example flushes the `farequote` job: [source,js] -------------------------------------------------- diff --git a/docs/en/rest-api/ml/get-bucket.asciidoc b/docs/en/rest-api/ml/get-bucket.asciidoc index 34915dae582..76f48522f84 100644 --- a/docs/en/rest-api/ml/get-bucket.asciidoc +++ b/docs/en/rest-api/ml/get-bucket.asciidoc @@ -1,7 +1,8 @@ +//lcawley Verified example output 2017-04-11 [[ml-get-bucket]] ==== Get Buckets -The get bucket API allows you to retrieve information about buckets in the +The get bucket API enables you to retrieve information about buckets in the results from a job. ===== Request @@ -93,9 +94,9 @@ score and time constraints: { "job_id": "it-ops-kpi", "timestamp": 1454943900000, - "anomaly_score": 87.2526, + "anomaly_score": 94.1706, "bucket_span": 300, - "initial_anomaly_score": 83.3831, + "initial_anomaly_score": 94.1706, "record_count": 1, "event_count": 153, "is_interim": false, @@ -104,17 +105,17 @@ score and time constraints: "job_id": "it-ops-kpi", "result_type": "bucket_influencer", "influencer_field_name": "bucket_time", - "initial_anomaly_score": 83.3831, - "anomaly_score": 87.2526, - "raw_anomaly_score": 2.02204, - "probability": 0.0000109783, + "initial_anomaly_score": 94.1706, + "anomaly_score": 94.1706, + "raw_anomaly_score": 2.32119, + "probability": 0.00000575042, "timestamp": 1454943900000, "bucket_span": 300, "sequence_num": 2, "is_interim": false } ], - "processing_time_ms": 3, + "processing_time_ms": 2, "partition_scores": [], "result_type": "bucket" } diff --git a/docs/en/rest-api/ml/get-category.asciidoc b/docs/en/rest-api/ml/get-category.asciidoc index 393edb34e94..34b4f552016 100644 --- a/docs/en/rest-api/ml/get-category.asciidoc +++ b/docs/en/rest-api/ml/get-category.asciidoc @@ -1,7 +1,9 @@ +//lcawley Verified example output 2017-04-11 [[ml-get-category]] ==== Get Categories -The get categories API allows you to retrieve information about the categories in the results for a job. +The get categories API enables you to retrieve information +about the categories in the results for a job. ===== Request diff --git a/docs/en/rest-api/ml/get-datafeed-stats.asciidoc b/docs/en/rest-api/ml/get-datafeed-stats.asciidoc index 6203bdd193f..e037cf4d043 100644 --- a/docs/en/rest-api/ml/get-datafeed-stats.asciidoc +++ b/docs/en/rest-api/ml/get-datafeed-stats.asciidoc @@ -1,7 +1,8 @@ +//lcawley Verified example output 2017-04-11 [[ml-get-datafeed-stats]] ==== Get Data Feed Statistics -The get data feed statistics API allows you to retrieve usage information for +The get data feed statistics API enables you to retrieve usage information for data feeds. ===== Request @@ -10,7 +11,6 @@ data feeds. `GET _xpack/ml/datafeeds//_stats` - ===== Description If the data feed is stopped, the only information you receive is the @@ -25,24 +25,12 @@ If the data feed is stopped, the only information you receive is the ===== Results -The API returns the following usage information: +The API returns the following information: -`assigment_explanation`:: - TBD - For example: "" +`datafeeds`:: + (+array+) An array of data feed count objects. + For more information, see <>. -`datafeed_id`:: - (+string+) A numerical character string that uniquely identifies the data feed. - -`node`:: - (+object+) TBD - -`state`:: - (+string+) The status of the data feed, which can be one of the following values: - * `started`: The data feed is actively receiving data. - * `stopped`: The data feed is stopped and will not receive data until - it is re-started. -//failed? //// ===== Responses @@ -55,7 +43,17 @@ The API returns the following usage information: //// ===== Examples -.Example results for a started job +The following example gets usage information for the +`datafeed-farequote` data feed: + +[source,js] +-------------------------------------------------- +GET _xpack/ml/datafeeds/datafeed-farequote/_stats +-------------------------------------------------- +// CONSOLE +// TEST[skip:todo] + +The API returns the following results: ---- { "count": 1, @@ -64,15 +62,15 @@ The API returns the following usage information: "datafeed_id": "datafeed-farequote", "state": "started", "node": { - "id": "0-o0tOoRTwKFZifatTWKNw", - "name": "0-o0tOo", - "ephemeral_id": "DOZltLxLS_SzYpW6hQ9hyg", + "id": "IO_gxe2_S8mrzu7OpmK5Jw", + "name": "IO_gxe2", + "ephemeral_id": "KHMWPZoMToOzSsZY9lDDgQ", "transport_address": "127.0.0.1:9300", "attributes": { "max_running_jobs": "10" } }, - "assigment_explanation": "" + "assignment_explanation": "" } ] } diff --git a/docs/en/rest-api/ml/get-datafeed.asciidoc b/docs/en/rest-api/ml/get-datafeed.asciidoc index bafc9877b21..eea0b5b3203 100644 --- a/docs/en/rest-api/ml/get-datafeed.asciidoc +++ b/docs/en/rest-api/ml/get-datafeed.asciidoc @@ -1,7 +1,8 @@ +//lcawley Verified example output 2017-04-11 [[ml-get-datafeed]] ==== Get Data Feeds -The get data feeds API allows you to retrieve configuration information for +The get data feeds API enables you to retrieve configuration information for data feeds. ===== Request @@ -23,14 +24,13 @@ OUTDATED?: The get job API can also be applied to all jobs by using `_all` as th ===== Results -The API returns information about the data feed resource. -For more information, see <>. +The API returns the following information: + +`datafeeds`:: + (+array+) An array of data feed objects. + For more information, see <>. //// -===== Query Parameters - -None - ===== Responses 200 @@ -42,22 +42,32 @@ None //// ===== Examples -.Example results for a single data feed +The following example gets configuration information for the +`datafeed-it-ops-kpi` data feed: + +[source,js] +-------------------------------------------------- +GET _xpack/ml/datafeeds/datafeed-it-ops-kpi +-------------------------------------------------- +// CONSOLE +// TEST[skip:todo] + +The API returns the following results: ---- { "count": 1, "datafeeds": [ { - "datafeed_id": "datafeed-it-ops", - "job_id": "it-ops", + "datafeed_id": "datafeed-it-ops-kpi", + "job_id": "it-ops-kpi", "query_delay": "60s", "frequency": "150s", "indexes": [ "it_ops_metrics" ], "types": [ - "network", "kpi", + "network", "sql" ], "query": { @@ -66,8 +76,8 @@ None } }, "aggregations": { - "@timestamp": { - "histogram": { + "buckets": { + "date_histogram": { "field": "@timestamp", "interval": 30000, "offset": 0, @@ -82,11 +92,20 @@ None "sum": { "field": "events_per_min" } + }, + "@timestamp": { + "max": { + "field": "@timestamp" + } } } } }, - "scroll_size": 1000 + "scroll_size": 1000, + "chunking_config": { + "mode": "manual", + "time_span": "30000000ms" + } } ] } diff --git a/docs/en/rest-api/ml/get-influencer.asciidoc b/docs/en/rest-api/ml/get-influencer.asciidoc index 14deea536f8..ca12d67644d 100644 --- a/docs/en/rest-api/ml/get-influencer.asciidoc +++ b/docs/en/rest-api/ml/get-influencer.asciidoc @@ -1,7 +1,7 @@ [[ml-get-influencer]] ==== Get Influencers -The get influencers API allows you to retrieve information about the influencers +The get influencers API enables you to retrieve information about the influencers in a job. ===== Request @@ -11,7 +11,6 @@ in a job. //// ===== Description - //// ===== Path Parameters @@ -83,7 +82,7 @@ In this example, the API returns the following information, sorted based on the influencer score in descending order: ---- { - "count": 22, + "count": 28, "influencers": [ { "job_id": "it_ops_new_kpi", diff --git a/docs/en/rest-api/ml/get-job-stats.asciidoc b/docs/en/rest-api/ml/get-job-stats.asciidoc index f40a763c47e..9d5def3d996 100644 --- a/docs/en/rest-api/ml/get-job-stats.asciidoc +++ b/docs/en/rest-api/ml/get-job-stats.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-get-job-stats]] ==== Get Job Statistics @@ -12,7 +13,6 @@ The get jobs API allows you to retrieve usage information for jobs. //// ===== Description -TBD //// ===== Path Parameters @@ -23,34 +23,11 @@ TBD ===== Results -The API returns the following usage information: +The API returns the following information: -`job_id`:: - (+string+) A numerical character string that uniquely identifies the job. - -`data_counts`:: - (+object+) An object that describes the number of records processed and any related error counts. - See <>. - -`model_size_stats`:: - (+object+) An object that provides information about the size and contents of the model. - See <> - -`state`:: - (+string+) The status of the job, which can be one of the following values: - `open`::: The job is actively receiving and processing data. - `closed`::: The job finished successfully with its model state persisted. - The job is still available to accept further data. - `closing`::: TBD - `failed`::: The job did not finish successfully due to an error. - This situation can occur due to invalid input data. In this case, - sending corrected data to a failed job re-opens the job and - resets it to an open state. - -NOTE: If you send data in a periodic cycle and close the job at the end of -each transaction, the job is marked as closed in the intervals between -when data is sent. For example, if data is sent every minute and it takes -1 second to process, the job has a closed state for 59 seconds. +`jobs`:: + (+array+) An array of job count objects. + For more information, see <>. //// ===== Responses @@ -64,41 +41,52 @@ when data is sent. For example, if data is sent every minute and it takes //// ===== Examples -.Example results for a single job +The following example gets usage information for the `farequote` job: + +[source,js] +-------------------------------------------------- +GET _xpack/ml/anomaly_detectors/farequote/_stats +-------------------------------------------------- +// CONSOLE +// TEST[skip:todo] + +In this example, the API returns a single result that matches the specified +score and time constraints: ---- { "count": 1, "jobs": [ { - "job_id": "it-ops-kpi", + "job_id": "farequote", "data_counts": { - "job_id": "it-ops", - "processed_record_count": 43272, - "processed_field_count": 86544, - "input_bytes": 2846163, - "input_field_count": 86544, + "job_id": "farequote", + "processed_record_count": 86275, + "processed_field_count": 172550, + "input_bytes": 6744714, + "input_field_count": 172550, "invalid_date_count": 0, "missing_field_count": 0, "out_of_order_timestamp_count": 0, "empty_bucket_count": 0, - "sparse_bucket_count": 0, - "bucket_count": 4329, - "earliest_record_timestamp": 1454020560000, - "latest_record_timestamp": 1455318900000, - "last_data_time": 1491235405945, - "input_record_count": 43272 + "sparse_bucket_count": 15, + "bucket_count": 1528, + "earliest_record_timestamp": 1454803200000, + "latest_record_timestamp": 1455235196000, + "last_data_time": 1491948163685, + "latest_sparse_bucket_timestamp": 1455174900000, + "input_record_count": 86275 }, "model_size_stats": { - "job_id": "it-ops", + "job_id": "farequote", "result_type": "model_size_stats", - "model_bytes": 25586, - "total_by_field_count": 3, + "model_bytes": 387594, + "total_by_field_count": 21, "total_over_field_count": 0, - "total_partition_field_count": 2, + "total_partition_field_count": 20, "bucket_allocation_failures_count": 0, "memory_status": "ok", - "log_time": 1491235406000, - "timestamp": 1455318600000 + "log_time": 1491948163000, + "timestamp": 1455234600000 }, "state": "closed" } diff --git a/docs/en/rest-api/ml/get-job.asciidoc b/docs/en/rest-api/ml/get-job.asciidoc index 8200dc9894e..b8e7c9a54c0 100644 --- a/docs/en/rest-api/ml/get-job.asciidoc +++ b/docs/en/rest-api/ml/get-job.asciidoc @@ -1,17 +1,18 @@ +//lcawley Verified example output 2017-04-11 [[ml-get-job]] ==== Get Jobs -The get jobs API allows you to retrieve configuration information about jobs. +The get jobs API enables you to retrieve configuration information for jobs. ===== Request `GET _xpack/ml/anomaly_detectors/` + `GET _xpack/ml/anomaly_detectors/` + //// ===== Description -OUTDATED?: The get job API can also be applied to all jobs by using `_all` as the job name. //// ===== Path Parameters @@ -21,14 +22,13 @@ OUTDATED?: The get job API can also be applied to all jobs by using `_all` as th ===== Results -The API returns information about the job resource. For more information, see -<>. +he API returns the following information: + +`jobs`:: + (+array+) An array of job resources. + For more information, see <>. //// -===== Query Parameters - -None - ===== Responses 200 @@ -40,40 +40,48 @@ None //// ===== Examples -.Example results for a single job +The following example gets configuration information for the `farequote` job: + +[source,js] +-------------------------------------------------- +GET _xpack/ml/anomaly_detectors/farequote +-------------------------------------------------- +// CONSOLE +// TEST[skip:todo] + +In this example, the API returns a single result that matches the specified +score and time constraints: ---- { "count": 1, "jobs": [ - { - "job_id": "it-ops-kpi", - "description": "First simple job", - "create_time": 1491007356077, - "finished_time": 1491007365347, + { + "job_id": "farequote", + "job_type": "anomaly_detector", + "description": "Multi-metric job", + "create_time": 1491948149563, + "finished_time": 1491948166289, "analysis_config": { "bucket_span": "5m", - "latency": "0ms", - "summary_count_field_name": "doc_count", "detectors": [ { - "detector_description": "low_sum(events_per_min)", - "function": "low_sum", - "field_name": "events_per_min", + "detector_description": "mean(responsetime)", + "function": "mean", + "field_name": "responsetime", + "partition_field_name": "airline", "detector_rules": [] } ], - "influencers": [], - "use_per_partition_normalization": false + "influencers": [ + "airline" + ] }, "data_description": { "time_field": "@timestamp", "time_format": "epoch_ms" }, - "model_plot_config": { - "enabled": true - }, "model_snapshot_retention_days": 1, - "model_snapshot_id": "1491007364", + "model_snapshot_id": "1491948163", "results_index_name": "shared" } ] diff --git a/docs/en/rest-api/ml/get-record.asciidoc b/docs/en/rest-api/ml/get-record.asciidoc index 32675a25608..6cf5e557958 100644 --- a/docs/en/rest-api/ml/get-record.asciidoc +++ b/docs/en/rest-api/ml/get-record.asciidoc @@ -1,7 +1,8 @@ [[ml-get-record]] +//lcawley Verified example output 2017-04-11 ==== Get Records -The get records API allows you to retrieve anomaly records for a job. +The get records API enables you to retrieve anomaly records for a job. ===== Request @@ -10,7 +11,6 @@ The get records API allows you to retrieve anomaly records for a job. //// ===== Description - //// ===== Path Parameters @@ -21,7 +21,6 @@ The get records API allows you to retrieve anomaly records for a job. `desc`:: (+boolean+) If true, the results are sorted in descending order. -//TBD: Using the "sort" value? `end`:: (+string+) Returns records with timestamps earlier than this time. @@ -77,54 +76,42 @@ The following example gets bucket information for the `it-ops-kpi` job: [source,js] -------------------------------------------------- -GET _xpack/ml/anomaly_detectors/it-ops-kpi/results/buckets +GET _xpack/ml/anomaly_detectors/it-ops-kpi/results/records { "sort": "record_score", "desc": true, - "start": "1454944200000" + "start": "1454944100000" } -------------------------------------------------- // CONSOLE // TEST[skip:todo] -In this example, the API returns a single result that matches the specified -score and time constraints: +In this example, the API returns twelve results for the specified +time constraints: ---- { - "count": 6, + "count": 12, "records": [ { - "job_id": "it_ops_new_kpi", + "job_id": "it-ops-kpi", "result_type": "record", - "probability": 0.000113075, - "record_score": 86.9677, - "initial_record_score": 82.8891, - "bucket_span": 600, + "probability": 0.00000332668, + "record_score": 72.9929, + "initial_record_score": 65.7923, + "bucket_span": 300, "detector_index": 0, "sequence_num": 1, "is_interim": false, "timestamp": 1454944200000, - "partition_field_name": "kpi_indicator", - "partition_field_value": "online_purchases", - "function": "low_non_zero_count", - "function_description": "count", + "function": "low_sum", + "function_description": "sum", "typical": [ - 3582.53 + 1806.48 ], "actual": [ - 575 + 288 ], - "influencers": [ - { - "influencer_field_name": "kpi_indicator", - "influencer_field_values": [ - "online_purchases" - ] - } - ], - "kpi_indicator": [ - "online_purchases" - ] + "field_name": "events_per_min" }, ... ] diff --git a/docs/en/rest-api/ml/get-snapshot.asciidoc b/docs/en/rest-api/ml/get-snapshot.asciidoc index 77da8b78b4d..4618e317e68 100644 --- a/docs/en/rest-api/ml/get-snapshot.asciidoc +++ b/docs/en/rest-api/ml/get-snapshot.asciidoc @@ -1,7 +1,8 @@ +//lcawley Verified example output 2017-04-11 [[ml-get-snapshot]] ==== Get Model Snapshots -The get model snapshots API allows you to retrieve information about model snapshots. +The get model snapshots API enables you to retrieve information about model snapshots. ===== Request @@ -11,7 +12,6 @@ The get model snapshots API allows you to retrieve information about model snaps //// ===== Description -OUTDATED?: The get job API can also be applied to all jobs by using `_all` as the job name. //// ===== Path Parameters @@ -74,7 +74,7 @@ The following example gets model snapshot information for the [source,js] -------------------------------------------------- -GET _xpack/ml/anomaly_detectors/it_ops_new_logs/model_snapshots +GET _xpack/ml/anomaly_detectors/farequote/model_snapshots { "start": "1491852977000" } @@ -88,25 +88,25 @@ In this example, the API provides a single result: "count": 1, "model_snapshots": [ { - "job_id": "it_ops_new_logs", - "timestamp": 1491852978000, - "description": "State persisted due to job close at 2017-04-10T12:36:18-0700", - "snapshot_id": "1491852978", + "job_id": "farequote", + "timestamp": 1491948163000, + "description": "State persisted due to job close at 2017-04-11T15:02:43-0700", + "snapshot_id": "1491948163", "snapshot_doc_count": 1, "model_size_stats": { - "job_id": "it_ops_new_logs", + "job_id": "farequote", "result_type": "model_size_stats", - "model_bytes": 100393, - "total_by_field_count": 13, + "model_bytes": 387594, + "total_by_field_count": 21, "total_over_field_count": 0, - "total_partition_field_count": 2, + "total_partition_field_count": 20, "bucket_allocation_failures_count": 0, "memory_status": "ok", - "log_time": 1491852978000, - "timestamp": 1455229800000 + "log_time": 1491948163000, + "timestamp": 1455234600000 }, - "latest_record_time_stamp": 1455232663000, - "latest_result_time_stamp": 1455229800000, + "latest_record_time_stamp": 1455235196000, + "latest_result_time_stamp": 1455234900000, "retain": false } ] diff --git a/docs/en/rest-api/ml/jobcounts.asciidoc b/docs/en/rest-api/ml/jobcounts.asciidoc index 5e3e955bd6d..4758c1cc9f6 100644 --- a/docs/en/rest-api/ml/jobcounts.asciidoc +++ b/docs/en/rest-api/ml/jobcounts.asciidoc @@ -1,33 +1,56 @@ +//lcawley Verified example output 2017-04-11 [[ml-jobcounts]] ==== Job Counts -The `data_counts` object provides information about the operational progress of a job. -It describes the number of records processed and any related error counts. +The get job statistics API provides information about the operational +progress of a job. NOTE: Job count values are cumulative for the lifetime of a job. If a model snapshot is reverted or old results are deleted, the job counts are not reset. -[[ml-datacounts]] -===== Data Counts Objects - -A `data_counts` object has the following properties: +`data_counts`:: + (+object+) An object that describes the number of records processed and any related error counts. + See <>. `job_id`:: (+string+) A numerical character string that uniquely identifies the job. -`processed_record_count`:: - (+long+) The number of records that have been processed by the job. - This value includes records with missing fields, since they are nonetheless analyzed. - + - The following records are not processed: - * Records not in chronological order and outside the latency window - * Records with invalid timestamp - * Records filtered by an exclude transform +`model_size_stats`:: + (+object+) An object that provides information about the size and contents of the model. + See <> -`processed_field_count`:: - (+long+) The total number of fields in all the records that have been processed by the job. - Only fields that are specified in the detector configuration object contribute to this count. - The time stamp is not included in this count. +`state`:: + (+string+) The status of the job, which can be one of the following values: + `open`::: The job is actively receiving and processing data. + `closed`::: The job finished successfully with its model state persisted. + The job is still available to accept further data. + `closing`::: TBD + `failed`::: The job did not finish successfully due to an error. + This situation can occur due to invalid input data. In this case, + sending corrected data to a failed job re-opens the job and + resets it to an open state. + +NOTE: If you send data in a periodic cycle and close the job at the end of +each transaction, the job is marked as closed in the intervals between +when data is sent. For example, if data is sent every minute and it takes +1 second to process, the job has a closed state for 59 seconds. + +[float] +[[ml-datacounts]] +===== Data Counts Objects + +The `data_counts` object describes the number of records processed +and any related error counts. It has the following properties: + +`bucket_count`:: + (+long+) The number of bucket results produced by the job. + +`earliest_record_timestamp`:: + (+string+) The timestamp of the earliest chronologically ordered record. + The datetime string is in ISO 8601 format. + +`empty_bucket_count`:: + TBD `input_bytes`:: (+long+) The number of raw bytes read by the job. @@ -36,9 +59,27 @@ A `data_counts` object has the following properties: (+long+) The total number of record fields read by the job. This count includes fields that are not used in the analysis. +`input_record_count`:: + (+long+) The number of data records read by the job. + `invalid_date_count`:: (+long+) The number of records with either a missing date field or a date that could not be parsed. +`job_id`:: + (+string+) A numerical character string that uniquely identifies the job. + +`last_data_time`:: + (++) TBD + +`latest_record_timestamp`:: + (+string+) The timestamp of the last chronologically ordered record. + If the records are not in strict chronological order, this value might not be + the same as the timestamp of the last record. + The datetime string is in ISO 8601 format. + +`latest_sparse_bucket_timestamp`:: + (++) TBD + `missing_field_count`:: (+long+) The number of records that are missing a field that the job is configured to analyze. Records with missing fields are still processed because it is possible that not all fields are missing. @@ -48,32 +89,24 @@ A `data_counts` object has the following properties: (+long+) The number of records that are out of time sequence and outside of the latency window. These records are discarded, since jobs require time series data to be in ascending chronological order. -`empty_bucket_count`:: - TBD +`processed_field_count`:: + (+long+) The total number of fields in all the records that have been processed by the job. + Only fields that are specified in the detector configuration object contribute to this count. + The time stamp is not included in this count. + +`processed_record_count`:: + (+long+) The number of records that have been processed by the job. + This value includes records with missing fields, since they are nonetheless analyzed. + + + The following records are not processed: + * Records not in chronological order and outside the latency window + * Records with invalid timestamp + * Records filtered by an exclude transform `sparse_bucket_count`:: TBD -`bucket_count`:: - (+long+) The number of bucket results produced by the job. - -`earliest_record_timestamp`:: - (+string+) The timestamp of the earliest chronologically ordered record. - The datetime string is in ISO 8601 format. - -`latest_record_timestamp`:: - (+string+) The timestamp of the last chronologically ordered record. - If the records are not in strict chronological order, this value might not be - the same as the timestamp of the last record. - The datetime string is in ISO 8601 format. - -`last_data_time`:: - TBD - -`input_record_count`:: - (+long+) The number of data records read by the job. - - +[float] [[ml-modelsizestats]] ===== Model Size Stats Objects diff --git a/docs/en/rest-api/ml/jobresource.asciidoc b/docs/en/rest-api/ml/jobresource.asciidoc index 774132480d1..3267ec8dd22 100644 --- a/docs/en/rest-api/ml/jobresource.asciidoc +++ b/docs/en/rest-api/ml/jobresource.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-job-resource]] ==== Job Resources @@ -26,16 +27,19 @@ A job resource has the following properties: `job_id`:: (+string+) A numerical character string that uniquely identifies the job. +`job_type`:: + (+string+) TBD. For example: "anomaly_detector". + `model_plot_config`:: TBD `enabled`:: TBD. For example, `true`. `model_snapshot_id`:: - TBD. For example, `1491007364`. - + (+string+) A numerical character string that uniquely identifies the model + snapshot. For example, `1491007364`. `model_snapshot_retention_days`:: - (+long+) The time in days that model snapshots are retained for the job. Older snapshots are deleted. - The default value is 1 day. + (+long+) The time in days that model snapshots are retained for the job. + Older snapshots are deleted. The default value is 1 day. `results_index_name`:: TBD. For example, `shared`. @@ -46,8 +50,8 @@ A job resource has the following properties: An analysis configuration object has the following properties: `bucket_span` (required):: - (+unsigned integer+) The size of the interval that the analysis is aggregated into, measured in seconds. - The default value is 300 seconds (5 minutes). + (+unsigned integer+) The size of the interval that the analysis is aggregated into, measured in seconds. The default value is 5 minutes. +//TBD: Is this now measured in minutes? `categorization_field_name`:: (+string+) If not null, the values of the specified field will be categorized. @@ -78,8 +82,7 @@ and an error is returned. the use of influencers is recommended as it aggregates results for each influencer entity. `latency`:: - (+unsigned integer+) The size of the window, in seconds, in which to expect data that is out of time order. - The default value is 0 seconds (no latency). + (+unsigned integer+) The size of the window, in seconds, in which to expect data that is out of time order. The default value is 0 milliseconds (no latency). NOTE: Latency is only applicable when you send data by using the <> API. @@ -127,7 +130,7 @@ Each detector has the following properties: (+string+) A description of the detector. For example, `low_sum(events_per_min)`. `detector_rules`:: - TBD + (+array+) TBD `exclude_frequent`:: (+string+) Contains one of the following values: `all`, `none`, `by`, or `over`. diff --git a/docs/en/rest-api/ml/open-job.asciidoc b/docs/en/rest-api/ml/open-job.asciidoc index edab37abd13..93360dd053a 100644 --- a/docs/en/rest-api/ml/open-job.asciidoc +++ b/docs/en/rest-api/ml/open-job.asciidoc @@ -1,8 +1,9 @@ +//lcawley Verified example output 2017-04-11 [[ml-open-job]] ==== Open Jobs -An anomaly detection job must be opened in order for it to be ready to receive and analyze data. -A job may be opened and closed multiple times throughout its lifecycle. +A job must be opened in order for it to be ready to receive and analyze data. +A job can be opened and closed multiple times throughout its lifecycle. ===== Request diff --git a/docs/en/rest-api/ml/post-data.asciidoc b/docs/en/rest-api/ml/post-data.asciidoc index e6acdc394c1..eb40f87be1d 100644 --- a/docs/en/rest-api/ml/post-data.asciidoc +++ b/docs/en/rest-api/ml/post-data.asciidoc @@ -1,3 +1,4 @@ +//lcawley: Verified example output 2017-04-11 [[ml-post-data]] ==== Post Data to Jobs @@ -17,10 +18,9 @@ many small uploads, rather than queueing data to upload larger files. IMPORTANT: Data can only be accepted from a single connection. - Do not attempt to access the data endpoint from different threads at the same time. - Use a single connection synchronously to send data, close, flush or delete a single job. - + - It is not currently possible to post data to multiple jobs using wildcards or a comma separated list. +Use a single connection synchronously to send data, close, flush, or delete a single job. +It is not currently possible to post data to multiple jobs using wildcards +or a comma separated list. ===== Path Parameters @@ -30,10 +30,10 @@ IMPORTANT: Data can only be accepted from a single connection. ===== Request Body `reset_start`:: - (+string+; default: ++null++) Specifies the start of the bucket resetting range + (+string+) Specifies the start of the bucket resetting range `reset_end`:: - (+string+; default: ++null++) Specifies the end of the bucket resetting range" + (+string+) Specifies the end of the bucket resetting range" //// ===== Responses @@ -55,30 +55,34 @@ The following example posts data from the farequote.json file to the `farequote` [source,js] -------------------------------------------------- $ curl -s -H "Content-type: application/json" --X POST http:\/\/localhost:9200/_xpack/ml/anomaly_detectors/farequote --data-binary @farequote.json +-X POST http:\/\/localhost:9200/_xpack/ml/anomaly_detectors/it_ops_new_kpi/_data +--data-binary @it_ops_new_kpi.json -------------------------------------------------- // CONSOLE // TEST[skip:todo] +//TBD: Create example of how to post a small data example in Kibana? + When the data is sent, you receive information about the operational progress of the job. For example: ---- { - "job_id":"farequote", - "processed_record_count":86275, - "processed_field_count":172550, - "input_bytes":8678202, - "input_field_count":258825, + "job_id":"it_ops_new_kpi", + "processed_record_count":21435, + "processed_field_count":64305, + "input_bytes":2589063, + "input_field_count":85740, "invalid_date_count":0, "missing_field_count":0, "out_of_order_timestamp_count":0, - "empty_bucket_count":0, + "empty_bucket_count":16, "sparse_bucket_count":0, - "bucket_count":1440, - "earliest_record_timestamp":1454803200000, - "latest_record_timestamp":1455235196000, - "last_data_time":1491436182038, - "input_record_count":86275 + "bucket_count":2165, + "earliest_record_timestamp":1454020569000, + "latest_record_timestamp":1455318669000, + "last_data_time":1491952300658, + "latest_empty_bucket_timestamp":1454541600000, + "input_record_count":21435 } ---- diff --git a/docs/en/rest-api/ml/preview-datafeed.asciidoc b/docs/en/rest-api/ml/preview-datafeed.asciidoc index d7e7e98a163..c311291e145 100644 --- a/docs/en/rest-api/ml/preview-datafeed.asciidoc +++ b/docs/en/rest-api/ml/preview-datafeed.asciidoc @@ -1,7 +1,8 @@ +//lcawley: Verified example output 2017-04-11 [[ml-preview-datafeed]] ==== Preview Data Feeds -The preview data feed API allows you to preview a data feed. +The preview data feed API enables you to preview a data feed. ===== Request @@ -10,8 +11,7 @@ The preview data feed API allows you to preview a data feed. ===== Description -TBD -//How much data does it return? +//TBD: How much data does it return? The API returns example data by using the current data feed settings. ===== Path Parameters @@ -52,14 +52,17 @@ The data that is returned for this example is as follows: [ { "@timestamp": 1454803200000, + "airline": "AAL", "responsetime": 132.20460510253906 }, { "@timestamp": 1454803200000, + "airline": "JZA", "responsetime": 990.4628295898438 }, { "@timestamp": 1454803200000, + "airline": "JBU", "responsetime": 877.5927124023438 }, ... diff --git a/docs/en/rest-api/ml/put-datafeed.asciidoc b/docs/en/rest-api/ml/put-datafeed.asciidoc index bea283f592d..e08b5468b23 100644 --- a/docs/en/rest-api/ml/put-datafeed.asciidoc +++ b/docs/en/rest-api/ml/put-datafeed.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-put-datafeed]] ==== Create Data Feeds @@ -7,7 +8,6 @@ The create data feed API enables you to instantiate a data feed. `PUT _xpack/ml/datafeeds/` - ===== Description You must create a job before you create a data feed. You can associate only one @@ -20,36 +20,37 @@ data feed to each job. ===== Request Body -aggregations:: - (+object+) TBD. For example: {"@timestamp": {"histogram": {"field": "@timestamp", - "interval": 30000,"offset": 0,"order": {"_key": "asc"},"keyed": false, - "min_doc_count": 0}, "aggregations": {"events_per_min": {"sum": { - "field": "events_per_min"}}}}} +`aggregations`:: + (+object+) TBD. -frequency:: +`chunking_config`:: + (+object+) TBD. + For example: {"mode": "manual", "time_span": "30000000ms"} + +`frequency`:: TBD: For example: "150s" -indexes (required):: +`indexes` (required):: (+array+) An array of index names. For example: ["it_ops_metrics"] -job_id (required):: +`job_id` (required):: (+string+) A numerical character string that uniquely identifies the job. -query:: +`query`:: (+object+) The query that retrieves the data. By default, this property has the following value: `{"match_all": {"boost": 1}}`. -query_delay:: +`query_delay`:: TBD. For example: "60s" -scroll_size:: +`scroll_size`:: TBD. For example, 1000 -types (required):: +`types` (required):: TBD. For example: ["network","sql","kpi"] For more information about these properties, -see <>. +see <>. //// ===== Responses @@ -72,21 +73,13 @@ The following example creates the `datafeed-it-ops-kpi` data feed: PUT _xpack/ml/datafeeds/datafeed-it-ops-kpi { "job_id": "it-ops-kpi", - "query": - { - "match_all": - { - "boost": 1 - } - }, - "indexes": [ - "it_ops_metrics" - ], - "types": [ - "kpi", - "sql", - "network" - ] + "indexes": ["it_ops_metrics"], + "types": ["kpi","network","sql"], + "query": { + "match_all": { + "boost": 1 + } + } } -------------------------------------------------- // CONSOLE @@ -103,14 +96,17 @@ When the data feed is created, you receive the following results: ], "types": [ "kpi", - "sql", - "network" + "network", + "sql" ], "query": { "match_all": { "boost": 1 } }, - "scroll_size": 1000 + "scroll_size": 1000, + "chunking_config": { + "mode": "auto" + } } ---- diff --git a/docs/en/rest-api/ml/put-job.asciidoc b/docs/en/rest-api/ml/put-job.asciidoc index 1aee3fd3736..9639af528e9 100644 --- a/docs/en/rest-api/ml/put-job.asciidoc +++ b/docs/en/rest-api/ml/put-job.asciidoc @@ -1,7 +1,8 @@ +//lcawley Verified example output 2017-04-11 [[ml-put-job]] ==== Create Jobs -The create job API enables you to instantiate a {ml} job. +The create job API enables you to instantiate a job. ===== Request @@ -10,7 +11,6 @@ The create job API enables you to instantiate a {ml} job. //// ===== Description -TBD //// ===== Path Parameters @@ -20,23 +20,28 @@ TBD ===== Request Body -`description`:: - (+string+) An optional description of the job. - `analysis_config`:: (+object+) The analysis configuration, which specifies how to analyze the data. See <>. +`analysis_limits`:: + Optionally specifies runtime limits for the job. See <>. + `data_description`:: (+object+) Describes the format of the input data. See <>. -`analysis_limits`:: - Optionally specifies runtime limits for the job. See <>. +`description`:: + (+string+) An optional description of the job. + +`model_snapshot_retention_days`:: + (+long+) The time in days that model snapshots are retained for the job. + Older snapshots are deleted. The default value is 1 day. + +`results_index_name`:: + (+string+) TBD. For example, `shared`. //// -This expects data to be sent in JSON format using the POST `_data` API. - ===== Responses TBD @@ -83,8 +88,9 @@ When the job is created, you receive the following results: ---- { "job_id": "it-ops-kpi", + "job_type": "anomaly_detector", "description": "First simple job", - "create_time": 1491247016391, + "create_time": 1491948238874, "analysis_config": { "bucket_span": "5m", "latency": "0ms", @@ -96,8 +102,7 @@ When the job is created, you receive the following results: "detector_rules": [] } ], - "influencers": [], - "use_per_partition_normalization": false + "influencers": [] }, "data_description": { "time_field": "@timestamp", diff --git a/docs/en/rest-api/ml/resultsresource.asciidoc b/docs/en/rest-api/ml/resultsresource.asciidoc index 93c0b96a0e7..826a9ba8823 100644 --- a/docs/en/rest-api/ml/resultsresource.asciidoc +++ b/docs/en/rest-api/ml/resultsresource.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-results-resource]] ==== Results Resources @@ -63,10 +64,9 @@ A record object has the following properties: `detector_index`:: (+number+) A unique identifier for the detector. -//`fieldName`:: -// TBD: This field did not appear in my results, but it might be a valid property. -// (+string+) Certain functions require a field to operate on. For those functions, -// this is the name of the field to be analyzed. +`field_name`:: + (+string+) Certain functions require a field to operate on. + For those functions, this is the name of the field to be analyzed. `function`:: (+string+) The function in which the anomaly occurs. @@ -90,8 +90,9 @@ A record object has the following properties: `job_id`:: (+string+) A numerical character string that uniquely identifies the job. -`kpi_indicator`:: - (++) TBD. For example, ["online_purchases"] +//`kpi_indicator`:: +// (++) TBD. For example, ["online_purchases"] +// I did not receive this in later tests. Is it still valid? `partition_field_name`:: (+string+) The name of the partition field that was used in the analysis, if @@ -154,9 +155,6 @@ An influencer object has the following properties: // Same as for buckets? i.e. (+unsigned integer+) The length of the bucket in seconds. // This value is equal to the `bucket_span` value in the job configuration. -`job_id`:: - (+string+) A numerical character string that uniquely identifies the job. - `influencer_score`:: (+number+) An anomaly score for the influencer in this bucket time interval. The score is calculated based upon a sophisticated aggregation of the anomalies @@ -176,6 +174,9 @@ An influencer object has the following properties: (+boolean+) If true, then this is an interim result. In other words, it is calculated based on partial input data. +`job_id`:: + (+string+) A numerical character string that uniquely identifies the job. + `kpi_indicator`:: (++) TBD. For example, "online_purchases". @@ -188,7 +189,54 @@ An influencer object has the following properties: `result_type`:: (++) TBD. For example, "influencer". -//TBD: How is this different from the "bucket_influencer" type? + +`sequence_num`:: + (++) TBD. For example, 2. + +`timestamp`:: + (+date+) Influencers are produced in buckets. This value is the start time + of the bucket, specified in ISO 8601 format. For example, 1454943900000. + +An bucket influencer object has the same following properties: + +`anomaly_score`:: + (+number+) TBD +//It is unclear how this differs from the influencer_score. +//An anomaly score for the influencer in this bucket time interval. +//The score is calculated based upon a sophisticated aggregation of the anomalies +//in the bucket for this entity. For example: 94.1386. + +`bucket_span`:: + (++) TBD. For example, 300. +//// +// Same as for buckets? i.e. (+unsigned integer+) The length of the bucket in seconds. +// This value is equal to the `bucket_span` value in the job configuration. +//// +`initial_anomaly_score`:: + (++) TBD. For example, 83.3831. + +`influencer_field_name`:: + (+string+) The field name of the influencer. + +`is_interim`:: + (+boolean+) If true, then this is an interim result. + In other words, it is calculated based on partial input data. + +`job_id`:: + (+string+) A numerical character string that uniquely identifies the job. + +`probability`:: + (+number+) The probability that the influencer has this behavior. + This value is in the range 0 to 1. For example, 0.0000109783. +// For example, 0.03 means 3%. This value is held to a high precision of over +//300 decimal places. In scientific notation, a value of 3.24E-300 is highly +//unlikely and therefore highly anomalous. + +`raw_anomaly_score`:: + (++) TBD. For example, 2.32119. + +`result_type`:: + (++) TBD. For example, "bucket_influencer". `sequence_num`:: (++) TBD. For example, 2. @@ -227,7 +275,7 @@ A bucket resource has the following properties: `bucket_influencers`:: (+array+) An array of influencer objects. - For more information, see <>. + For more information, see <>. `bucket_span`:: (+unsigned integer+) The length of the bucket in seconds. This value is diff --git a/docs/en/rest-api/ml/revert-snapshot.asciidoc b/docs/en/rest-api/ml/revert-snapshot.asciidoc index fca27ccbf9e..ee31f03f998 100644 --- a/docs/en/rest-api/ml/revert-snapshot.asciidoc +++ b/docs/en/rest-api/ml/revert-snapshot.asciidoc @@ -1,7 +1,8 @@ +//lcawley Verified example output 2017-04-11 [[ml-revert-snapshot]] ==== Revert Model Snapshots -The revert model snapshot API allows you to revert to a specific snapshot. +The revert model snapshot API enables you to revert to a specific snapshot. ===== Request diff --git a/docs/en/rest-api/ml/snapshotresource.asciidoc b/docs/en/rest-api/ml/snapshotresource.asciidoc index 7a03ed16842..8887697c956 100644 --- a/docs/en/rest-api/ml/snapshotresource.asciidoc +++ b/docs/en/rest-api/ml/snapshotresource.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-snapshot-resource]] ==== Model Snapshot Resources @@ -31,7 +32,7 @@ A model snapshot resource has the following properties: (++) TBD. For example: 1455229800000. `model_size_stats`:: - (+object+) TBD. + (+object+) TBD. See <>. `retain`:: (+boolean+) TBD. For example: false. @@ -47,6 +48,8 @@ A model snapshot resource has the following properties: (+date+) The creation timestamp for the snapshot, specified in ISO 8601 format. For example: 1491852978000. +[float] +[[ml-snapshot-stats]] ===== Model Size Statistics The `model_size_stats` object has the following properties: diff --git a/docs/en/rest-api/ml/start-datafeed.asciidoc b/docs/en/rest-api/ml/start-datafeed.asciidoc index 6079eaaec2f..165677a4551 100644 --- a/docs/en/rest-api/ml/start-datafeed.asciidoc +++ b/docs/en/rest-api/ml/start-datafeed.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04 [[ml-start-datafeed]] ==== Start Data Feeds @@ -10,6 +11,9 @@ A data feed can be opened and closed multiple times throughout its lifecycle. ===== Description +NOTE: Before you can start a data feed, the job must be open. Otherwise, an error +occurs. + When you start a data feed, you can specify a start time. This allows you to include a training period, providing you have this data available in {es}. If you want to analyze from the beginning of a dataset, you can specify any date @@ -23,6 +27,19 @@ job analyzes data from the start time until the end time, at which point the analysis stops. This scenario is useful for a one-off batch analysis. If you do not specify an end time, the data feed runs continuously. +The `start` and `end` times can be specified by using one of the +following formats: + + +- ISO 8601 format with milliseconds, for example `2017-01-22T06:00:00.000Z` +- ISO 8601 format without milliseconds, for example `2017-01-22T06:00:00+00:00` +- Seconds from the Epoch, for example `1390370400` + +Date-time arguments using either of the ISO 8601 formats must have a time zone +designator, where Z is accepted as an abbreviation for UTC time. + +NOTE: When a URL is expected (for example, in browsers), the `+` used in time +zone designators must be encoded as `%2B`. + If the system restarts, any jobs that had data feeds running are also restarted. When a stopped data feed is restarted, it continues processing input data from @@ -33,9 +50,6 @@ because the job might not have completely processed all data for that millisecon If you specify a `start` value that is earlier than the timestamp of the latest processed record, that value is ignored. -NOTE: Before you can start a data feed, the job must be open. Otherwise, an error -occurs. - ===== Path Parameters `feed_id` (required):: @@ -51,18 +65,6 @@ occurs. (+string+) The time that the data feed should begin. This value is inclusive. The default value is an empty string. -These `start` and `end` times can be specified by using one of the -following formats: -* ISO 8601 format with milliseconds, for example `2017-01-22T06:00:00.000Z` -* ISO 8601 format without milliseconds, for example `2017-01-22T06:00:00+00:00` -* Seconds from the Epoch, for example `1390370400` - -NOTE: When a URL is expected (for example, in browsers), the `+`` used in time -zone designators has to be encoded as `%2B`. - -Date-time arguments using either of the ISO 8601 formats must have a time zone -designator, where Z is accepted as an abbreviation for UTC time. - `timeout`:: (+time+) Controls the amount of time to wait until a data feed starts. The default value is 20 seconds. diff --git a/docs/en/rest-api/ml/stop-datafeed.asciidoc b/docs/en/rest-api/ml/stop-datafeed.asciidoc index 762357517fc..a771066972e 100644 --- a/docs/en/rest-api/ml/stop-datafeed.asciidoc +++ b/docs/en/rest-api/ml/stop-datafeed.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-stop-datafeed]] ==== Stop Data Feeds @@ -11,7 +12,6 @@ A data feed can be opened and closed multiple times throughout its lifecycle. //// ===== Description -TBD //// ===== Path Parameters diff --git a/docs/en/rest-api/ml/update-datafeed.asciidoc b/docs/en/rest-api/ml/update-datafeed.asciidoc index 9f72dca586d..e2ef2daa4c9 100644 --- a/docs/en/rest-api/ml/update-datafeed.asciidoc +++ b/docs/en/rest-api/ml/update-datafeed.asciidoc @@ -1,7 +1,8 @@ +//lcawley Verified example output 2017-04 [[ml-update-datafeed]] ==== Update Data Feeds -The update data feed API allows you to update certain properties of a data feed. +The update data feed API enables you to update certain properties of a data feed. ===== Request @@ -10,8 +11,6 @@ The update data feed API allows you to update certain properties of a data feed. //// ===== Description -TBD - //// ===== Path Parameters @@ -22,29 +21,33 @@ TBD The following properties can be updated after the data feed is created: -aggregations:: +`aggregations`:: (+object+) TBD. -frequency:: +`chunking_config`:: + (+object+) TBD. + For example: {"mode": "manual", "time_span": "30000000ms"} + +`frequency`:: TBD: For example: "150s" -indexes (required):: +`indexes` (required):: (+array+) An array of index names. For example: ["it_ops_metrics"] -job_id:: +`job_id`:: (+string+) A numerical character string that uniquely identifies the job. -query:: +`query`:: (+object+) The query that retrieves the data. By default, this property has the following value: `{"match_all": {"boost": 1}}`. -query_delay:: +`query_delay`:: TBD. For example: "60s" -scroll_size:: +`scroll_size`:: TBD. For example, 1000 -types (required):: +`types` (required):: TBD. For example: ["network","sql","kpi"] For more information about these properties, @@ -68,30 +71,41 @@ The following example updates the `it-ops-kpi` job: [source,js] -------------------------------------------------- -POST _xpack/ml/datafeeds/datafeed-it-ops-kpi3/_update +POST _xpack/ml/datafeeds/datafeed-it-ops-kpi/_update { + "query_delay": "60s", + "frequency": "150s", "aggregations": { - "@timestamp": { - "histogram": { - "field": "@timestamp", - "interval": 30000, - "offset": 0, - "order": { - "_key": "asc" - }, - "keyed": false, - "min_doc_count": 0 - }, - "aggregations": { - "events_per_min": { - "sum": { - "field": "events_per_min" + "buckets": { + "date_histogram": { + "field": "@timestamp", + "interval": 30000, + "offset": 0, + "order": { + "_key": "asc" + }, + "keyed": false, + "min_doc_count": 0 + }, + "aggregations": { + "events_per_min": { + "sum": { + "field": "events_per_min" + } + }, + "@timestamp": { + "max": { + "field": "@timestamp" + } + } } } - } - } - }, - "frequency": "160s" + }, + "scroll_size": 1000, + "chunking_config": { + "mode": "manual", + "time_span": "30000000ms" + } } -------------------------------------------------- // CONSOLE @@ -102,12 +116,12 @@ When the data feed is updated, you receive the following results: { "datafeed_id": "datafeed-it-ops-kpi", "job_id": "it-ops-kpi", - "query_delay": "1m", - "frequency": "160s", -... + "query_delay": "60s", + "frequency": "150s", + ... "aggregations": { - "@timestamp": { - "histogram": { + "buckets": { + "date_histogram": { "field": "@timestamp", "interval": 30000, "offset": 0, @@ -122,10 +136,19 @@ When the data feed is updated, you receive the following results: "sum": { "field": "events_per_min" } + }, + "@timestamp": { + "max": { + "field": "@timestamp" + } } } } }, - "scroll_size": 1000 + "scroll_size": 1000, + "chunking_config": { + "mode": "manual", + "time_span": "30000000ms" + } } ---- diff --git a/docs/en/rest-api/ml/update-job.asciidoc b/docs/en/rest-api/ml/update-job.asciidoc index e0a934442e4..c11116b8d5c 100644 --- a/docs/en/rest-api/ml/update-job.asciidoc +++ b/docs/en/rest-api/ml/update-job.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-update-job]] ==== Update Jobs @@ -59,7 +60,7 @@ The following example updates the `it-ops-kpi` job: [source,js] -------------------------------------------------- -PUT _xpack/ml/anomaly_detectors/it-ops-kpi/_update +POST _xpack/ml/anomaly_detectors/it-ops-kpi/_update { "description":"New description", "analysis_limits":{ @@ -74,10 +75,12 @@ When the job is updated, you receive the following results: ---- { "job_id": "it-ops-kpi", + "job_type": "anomaly_detector", "description": "New description", ... "analysis_limits": { "model_memory_limit": 8192 + }, ... } ---- diff --git a/docs/en/rest-api/ml/update-snapshot.asciidoc b/docs/en/rest-api/ml/update-snapshot.asciidoc index aca7e82a72c..0bd15fcb6c0 100644 --- a/docs/en/rest-api/ml/update-snapshot.asciidoc +++ b/docs/en/rest-api/ml/update-snapshot.asciidoc @@ -1,7 +1,8 @@ +//lcawley Verified example output 2017-04-11 [[ml-update-snapshot]] ==== Update Model Snapshots -The update model snapshot API allows you to update certain properties of a snapshot. +The update model snapshot API enables you to update certain properties of a snapshot. ===== Request diff --git a/docs/en/rest-api/ml/validate-detector.asciidoc b/docs/en/rest-api/ml/validate-detector.asciidoc index 8331cee33b0..49e852b16f3 100644 --- a/docs/en/rest-api/ml/validate-detector.asciidoc +++ b/docs/en/rest-api/ml/validate-detector.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-valid-detector]] ==== Validate Detectors @@ -9,14 +10,11 @@ The validate detectors API validates detector configuration information. ===== Description -TBD +This API enables you validate the detector configuration before you create a job. //// ===== Path Parameters -`job_id` (required):: -(+string+) Identifier for the job - //// ===== Request Body @@ -41,9 +39,9 @@ The following example validates detector configuration information: -------------------------------------------------- POST _xpack/ml/anomaly_detectors/_validate/detector { - "function":"metric", - "field_name":"responsetime", - "by_field_name":"airline" + "function": "metric", + "field_name": "responsetime", + "by_field_name": "airline" } -------------------------------------------------- // CONSOLE diff --git a/docs/en/rest-api/ml/validate-job.asciidoc b/docs/en/rest-api/ml/validate-job.asciidoc index 80ba9e63904..8e2039b93ce 100644 --- a/docs/en/rest-api/ml/validate-job.asciidoc +++ b/docs/en/rest-api/ml/validate-job.asciidoc @@ -1,3 +1,4 @@ +//lcawley Verified example output 2017-04-11 [[ml-valid-job]] ==== Validate Jobs @@ -9,29 +10,16 @@ The validate jobs API validates job configuration information. ===== Description -TBD +This API enables you validate the job configuration before you create the job. //// ===== Path Parameters -`job_id` (required):: -(+string+) Identifier for the job //// ===== Request Body -`description`:: - (+string+) An optional description of the job. - -`analysis_config`:: - (+object+) The analysis configuration, which specifies how to analyze the data. - See <>. - -`data_description`:: - (+object+) Describes the format of the input data. - See <>. - -`analysis_limits`:: - Optionally specifies runtime limits for the job. See <>. +For a list of the properties that you can specify in the body of this API, +see <>. //// ===== Responses @@ -56,14 +44,14 @@ POST _xpack/ml/anomaly_detectors/_validate "bucket_span": "300S", "detectors" :[ { - "function":"metric", - "field_name":"responsetime", - "by_field_name":"airline"}], - "influencers" : [ "airline" ] + "function": "metric", + "field_name": "responsetime", + "by_field_name": "airline"}], + "influencers": [ "airline" ] }, "data_description" : { - "time_field":"time", - "time_format":"yyyy-MM-dd'T'HH:mm:ssX" + "time_field": "time", + "time_format": "yyyy-MM-dd'T'HH:mm:ssX" } } --------------------------------------------------