From 7242267f5de3c23c6b804c24c1536af6a2327974 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Fri, 5 Jul 2019 13:34:05 +0200 Subject: [PATCH] [DOCS] Adds data frame analytics APIs to the ML APIs (#43875) This PR adds the reference documentation pages of the data frame analytics APIs (PUT, START, STOP, GET, GET stats, DELETE, Evaluate) to the ML APIs pool. --- docs/build.gradle | 39 ++++++ .../ml/apis/delete-dfanalytics.asciidoc | 52 +++++++ .../ml/apis/evaluate-dfanalytics.asciidoc | 105 ++++++++++++++ .../ml/apis/get-dfanalytics-stats.asciidoc | 91 +++++++++++++ .../ml/apis/get-dfanalytics.asciidoc | 106 +++++++++++++++ docs/reference/ml/apis/ml-api.asciidoc | 17 +++ .../ml/apis/put-dfanalytics.asciidoc | 128 ++++++++++++++++++ .../ml/apis/start-dfanalytics.asciidoc | 61 +++++++++ .../ml/apis/stop-dfanalytics.asciidoc | 81 +++++++++++ 9 files changed, 680 insertions(+) create mode 100644 docs/reference/ml/apis/delete-dfanalytics.asciidoc create mode 100644 docs/reference/ml/apis/evaluate-dfanalytics.asciidoc create mode 100644 docs/reference/ml/apis/get-dfanalytics-stats.asciidoc create mode 100644 docs/reference/ml/apis/get-dfanalytics.asciidoc create mode 100644 docs/reference/ml/apis/put-dfanalytics.asciidoc create mode 100644 docs/reference/ml/apis/start-dfanalytics.asciidoc create mode 100644 docs/reference/ml/apis/stop-dfanalytics.asciidoc diff --git a/docs/build.gradle b/docs/build.gradle index 08cb2de9713..d13f4ca3b2e 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -1146,3 +1146,42 @@ buildRestTests.setups['kibana_sample_data_ecommerce'] = ''' number_of_shards: 1 number_of_replicas: 0 ''' +buildRestTests.setups['setup_logdata'] = ''' + - do: + indices.create: + index: logdata + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + mappings: + properties: + grade: + type: byte + - do: + bulk: + index: logdata + refresh: true + body: | + {"index":{}} + {"grade": 100, "weight": 2} + {"index":{}} + {"grade": 50, "weight": 3} +''' +buildRestTests.setups['logdata_job'] = buildRestTests.setups['setup_logdata'] + ''' + - do: + ml.put_data_frame_analytics: + id: "loganalytics" + body: > + { + "source": { + "index": "logdata" + }, + "dest": { + "index": "logdata_out" + }, + "analysis": { + "outlier_detection": {} + } + } +''' diff --git a/docs/reference/ml/apis/delete-dfanalytics.asciidoc b/docs/reference/ml/apis/delete-dfanalytics.asciidoc new file mode 100644 index 00000000000..9904cf1fa49 --- /dev/null +++ b/docs/reference/ml/apis/delete-dfanalytics.asciidoc @@ -0,0 +1,52 @@ +[role="xpack"] +[testenv="platinum"] +[[delete-dfanalytics]] +=== Delete {dfanalytics-jobs} API +[subs="attributes"] +++++ +Delete {dfanalytics-jobs} +++++ + +experimental[] + +Deletes an existing {dfanalytics-job}. + +[[ml-delete-dfanalytics-request]] +==== {api-request-title} + +`DELETE _ml/data_frame/analytics/` + +[[ml-delete-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `machine_learning_admin` built-in role to use this API. For more +information, see {stack-ov}/security-privileges.html[Security privileges] and +{stack-ov}/built-in-roles.html[Built-in roles]. + +[[ml-delete-dfanalytics-path-params]] +==== {api-path-parms-title} + +`` (Required):: + (string) Identifier for the {dfanalytics-job} you want to delete. + +[[ml-delete-dfanalytics-example]] +==== {api-examples-title} + +The following example deletes the `loganalytics` {dfanalytics-job}: + +[source,js] +-------------------------------------------------- +DELETE _ml/data_frame/analytics/loganalytics +-------------------------------------------------- +// CONSOLE +// TEST[skip:TBD] + +The API returns the following result: + +[source,js] +---- +{ + "acknowledged" : true +} +---- +// TESTRESPONSE \ No newline at end of file diff --git a/docs/reference/ml/apis/evaluate-dfanalytics.asciidoc b/docs/reference/ml/apis/evaluate-dfanalytics.asciidoc new file mode 100644 index 00000000000..9c779f939e2 --- /dev/null +++ b/docs/reference/ml/apis/evaluate-dfanalytics.asciidoc @@ -0,0 +1,105 @@ +[role="xpack"] +[testenv="platinum"] +[[evaluate-dfanalytics]] +=== Evaluate {dfanalytics} API + +[subs="attributes"] +++++ +Evaluate {dfanalytics} +++++ + +experimental[] + +Evaluates the executed analysis on an index that is already annotated with a +field that contains the results of the analytics (the `ground truth`) for each +{dataframe} row. Evaluation is typically done via calculating a set of metrics +that capture various aspects of the quality of the results over the data for +which we have the `ground truth`. For different types of analyses different +metrics are suitable. This API packages together commonly used metrics for +various analyses. + +[[ml-evaluate-dfanalytics-request]] +==== {api-request-title} + +`POST _ml/data_frame/_evaluate` + +[[ml-evaluate-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `monitor_ml` privilege to use this API. For more +information, see {stack-ov}/security-privileges.html[Security privileges] and +{stack-ov}/built-in-roles.html[Built-in roles]. + +[[ml-evaluate-dfanalytics-request-body]] +==== {api-request-body-title} + +`index` (Required):: + (object) Defines the `index` in which the evaluation will be performed. + +`evaluation` (Required):: + (object) Defines the type of evaluation you want to perform. For example: + `binary_soft_classification`. + See Evaluate API resources. + +[[ml-evaluate-dfanalytics-example]] +==== {api-examples-title} + +[source,js] +-------------------------------------------------- +POST _ml/data_frame/_evaluate +{ + "index": "my_analytics_dest_index", + "evaluation": { + "binary_soft_classification": { + "actual_field": "is_outlier", + "predicted_probability_field": "ml.outlier_score" + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[skip:TBD] + +The API returns the following results: + +[source,js] +---- +{ + "binary_soft_classification": { + "auc_roc": { + "score": 0.92584757746414444 + }, + "confusion_matrix": { + "0.25": { + "tp": 5, + "fp": 9, + "tn": 204, + "fn": 5 + }, + "0.5": { + "tp": 1, + "fp": 5, + "tn": 208, + "fn": 9 + }, + "0.75": { + "tp": 0, + "fp": 4, + "tn": 209, + "fn": 10 + } + }, + "precision": { + "0.25": 0.35714285714285715, + "0.5": 0.16666666666666666, + "0.75": 0 + }, + "recall": { + "0.25": 0.5, + "0.5": 0.1, + "0.75": 0 + } + } +} +---- +// TESTRESPONSE \ No newline at end of file diff --git a/docs/reference/ml/apis/get-dfanalytics-stats.asciidoc b/docs/reference/ml/apis/get-dfanalytics-stats.asciidoc new file mode 100644 index 00000000000..01014a11e4d --- /dev/null +++ b/docs/reference/ml/apis/get-dfanalytics-stats.asciidoc @@ -0,0 +1,91 @@ +[role="xpack"] +[testenv="platinum"] +[[get-dfanalytics-stats]] +=== Get {dfanalytics-jobs} statistics API +[subs="attributes"] +++++ +Get {dfanalytics-jobs} stats +++++ + +experimental[] + +Retrieves usage information for {dfanalytics-jobs}. + +[[ml-get-dfanalytics-stats-request]] +==== {api-request-title} + +`GET _ml/data_frame/analytics//_stats` + + +`GET _ml/data_frame/analytics/,/_stats` + + +`GET _ml/data_frame/analytics/_stats` + + +`GET _ml/data_frame/analytics/_all/_stats` + + +`GET _ml/data_frame/analytics/*/_stats` + +[[ml-get-dfanalytics-stats-prereq]] +==== {api-prereq-title} + +* You must have `monitor_ml` privilege to use this API. For more +information, see {stack-ov}/security-privileges.html[Security privileges] and +{stack-ov}/built-in-roles.html[Built-in roles]. + +[[ml-get-dfanalytics-stats-path-params]] +==== {api-path-parms-title} + +`` (Optional):: + (string) Identifier for the {dfanalytics-job}. If you do not specify one of + these options, the API returns information for the first hundred + {dfanalytics-jobs}. + +`allow_no_match` (Optional) + (boolean) If `false` and the `data_frame_analytics_id` does not match any + {dfanalytics-job} an error will be returned. The default value is `true`. + +[[ml-get-dfanalytics-stats-query-params]] +==== {api-query-parms-title} + +`from` (Optional):: + (integer) Skips the specified number of {dfanalytics-jobs}. The default value + is `0`. + +`size` (Optional):: + (integer) Specifies the maximum number of {dfanalytics-jobs} to obtain. The + default value is `100`. + +[discrete] +[[ml-get-dfanalytics-stats-response-body]] +==== {api-response-body-title} + +The API returns the following information: + +`data_frame_analytics`:: + (array) An array of statistics objects for {dfanalytics-jobs}, which are + sorted by the `id` value in ascending order. + +[[ml-get-dfanalytics-stats-example]] +==== {api-examples-title} + +[source,js] +-------------------------------------------------- +GET _ml/data_frame/analytics/loganalytics/_stats +-------------------------------------------------- +// CONSOLE +// TEST[skip:TBD] + +The API returns the following results: + +[source,js] +---- +{ + "count": 1, + "data_frame_analytics": [ + { + "id": "loganalytics", + "state": "stopped" + } + ] +} +---- +// TESTRESPONSE diff --git a/docs/reference/ml/apis/get-dfanalytics.asciidoc b/docs/reference/ml/apis/get-dfanalytics.asciidoc new file mode 100644 index 00000000000..edf14060cad --- /dev/null +++ b/docs/reference/ml/apis/get-dfanalytics.asciidoc @@ -0,0 +1,106 @@ +[role="xpack"] +[testenv="platinum"] +[[get-dfanalytics]] +=== Get {dfanalytics-jobs} API +[subs="attributes"] +++++ +Get {dfanalytics-jobs} +++++ + +experimental[] + +Retrieves configuration information for {dfanalytics-jobs}. + +[[ml-get-dfanalytics-request]] +==== {api-request-title} + +`GET _ml/data_frame/analytics/` + + +`GET _ml/data_frame/analytics/,` + + +`GET _ml/data_frame/analytics/` + + +`GET _ml/data_frame/analytics/_all` + +[[ml-get-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `monitor_ml` privilege to use this API. For more +information, see {stack-ov}/security-privileges.html[Security privileges] and +{stack-ov}/built-in-roles.html[Built-in roles]. + +[[ml-get-dfanalytics-desc]] +==== {api-description-title} + +You can get information for multiple {dfanalytics-jobs} in a single API request +by using a comma-separated list of {dfanalytics-jobs} or a wildcard expression. +You can get information for all {dfanalytics-jobs} by using _all, by specifying +`*` as the ``, or by omitting the +``. + +[[ml-get-dfanalytics-path-params]] +==== {api-path-parms-title} + +`` (Optional):: + (string) Identifier for the {dfanalytics-job}. If you do not specify one of + these options, the API returns information for the first hundred + {dfanalytics-jobs}. + +`allow_no_match` (Optional) + (boolean) If `false` and the `data_frame_analytics_id` does not match any + {dfanalytics-job} an error will be returned. The default value is `true`. + +[[ml-get-dfanalytics-query-params]] +==== {api-query-parms-title} + +`from` (Optional):: + (integer) Skips the specified number of {dfanalytics-jobs}. The default value + is `0`. + +`size` (Optional):: + (integer) Specifies the maximum number of {dfanalytics-jobs} to obtain. The + default value is `100`. + +[[ml-get-dfanalytics-example]] +==== {api-examples-title} + +The following example gets configuration information for the `loganalytics` +{dfanalytics-job}: + +[source,js] +-------------------------------------------------- +GET _ml/data_frame/analytics/loganalytics +-------------------------------------------------- +// CONSOLE +// TEST[skip:TBD] + +The API returns the following results: + +[source,js] +---- +{ + "count": 1, + "data_frame_analytics": [ + { + "id": "loganalytics", + "source": { + "index": "logdata", + "query": { + "match_all": {} + } + }, + "dest": { + "index": "logdata_out", + "results_field": "ml" + }, + "analysis": { + "outlier_detection": {} + }, + "model_memory_limit": "1gb", + "create_time": 1562265491319, + "version" : "8.0.0" + } + ] +} +---- +// TESTRESPONSE diff --git a/docs/reference/ml/apis/ml-api.asciidoc b/docs/reference/ml/apis/ml-api.asciidoc index 7933dea85ce..54a7b4e60c1 100644 --- a/docs/reference/ml/apis/ml-api.asciidoc +++ b/docs/reference/ml/apis/ml-api.asciidoc @@ -34,7 +34,16 @@ machine learning APIs and in advanced job configuration options in Kibana. * <> * <> +[float] +[[ml-api-dfanalytics-endpoint]] +=== {dfanalytics-cap} APIs +* <> or +<> +* <> or +<> +* <> or <> +* <> [float] [[ml-api-job-endpoint]] @@ -104,6 +113,7 @@ include::put-calendar.asciidoc[] include::put-datafeed.asciidoc[] include::put-filter.asciidoc[] include::put-job.asciidoc[] +include::put-dfanalytics.asciidoc[] //DELETE include::delete-calendar.asciidoc[] include::delete-datafeed.asciidoc[] @@ -114,6 +124,9 @@ include::delete-job.asciidoc[] include::delete-calendar-job.asciidoc[] include::delete-snapshot.asciidoc[] include::delete-expired-data.asciidoc[] +include::delete-dfanalytics.asciidoc[] +//EVALUATE +include::evaluate-dfanalytics.asciidoc[] //FIND include::find-file-structure.asciidoc[] //FLUSH @@ -135,6 +148,8 @@ include::get-snapshot.asciidoc[] include::get-calendar-event.asciidoc[] include::get-filter.asciidoc[] include::get-record.asciidoc[] +include::get-dfanalytics.asciidoc[] +include::get-dfanalytics-stats.asciidoc[] //OPEN include::open-job.asciidoc[] //POST @@ -146,7 +161,9 @@ include::revert-snapshot.asciidoc[] //SET/START/STOP include::set-upgrade-mode.asciidoc[] include::start-datafeed.asciidoc[] +include::start-dfanalytics.asciidoc[] include::stop-datafeed.asciidoc[] +include::stop-dfanalytics.asciidoc[] //UPDATE include::update-datafeed.asciidoc[] include::update-filter.asciidoc[] diff --git a/docs/reference/ml/apis/put-dfanalytics.asciidoc b/docs/reference/ml/apis/put-dfanalytics.asciidoc new file mode 100644 index 00000000000..8499950c2fb --- /dev/null +++ b/docs/reference/ml/apis/put-dfanalytics.asciidoc @@ -0,0 +1,128 @@ +[role="xpack"] +[testenv="platinum"] +[[put-dfanalytics]] +=== Create {dfanalytics-jobs} API +[subs="attributes"] +++++ +Create {dfanalytics-jobs} +++++ + +experimental[] + +Instantiates a {dfanalytics-job}. + +[[ml-put-dfanalytics-request]] +==== {api-request-title} + +`PUT _ml/data_frame/analytics/` + +[[ml-put-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `machine_learning_admin` built-in role to use this API. You must +also have `read` and `view_index_metadata` privileges on the source index and +`read`, `create_index`, and `index` privileges on the destination index. For +more information, see {stack-ov}/security-privileges.html[Security privileges] +and {stack-ov}/built-in-roles.html[Built-in roles]. + +[[ml-put-dfanalytics-desc]] +==== {api-description-title} + +This API creates a {dfanalytics-job} that performs an analysis on the source +index and stores the outcome in a destination index. + +The destination index will be automatically created if it does not exist. The +`index.number_of_shards` and `index.number_of_replicas` settings of the source +index will be copied over the destination index. When the source index matches +multiple indices, these settings will be set to the maximum values found in the +source indices. + +The mappings of the source indices are also attempted to be copied over +to the destination index, however, if the mappings of any of the fields don't +match among the source indices, the attempt will fail with an error message. + +If the destination index already exists, then it will be use as is. This makes +it possible to set up the destination index in advance with custom settings +and mappings. + +[[ml-put-dfanalytics-path-params]] +==== {api-path-parms-title} + +`` (Required):: + (string) A numerical character string that uniquely identifies the + {dfanalytics-job}. This identifier can contain lowercase alphanumeric characters + (a-z and 0-9), hyphens, and underscores. It must start and end with alphanumeric + characters. + +[[ml-put-dfanalytics-request-body]] +==== {api-request-body-title} + +`source` (Required):: + (object) The source configuration, consisting of `index` and optionally a + `query`. + +`dest` (Required):: + (object) The destination configuration, consisting of `index` and optionally + `results_field` (`ml` by default). + +`analysis` (Required):: + (object) Defines the type of {dfanalytics} you want to perform on your source + index. For example: `outlier_detection`. + See {oldetection} resources. + +`analyzed_fields` (Optional):: + (object) You can specify both `includes` and/or `excludes` patterns. If + `analyzed_fields` is not set, only the relevant fileds will be included. For + example all the numeric fields for {oldetection}. + +[[ml-put-dfanalytics-example]] +==== {api-examples-title} + +The following example creates the `loganalytics` {dfanalytics-job}, the analysis +type is `outlier_detection`: + +[source,js] +-------------------------------------------------- +PUT _ml/data_frame/analytics/loganalytics +{ + "source": { + "index": "logdata" + }, + "dest": { + "index": "logdata_out" + }, + "analysis": { + "outlier_detection": { + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[setup:setup_logdata] + +The API returns the following result: + +[source,js] +---- +{ + "id": "loganalytics", + "source": { + "index": ["logdata"], + "query": { + "match_all": {} + } + }, + "dest": { + "index": "logdata_out", + "results_field": "ml" + }, + "analysis": { + "outlier_detection": {} + }, + "model_memory_limit": "1gb", + "create_time" : 1562265491319, + "version" : "8.0.0" +} +---- +// TESTRESPONSE[s/1562265491319/$body.$_path/] +// TESTRESPONSE[s/"version": "8.0.0"/"version": $body.version/] \ No newline at end of file diff --git a/docs/reference/ml/apis/start-dfanalytics.asciidoc b/docs/reference/ml/apis/start-dfanalytics.asciidoc new file mode 100644 index 00000000000..4b2c774ae3b --- /dev/null +++ b/docs/reference/ml/apis/start-dfanalytics.asciidoc @@ -0,0 +1,61 @@ +[role="xpack"] +[testenv="platinum"] +[[start-dfanalytics]] +=== Start {dfanalytics-jobs} API + +[subs="attributes"] +++++ +Start {dfanalytics-jobs} +++++ + +experimental[] + +Starts a {dfanalytics-job}. + +[[ml-start-dfanalytics-request]] +==== {api-request-title} + +`POST _ml/data_frame/analytics//_start` + +[[ml-start-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `machine_learning_admin` built-in role to use this API. You must +also have `read` and `view_index_metadata` privileges on the source index and +`read`, `create_index`, and `index` privileges on the destination index. For +more information, see {stack-ov}/security-privileges.html[Security privileges] +and {stack-ov}/built-in-roles.html[Built-in roles]. + +[[ml-start-dfanalytics-path-params]] +==== {api-path-parms-title} + +`` (Required):: + (string) Identifier for the {dfanalytics-job}. This identifier can contain + lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It + must start and end with alphanumeric characters. + +`timeout` (Optional):: + (time) Controls the amount of time to wait until the {dfanalytics-job} starts. + The default value is 20 seconds. + +[[ml-start-dfanalytics-example]] +==== {api-examples-title} + +The following example starts the `loganalytics` {dfanalytics-job}: + +[source,js] +-------------------------------------------------- +POST _ml/data_frame/analytics/loganalytics/_start +-------------------------------------------------- +// CONSOLE +// TEST[skip:setup:logdata_job] + +When the {dfanalytics-job} starts, you receive the following results: + +[source,js] +---- +{ + "acknowledged" : true +} +---- +// TESTRESPONSE \ No newline at end of file diff --git a/docs/reference/ml/apis/stop-dfanalytics.asciidoc b/docs/reference/ml/apis/stop-dfanalytics.asciidoc new file mode 100644 index 00000000000..70b1d8454f8 --- /dev/null +++ b/docs/reference/ml/apis/stop-dfanalytics.asciidoc @@ -0,0 +1,81 @@ +[role="xpack"] +[testenv="platinum"] +[[stop-dfanalytics]] +=== Stop {dfanalytics-jobs} API + +[subs="attributes"] +++++ +Stop {dfanalytics-jobs} +++++ + +experimental[] + +Stops one or more {dfanalytics-jobs}. + +[[ml-stop-dfanalytics-request]] +==== {api-request-title} + +`POST _ml/data_frame/analytics//_stop` + + +`POST _ml/data_frame/analytics/,/_stop` + + +`POST _ml/data_frame/analytics/_all/_stop` + +[[ml-stop-dfanalytics-prereq]] +==== {api-prereq-title} + +* You must have `machine_learning_admin` built-in role to use this API. For more +information, see {stack-ov}/security-privileges.html[Security privileges] and +{stack-ov}/built-in-roles.html[Built-in roles]. + +[[ml-stop-dfanalytics-desc]] +==== {api-description-title} + +A {dfanalytics-job} can be started and stopped multiple times throughout its +lifecycle. + +You can stop multiple {dfanalytics-jobs} in a single API request by using a +comma-separated list of {dfanalytics-jobs} or a wildcard expression. You can +stop all {dfanalytics-job} by using _all or by specifying * as the +. + +[[ml-stop-dfanalytics-path-params]] +==== {api-path-parms-title} + +`` (Required):: + (string) Identifier for the {dfanalytics-job}. This identifier can contain + lowercase alphanumeric characters (a-z and 0-9), hyphens, and underscores. It + must start and end with alphanumeric characters. + +`timeout` (Optional):: + Controls the amount of time to wait until the {dfanalytics-job} stops. + The default value is 20 seconds. + +`force` (Optional):: + (boolean) If true, the {dfanalytics-job} is stopped forcefully. + +`allow_no_match` (Optional) + (boolean) If `false` and the `data_frame_analytics_id` does not match any + {dfanalytics-job} an error will be returned. The default value is `true`. + +[[ml-stop-dfanalytics-example]] +==== {api-examples-title} + +The following example stops the `loganalytics` {dfanalytics-job}: + +[source,js] +-------------------------------------------------- +POST _ml/data_frame/analytics/loganalytics/_stop +-------------------------------------------------- +// CONSOLE +// TEST[skip:TBD] + +When the {dfanalytics-job} stops, you receive the following results: + +[source,js] +---- +{ + "stopped" : true +} +---- +// TESTRESPONSE \ No newline at end of file