[7.x] [ML] Add _cat/ml/data_frame/analytics API (#52260) (#52312)

2020-02-13 16:55:47 +01:00 · 2020-02-13 16:55:47 +01:00 · 0da3af7581
parent ea6f0e39bc
commit 0da3af7581
7 changed files with 390 additions and 21 deletions
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java
@ -254,6 +254,7 @@ import org.elasticsearch.xpack.ml.rest.calendar.RestGetCalendarsAction;
 import org.elasticsearch.xpack.ml.rest.calendar.RestPostCalendarEventAction;
 import org.elasticsearch.xpack.ml.rest.calendar.RestPutCalendarAction;
 import org.elasticsearch.xpack.ml.rest.calendar.RestPutCalendarJobAction;
+import org.elasticsearch.xpack.ml.rest.cat.RestCatDataFrameAnalyticsAction;
 import org.elasticsearch.xpack.ml.rest.cat.RestCatDatafeedsAction;
 import org.elasticsearch.xpack.ml.rest.cat.RestCatJobsAction;
 import org.elasticsearch.xpack.ml.rest.cat.RestCatTrainedModelsAction;
@ -788,7 +789,8 @@ public class MachineLearning extends Plugin implements SystemIndexPlugin, Analys
            // CAT Handlers
            new RestCatJobsAction(),
            new RestCatTrainedModelsAction(),
-            new RestCatDatafeedsAction()
+            new RestCatDatafeedsAction(),
+            new RestCatDataFrameAnalyticsAction()
        );
    }

--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDataFrameAnalyticsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDataFrameAnalyticsAction.java
@ -0,0 +1,196 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+package org.elasticsearch.xpack.ml.rest.cat;
+
+import org.elasticsearch.client.node.NodeClient;
+import org.elasticsearch.cluster.metadata.MetaData;
+import org.elasticsearch.cluster.node.DiscoveryNode;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.common.Table;
+import org.elasticsearch.rest.RestRequest;
+import org.elasticsearch.rest.RestResponse;
+import org.elasticsearch.rest.action.RestActionListener;
+import org.elasticsearch.rest.action.RestResponseListener;
+import org.elasticsearch.rest.action.cat.AbstractCatAction;
+import org.elasticsearch.rest.action.cat.RestTable;
+import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsAction;
+import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsStatsAction;
+import org.elasticsearch.xpack.core.ml.action.GetDataFrameAnalyticsStatsAction.Response.Stats;
+import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
+import org.elasticsearch.xpack.core.ml.utils.PhaseProgress;
+
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+
+import static java.util.Arrays.asList;
+import static java.util.Collections.unmodifiableList;
+import static java.util.stream.Collectors.joining;
+import static java.util.stream.Collectors.toMap;
+import static org.elasticsearch.rest.RestRequest.Method.GET;
+
+public class RestCatDataFrameAnalyticsAction extends AbstractCatAction {
+
+    @Override
+    public List<Route> routes() {
+        return unmodifiableList(asList(
+            new Route(GET, "_cat/ml/data_frame/analytics/{" + DataFrameAnalyticsConfig.ID.getPreferredName() + "}"),
+            new Route(GET, "_cat/ml/data_frame/analytics")));
+    }
+
+    @Override
+    public String getName() {
+        return "cat_ml_get_data_frame_analytics_action";
+    }
+
+    @Override
+    protected RestChannelConsumer doCatRequest(RestRequest restRequest, NodeClient client) {
+        String dataFrameAnalyticsId = restRequest.param(DataFrameAnalyticsConfig.ID.getPreferredName());
+        if (Strings.isNullOrEmpty(dataFrameAnalyticsId)) {
+            dataFrameAnalyticsId = MetaData.ALL;
+        }
+
+        GetDataFrameAnalyticsAction.Request getRequest = new GetDataFrameAnalyticsAction.Request(dataFrameAnalyticsId);
+        getRequest.setAllowNoResources(
+            restRequest.paramAsBoolean(
+                GetDataFrameAnalyticsAction.Request.ALLOW_NO_MATCH.getPreferredName(), getRequest.isAllowNoResources()));
+
+        GetDataFrameAnalyticsStatsAction.Request getStatsRequest = new GetDataFrameAnalyticsStatsAction.Request(dataFrameAnalyticsId);
+        getStatsRequest.setAllowNoMatch(true);
+
+        return channel -> client.execute(
+                GetDataFrameAnalyticsAction.INSTANCE, getRequest, new RestActionListener<GetDataFrameAnalyticsAction.Response>(channel) {
+            @Override
+            public void processResponse(GetDataFrameAnalyticsAction.Response getResponse) {
+                client.execute(
+                        GetDataFrameAnalyticsStatsAction.INSTANCE,
+                        getStatsRequest,
+                        new RestResponseListener<GetDataFrameAnalyticsStatsAction.Response>(channel) {
+                    @Override
+                    public RestResponse buildResponse(GetDataFrameAnalyticsStatsAction.Response getStatsResponse) throws Exception {
+                        return RestTable.buildResponse(buildTable(getResponse, getStatsResponse), channel);
+                    }
+                });
+            }
+        });
+    }
+
+    @Override
+    protected void documentation(StringBuilder sb) {
+        sb.append("/_cat/ml/data_frame/analytics\n");
+        sb.append("/_cat/ml/data_frame/analytics/{").append(DataFrameAnalyticsConfig.ID.getPreferredName()).append("}\n");
+    }
+
+    @Override
+    protected Table getTableWithHeader(RestRequest unused) {
+        return getTableWithHeader();
+    }
+
+    private static Table getTableWithHeader() {
+        return new Table()
+            .startHeaders()
+            // DFA config info
+            .addCell("id", TableColumnAttributeBuilder.builder("the id").build())
+            .addCell("type",
+                TableColumnAttributeBuilder.builder("analysis type")
+                    .setAliases("t")
+                    .build())
+            .addCell("create_time",
+                TableColumnAttributeBuilder.builder("job creation time")
+                    .setAliases("ct", "createTime")
+                    .build())
+            .addCell("version",
+                TableColumnAttributeBuilder.builder("the version of Elasticsearch when the analytics was created", false)
+                    .setAliases("v")
+                    .build())
+            .addCell("source_index",
+                TableColumnAttributeBuilder.builder("source index", false)
+                    .setAliases("si", "sourceIndex")
+                    .build())
+            .addCell("dest_index",
+                TableColumnAttributeBuilder.builder("destination index", false)
+                    .setAliases("di", "destIndex")
+                    .build())
+            .addCell("description",
+                TableColumnAttributeBuilder.builder("description", false)
+                    .setAliases("d")
+                    .build())
+            .addCell("model_memory_limit",
+                TableColumnAttributeBuilder.builder("model memory limit", false)
+                    .setAliases("mml", "modelMemoryLimit")
+                    .build())
+            // DFA stats info
+            .addCell("state",
+                TableColumnAttributeBuilder.builder("job state")
+                    .setAliases("s")
+                    .setTextAlignment(TableColumnAttributeBuilder.TextAlign.RIGHT)
+                    .build())
+            .addCell("failure_reason",
+                TableColumnAttributeBuilder.builder("failure reason", false)
+                    .setAliases("fr", "failureReason")
+                    .build())
+            .addCell("progress",
+                TableColumnAttributeBuilder.builder("progress", false)
+                    .setAliases("p")
+                    .build())
+            .addCell("assignment_explanation",
+                TableColumnAttributeBuilder.builder("why the job is or is not assigned to a node", false)
+                    .setAliases("ae", "assignmentExplanation")
+                    .build())
+            // Node info
+            .addCell("node.id",
+                TableColumnAttributeBuilder.builder("id of the assigned node", false)
+                    .setAliases("ni", "nodeId")
+                    .build())
+            .addCell("node.name",
+                TableColumnAttributeBuilder.builder("name of the assigned node", false)
+                    .setAliases("nn", "nodeName")
+                    .build())
+            .addCell("node.ephemeral_id",
+                TableColumnAttributeBuilder.builder("ephemeral id of the assigned node", false)
+                    .setAliases("ne", "nodeEphemeralId")
+                    .build())
+            .addCell("node.address",
+                TableColumnAttributeBuilder.builder("network address of the assigned node", false)
+                    .setAliases("na", "nodeAddress")
+                    .build())
+            .endHeaders();
+    }
+
+    private static Table buildTable(GetDataFrameAnalyticsAction.Response getResponse,
+                                    GetDataFrameAnalyticsStatsAction.Response getStatsResponse) {
+        Map<String, Stats> statsById = getStatsResponse.getResponse().results().stream().collect(toMap(Stats::getId, Function.identity()));
+        Table table = getTableWithHeader();
+        for (DataFrameAnalyticsConfig config : getResponse.getResources().results()) {
+            Stats stats = statsById.get(config.getId());
+            DiscoveryNode node = stats == null ? null : stats.getNode();
+            table
+                .startRow()
+                .addCell(config.getId())
+                .addCell(config.getAnalysis().getWriteableName())
+                .addCell(config.getCreateTime())
+                .addCell(config.getVersion())
+                .addCell(String.join(",", config.getSource().getIndex()))
+                .addCell(config.getDest().getIndex())
+                .addCell(config.getDescription())
+                .addCell(config.getModelMemoryLimit())
+                .addCell(stats == null ? null : stats.getState())
+                .addCell(stats == null ? null : stats.getFailureReason())
+                .addCell(stats == null ? null : progressToString(stats.getProgress()))
+                .addCell(stats == null ? null : stats.getAssignmentExplanation())
+                .addCell(node == null ? null : node.getId())
+                .addCell(node == null ? null : node.getName())
+                .addCell(node == null ? null : node.getEphemeralId())
+                .addCell(node == null ? null : node.getAddress().toString())
+                .endRow();
+        }
+        return table;
+    }
+
+    private static String progressToString(List<PhaseProgress> phases) {
+        return phases.stream().map(p -> p.getPhase() + ":" + p.getProgressPercent()).collect(joining(","));
+    }
+}
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatDatafeedsAction.java
@ -70,9 +70,9 @@ public class RestCatDatafeedsAction extends AbstractCatAction {
        table.startHeaders();

        // Datafeed Info
-        table.addCell("id", TableColumnAttributeBuilder.builder().setDescription("the datafeed_id").build());
-        table.addCell("state", TableColumnAttributeBuilder.builder()
-            .setDescription("the datafeed state")
+        table.addCell("id", TableColumnAttributeBuilder.builder("the datafeed_id").build());
+        table.addCell("state",
+            TableColumnAttributeBuilder.builder("the datafeed state")
                .setAliases("s")
                .setTextAlignment(TableColumnAttributeBuilder.TextAlign.RIGHT)
                .build());
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatJobsAction.java
@ -75,17 +75,15 @@ public class RestCatJobsAction extends AbstractCatAction {
        table.startHeaders();

        // Job Info
-        table.addCell("id", TableColumnAttributeBuilder.builder().setDescription("the job_id").build());
-        table.addCell("state", TableColumnAttributeBuilder.builder()
-            .setDescription("the job state")
+        table.addCell("id", TableColumnAttributeBuilder.builder("the job_id").build());
+        table.addCell("state",
+            TableColumnAttributeBuilder.builder("the job state")
                .setAliases("s")
                .setTextAlignment(TableColumnAttributeBuilder.TextAlign.RIGHT)
                .build());
        table.addCell("opened_time",
-            TableColumnAttributeBuilder.builder()
-                .setDescription("the amount of time the job has been opened")
+            TableColumnAttributeBuilder.builder("the amount of time the job has been opened", false)
                .setAliases("ot")
-                .setDisplayByDefault(false)
                .build());
        table.addCell("assignment_explanation",
            TableColumnAttributeBuilder.builder("why the job is or is not assigned to a node", false)
--- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatTrainedModelsAction.java
+++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/cat/RestCatTrainedModelsAction.java
@ -130,17 +130,15 @@ public class RestCatTrainedModelsAction extends AbstractCatAction {
        table.startHeaders();

        // Trained Model Info
-        table.addCell("id", TableColumnAttributeBuilder.builder().setDescription("the trained model id").build());
+        table.addCell("id", TableColumnAttributeBuilder.builder("the trained model id").build());
        table.addCell("created_by", TableColumnAttributeBuilder.builder("who created the model", false)
            .setAliases("c", "createdBy")
            .setTextAlignment(TableColumnAttributeBuilder.TextAlign.RIGHT)
            .build());
-        table.addCell("heap_size", TableColumnAttributeBuilder.builder()
-            .setDescription("the estimated heap size to keep the model in memory")
+        table.addCell("heap_size", TableColumnAttributeBuilder.builder("the estimated heap size to keep the model in memory")
            .setAliases("hs","modelHeapSize")
            .build());
-        table.addCell("operations", TableColumnAttributeBuilder.builder()
-            .setDescription("the estimated number of operations to use the model")
+        table.addCell("operations", TableColumnAttributeBuilder.builder("the estimated number of operations to use the model")
            .setAliases("o", "modelOperations")
            .build());
        table.addCell("license", TableColumnAttributeBuilder.builder("The license level of the model", false)
--- a/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_data_frame_analytics.json
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/api/cat.ml_data_frame_analytics.json
@ -0,0 +1,89 @@
+{
+  "cat.ml_data_frame_analytics":{
+    "documentation":{
+      "url":"http://www.elastic.co/guide/en/elasticsearch/reference/current/get-dfanalytics-stats.html"
+    },
+    "stability":"stable",
+    "url":{
+      "paths":[
+        {
+          "path":"/_cat/ml/data_frame/analytics",
+          "methods":[
+            "GET"
+          ]
+        },
+        {
+          "path":"/_cat/ml/data_frame/analytics/{id}",
+          "methods":[
+            "GET"
+          ],
+          "parts":{
+            "id":{
+              "type":"string",
+              "description":"The ID of the data frame analytics to fetch"
+            }
+          }
+        }
+      ]
+    },
+    "params":{
+      "allow_no_match":{
+        "type":"boolean",
+        "required":false,
+        "description":"Whether to ignore if a wildcard expression matches no configs. (This includes `_all` string or when no configs have been specified)"
+      },
+      "bytes":{
+        "type":"enum",
+        "description":"The unit in which to display byte values",
+        "options":[
+          "b",
+          "k",
+          "kb",
+          "m",
+          "mb",
+          "g",
+          "gb",
+          "t",
+          "tb",
+          "p",
+          "pb"
+        ]
+      },
+      "format":{
+        "type":"string",
+        "description":"a short version of the Accept header, e.g. json, yaml"
+      },
+      "h":{
+        "type":"list",
+        "description":"Comma-separated list of column names to display"
+      },
+      "help":{
+        "type":"boolean",
+        "description":"Return help information",
+        "default":false
+      },
+      "s":{
+        "type":"list",
+        "description":"Comma-separated list of column names or column aliases to sort by"
+      },
+      "time":{
+        "type":"enum",
+        "description":"The unit in which to display time values",
+        "options":[
+          "d (Days)",
+          "h (Hours)",
+          "m (Minutes)",
+          "s (Seconds)",
+          "ms (Milliseconds)",
+          "micros (Microseconds)",
+          "nanos (Nanoseconds)"
+        ]
+      },
+      "v":{
+        "type":"boolean",
+        "description":"Verbose mode. Display column headers",
+        "default":false
+      }
+    }
+  }
+}
--- a/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_cat_apis.yml
+++ b/x-pack/plugin/src/test/resources/rest-api-spec/test/ml/data_frame_analytics_cat_apis.yml
@ -0,0 +1,86 @@
+setup:
+  - skip:
+      features: headers
+  - do:
+      indices.create:
+        index: index-source
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      ml.put_data_frame_analytics:
+        id: dfa-outlier-detection-job
+        body:  >
+          {
+            "source": { "index": "index-source" },
+            "dest": { "index": "index-dest-od" },
+            "analysis": {"outlier_detection": {}}
+          }
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      ml.put_data_frame_analytics:
+        id: dfa-regression-job
+        body:  >
+          {
+            "source": { "index": "index-source" },
+            "dest": { "index": "index-dest-r" },
+            "analysis": { "regression": { "dependent_variable": "dep_var" } }
+          }
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      ml.put_data_frame_analytics:
+        id: dfa-classification-job
+        body:  >
+          {
+            "source": { "index": "index-source" },
+            "dest": { "index": "index-dest-c" },
+            "analysis": { "classification": { "dependent_variable": "dep_var" } }
+          }
+
+---
+"Test cat data frame analytics single job":
+
+  - do:
+      cat.ml_data_frame_analytics:
+        id: dfa-outlier-detection-job
+  - match:
+      $body: |
+        / #id                               type                  create_time state
+        ^ (dfa\-outlier\-detection\-job \s+ outlier_detection \s+ [^\s]+ \s+  stopped \n)+  $/
+
+---
+"Test cat data frame analytics single job with header":
+  - do:
+      cat.ml_data_frame_analytics:
+        id: dfa-outlier-detection-job
+        v: true
+  - match:
+      $body: |
+        /^  id                           \s+ type              \s+ create_time \s+ state   \n
+           (dfa\-outlier\-detection\-job \s+ outlier_detection \s+ [^\s]+      \s+ stopped \n)+  $/
+
+---
+"Test cat data frame analytics all jobs with header":
+  - do:
+      cat.ml_data_frame_analytics:
+        v: true
+  - match:
+      $body: |
+        /^  id                           \s+ type              \s+ create_time \s+ state   \n
+           (dfa\-classification\-job     \s+ classification    \s+ [^\s]+      \s+ stopped \n)+
+           (dfa\-outlier\-detection\-job \s+ outlier_detection \s+ [^\s]+      \s+ stopped \n)+
+           (dfa\-regression\-job         \s+ regression        \s+ [^\s]+      \s+ stopped \n)+  $/
+
+---
+"Test cat data frame analytics all jobs with header and column selection":
+  - do:
+      cat.ml_data_frame_analytics:
+        v: true
+        h: id,t,s,p,source_index,dest_index
+  - match:
+      $body: |
+        /^  id                           \s+ t                 \s+ s       \s+ p                                                         \s+ source_index \s+ dest_index    \n
+           (dfa\-classification\-job     \s+ classification    \s+ stopped \s+ reindexing:0,loading_data:0,analyzing:0,writing_results:0 \s+ index-source \s+ index-dest-c  \n)+
+           (dfa\-outlier\-detection\-job \s+ outlier_detection \s+ stopped \s+ reindexing:0,loading_data:0,analyzing:0,writing_results:0 \s+ index-source \s+ index-dest-od \n)+
+           (dfa\-regression\-job         \s+ regression        \s+ stopped \s+ reindexing:0,loading_data:0,analyzing:0,writing_results:0 \s+ index-source \s+ index-dest-r  \n)+  $/