[ML][Data Frame] improve pivot nested field validations (#43548) (#43636)

* [ML][Data Frame] improve pivot nested field validations

* addressing pr comments
This commit is contained in:
Benjamin Trent 2019-06-26 13:35:51 -05:00 committed by GitHub
parent c00e305d79
commit 52e26bbc42
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 14 deletions

View File

@ -33,6 +33,8 @@ import java.util.List;
import java.util.Map;
import java.util.Objects;
import static org.elasticsearch.action.ValidateActions.addValidationError;
public class PreviewDataFrameTransformAction extends Action<PreviewDataFrameTransformAction.Response> {
public static final PreviewDataFrameTransformAction INSTANCE = new PreviewDataFrameTransformAction();
@ -94,9 +96,16 @@ public class PreviewDataFrameTransformAction extends Action<PreviewDataFrameTran
@Override
public ActionRequestValidationException validate() {
return null;
ActionRequestValidationException validationException = null;
if(config.getPivotConfig() != null) {
for(String failure : config.getPivotConfig().aggFieldValidation()) {
validationException = addValidationError(failure, validationException);
}
}
return validationException;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
return this.config.toXContent(builder, params);

View File

@ -75,7 +75,7 @@ setup:
"pivot": {
"group_by": {
"airline": {"terms": {"field": "airline"}},
"by-hour": {"date_histogram": {"fixed_interval": "1h", "field": "time", "format": "yyyy-MM-dd HH"}}},
"by-hour": {"date_histogram": {"fixed_interval": "1h", "field": "time"}}},
"aggs": {
"avg_response": {"avg": {"field": "responsetime"}},
"time.max": {"max": {"field": "time"}},
@ -84,17 +84,17 @@ setup:
}
}
- match: { preview.0.airline: foo }
- match: { preview.0.by-hour: "2017-02-18 00" }
- match: { preview.0.by-hour: 1487376000000 }
- match: { preview.0.avg_response: 1.0 }
- match: { preview.0.time.max: "2017-02-18T00:30:00.000Z" }
- match: { preview.0.time.min: "2017-02-18T00:00:00.000Z" }
- match: { preview.1.airline: bar }
- match: { preview.1.by-hour: "2017-02-18 01" }
- match: { preview.1.by-hour: 1487379600000 }
- match: { preview.1.avg_response: 42.0 }
- match: { preview.1.time.max: "2017-02-18T01:00:00.000Z" }
- match: { preview.1.time.min: "2017-02-18T01:00:00.000Z" }
- match: { preview.2.airline: foo }
- match: { preview.2.by-hour: "2017-02-18 01" }
- match: { preview.2.by-hour: 1487379600000 }
- match: { preview.2.avg_response: 42.0 }
- match: { preview.2.time.max: "2017-02-18T01:01:00.000Z" }
- match: { preview.2.time.min: "2017-02-18T01:01:00.000Z" }
@ -123,22 +123,22 @@ setup:
"pivot": {
"group_by": {
"airline": {"terms": {"field": "airline"}},
"by-hour": {"date_histogram": {"fixed_interval": "1h", "field": "time", "format": "yyyy-MM-dd HH"}}},
"by-hour": {"date_histogram": {"fixed_interval": "1h", "field": "time"}}},
"aggs": {
"avg_response": {"avg": {"field": "responsetime"}}
}
}
}
- match: { preview.0.airline: foo }
- match: { preview.0.by-hour: "2017-02-18 00" }
- match: { preview.0.by-hour: 1487376000000 }
- match: { preview.0.avg_response: 1.0 }
- match: { preview.0.my_field: 42 }
- match: { preview.1.airline: bar }
- match: { preview.1.by-hour: "2017-02-18 01" }
- match: { preview.1.by-hour: 1487379600000 }
- match: { preview.1.avg_response: 42.0 }
- match: { preview.1.my_field: 42 }
- match: { preview.2.airline: foo }
- match: { preview.2.by-hour: "2017-02-18 01" }
- match: { preview.2.by-hour: 1487379600000 }
- match: { preview.2.avg_response: 42.0 }
- match: { preview.2.my_field: 42 }
@ -166,7 +166,7 @@ setup:
"pivot": {
"group_by": {
"airline": {"terms": {"field": "airline"}},
"by-hour": {"date_histogram": {"fixed_interval": "1h", "field": "time", "format": "yyyy-MM-dd HH"}}},
"by-hour": {"date_histogram": {"fixed_interval": "1h", "field": "time"}}},
"aggs": {"avg_response": {"avg": {"field": "responsetime"}}}
}
}
@ -180,7 +180,7 @@ setup:
"source": { "index": "airline-data" },
"pivot": {
"group_by": {
"time": {"date_histogram": {"fixed_interval": "1h", "field": "time", "format": "yyyy-MM-DD HH"}}},
"time": {"date_histogram": {"fixed_interval": "1h", "field": "time"}}},
"aggs": {
"avg_response": {"avg": {"field": "responsetime"}},
"time.min": {"min": {"field": "time"}}
@ -189,20 +189,43 @@ setup:
}
- do:
catch: /mixed object types of nested and non-nested fields \[time.min\]/
catch: /field \[time\] cannot be both an object and a field/
data_frame.preview_data_frame_transform:
body: >
{
"source": { "index": "airline-data" },
"pivot": {
"group_by": {
"time": {"date_histogram": {"fixed_interval": "1h", "field": "time", "format": "yyyy-MM-DD HH"}}},
"time": {"date_histogram": {"fixed_interval": "1h", "field": "time"}}},
"aggs": {
"avg_response": {"avg": {"field": "responsetime"}},
"time.min": {"min": {"field": "time"}}
}
}
}
- do:
catch: /field \[super_metric\] cannot be both an object and a field/
data_frame.preview_data_frame_transform:
body: >
{
"source": { "index": "airline-data" },
"pivot": {
"group_by": {
"time": {"date_histogram": {"fixed_interval": "1h", "field": "time"}}},
"aggs": {
"avg_response": {"avg": {"field": "responsetime"}},
"super_metric.time": {"min": {"field": "time"}},
"super_metric": {
"scripted_metric": {
"init_script": "",
"map_script": "",
"combine_script": "",
"reduce_script": "return ['value1': 1, 'value2':2]"
}
}
}
}
}
---
"Test preview with missing pipeline":
- do:
@ -214,7 +237,7 @@ setup:
"dest": { "pipeline": "missing-pipeline" },
"pivot": {
"group_by": {
"time": {"date_histogram": {"fixed_interval": "1h", "field": "time", "format": "yyyy-MM-DD HH"}}},
"time": {"date_histogram": {"fixed_interval": "1h", "field": "time"}}},
"aggs": {
"avg_response": {"avg": {"field": "responsetime"}},
"time.min": {"min": {"field": "time"}}