[ML] Map new fields as keywords in results index (elastic/x-pack-elasticsearch#1387)

Each job introduces new fields to the results index matching
the analysis terms. When the job is created, mappings for those
are added explicitly. However, when rollover is introduced, that
will not be the case. This commit prepares for that by adding
dynamic mapping of new fields as keyword.

Relates elastic/x-pack-elasticsearch#827

Original commit: elastic/x-pack-elasticsearch@8f6cd09a71
This commit is contained in:
Dimitris Athanasiou 2017-05-11 13:47:31 +01:00 committed by GitHub
parent c4c57e6497
commit 69f9fa8ae9
4 changed files with 71 additions and 2 deletions

View File

@ -249,7 +249,8 @@ public class MachineLearningTemplateRegistry extends AbstractComponent implemen
}
void putJobResultsIndexTemplate(BiConsumer<Boolean, Exception> listener) {
try (XContentBuilder resultsMapping = ElasticsearchMappings.resultsMapping();
try (XContentBuilder defaultMapping = ElasticsearchMappings.defaultMapping();
XContentBuilder resultsMapping = ElasticsearchMappings.resultsMapping();
XContentBuilder categoryDefinitionMapping = ElasticsearchMappings.categoryDefinitionMapping();
XContentBuilder dataCountsMapping = ElasticsearchMappings.dataCountsMapping();
XContentBuilder modelSnapshotMapping = ElasticsearchMappings.modelSnapshotMapping()) {
@ -257,6 +258,7 @@ public class MachineLearningTemplateRegistry extends AbstractComponent implemen
PutIndexTemplateRequest templateRequest = new PutIndexTemplateRequest(AnomalyDetectorsIndex.jobResultsIndexPrefix());
templateRequest.patterns(Collections.singletonList(AnomalyDetectorsIndex.jobResultsIndexPrefix() + "*"));
templateRequest.settings(mlResultsIndexSettings());
templateRequest.mapping(MapperService.DEFAULT_MAPPING, defaultMapping);
templateRequest.mapping(Result.TYPE.getPreferredName(), resultsMapping);
templateRequest.mapping(CategoryDefinition.TYPE.getPreferredName(), categoryDefinitionMapping);
templateRequest.mapping(DataCounts.TYPE.getPreferredName(), dataCountsMapping);

View File

@ -6,6 +6,7 @@
package org.elasticsearch.xpack.ml.job.persistence;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.xpack.ml.job.config.Job;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.CategorizerState;
import org.elasticsearch.xpack.ml.job.process.autodetect.state.DataCounts;
@ -90,6 +91,33 @@ public class ElasticsearchMappings {
private ElasticsearchMappings() {
}
/**
* Creates a default mapping which has a dynamic template that
* treats all dynamically added fields as keywords. This is needed
* so that the per-job term fields will not be automatically added
* as fields of type 'text' to the index mappings of newly rolled indices.
*
* @return The default mapping
* @throws IOException On write error
*/
public static XContentBuilder defaultMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(MapperService.DEFAULT_MAPPING)
.startArray("dynamic_templates")
.startObject()
.startObject("strings_as_keywords")
.field("match", "*")
.startObject("mapping")
.field(TYPE, KEYWORD)
.endObject()
.endObject()
.endObject()
.endArray()
.endObject()
.endObject();
}
/**
* Create the Elasticsearch mapping for results objects
* {@link Bucket}s, {@link AnomalyRecord}s, {@link Influencer} and

View File

@ -262,11 +262,12 @@ public class MachineLearningTemplateRegistryTests extends ESTestCase {
PutIndexTemplateRequest request = captor.getValue();
assertNotNull(request);
assertEquals(templateRegistry.mlResultsIndexSettings().build(), request.settings());
assertTrue(request.mappings().containsKey("_default_"));
assertTrue(request.mappings().containsKey(Result.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(CategoryDefinition.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(DataCounts.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(ModelSnapshot.TYPE.getPreferredName()));
assertEquals(4, request.mappings().size());
assertEquals(5, request.mappings().size());
assertEquals(Collections.singletonList(AnomalyDetectorsIndex.jobResultsIndexPrefix() + "*"), request.patterns());
assertEquals(new Integer(Version.CURRENT.id), request.version());
});

View File

@ -0,0 +1,38 @@
---
"Test new fields are mapped as keyword":
- do:
xpack.ml.put_job:
job_id: ml-anomalies-default-mappings-job
body: >
{
"analysis_config" : {
"bucket_span": "1h",
"detectors" :[{"function":"count","by_field_name":"foo"}]
},
"data_description" : {
"time_field":"time"
}
}
- match: { job_id: "ml-anomalies-default-mappings-job" }
- do:
index:
index: .ml-anomalies-shared
type: result
id: "new_doc"
body: >
{
"new_field": "bar"
}
- do:
indices.refresh:
index: .ml-anomalies-shared
- do:
indices.get_field_mapping:
index: .ml-anomalies-shared
type: result
fields: new_field
- match: {\.ml-anomalies-shared.mappings.result.new_field.mapping.new_field.type: keyword}