From f085b935f63928508ed26c6c56963d13f7e14ed8 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Wed, 14 Dec 2016 16:32:36 +0000 Subject: [PATCH] Custom all mapping for interesting result fields (elastic/elasticsearch#535) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Check use of mappings * Add unit tests for JobProvider.createJobRelatedIndices * Remove ‘index: no’ from mappings as no longer required The entire type mapping has ‘enabled: false’ * Restore “index.analysis.analyzer.default.type” setting * Remove include_in_all from nested mappings * Add audit and usage mappings to the job index * Revert ‘Restore “index.analysis.analyzer.default.type” setting’ Original commit: elastic/x-pack-elasticsearch@c7d62e0c7e12af73d8cece15a29b26c4d91c22e2 --- .../persistence/ElasticsearchMappings.java | 547 +++++++++--------- .../prelert/job/persistence/JobProvider.java | 30 +- .../job/results/ReservedFieldNames.java | 3 + .../ElasticsearchMappingsTests.java | 4 - .../job/persistence/JobProviderTests.java | 56 +- .../job/persistence/MockClientBuilder.java | 11 +- 6 files changed, 363 insertions(+), 288 deletions(-) diff --git a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/persistence/ElasticsearchMappings.java b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/persistence/ElasticsearchMappings.java index acbf62af3db..e3c368756e4 100644 --- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/persistence/ElasticsearchMappings.java +++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/persistence/ElasticsearchMappings.java @@ -45,27 +45,32 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; * It is expected that indexes to which these mappings are applied have their * default analyzer set to "keyword", which does not tokenise fields. The * index-wide default analyzer cannot be set via these mappings, so needs to be - * set in the index settings during index creation. Then the _all field has its - * analyzer set to "whitespace" by these mappings, so that _all gets tokenised + * set in the index settings during index creation. For the results mapping the + * _all field is disabled and a custom all field is used in its place. The index + * settings must have {@code "index.query.default_field": "all_field_values" } set + * for the queries to use the custom all field. The custom all field has its + * analyzer set to "whitespace" by these mappings, so that it gets tokenised * using whitespace. */ public class ElasticsearchMappings { /** * String constants used in mappings */ - static final String INDEX = "index"; - static final String NO = "false"; static final String ALL = "_all"; static final String ENABLED = "enabled"; static final String ANALYZER = "analyzer"; static final String WHITESPACE = "whitespace"; - static final String INCLUDE_IN_ALL = "include_in_all"; static final String NESTED = "nested"; static final String COPY_TO = "copy_to"; static final String PROPERTIES = "properties"; static final String TYPE = "type"; static final String DYNAMIC = "dynamic"; + /** + * Name of the custom 'all' field for results + */ + public static final String ALL_FIELD_VALUES = "all_field_values"; + /** * Name of the Elasticsearch field by which documents are sorted by default */ @@ -88,10 +93,26 @@ public class ElasticsearchMappings { /** * Create the Elasticsearch mapping for results objects - * {@link Bucket}s, {@link AnomalyRecord}s, {@link Influencer}, - * {@link BucketInfluencer} and {@link CategoryDefinition} + * {@link Bucket}s, {@link AnomalyRecord}s, {@link Influencer} and + * {@link BucketInfluencer} * - * The '_all' field is disabled as the document isn't meant to be searched. + * The mapping has a custom all field containing the *_FIELD_VALUE fields + * e.g. BY_FIELD_VALUE, OVER_FIELD_VALUE, etc. The custom all field {@link #ALL_FIELD_VALUES} + * must be set in the index settings. A custom all field is preferred over the usual + * '_all' field as most fields do not belong in '_all', disabling '_all' and + * using a custom all field simplifies the mapping. + * + * These fields are copied to the custom all field + * * * @param termFieldNames All the term fields (by, over, partition) and influencers * included in the mapping @@ -105,17 +126,17 @@ public class ElasticsearchMappings { .startObject(Result.TYPE.getPreferredName()) .startObject(ALL) .field(ENABLED, false) - // analyzer must be specified even though _all is disabled - // because all types in the same index must have the same - // analyzer for a given field - .field(ANALYZER, WHITESPACE) .endObject() .startObject(PROPERTIES) + .startObject(ALL_FIELD_VALUES) + .field(TYPE, TEXT) + .field(ANALYZER, WHITESPACE) + .endObject() .startObject(Result.RESULT_TYPE.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(Job.ID.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(Bucket.TIMESTAMP.getPreferredName()) .field(TYPE, DATE) @@ -148,7 +169,7 @@ public class ElasticsearchMappings { .field(TYPE, NESTED) .startObject(PROPERTIES) .startObject(AnomalyRecord.PARTITION_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyRecord.PARTITION_FIELD_VALUE.getPreferredName()) .field(TYPE, KEYWORD) @@ -169,7 +190,7 @@ public class ElasticsearchMappings { .field(TYPE, NESTED) .startObject(PROPERTIES) .startObject(BucketInfluencer.INFLUENCER_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(BucketInfluencer.RAW_ANOMALY_SCORE.getPreferredName()) .field(TYPE, DOUBLE) @@ -177,7 +198,7 @@ public class ElasticsearchMappings { .endObject() .endObject() .startObject(BucketInfluencer.INFLUENCER_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() // per-partition max probabilities mapping @@ -195,16 +216,16 @@ public class ElasticsearchMappings { // Model Debug Output .startObject(ModelDebugOutput.DEBUG_FEATURE.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(ModelDebugOutput.DEBUG_LOWER.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(ModelDebugOutput.DEBUG_UPPER.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(ModelDebugOutput.DEBUG_MEDIAN.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject(); addAnomalyRecordFieldsToMapping(builder); @@ -240,94 +261,101 @@ public class ElasticsearchMappings { private static XContentBuilder addAnomalyRecordFieldsToMapping(XContentBuilder builder) throws IOException { builder.startObject(AnomalyRecord.DETECTOR_INDEX.getPreferredName()) - .field(TYPE, INTEGER).field(INCLUDE_IN_ALL, false) + .field(TYPE, INTEGER) .endObject() .startObject(AnomalyRecord.SEQUENCE_NUM.getPreferredName()) - .field(TYPE, INTEGER).field(INCLUDE_IN_ALL, false) + .field(TYPE, INTEGER) .endObject() .startObject(AnomalyRecord.ACTUAL.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(AnomalyRecord.TYPICAL.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(AnomalyRecord.PROBABILITY.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(AnomalyRecord.FUNCTION.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyRecord.FUNCTION_DESCRIPTION.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyRecord.BY_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyRecord.BY_FIELD_VALUE.getPreferredName()) .field(TYPE, KEYWORD) + .field(COPY_TO, ALL_FIELD_VALUES) .endObject() .startObject(AnomalyRecord.FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyRecord.PARTITION_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyRecord.PARTITION_FIELD_VALUE.getPreferredName()) .field(TYPE, KEYWORD) + .field(COPY_TO, ALL_FIELD_VALUES) .endObject() .startObject(AnomalyRecord.OVER_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyRecord.OVER_FIELD_VALUE.getPreferredName()) .field(TYPE, KEYWORD) + .field(COPY_TO, ALL_FIELD_VALUES) .endObject() .startObject(AnomalyRecord.NORMALIZED_PROBABILITY.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(AnomalyRecord.INITIAL_NORMALIZED_PROBABILITY.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(AnomalyRecord.CAUSES.getPreferredName()) .field(TYPE, NESTED) .startObject(PROPERTIES) .startObject(AnomalyCause.ACTUAL.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(AnomalyCause.TYPICAL.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(AnomalyCause.PROBABILITY.getPreferredName()) - .field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false) + .field(TYPE, DOUBLE) .endObject() .startObject(AnomalyCause.FUNCTION.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyCause.FUNCTION_DESCRIPTION.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyCause.BY_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyCause.BY_FIELD_VALUE.getPreferredName()) .field(TYPE, KEYWORD) + .field(COPY_TO, ALL_FIELD_VALUES) .endObject() .startObject(AnomalyCause.CORRELATED_BY_FIELD_VALUE.getPreferredName()) .field(TYPE, KEYWORD) + .field(COPY_TO, ALL_FIELD_VALUES) .endObject() .startObject(AnomalyCause.FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyCause.PARTITION_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyCause.PARTITION_FIELD_VALUE.getPreferredName()) .field(TYPE, KEYWORD) + .field(COPY_TO, ALL_FIELD_VALUES) .endObject() .startObject(AnomalyCause.OVER_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(AnomalyCause.OVER_FIELD_VALUE.getPreferredName()) .field(TYPE, KEYWORD) + .field(COPY_TO, ALL_FIELD_VALUES) .endObject() .endObject() .endObject() @@ -336,10 +364,11 @@ public class ElasticsearchMappings { .field(TYPE, NESTED) .startObject(PROPERTIES) .startObject(Influence.INFLUENCER_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(Influence.INFLUENCER_FIELD_VALUES.getPreferredName()) .field(TYPE, KEYWORD) + .field(COPY_TO, ALL_FIELD_VALUES) .endObject() .endObject() .endObject(); @@ -349,146 +378,144 @@ public class ElasticsearchMappings { private static XContentBuilder addInfluencerFieldsToMapping(XContentBuilder builder) throws IOException { builder.startObject(Influencer.INFLUENCER_FIELD_NAME.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject() .startObject(Influencer.INFLUENCER_FIELD_VALUE.getPreferredName()) - .field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false) + .field(TYPE, KEYWORD) .endObject(); return builder; } + /** + * {@link DataCounts} mapping. + * The type is disabled so {@link DataCounts} aren't searchable and + * the '_all' field is disabled + * + * @return The builder + * @throws IOException On builder write error + */ public static XContentBuilder dataCountsMapping() throws IOException { return jsonBuilder() .startObject() - .startObject(DataCounts.TYPE.getPreferredName()) - .startObject(ALL) - .field(ENABLED, false) - // analyzer must be specified even though _all is disabled - // because all types in the same index must have the same - // analyzer for a given field - .field(ANALYZER, WHITESPACE) - .endObject() - .startObject(PROPERTIES) - .startObject(Job.ID.getPreferredName()) - .field(TYPE, KEYWORD) - .endObject() - .startObject(DataCounts.PROCESSED_RECORD_COUNT.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(DataCounts.PROCESSED_FIELD_COUNT.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(DataCounts.INPUT_BYTES.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(DataCounts.INPUT_RECORD_COUNT.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(DataCounts.INPUT_FIELD_COUNT.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(DataCounts.INVALID_DATE_COUNT.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(DataCounts.MISSING_FIELD_COUNT.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(DataCounts.OUT_OF_ORDER_TIME_COUNT.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(DataCounts.EARLIEST_RECORD_TIME.getPreferredName()) - .field(TYPE, DATE) - .endObject() - .startObject(DataCounts.LATEST_RECORD_TIME.getPreferredName()) - .field(TYPE, DATE) - .endObject() - .endObject() - .endObject() + .startObject(DataCounts.TYPE.getPreferredName()) + .field(ENABLED, false) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .startObject(PROPERTIES) + .startObject(Job.ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(DataCounts.PROCESSED_RECORD_COUNT.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DataCounts.PROCESSED_FIELD_COUNT.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DataCounts.INPUT_BYTES.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DataCounts.INPUT_RECORD_COUNT.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DataCounts.INPUT_FIELD_COUNT.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DataCounts.INVALID_DATE_COUNT.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DataCounts.MISSING_FIELD_COUNT.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DataCounts.OUT_OF_ORDER_TIME_COUNT.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(DataCounts.EARLIEST_RECORD_TIME.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .startObject(DataCounts.LATEST_RECORD_TIME.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .endObject() + .endObject() .endObject(); } + /** + * {@link CategorizerState} mapping. + * The type is disabled so {@link CategorizerState} is not searchable and + * the '_all' field is disabled + * + * @return The builder + * @throws IOException On builder write error + */ public static XContentBuilder categorizerStateMapping() throws IOException { return jsonBuilder() .startObject() - .startObject(CategorizerState.TYPE) - .field(ENABLED, false) - .startObject(ALL) - .field(ENABLED, false) - // analyzer must be specified even though _all is disabled - // because all types in the same index must have the same - // analyzer for a given field - .field(ANALYZER, WHITESPACE) - .endObject() - .endObject() + .startObject(CategorizerState.TYPE) + .field(ENABLED, false) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .endObject() .endObject(); } /** * Create the Elasticsearch mapping for {@linkplain Quantiles}. - * The '_all' field is disabled as the document isn't meant to be searched. + * The type is disabled as is the '_all' field as the document isn't meant to be searched. *

- * The quantile state string is not searchable (index = 'no') as it could be + * The quantile state string is not searchable (enabled = false) as it could be * very large. */ public static XContentBuilder quantilesMapping() throws IOException { return jsonBuilder() .startObject() - .startObject(Quantiles.TYPE.getPreferredName()) - .startObject(ALL) - .field(ENABLED, false) - // analyzer must be specified even though _all is disabled - // because all types in the same index must have the same - // analyzer for a given field - .field(ANALYZER, WHITESPACE) - .endObject() - .startObject(PROPERTIES) - .startObject(Job.ID.getPreferredName()) - .field(TYPE, KEYWORD) - .endObject() - .startObject(Quantiles.TIMESTAMP.getPreferredName()) - .field(TYPE, DATE) - .endObject() - .startObject(Quantiles.QUANTILE_STATE.getPreferredName()) - .field(TYPE, TEXT).field(INDEX, NO) - .endObject() - .endObject() - .endObject() + .startObject(Quantiles.TYPE.getPreferredName()) + .field(ENABLED, false) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .endObject() .endObject(); } + /** + * Create the Elasticsearch mapping for {@linkplain CategoryDefinition}. + * The '_all' field is disabled as the document isn't meant to be searched. + * + * @return The builder + * @throws IOException On builder error + */ public static XContentBuilder categoryDefinitionMapping() throws IOException { return jsonBuilder() .startObject() - .startObject(CategoryDefinition.TYPE.getPreferredName()) - .startObject(ALL) - .field(ENABLED, false) - // analyzer must be specified even though _all is disabled - // because all types in the same index must have the same - // analyzer for a given field - .field(ANALYZER, WHITESPACE) - .endObject() - .startObject(PROPERTIES) - .startObject(CategoryDefinition.CATEGORY_ID.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(Job.ID.getPreferredName()) - .field(TYPE, KEYWORD) - .endObject() - .startObject(CategoryDefinition.TERMS.getPreferredName()) - .field(TYPE, TEXT).field(INDEX, NO) - .endObject() - .startObject(CategoryDefinition.REGEX.getPreferredName()) - .field(TYPE, TEXT).field(INDEX, NO) - .endObject() - .startObject(CategoryDefinition.MAX_MATCHING_LENGTH.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(CategoryDefinition.EXAMPLES.getPreferredName()) - .field(TYPE, TEXT).field(INDEX, NO) - .endObject() - .endObject() - .endObject() + .startObject(CategoryDefinition.TYPE.getPreferredName()) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .startObject(PROPERTIES) + .startObject(CategoryDefinition.CATEGORY_ID.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(Job.ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(CategoryDefinition.TERMS.getPreferredName()) + .field(TYPE, TEXT) + .endObject() + .startObject(CategoryDefinition.REGEX.getPreferredName()) + .field(TYPE, TEXT) + .endObject() + .startObject(CategoryDefinition.MAX_MATCHING_LENGTH.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(CategoryDefinition.EXAMPLES.getPreferredName()) + .field(TYPE, TEXT) + .endObject() + .endObject() + .endObject() .endObject(); } @@ -502,90 +529,79 @@ public class ElasticsearchMappings { public static XContentBuilder modelStateMapping() throws IOException { return jsonBuilder() .startObject() - .startObject(ModelState.TYPE.getPreferredName()) - .field(ENABLED, false) - .startObject(ALL) - .field(ENABLED, false) - // analyzer must be specified even though _all is disabled - // because all types in the same index must have the same - // analyzer for a given field - .field(ANALYZER, WHITESPACE) - .endObject() - .endObject() + .startObject(ModelState.TYPE.getPreferredName()) + .field(ENABLED, false) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .endObject() .endObject(); } /** - * Create the Elasticsearch mapping for {@linkplain ModelState}. - * The model state could potentially be huge (over a gigabyte in size) - * so all analysis by Elasticsearch is disabled. The only way to - * retrieve the model state is by knowing the ID of a particular - * document or by searching for all documents of this type. + * Create the Elasticsearch mapping for {@linkplain ModelSnapshot}. + * The '_all' field is disabled but the type is searchable */ public static XContentBuilder modelSnapshotMapping() throws IOException { XContentBuilder builder = jsonBuilder() .startObject() - .startObject(ModelSnapshot.TYPE.getPreferredName()) - .startObject(ALL) - .field(ENABLED, false) - // analyzer must be specified even though _all is disabled - // because all types in the same index must have the same - // analyzer for a given field - .field(ANALYZER, WHITESPACE) - .endObject() - .startObject(PROPERTIES) - .startObject(Job.ID.getPreferredName()) - .field(TYPE, KEYWORD) - .endObject() - .startObject(ModelSnapshot.TIMESTAMP.getPreferredName()) - .field(TYPE, DATE) - .endObject() - // "description" is analyzed so that it has the same - // mapping as a user field of the same name - this means - // it doesn't have to be a reserved field name - .startObject(ModelSnapshot.DESCRIPTION.getPreferredName()) - .field(TYPE, TEXT) - .endObject() - .startObject(ModelSnapshot.RESTORE_PRIORITY.getPreferredName()) - .field(TYPE, LONG) - .endObject() - .startObject(ModelSnapshot.SNAPSHOT_ID.getPreferredName()) - .field(TYPE, KEYWORD) - .endObject() - .startObject(ModelSnapshot.SNAPSHOT_DOC_COUNT.getPreferredName()) - .field(TYPE, INTEGER) - .endObject() - .startObject(ModelSizeStats.RESULT_TYPE_FIELD.getPreferredName()) - .startObject(PROPERTIES) - .startObject(Job.ID.getPreferredName()) - .field(TYPE, KEYWORD) - .endObject(); + .startObject(ModelSnapshot.TYPE.getPreferredName()) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .startObject(PROPERTIES) + .startObject(Job.ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(ModelSnapshot.TIMESTAMP.getPreferredName()) + .field(TYPE, DATE) + .endObject() + // "description" is analyzed so that it has the same + // mapping as a user field of the same name - this means + // it doesn't have to be a reserved field name + .startObject(ModelSnapshot.DESCRIPTION.getPreferredName()) + .field(TYPE, TEXT) + .endObject() + .startObject(ModelSnapshot.RESTORE_PRIORITY.getPreferredName()) + .field(TYPE, LONG) + .endObject() + .startObject(ModelSnapshot.SNAPSHOT_ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(ModelSnapshot.SNAPSHOT_DOC_COUNT.getPreferredName()) + .field(TYPE, INTEGER) + .endObject() + .startObject(ModelSizeStats.RESULT_TYPE_FIELD.getPreferredName()) + .startObject(PROPERTIES) + .startObject(Job.ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject(); addModelSizeStatsFieldsToMapping(builder); - builder.endObject() - .endObject() - .startObject(Quantiles.TYPE.getPreferredName()) - .startObject(PROPERTIES) - .startObject(Job.ID.getPreferredName()) - .field(TYPE, KEYWORD) - .endObject() - .startObject(Quantiles.TIMESTAMP.getPreferredName()) - .field(TYPE, DATE) - .endObject() - .startObject(Quantiles.QUANTILE_STATE.getPreferredName()) - .field(TYPE, TEXT).field(INDEX, NO) - .endObject() - .endObject() - .endObject() - .startObject(ModelSnapshot.LATEST_RECORD_TIME.getPreferredName()) - .field(TYPE, DATE) - .endObject() - .startObject(ModelSnapshot.LATEST_RESULT_TIME.getPreferredName()) - .field(TYPE, DATE) - .endObject() - .endObject() - .endObject() + builder.endObject() + .endObject() + .startObject(Quantiles.TYPE.getPreferredName()) + .startObject(PROPERTIES) + .startObject(Job.ID.getPreferredName()) + .field(TYPE, KEYWORD) + .endObject() + .startObject(Quantiles.TIMESTAMP.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .startObject(Quantiles.QUANTILE_STATE.getPreferredName()) + .field(TYPE, TEXT) + .endObject() + .endObject() + .endObject() + .startObject(ModelSnapshot.LATEST_RECORD_TIME.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .startObject(ModelSnapshot.LATEST_RESULT_TIME.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .endObject() + .endObject() .endObject(); return builder; @@ -616,68 +632,71 @@ public class ElasticsearchMappings { .startObject(ModelSizeStats.MEMORY_STATUS_FIELD.getPreferredName()) .field(TYPE, KEYWORD) .endObject() - .startObject(ModelSizeStats.LOG_TIME_FIELD.getPreferredName()) - .field(TYPE, DATE) + .startObject(ModelSizeStats.LOG_TIME_FIELD.getPreferredName()) + .field(TYPE, DATE) .endObject(); return builder; } /** - * The Elasticsearch mappings for the usage documents + * The Elasticsearch mappings for the usage documents. + * The '_all' field is disabled but the type is searchable */ public static XContentBuilder usageMapping() throws IOException { return jsonBuilder() .startObject() - .startObject(Usage.TYPE) - .startObject(ALL) - .field(ENABLED, false) - // analyzer must be specified even though _all is disabled - // because all types in the same index must have the same - // analyzer for a given field - .field(ANALYZER, WHITESPACE) - .endObject() - .startObject(PROPERTIES) - .startObject(Usage.TIMESTAMP) - .field(TYPE, DATE) - .endObject() - .startObject(Usage.INPUT_BYTES) - .field(TYPE, LONG) - .endObject() - .startObject(Usage.INPUT_FIELD_COUNT) - .field(TYPE, LONG) - .endObject() - .startObject(Usage.INPUT_RECORD_COUNT) - .field(TYPE, LONG) - .endObject() - .endObject() - .endObject() + .startObject(Usage.TYPE) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .startObject(PROPERTIES) + .startObject(Usage.TIMESTAMP) + .field(TYPE, DATE) + .endObject() + .startObject(Usage.INPUT_BYTES) + .field(TYPE, LONG) + .endObject() + .startObject(Usage.INPUT_FIELD_COUNT) + .field(TYPE, LONG) + .endObject() + .startObject(Usage.INPUT_RECORD_COUNT) + .field(TYPE, LONG) + .endObject() + .endObject() + .endObject() .endObject(); } public static XContentBuilder auditMessageMapping() throws IOException { return jsonBuilder() .startObject() - .startObject(AuditMessage.TYPE.getPreferredName()) - .startObject(PROPERTIES) - .startObject(AuditMessage.TIMESTAMP.getPreferredName()) - .field(TYPE, DATE) - .endObject() - .endObject() - .endObject() + .startObject(AuditMessage.TYPE.getPreferredName()) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .startObject(PROPERTIES) + .startObject(AuditMessage.TIMESTAMP.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .endObject() + .endObject() .endObject(); } public static XContentBuilder auditActivityMapping() throws IOException { return jsonBuilder() .startObject() - .startObject(AuditActivity.TYPE.getPreferredName()) - .startObject(PROPERTIES) - .startObject(AuditActivity.TIMESTAMP.getPreferredName()) - .field(TYPE, DATE) - .endObject() - .endObject() - .endObject() + .startObject(AuditActivity.TYPE.getPreferredName()) + .startObject(ALL) + .field(ENABLED, false) + .endObject() + .startObject(PROPERTIES) + .startObject(AuditActivity.TIMESTAMP.getPreferredName()) + .field(TYPE, DATE) + .endObject() + .endObject() + .endObject() .endObject(); } } diff --git a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/persistence/JobProvider.java b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/persistence/JobProvider.java index 90653a9858b..5d507a090b7 100644 --- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/persistence/JobProvider.java +++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/persistence/JobProvider.java @@ -29,6 +29,8 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.IndexNotFoundException; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.ConstantScoreQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; @@ -47,6 +49,8 @@ import org.elasticsearch.xpack.prelert.job.Job; import org.elasticsearch.xpack.prelert.job.ModelSizeStats; import org.elasticsearch.xpack.prelert.job.ModelSnapshot; import org.elasticsearch.xpack.prelert.job.ModelState; +import org.elasticsearch.xpack.prelert.job.audit.AuditActivity; +import org.elasticsearch.xpack.prelert.job.audit.AuditMessage; import org.elasticsearch.xpack.prelert.job.audit.Auditor; import org.elasticsearch.xpack.prelert.job.persistence.BucketsQueryBuilder.BucketsQuery; import org.elasticsearch.xpack.prelert.job.persistence.InfluencersQueryBuilder.InfluencersQuery; @@ -71,7 +75,6 @@ import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Objects; import java.util.Optional; import java.util.function.BiConsumer; @@ -90,11 +93,7 @@ public class JobProvider { */ private static final String PRELERT_INFO_INDEX = "prelert-int"; - private static final String SETTING_TRANSLOG_DURABILITY = "index.translog.durability"; private static final String ASYNC = "async"; - private static final String SETTING_MAPPER_DYNAMIC = "index.mapper.dynamic"; - private static final String SETTING_DEFAULT_ANALYZER_TYPE = "index.analysis.analyzer.default.type"; - private static final String KEYWORD = "keyword"; private static final List SECONDARY_SORT = Arrays.asList( AnomalyRecord.ANOMALY_SCORE.getPreferredName(), @@ -157,7 +156,7 @@ public class JobProvider { * @return An Elasticsearch builder initialised with the desired settings * for Prelert indexes. */ - private Settings.Builder prelertIndexSettings() { + Settings.Builder prelertIndexSettings() { return Settings.builder() // Our indexes are small and one shard puts the // least possible burden on Elasticsearch @@ -166,12 +165,12 @@ public class JobProvider { // Sacrifice durability for performance: in the event of power // failure we can lose the last 5 seconds of changes, but it's // much faster - .put(SETTING_TRANSLOG_DURABILITY, ASYNC) + .put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), ASYNC) // We need to allow fields not mentioned in the mappings to // pick up default mappings and be used in queries - .put(SETTING_MAPPER_DYNAMIC, true) - // By default "analyzed" fields won't be tokenised - .put(SETTING_DEFAULT_ANALYZER_TYPE, KEYWORD); + .put(MapperService.INDEX_MAPPER_DYNAMIC_SETTING.getKey(), true) + // set the default all search field + .put(IndexSettings.DEFAULT_FIELD_SETTING.getKey(), ElasticsearchMappings.ALL_FIELD_VALUES); } /** @@ -188,11 +187,15 @@ public class JobProvider { XContentBuilder modelStateMapping = ElasticsearchMappings.modelStateMapping(); XContentBuilder modelSnapshotMapping = ElasticsearchMappings.modelSnapshotMapping(); XContentBuilder dataCountsMapping = ElasticsearchMappings.dataCountsMapping(); + XContentBuilder usageMapping = ElasticsearchMappings.usageMapping(); + XContentBuilder auditMessageMapping = ElasticsearchMappings.auditMessageMapping(); + XContentBuilder auditActivityMapping = ElasticsearchMappings.auditActivityMapping(); String jobId = job.getId(); LOGGER.trace("ES API CALL: create index {}", job.getId()); CreateIndexRequest createIndexRequest = new CreateIndexRequest(JobResultsPersister.getJobIndexName(jobId)); - createIndexRequest.settings(prelertIndexSettings()); + Settings.Builder settingsBuilder = prelertIndexSettings(); + createIndexRequest.settings(settingsBuilder); createIndexRequest.mapping(Result.TYPE.getPreferredName(), resultsMapping); createIndexRequest.mapping(CategorizerState.TYPE, categorizerStateMapping); createIndexRequest.mapping(CategoryDefinition.TYPE.getPreferredName(), categoryDefinitionMapping); @@ -200,6 +203,11 @@ public class JobProvider { createIndexRequest.mapping(ModelState.TYPE.getPreferredName(), modelStateMapping); createIndexRequest.mapping(ModelSnapshot.TYPE.getPreferredName(), modelSnapshotMapping); createIndexRequest.mapping(DataCounts.TYPE.getPreferredName(), dataCountsMapping); + // NORELASE These mappings shouldn't go in the results index once the index + // strategy has been reworked + createIndexRequest.mapping(Usage.TYPE, usageMapping); + createIndexRequest.mapping(AuditMessage.TYPE.getPreferredName(), auditMessageMapping); + createIndexRequest.mapping(AuditActivity.TYPE.getPreferredName(), auditActivityMapping); client.admin().indices().create(createIndexRequest, new ActionListener() { @Override diff --git a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/results/ReservedFieldNames.java b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/results/ReservedFieldNames.java index b25d603dd9e..ebfaa891485 100644 --- a/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/results/ReservedFieldNames.java +++ b/elasticsearch/src/main/java/org/elasticsearch/xpack/prelert/job/results/ReservedFieldNames.java @@ -9,6 +9,7 @@ import org.elasticsearch.xpack.prelert.job.DataCounts; import org.elasticsearch.xpack.prelert.job.Job; import org.elasticsearch.xpack.prelert.job.ModelSizeStats; import org.elasticsearch.xpack.prelert.job.ModelSnapshot; +import org.elasticsearch.xpack.prelert.job.persistence.ElasticsearchMappings; import org.elasticsearch.xpack.prelert.job.quantiles.Quantiles; import org.elasticsearch.xpack.prelert.job.usage.Usage; @@ -43,6 +44,8 @@ public final class ReservedFieldNames { * in a given index.) */ private static final String[] RESERVED_FIELD_NAME_ARRAY = { + ElasticsearchMappings.ALL_FIELD_VALUES, + AnomalyCause.PROBABILITY.getPreferredName(), AnomalyCause.OVER_FIELD_NAME.getPreferredName(), AnomalyCause.OVER_FIELD_VALUE.getPreferredName(), diff --git a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/ElasticsearchMappingsTests.java b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/ElasticsearchMappingsTests.java index ebf198b9d87..de98662057d 100644 --- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/ElasticsearchMappingsTests.java +++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/ElasticsearchMappingsTests.java @@ -18,7 +18,6 @@ import org.elasticsearch.xpack.prelert.job.audit.AuditMessage; import org.elasticsearch.xpack.prelert.job.metadata.Allocation; import org.elasticsearch.xpack.prelert.job.quantiles.Quantiles; import org.elasticsearch.xpack.prelert.job.results.CategoryDefinition; -import org.elasticsearch.xpack.prelert.job.results.ModelDebugOutput; import org.elasticsearch.xpack.prelert.job.results.ReservedFieldNames; import org.elasticsearch.xpack.prelert.job.results.Result; import org.elasticsearch.xpack.prelert.job.usage.Usage; @@ -73,10 +72,7 @@ public class ElasticsearchMappingsTests extends ESTestCase { overridden.add(ElasticsearchMappings.COPY_TO); overridden.add(ElasticsearchMappings.DYNAMIC); overridden.add(ElasticsearchMappings.ENABLED); - overridden.add(ElasticsearchMappings.INCLUDE_IN_ALL); - overridden.add(ElasticsearchMappings.INDEX); overridden.add(ElasticsearchMappings.NESTED); - overridden.add(ElasticsearchMappings.NO); overridden.add(ElasticsearchMappings.PROPERTIES); overridden.add(ElasticsearchMappings.TYPE); overridden.add(ElasticsearchMappings.WHITESPACE); diff --git a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/JobProviderTests.java b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/JobProviderTests.java index f8f25ee69d3..afa55942f4a 100644 --- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/JobProviderTests.java +++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/JobProviderTests.java @@ -7,10 +7,12 @@ package org.elasticsearch.xpack.prelert.job.persistence; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.get.GetResponse; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.IndexNotFoundException; import org.elasticsearch.index.query.QueryBuilder; @@ -21,9 +23,12 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.prelert.action.DeleteJobAction; import org.elasticsearch.xpack.prelert.job.AnalysisLimits; import org.elasticsearch.xpack.prelert.job.CategorizerState; +import org.elasticsearch.xpack.prelert.job.DataCounts; import org.elasticsearch.xpack.prelert.job.Job; import org.elasticsearch.xpack.prelert.job.ModelSnapshot; import org.elasticsearch.xpack.prelert.job.ModelState; +import org.elasticsearch.xpack.prelert.job.audit.AuditActivity; +import org.elasticsearch.xpack.prelert.job.audit.AuditMessage; import org.elasticsearch.xpack.prelert.job.persistence.InfluencersQueryBuilder.InfluencersQuery; import org.elasticsearch.xpack.prelert.job.quantiles.Quantiles; import org.elasticsearch.xpack.prelert.job.results.AnomalyRecord; @@ -32,6 +37,7 @@ import org.elasticsearch.xpack.prelert.job.results.CategoryDefinition; import org.elasticsearch.xpack.prelert.job.results.Influencer; import org.elasticsearch.xpack.prelert.job.results.PerPartitionMaxProbabilities; import org.elasticsearch.xpack.prelert.job.results.Result; +import org.elasticsearch.xpack.prelert.job.usage.Usage; import org.mockito.ArgumentCaptor; import org.mockito.Captor; @@ -138,13 +144,61 @@ public class JobProviderTests extends ESTestCase { clientBuilder.verifyIndexCreated(JobProvider.PRELERT_USAGE_INDEX); } + public void testIndexSettings() { + MockClientBuilder clientBuilder = new MockClientBuilder(CLUSTER_NAME); + JobProvider provider = createProvider(clientBuilder.build()); + Settings settings = provider.prelertIndexSettings().build(); + + assertEquals("1", settings.get("index.number_of_shards")); + assertEquals("0", settings.get("index.number_of_replicas")); + assertEquals("async", settings.get("index.translog.durability")); + assertEquals("true", settings.get("index.mapper.dynamic")); + assertEquals("all_field_values", settings.get("index.query.default_field")); + } + + public void testCreateJobRelatedIndicies() { + MockClientBuilder clientBuilder = new MockClientBuilder(CLUSTER_NAME); + ArgumentCaptor captor = ArgumentCaptor.forClass(CreateIndexRequest.class); + clientBuilder.createIndexRequest(JobResultsPersister.getJobIndexName("foo"), captor); + + Job.Builder job = buildJobBuilder("foo"); + JobProvider provider = createProvider(clientBuilder.build()); + + provider.createJobRelatedIndices(job.build(), new ActionListener() { + @Override + public void onResponse(Boolean aBoolean) { + CreateIndexRequest request = captor.getValue(); + assertNotNull(request); + assertEquals(provider.prelertIndexSettings().build(), request.settings()); + assertTrue(request.mappings().containsKey(Result.TYPE.getPreferredName())); + assertTrue(request.mappings().containsKey(CategorizerState.TYPE)); + assertTrue(request.mappings().containsKey(CategoryDefinition.TYPE.getPreferredName())); + assertTrue(request.mappings().containsKey(Quantiles.TYPE.getPreferredName())); + assertTrue(request.mappings().containsKey(ModelState.TYPE.getPreferredName())); + assertTrue(request.mappings().containsKey(ModelSnapshot.TYPE.getPreferredName())); + assertTrue(request.mappings().containsKey(DataCounts.TYPE.getPreferredName())); + assertTrue(request.mappings().containsKey(Usage.TYPE)); + assertTrue(request.mappings().containsKey(AuditMessage.TYPE.getPreferredName())); + assertTrue(request.mappings().containsKey(AuditActivity.TYPE.getPreferredName())); + assertEquals(10, request.mappings().size()); + } + + @Override + public void onFailure(Exception e) { + fail(e.toString()); + } + }); + } + + public void testCreateJob() throws InterruptedException, ExecutionException { Job.Builder job = buildJobBuilder("marscapone"); job.setDescription("This is a very cheesy job"); AnalysisLimits limits = new AnalysisLimits(9878695309134L, null); job.setAnalysisLimits(limits); - MockClientBuilder clientBuilder = new MockClientBuilder(CLUSTER_NAME).createIndexRequest("prelertresults-" + job.getId()); + ArgumentCaptor captor = ArgumentCaptor.forClass(CreateIndexRequest.class); + MockClientBuilder clientBuilder = new MockClientBuilder(CLUSTER_NAME).createIndexRequest("prelertresults-" + job.getId(), captor); Client client = clientBuilder.build(); JobProvider provider = createProvider(client); diff --git a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/MockClientBuilder.java b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/MockClientBuilder.java index 0026de074e2..6cd46ea6c5e 100644 --- a/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/MockClientBuilder.java +++ b/elasticsearch/src/test/java/org/elasticsearch/xpack/prelert/job/persistence/MockClientBuilder.java @@ -217,17 +217,12 @@ public class MockClientBuilder { } @SuppressWarnings({ "rawtypes", "unchecked" }) - public MockClientBuilder createIndexRequest(String index) { - ArgumentMatcher argumentMatcher = new ArgumentMatcher() { - @Override - public boolean matches(Object o) { - return index.equals(((CreateIndexRequest) o).index()); - } - }; + public MockClientBuilder createIndexRequest(String index, ArgumentCaptor requestCapture) { + doAnswer(invocation -> { ((ActionListener) invocation.getArguments()[1]).onResponse(mock(CreateIndexResponse.class)); return null; - }).when(indicesAdminClient).create(argThat(argumentMatcher), any(ActionListener.class)); + }).when(indicesAdminClient).create(requestCapture.capture(), any(ActionListener.class)); return this; }