Custom all mapping for interesting result fields (elastic/elasticsearch#535)

* Check use of mappings

* Add unit tests for JobProvider.createJobRelatedIndices

* Remove ‘index: no’ from mappings as no longer required

The entire type mapping has ‘enabled: false’

* Restore “index.analysis.analyzer.default.type” setting

* Remove include_in_all from nested mappings

* Add audit and usage mappings to the job index

* Revert ‘Restore “index.analysis.analyzer.default.type” setting’

Original commit: elastic/x-pack-elasticsearch@c7d62e0c7e
This commit is contained in:
David Kyle 2016-12-14 16:32:36 +00:00 committed by GitHub
parent 0a45d846df
commit f085b935f6
6 changed files with 363 additions and 288 deletions

View File

@ -45,27 +45,32 @@ import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
* It is expected that indexes to which these mappings are applied have their
* default analyzer set to "keyword", which does not tokenise fields. The
* index-wide default analyzer cannot be set via these mappings, so needs to be
* set in the index settings during index creation. Then the _all field has its
* analyzer set to "whitespace" by these mappings, so that _all gets tokenised
* set in the index settings during index creation. For the results mapping the
* _all field is disabled and a custom all field is used in its place. The index
* settings must have {@code "index.query.default_field": "all_field_values" } set
* for the queries to use the custom all field. The custom all field has its
* analyzer set to "whitespace" by these mappings, so that it gets tokenised
* using whitespace.
*/
public class ElasticsearchMappings {
/**
* String constants used in mappings
*/
static final String INDEX = "index";
static final String NO = "false";
static final String ALL = "_all";
static final String ENABLED = "enabled";
static final String ANALYZER = "analyzer";
static final String WHITESPACE = "whitespace";
static final String INCLUDE_IN_ALL = "include_in_all";
static final String NESTED = "nested";
static final String COPY_TO = "copy_to";
static final String PROPERTIES = "properties";
static final String TYPE = "type";
static final String DYNAMIC = "dynamic";
/**
* Name of the custom 'all' field for results
*/
public static final String ALL_FIELD_VALUES = "all_field_values";
/**
* Name of the Elasticsearch field by which documents are sorted by default
*/
@ -88,10 +93,26 @@ public class ElasticsearchMappings {
/**
* Create the Elasticsearch mapping for results objects
* {@link Bucket}s, {@link AnomalyRecord}s, {@link Influencer},
* {@link BucketInfluencer} and {@link CategoryDefinition}
* {@link Bucket}s, {@link AnomalyRecord}s, {@link Influencer} and
* {@link BucketInfluencer}
*
* The '_all' field is disabled as the document isn't meant to be searched.
* The mapping has a custom all field containing the *_FIELD_VALUE fields
* e.g. BY_FIELD_VALUE, OVER_FIELD_VALUE, etc. The custom all field {@link #ALL_FIELD_VALUES}
* must be set in the index settings. A custom all field is preferred over the usual
* '_all' field as most fields do not belong in '_all', disabling '_all' and
* using a custom all field simplifies the mapping.
*
* These fields are copied to the custom all field
* <ul>
* <li>by_field_value</li>
* <li>partition_field_value</li>
* <li>over_field_value</li>
* <li>AnomalyCause.correlated_by_field_value</li>
* <li>AnomalyCause.by_field_value</li>
* <li>AnomalyCause.partition_field_value</li>
* <li>AnomalyCause.over_field_value</li>
* <li>Influencer.influencer_field_values</li>
* </ul>
*
* @param termFieldNames All the term fields (by, over, partition) and influencers
* included in the mapping
@ -105,17 +126,17 @@ public class ElasticsearchMappings {
.startObject(Result.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
// analyzer must be specified even though _all is disabled
// because all types in the same index must have the same
// analyzer for a given field
.field(ANALYZER, WHITESPACE)
.endObject()
.startObject(PROPERTIES)
.startObject(ALL_FIELD_VALUES)
.field(TYPE, TEXT)
.field(ANALYZER, WHITESPACE)
.endObject()
.startObject(Result.RESULT_TYPE.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(Bucket.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
@ -148,7 +169,7 @@ public class ElasticsearchMappings {
.field(TYPE, NESTED)
.startObject(PROPERTIES)
.startObject(AnomalyRecord.PARTITION_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyRecord.PARTITION_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD)
@ -169,7 +190,7 @@ public class ElasticsearchMappings {
.field(TYPE, NESTED)
.startObject(PROPERTIES)
.startObject(BucketInfluencer.INFLUENCER_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(BucketInfluencer.RAW_ANOMALY_SCORE.getPreferredName())
.field(TYPE, DOUBLE)
@ -177,7 +198,7 @@ public class ElasticsearchMappings {
.endObject()
.endObject()
.startObject(BucketInfluencer.INFLUENCER_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
// per-partition max probabilities mapping
@ -195,16 +216,16 @@ public class ElasticsearchMappings {
// Model Debug Output
.startObject(ModelDebugOutput.DEBUG_FEATURE.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(ModelDebugOutput.DEBUG_LOWER.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(ModelDebugOutput.DEBUG_UPPER.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(ModelDebugOutput.DEBUG_MEDIAN.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject();
addAnomalyRecordFieldsToMapping(builder);
@ -240,94 +261,101 @@ public class ElasticsearchMappings {
private static XContentBuilder addAnomalyRecordFieldsToMapping(XContentBuilder builder)
throws IOException {
builder.startObject(AnomalyRecord.DETECTOR_INDEX.getPreferredName())
.field(TYPE, INTEGER).field(INCLUDE_IN_ALL, false)
.field(TYPE, INTEGER)
.endObject()
.startObject(AnomalyRecord.SEQUENCE_NUM.getPreferredName())
.field(TYPE, INTEGER).field(INCLUDE_IN_ALL, false)
.field(TYPE, INTEGER)
.endObject()
.startObject(AnomalyRecord.ACTUAL.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(AnomalyRecord.TYPICAL.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(AnomalyRecord.PROBABILITY.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(AnomalyRecord.FUNCTION.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyRecord.FUNCTION_DESCRIPTION.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyRecord.BY_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyRecord.BY_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD)
.field(COPY_TO, ALL_FIELD_VALUES)
.endObject()
.startObject(AnomalyRecord.FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyRecord.PARTITION_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyRecord.PARTITION_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD)
.field(COPY_TO, ALL_FIELD_VALUES)
.endObject()
.startObject(AnomalyRecord.OVER_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyRecord.OVER_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD)
.field(COPY_TO, ALL_FIELD_VALUES)
.endObject()
.startObject(AnomalyRecord.NORMALIZED_PROBABILITY.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(AnomalyRecord.INITIAL_NORMALIZED_PROBABILITY.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(AnomalyRecord.CAUSES.getPreferredName())
.field(TYPE, NESTED)
.startObject(PROPERTIES)
.startObject(AnomalyCause.ACTUAL.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(AnomalyCause.TYPICAL.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(AnomalyCause.PROBABILITY.getPreferredName())
.field(TYPE, DOUBLE).field(INCLUDE_IN_ALL, false)
.field(TYPE, DOUBLE)
.endObject()
.startObject(AnomalyCause.FUNCTION.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyCause.FUNCTION_DESCRIPTION.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyCause.BY_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyCause.BY_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD)
.field(COPY_TO, ALL_FIELD_VALUES)
.endObject()
.startObject(AnomalyCause.CORRELATED_BY_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD)
.field(COPY_TO, ALL_FIELD_VALUES)
.endObject()
.startObject(AnomalyCause.FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyCause.PARTITION_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyCause.PARTITION_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD)
.field(COPY_TO, ALL_FIELD_VALUES)
.endObject()
.startObject(AnomalyCause.OVER_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(AnomalyCause.OVER_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD)
.field(COPY_TO, ALL_FIELD_VALUES)
.endObject()
.endObject()
.endObject()
@ -336,10 +364,11 @@ public class ElasticsearchMappings {
.field(TYPE, NESTED)
.startObject(PROPERTIES)
.startObject(Influence.INFLUENCER_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(Influence.INFLUENCER_FIELD_VALUES.getPreferredName())
.field(TYPE, KEYWORD)
.field(COPY_TO, ALL_FIELD_VALUES)
.endObject()
.endObject()
.endObject();
@ -349,146 +378,144 @@ public class ElasticsearchMappings {
private static XContentBuilder addInfluencerFieldsToMapping(XContentBuilder builder) throws IOException {
builder.startObject(Influencer.INFLUENCER_FIELD_NAME.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject()
.startObject(Influencer.INFLUENCER_FIELD_VALUE.getPreferredName())
.field(TYPE, KEYWORD).field(INCLUDE_IN_ALL, false)
.field(TYPE, KEYWORD)
.endObject();
return builder;
}
/**
* {@link DataCounts} mapping.
* The type is disabled so {@link DataCounts} aren't searchable and
* the '_all' field is disabled
*
* @return The builder
* @throws IOException On builder write error
*/
public static XContentBuilder dataCountsMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(DataCounts.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
// analyzer must be specified even though _all is disabled
// because all types in the same index must have the same
// analyzer for a given field
.field(ANALYZER, WHITESPACE)
.endObject()
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(DataCounts.PROCESSED_RECORD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.PROCESSED_FIELD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.INPUT_BYTES.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.INPUT_RECORD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.INPUT_FIELD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.INVALID_DATE_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.MISSING_FIELD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.OUT_OF_ORDER_TIME_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.EARLIEST_RECORD_TIME.getPreferredName())
.field(TYPE, DATE)
.endObject()
.startObject(DataCounts.LATEST_RECORD_TIME.getPreferredName())
.field(TYPE, DATE)
.endObject()
.endObject()
.endObject()
.startObject(DataCounts.TYPE.getPreferredName())
.field(ENABLED, false)
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(DataCounts.PROCESSED_RECORD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.PROCESSED_FIELD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.INPUT_BYTES.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.INPUT_RECORD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.INPUT_FIELD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.INVALID_DATE_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.MISSING_FIELD_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.OUT_OF_ORDER_TIME_COUNT.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(DataCounts.EARLIEST_RECORD_TIME.getPreferredName())
.field(TYPE, DATE)
.endObject()
.startObject(DataCounts.LATEST_RECORD_TIME.getPreferredName())
.field(TYPE, DATE)
.endObject()
.endObject()
.endObject()
.endObject();
}
/**
* {@link CategorizerState} mapping.
* The type is disabled so {@link CategorizerState} is not searchable and
* the '_all' field is disabled
*
* @return The builder
* @throws IOException On builder write error
*/
public static XContentBuilder categorizerStateMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(CategorizerState.TYPE)
.field(ENABLED, false)
.startObject(ALL)
.field(ENABLED, false)
// analyzer must be specified even though _all is disabled
// because all types in the same index must have the same
// analyzer for a given field
.field(ANALYZER, WHITESPACE)
.endObject()
.endObject()
.startObject(CategorizerState.TYPE)
.field(ENABLED, false)
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.endObject()
.endObject();
}
/**
* Create the Elasticsearch mapping for {@linkplain Quantiles}.
* The '_all' field is disabled as the document isn't meant to be searched.
* The type is disabled as is the '_all' field as the document isn't meant to be searched.
* <p>
* The quantile state string is not searchable (index = 'no') as it could be
* The quantile state string is not searchable (enabled = false) as it could be
* very large.
*/
public static XContentBuilder quantilesMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(Quantiles.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
// analyzer must be specified even though _all is disabled
// because all types in the same index must have the same
// analyzer for a given field
.field(ANALYZER, WHITESPACE)
.endObject()
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(Quantiles.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
.startObject(Quantiles.QUANTILE_STATE.getPreferredName())
.field(TYPE, TEXT).field(INDEX, NO)
.endObject()
.endObject()
.endObject()
.startObject(Quantiles.TYPE.getPreferredName())
.field(ENABLED, false)
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.endObject()
.endObject();
}
/**
* Create the Elasticsearch mapping for {@linkplain CategoryDefinition}.
* The '_all' field is disabled as the document isn't meant to be searched.
*
* @return The builder
* @throws IOException On builder error
*/
public static XContentBuilder categoryDefinitionMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(CategoryDefinition.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
// analyzer must be specified even though _all is disabled
// because all types in the same index must have the same
// analyzer for a given field
.field(ANALYZER, WHITESPACE)
.endObject()
.startObject(PROPERTIES)
.startObject(CategoryDefinition.CATEGORY_ID.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(CategoryDefinition.TERMS.getPreferredName())
.field(TYPE, TEXT).field(INDEX, NO)
.endObject()
.startObject(CategoryDefinition.REGEX.getPreferredName())
.field(TYPE, TEXT).field(INDEX, NO)
.endObject()
.startObject(CategoryDefinition.MAX_MATCHING_LENGTH.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(CategoryDefinition.EXAMPLES.getPreferredName())
.field(TYPE, TEXT).field(INDEX, NO)
.endObject()
.endObject()
.endObject()
.startObject(CategoryDefinition.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.startObject(PROPERTIES)
.startObject(CategoryDefinition.CATEGORY_ID.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(CategoryDefinition.TERMS.getPreferredName())
.field(TYPE, TEXT)
.endObject()
.startObject(CategoryDefinition.REGEX.getPreferredName())
.field(TYPE, TEXT)
.endObject()
.startObject(CategoryDefinition.MAX_MATCHING_LENGTH.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(CategoryDefinition.EXAMPLES.getPreferredName())
.field(TYPE, TEXT)
.endObject()
.endObject()
.endObject()
.endObject();
}
@ -502,90 +529,79 @@ public class ElasticsearchMappings {
public static XContentBuilder modelStateMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(ModelState.TYPE.getPreferredName())
.field(ENABLED, false)
.startObject(ALL)
.field(ENABLED, false)
// analyzer must be specified even though _all is disabled
// because all types in the same index must have the same
// analyzer for a given field
.field(ANALYZER, WHITESPACE)
.endObject()
.endObject()
.startObject(ModelState.TYPE.getPreferredName())
.field(ENABLED, false)
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.endObject()
.endObject();
}
/**
* Create the Elasticsearch mapping for {@linkplain ModelState}.
* The model state could potentially be huge (over a gigabyte in size)
* so all analysis by Elasticsearch is disabled. The only way to
* retrieve the model state is by knowing the ID of a particular
* document or by searching for all documents of this type.
* Create the Elasticsearch mapping for {@linkplain ModelSnapshot}.
* The '_all' field is disabled but the type is searchable
*/
public static XContentBuilder modelSnapshotMapping() throws IOException {
XContentBuilder builder = jsonBuilder()
.startObject()
.startObject(ModelSnapshot.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
// analyzer must be specified even though _all is disabled
// because all types in the same index must have the same
// analyzer for a given field
.field(ANALYZER, WHITESPACE)
.endObject()
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(ModelSnapshot.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
// "description" is analyzed so that it has the same
// mapping as a user field of the same name - this means
// it doesn't have to be a reserved field name
.startObject(ModelSnapshot.DESCRIPTION.getPreferredName())
.field(TYPE, TEXT)
.endObject()
.startObject(ModelSnapshot.RESTORE_PRIORITY.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(ModelSnapshot.SNAPSHOT_ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(ModelSnapshot.SNAPSHOT_DOC_COUNT.getPreferredName())
.field(TYPE, INTEGER)
.endObject()
.startObject(ModelSizeStats.RESULT_TYPE_FIELD.getPreferredName())
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject();
.startObject(ModelSnapshot.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(ModelSnapshot.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
// "description" is analyzed so that it has the same
// mapping as a user field of the same name - this means
// it doesn't have to be a reserved field name
.startObject(ModelSnapshot.DESCRIPTION.getPreferredName())
.field(TYPE, TEXT)
.endObject()
.startObject(ModelSnapshot.RESTORE_PRIORITY.getPreferredName())
.field(TYPE, LONG)
.endObject()
.startObject(ModelSnapshot.SNAPSHOT_ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(ModelSnapshot.SNAPSHOT_DOC_COUNT.getPreferredName())
.field(TYPE, INTEGER)
.endObject()
.startObject(ModelSizeStats.RESULT_TYPE_FIELD.getPreferredName())
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject();
addModelSizeStatsFieldsToMapping(builder);
builder.endObject()
.endObject()
.startObject(Quantiles.TYPE.getPreferredName())
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(Quantiles.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
.startObject(Quantiles.QUANTILE_STATE.getPreferredName())
.field(TYPE, TEXT).field(INDEX, NO)
.endObject()
.endObject()
.endObject()
.startObject(ModelSnapshot.LATEST_RECORD_TIME.getPreferredName())
.field(TYPE, DATE)
.endObject()
.startObject(ModelSnapshot.LATEST_RESULT_TIME.getPreferredName())
.field(TYPE, DATE)
.endObject()
.endObject()
.endObject()
builder.endObject()
.endObject()
.startObject(Quantiles.TYPE.getPreferredName())
.startObject(PROPERTIES)
.startObject(Job.ID.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(Quantiles.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
.startObject(Quantiles.QUANTILE_STATE.getPreferredName())
.field(TYPE, TEXT)
.endObject()
.endObject()
.endObject()
.startObject(ModelSnapshot.LATEST_RECORD_TIME.getPreferredName())
.field(TYPE, DATE)
.endObject()
.startObject(ModelSnapshot.LATEST_RESULT_TIME.getPreferredName())
.field(TYPE, DATE)
.endObject()
.endObject()
.endObject()
.endObject();
return builder;
@ -616,68 +632,71 @@ public class ElasticsearchMappings {
.startObject(ModelSizeStats.MEMORY_STATUS_FIELD.getPreferredName())
.field(TYPE, KEYWORD)
.endObject()
.startObject(ModelSizeStats.LOG_TIME_FIELD.getPreferredName())
.field(TYPE, DATE)
.startObject(ModelSizeStats.LOG_TIME_FIELD.getPreferredName())
.field(TYPE, DATE)
.endObject();
return builder;
}
/**
* The Elasticsearch mappings for the usage documents
* The Elasticsearch mappings for the usage documents.
* The '_all' field is disabled but the type is searchable
*/
public static XContentBuilder usageMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(Usage.TYPE)
.startObject(ALL)
.field(ENABLED, false)
// analyzer must be specified even though _all is disabled
// because all types in the same index must have the same
// analyzer for a given field
.field(ANALYZER, WHITESPACE)
.endObject()
.startObject(PROPERTIES)
.startObject(Usage.TIMESTAMP)
.field(TYPE, DATE)
.endObject()
.startObject(Usage.INPUT_BYTES)
.field(TYPE, LONG)
.endObject()
.startObject(Usage.INPUT_FIELD_COUNT)
.field(TYPE, LONG)
.endObject()
.startObject(Usage.INPUT_RECORD_COUNT)
.field(TYPE, LONG)
.endObject()
.endObject()
.endObject()
.startObject(Usage.TYPE)
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.startObject(PROPERTIES)
.startObject(Usage.TIMESTAMP)
.field(TYPE, DATE)
.endObject()
.startObject(Usage.INPUT_BYTES)
.field(TYPE, LONG)
.endObject()
.startObject(Usage.INPUT_FIELD_COUNT)
.field(TYPE, LONG)
.endObject()
.startObject(Usage.INPUT_RECORD_COUNT)
.field(TYPE, LONG)
.endObject()
.endObject()
.endObject()
.endObject();
}
public static XContentBuilder auditMessageMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(AuditMessage.TYPE.getPreferredName())
.startObject(PROPERTIES)
.startObject(AuditMessage.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
.endObject()
.endObject()
.startObject(AuditMessage.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.startObject(PROPERTIES)
.startObject(AuditMessage.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
.endObject()
.endObject()
.endObject();
}
public static XContentBuilder auditActivityMapping() throws IOException {
return jsonBuilder()
.startObject()
.startObject(AuditActivity.TYPE.getPreferredName())
.startObject(PROPERTIES)
.startObject(AuditActivity.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
.endObject()
.endObject()
.startObject(AuditActivity.TYPE.getPreferredName())
.startObject(ALL)
.field(ENABLED, false)
.endObject()
.startObject(PROPERTIES)
.startObject(AuditActivity.TIMESTAMP.getPreferredName())
.field(TYPE, DATE)
.endObject()
.endObject()
.endObject()
.endObject();
}
}

View File

@ -29,6 +29,8 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
@ -47,6 +49,8 @@ import org.elasticsearch.xpack.prelert.job.Job;
import org.elasticsearch.xpack.prelert.job.ModelSizeStats;
import org.elasticsearch.xpack.prelert.job.ModelSnapshot;
import org.elasticsearch.xpack.prelert.job.ModelState;
import org.elasticsearch.xpack.prelert.job.audit.AuditActivity;
import org.elasticsearch.xpack.prelert.job.audit.AuditMessage;
import org.elasticsearch.xpack.prelert.job.audit.Auditor;
import org.elasticsearch.xpack.prelert.job.persistence.BucketsQueryBuilder.BucketsQuery;
import org.elasticsearch.xpack.prelert.job.persistence.InfluencersQueryBuilder.InfluencersQuery;
@ -71,7 +75,6 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.BiConsumer;
@ -90,11 +93,7 @@ public class JobProvider {
*/
private static final String PRELERT_INFO_INDEX = "prelert-int";
private static final String SETTING_TRANSLOG_DURABILITY = "index.translog.durability";
private static final String ASYNC = "async";
private static final String SETTING_MAPPER_DYNAMIC = "index.mapper.dynamic";
private static final String SETTING_DEFAULT_ANALYZER_TYPE = "index.analysis.analyzer.default.type";
private static final String KEYWORD = "keyword";
private static final List<String> SECONDARY_SORT = Arrays.asList(
AnomalyRecord.ANOMALY_SCORE.getPreferredName(),
@ -157,7 +156,7 @@ public class JobProvider {
* @return An Elasticsearch builder initialised with the desired settings
* for Prelert indexes.
*/
private Settings.Builder prelertIndexSettings() {
Settings.Builder prelertIndexSettings() {
return Settings.builder()
// Our indexes are small and one shard puts the
// least possible burden on Elasticsearch
@ -166,12 +165,12 @@ public class JobProvider {
// Sacrifice durability for performance: in the event of power
// failure we can lose the last 5 seconds of changes, but it's
// much faster
.put(SETTING_TRANSLOG_DURABILITY, ASYNC)
.put(IndexSettings.INDEX_TRANSLOG_DURABILITY_SETTING.getKey(), ASYNC)
// We need to allow fields not mentioned in the mappings to
// pick up default mappings and be used in queries
.put(SETTING_MAPPER_DYNAMIC, true)
// By default "analyzed" fields won't be tokenised
.put(SETTING_DEFAULT_ANALYZER_TYPE, KEYWORD);
.put(MapperService.INDEX_MAPPER_DYNAMIC_SETTING.getKey(), true)
// set the default all search field
.put(IndexSettings.DEFAULT_FIELD_SETTING.getKey(), ElasticsearchMappings.ALL_FIELD_VALUES);
}
/**
@ -188,11 +187,15 @@ public class JobProvider {
XContentBuilder modelStateMapping = ElasticsearchMappings.modelStateMapping();
XContentBuilder modelSnapshotMapping = ElasticsearchMappings.modelSnapshotMapping();
XContentBuilder dataCountsMapping = ElasticsearchMappings.dataCountsMapping();
XContentBuilder usageMapping = ElasticsearchMappings.usageMapping();
XContentBuilder auditMessageMapping = ElasticsearchMappings.auditMessageMapping();
XContentBuilder auditActivityMapping = ElasticsearchMappings.auditActivityMapping();
String jobId = job.getId();
LOGGER.trace("ES API CALL: create index {}", job.getId());
CreateIndexRequest createIndexRequest = new CreateIndexRequest(JobResultsPersister.getJobIndexName(jobId));
createIndexRequest.settings(prelertIndexSettings());
Settings.Builder settingsBuilder = prelertIndexSettings();
createIndexRequest.settings(settingsBuilder);
createIndexRequest.mapping(Result.TYPE.getPreferredName(), resultsMapping);
createIndexRequest.mapping(CategorizerState.TYPE, categorizerStateMapping);
createIndexRequest.mapping(CategoryDefinition.TYPE.getPreferredName(), categoryDefinitionMapping);
@ -200,6 +203,11 @@ public class JobProvider {
createIndexRequest.mapping(ModelState.TYPE.getPreferredName(), modelStateMapping);
createIndexRequest.mapping(ModelSnapshot.TYPE.getPreferredName(), modelSnapshotMapping);
createIndexRequest.mapping(DataCounts.TYPE.getPreferredName(), dataCountsMapping);
// NORELASE These mappings shouldn't go in the results index once the index
// strategy has been reworked
createIndexRequest.mapping(Usage.TYPE, usageMapping);
createIndexRequest.mapping(AuditMessage.TYPE.getPreferredName(), auditMessageMapping);
createIndexRequest.mapping(AuditActivity.TYPE.getPreferredName(), auditActivityMapping);
client.admin().indices().create(createIndexRequest, new ActionListener<CreateIndexResponse>() {
@Override

View File

@ -9,6 +9,7 @@ import org.elasticsearch.xpack.prelert.job.DataCounts;
import org.elasticsearch.xpack.prelert.job.Job;
import org.elasticsearch.xpack.prelert.job.ModelSizeStats;
import org.elasticsearch.xpack.prelert.job.ModelSnapshot;
import org.elasticsearch.xpack.prelert.job.persistence.ElasticsearchMappings;
import org.elasticsearch.xpack.prelert.job.quantiles.Quantiles;
import org.elasticsearch.xpack.prelert.job.usage.Usage;
@ -43,6 +44,8 @@ public final class ReservedFieldNames {
* in a given index.)
*/
private static final String[] RESERVED_FIELD_NAME_ARRAY = {
ElasticsearchMappings.ALL_FIELD_VALUES,
AnomalyCause.PROBABILITY.getPreferredName(),
AnomalyCause.OVER_FIELD_NAME.getPreferredName(),
AnomalyCause.OVER_FIELD_VALUE.getPreferredName(),

View File

@ -18,7 +18,6 @@ import org.elasticsearch.xpack.prelert.job.audit.AuditMessage;
import org.elasticsearch.xpack.prelert.job.metadata.Allocation;
import org.elasticsearch.xpack.prelert.job.quantiles.Quantiles;
import org.elasticsearch.xpack.prelert.job.results.CategoryDefinition;
import org.elasticsearch.xpack.prelert.job.results.ModelDebugOutput;
import org.elasticsearch.xpack.prelert.job.results.ReservedFieldNames;
import org.elasticsearch.xpack.prelert.job.results.Result;
import org.elasticsearch.xpack.prelert.job.usage.Usage;
@ -73,10 +72,7 @@ public class ElasticsearchMappingsTests extends ESTestCase {
overridden.add(ElasticsearchMappings.COPY_TO);
overridden.add(ElasticsearchMappings.DYNAMIC);
overridden.add(ElasticsearchMappings.ENABLED);
overridden.add(ElasticsearchMappings.INCLUDE_IN_ALL);
overridden.add(ElasticsearchMappings.INDEX);
overridden.add(ElasticsearchMappings.NESTED);
overridden.add(ElasticsearchMappings.NO);
overridden.add(ElasticsearchMappings.PROPERTIES);
overridden.add(ElasticsearchMappings.TYPE);
overridden.add(ElasticsearchMappings.WHITESPACE);

View File

@ -7,10 +7,12 @@ package org.elasticsearch.xpack.prelert.job.persistence;
import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
import org.elasticsearch.action.get.GetResponse;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexNotFoundException;
import org.elasticsearch.index.query.QueryBuilder;
@ -21,9 +23,12 @@ import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.prelert.action.DeleteJobAction;
import org.elasticsearch.xpack.prelert.job.AnalysisLimits;
import org.elasticsearch.xpack.prelert.job.CategorizerState;
import org.elasticsearch.xpack.prelert.job.DataCounts;
import org.elasticsearch.xpack.prelert.job.Job;
import org.elasticsearch.xpack.prelert.job.ModelSnapshot;
import org.elasticsearch.xpack.prelert.job.ModelState;
import org.elasticsearch.xpack.prelert.job.audit.AuditActivity;
import org.elasticsearch.xpack.prelert.job.audit.AuditMessage;
import org.elasticsearch.xpack.prelert.job.persistence.InfluencersQueryBuilder.InfluencersQuery;
import org.elasticsearch.xpack.prelert.job.quantiles.Quantiles;
import org.elasticsearch.xpack.prelert.job.results.AnomalyRecord;
@ -32,6 +37,7 @@ import org.elasticsearch.xpack.prelert.job.results.CategoryDefinition;
import org.elasticsearch.xpack.prelert.job.results.Influencer;
import org.elasticsearch.xpack.prelert.job.results.PerPartitionMaxProbabilities;
import org.elasticsearch.xpack.prelert.job.results.Result;
import org.elasticsearch.xpack.prelert.job.usage.Usage;
import org.mockito.ArgumentCaptor;
import org.mockito.Captor;
@ -138,13 +144,61 @@ public class JobProviderTests extends ESTestCase {
clientBuilder.verifyIndexCreated(JobProvider.PRELERT_USAGE_INDEX);
}
public void testIndexSettings() {
MockClientBuilder clientBuilder = new MockClientBuilder(CLUSTER_NAME);
JobProvider provider = createProvider(clientBuilder.build());
Settings settings = provider.prelertIndexSettings().build();
assertEquals("1", settings.get("index.number_of_shards"));
assertEquals("0", settings.get("index.number_of_replicas"));
assertEquals("async", settings.get("index.translog.durability"));
assertEquals("true", settings.get("index.mapper.dynamic"));
assertEquals("all_field_values", settings.get("index.query.default_field"));
}
public void testCreateJobRelatedIndicies() {
MockClientBuilder clientBuilder = new MockClientBuilder(CLUSTER_NAME);
ArgumentCaptor<CreateIndexRequest> captor = ArgumentCaptor.forClass(CreateIndexRequest.class);
clientBuilder.createIndexRequest(JobResultsPersister.getJobIndexName("foo"), captor);
Job.Builder job = buildJobBuilder("foo");
JobProvider provider = createProvider(clientBuilder.build());
provider.createJobRelatedIndices(job.build(), new ActionListener<Boolean>() {
@Override
public void onResponse(Boolean aBoolean) {
CreateIndexRequest request = captor.getValue();
assertNotNull(request);
assertEquals(provider.prelertIndexSettings().build(), request.settings());
assertTrue(request.mappings().containsKey(Result.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(CategorizerState.TYPE));
assertTrue(request.mappings().containsKey(CategoryDefinition.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(Quantiles.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(ModelState.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(ModelSnapshot.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(DataCounts.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(Usage.TYPE));
assertTrue(request.mappings().containsKey(AuditMessage.TYPE.getPreferredName()));
assertTrue(request.mappings().containsKey(AuditActivity.TYPE.getPreferredName()));
assertEquals(10, request.mappings().size());
}
@Override
public void onFailure(Exception e) {
fail(e.toString());
}
});
}
public void testCreateJob() throws InterruptedException, ExecutionException {
Job.Builder job = buildJobBuilder("marscapone");
job.setDescription("This is a very cheesy job");
AnalysisLimits limits = new AnalysisLimits(9878695309134L, null);
job.setAnalysisLimits(limits);
MockClientBuilder clientBuilder = new MockClientBuilder(CLUSTER_NAME).createIndexRequest("prelertresults-" + job.getId());
ArgumentCaptor<CreateIndexRequest> captor = ArgumentCaptor.forClass(CreateIndexRequest.class);
MockClientBuilder clientBuilder = new MockClientBuilder(CLUSTER_NAME).createIndexRequest("prelertresults-" + job.getId(), captor);
Client client = clientBuilder.build();
JobProvider provider = createProvider(client);

View File

@ -217,17 +217,12 @@ public class MockClientBuilder {
}
@SuppressWarnings({ "rawtypes", "unchecked" })
public MockClientBuilder createIndexRequest(String index) {
ArgumentMatcher<CreateIndexRequest> argumentMatcher = new ArgumentMatcher<CreateIndexRequest>() {
@Override
public boolean matches(Object o) {
return index.equals(((CreateIndexRequest) o).index());
}
};
public MockClientBuilder createIndexRequest(String index, ArgumentCaptor<CreateIndexRequest> requestCapture) {
doAnswer(invocation -> {
((ActionListener) invocation.getArguments()[1]).onResponse(mock(CreateIndexResponse.class));
return null;
}).when(indicesAdminClient).create(argThat(argumentMatcher), any(ActionListener.class));
}).when(indicesAdminClient).create(requestCapture.capture(), any(ActionListener.class));
return this;
}