Add delayed datacheck to the datafeed job runner (#35387)
* ML: Adding missing datacheck to datafeedjob * Adding client side and docs * Making adjustments to validations * Making values default to on, having more sensible limits * Intermittent commit, still need to figure out interval * Adjusting delayed data check interval * updating docs * Making parameter Boolean, so it is nullable * bumping bwc to 7 before backport * changing to version current * moving delayed data check config its own object * Separation of duties for delayed data detection * fixing checkstyles * fixing checkstyles * Adjusting default behavior so that null windows are allowed * Mentioning the default value * Fixing comments, syncing up validations
This commit is contained in:
parent
c7a2c6d549
commit
f7ada9b29b
|
@ -62,6 +62,7 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
public static final ParseField AGGREGATIONS = new ParseField("aggregations");
|
public static final ParseField AGGREGATIONS = new ParseField("aggregations");
|
||||||
public static final ParseField SCRIPT_FIELDS = new ParseField("script_fields");
|
public static final ParseField SCRIPT_FIELDS = new ParseField("script_fields");
|
||||||
public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config");
|
public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config");
|
||||||
|
public static final ParseField DELAYED_DATA_CHECK_CONFIG = new ParseField("delayed_data_check_config");
|
||||||
|
|
||||||
public static final ConstructingObjectParser<Builder, Void> PARSER = new ConstructingObjectParser<>(
|
public static final ConstructingObjectParser<Builder, Void> PARSER = new ConstructingObjectParser<>(
|
||||||
"datafeed_config", true, a -> new Builder((String)a[0], (String)a[1]));
|
"datafeed_config", true, a -> new Builder((String)a[0], (String)a[1]));
|
||||||
|
@ -88,6 +89,7 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
}, SCRIPT_FIELDS);
|
}, SCRIPT_FIELDS);
|
||||||
PARSER.declareInt(Builder::setScrollSize, SCROLL_SIZE);
|
PARSER.declareInt(Builder::setScrollSize, SCROLL_SIZE);
|
||||||
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, CHUNKING_CONFIG);
|
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, CHUNKING_CONFIG);
|
||||||
|
PARSER.declareObject(Builder::setDelayedDataCheckConfig, DelayedDataCheckConfig.PARSER, DELAYED_DATA_CHECK_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static BytesReference parseBytes(XContentParser parser) throws IOException {
|
private static BytesReference parseBytes(XContentParser parser) throws IOException {
|
||||||
|
@ -107,10 +109,12 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
private final List<SearchSourceBuilder.ScriptField> scriptFields;
|
private final List<SearchSourceBuilder.ScriptField> scriptFields;
|
||||||
private final Integer scrollSize;
|
private final Integer scrollSize;
|
||||||
private final ChunkingConfig chunkingConfig;
|
private final ChunkingConfig chunkingConfig;
|
||||||
|
private final DelayedDataCheckConfig delayedDataCheckConfig;
|
||||||
|
|
||||||
|
|
||||||
private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
|
private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
|
||||||
BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
|
BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
|
||||||
Integer scrollSize, ChunkingConfig chunkingConfig) {
|
Integer scrollSize, ChunkingConfig chunkingConfig, DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.jobId = jobId;
|
this.jobId = jobId;
|
||||||
this.queryDelay = queryDelay;
|
this.queryDelay = queryDelay;
|
||||||
|
@ -122,6 +126,7 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
this.scriptFields = scriptFields == null ? null : Collections.unmodifiableList(scriptFields);
|
this.scriptFields = scriptFields == null ? null : Collections.unmodifiableList(scriptFields);
|
||||||
this.scrollSize = scrollSize;
|
this.scrollSize = scrollSize;
|
||||||
this.chunkingConfig = chunkingConfig;
|
this.chunkingConfig = chunkingConfig;
|
||||||
|
this.delayedDataCheckConfig = delayedDataCheckConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
|
@ -168,6 +173,10 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
return chunkingConfig;
|
return chunkingConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
|
||||||
|
return delayedDataCheckConfig;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
builder.startObject();
|
builder.startObject();
|
||||||
|
@ -204,6 +213,9 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
if (chunkingConfig != null) {
|
if (chunkingConfig != null) {
|
||||||
builder.field(CHUNKING_CONFIG.getPreferredName(), chunkingConfig);
|
builder.field(CHUNKING_CONFIG.getPreferredName(), chunkingConfig);
|
||||||
}
|
}
|
||||||
|
if (delayedDataCheckConfig != null) {
|
||||||
|
builder.field(DELAYED_DATA_CHECK_CONFIG.getPreferredName(), delayedDataCheckConfig);
|
||||||
|
}
|
||||||
|
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
return builder;
|
return builder;
|
||||||
|
@ -244,7 +256,8 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
&& Objects.equals(this.scrollSize, that.scrollSize)
|
&& Objects.equals(this.scrollSize, that.scrollSize)
|
||||||
&& Objects.equals(asMap(this.aggregations), asMap(that.aggregations))
|
&& Objects.equals(asMap(this.aggregations), asMap(that.aggregations))
|
||||||
&& Objects.equals(this.scriptFields, that.scriptFields)
|
&& Objects.equals(this.scriptFields, that.scriptFields)
|
||||||
&& Objects.equals(this.chunkingConfig, that.chunkingConfig);
|
&& Objects.equals(this.chunkingConfig, that.chunkingConfig)
|
||||||
|
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -255,7 +268,7 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields,
|
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields,
|
||||||
chunkingConfig);
|
chunkingConfig, delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Builder builder(String id, String jobId) {
|
public static Builder builder(String id, String jobId) {
|
||||||
|
@ -275,6 +288,7 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
private List<SearchSourceBuilder.ScriptField> scriptFields;
|
private List<SearchSourceBuilder.ScriptField> scriptFields;
|
||||||
private Integer scrollSize;
|
private Integer scrollSize;
|
||||||
private ChunkingConfig chunkingConfig;
|
private ChunkingConfig chunkingConfig;
|
||||||
|
private DelayedDataCheckConfig delayedDataCheckConfig;
|
||||||
|
|
||||||
public Builder(String id, String jobId) {
|
public Builder(String id, String jobId) {
|
||||||
this.id = Objects.requireNonNull(id, ID.getPreferredName());
|
this.id = Objects.requireNonNull(id, ID.getPreferredName());
|
||||||
|
@ -293,6 +307,7 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
this.scriptFields = config.scriptFields;
|
this.scriptFields = config.scriptFields;
|
||||||
this.scrollSize = config.scrollSize;
|
this.scrollSize = config.scrollSize;
|
||||||
this.chunkingConfig = config.chunkingConfig;
|
this.chunkingConfig = config.chunkingConfig;
|
||||||
|
this.delayedDataCheckConfig = config.getDelayedDataCheckConfig();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder setIndices(List<String> indices) {
|
public Builder setIndices(List<String> indices) {
|
||||||
|
@ -366,9 +381,23 @@ public class DatafeedConfig implements ToXContentObject {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This sets the {@link DelayedDataCheckConfig} settings.
|
||||||
|
*
|
||||||
|
* See {@link DelayedDataCheckConfig} for more information.
|
||||||
|
*
|
||||||
|
* @param delayedDataCheckConfig the delayed data check configuration
|
||||||
|
* Default value is enabled, with `check_window` being null. This means the true window is
|
||||||
|
* calculated when the real-time Datafeed runs.
|
||||||
|
*/
|
||||||
|
public Builder setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
|
this.delayedDataCheckConfig = delayedDataCheckConfig;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public DatafeedConfig build() {
|
public DatafeedConfig build() {
|
||||||
return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
|
return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
|
||||||
chunkingConfig);
|
chunkingConfig, delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static BytesReference xContentToBytes(ToXContentObject object) throws IOException {
|
private static BytesReference xContentToBytes(ToXContentObject object) throws IOException {
|
||||||
|
|
|
@ -77,6 +77,9 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
}, DatafeedConfig.SCRIPT_FIELDS);
|
}, DatafeedConfig.SCRIPT_FIELDS);
|
||||||
PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE);
|
PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE);
|
||||||
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, DatafeedConfig.CHUNKING_CONFIG);
|
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, DatafeedConfig.CHUNKING_CONFIG);
|
||||||
|
PARSER.declareObject(Builder::setDelayedDataCheckConfig,
|
||||||
|
DelayedDataCheckConfig.PARSER,
|
||||||
|
DatafeedConfig.DELAYED_DATA_CHECK_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static BytesReference parseBytes(XContentParser parser) throws IOException {
|
private static BytesReference parseBytes(XContentParser parser) throws IOException {
|
||||||
|
@ -96,10 +99,11 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
private final List<SearchSourceBuilder.ScriptField> scriptFields;
|
private final List<SearchSourceBuilder.ScriptField> scriptFields;
|
||||||
private final Integer scrollSize;
|
private final Integer scrollSize;
|
||||||
private final ChunkingConfig chunkingConfig;
|
private final ChunkingConfig chunkingConfig;
|
||||||
|
private final DelayedDataCheckConfig delayedDataCheckConfig;
|
||||||
|
|
||||||
private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
|
private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
|
||||||
BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
|
BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
|
||||||
Integer scrollSize, ChunkingConfig chunkingConfig) {
|
Integer scrollSize, ChunkingConfig chunkingConfig, DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.jobId = jobId;
|
this.jobId = jobId;
|
||||||
this.queryDelay = queryDelay;
|
this.queryDelay = queryDelay;
|
||||||
|
@ -111,6 +115,7 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
this.scriptFields = scriptFields;
|
this.scriptFields = scriptFields;
|
||||||
this.scrollSize = scrollSize;
|
this.scrollSize = scrollSize;
|
||||||
this.chunkingConfig = chunkingConfig;
|
this.chunkingConfig = chunkingConfig;
|
||||||
|
this.delayedDataCheckConfig = delayedDataCheckConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -146,6 +151,9 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
}
|
}
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
}
|
}
|
||||||
|
if (delayedDataCheckConfig != null) {
|
||||||
|
builder.field(DatafeedConfig.DELAYED_DATA_CHECK_CONFIG.getPreferredName(), delayedDataCheckConfig);
|
||||||
|
}
|
||||||
addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize);
|
addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize);
|
||||||
addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig);
|
addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig);
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
|
@ -198,6 +206,10 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
return chunkingConfig;
|
return chunkingConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
|
||||||
|
return delayedDataCheckConfig;
|
||||||
|
}
|
||||||
|
|
||||||
private static Map<String, Object> asMap(BytesReference bytesReference) {
|
private static Map<String, Object> asMap(BytesReference bytesReference) {
|
||||||
return bytesReference == null ? null : XContentHelper.convertToMap(bytesReference, true, XContentType.JSON).v2();
|
return bytesReference == null ? null : XContentHelper.convertToMap(bytesReference, true, XContentType.JSON).v2();
|
||||||
}
|
}
|
||||||
|
@ -232,6 +244,7 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
&& Objects.equals(asMap(this.query), asMap(that.query))
|
&& Objects.equals(asMap(this.query), asMap(that.query))
|
||||||
&& Objects.equals(this.scrollSize, that.scrollSize)
|
&& Objects.equals(this.scrollSize, that.scrollSize)
|
||||||
&& Objects.equals(asMap(this.aggregations), asMap(that.aggregations))
|
&& Objects.equals(asMap(this.aggregations), asMap(that.aggregations))
|
||||||
|
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig)
|
||||||
&& Objects.equals(this.scriptFields, that.scriptFields)
|
&& Objects.equals(this.scriptFields, that.scriptFields)
|
||||||
&& Objects.equals(this.chunkingConfig, that.chunkingConfig);
|
&& Objects.equals(this.chunkingConfig, that.chunkingConfig);
|
||||||
}
|
}
|
||||||
|
@ -244,7 +257,7 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields,
|
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields,
|
||||||
chunkingConfig);
|
chunkingConfig, delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Builder builder(String id) {
|
public static Builder builder(String id) {
|
||||||
|
@ -264,6 +277,7 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
private List<SearchSourceBuilder.ScriptField> scriptFields;
|
private List<SearchSourceBuilder.ScriptField> scriptFields;
|
||||||
private Integer scrollSize;
|
private Integer scrollSize;
|
||||||
private ChunkingConfig chunkingConfig;
|
private ChunkingConfig chunkingConfig;
|
||||||
|
private DelayedDataCheckConfig delayedDataCheckConfig;
|
||||||
|
|
||||||
public Builder(String id) {
|
public Builder(String id) {
|
||||||
this.id = Objects.requireNonNull(id, DatafeedConfig.ID.getPreferredName());
|
this.id = Objects.requireNonNull(id, DatafeedConfig.ID.getPreferredName());
|
||||||
|
@ -281,6 +295,7 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
this.scriptFields = config.scriptFields;
|
this.scriptFields = config.scriptFields;
|
||||||
this.scrollSize = config.scrollSize;
|
this.scrollSize = config.scrollSize;
|
||||||
this.chunkingConfig = config.chunkingConfig;
|
this.chunkingConfig = config.chunkingConfig;
|
||||||
|
this.delayedDataCheckConfig = config.delayedDataCheckConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Builder setJobId(String jobId) {
|
public Builder setJobId(String jobId) {
|
||||||
|
@ -359,9 +374,14 @@ public class DatafeedUpdate implements ToXContentObject {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Builder setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
|
this.delayedDataCheckConfig = delayedDataCheckConfig;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public DatafeedUpdate build() {
|
public DatafeedUpdate build() {
|
||||||
return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
|
return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
|
||||||
chunkingConfig);
|
chunkingConfig, delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static BytesReference xContentToBytes(ToXContentObject object) throws IOException {
|
private static BytesReference xContentToBytes(ToXContentObject object) throws IOException {
|
||||||
|
|
|
@ -0,0 +1,130 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.client.ml.datafeed;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.Nullable;
|
||||||
|
import org.elasticsearch.common.ParseField;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The configuration object containing the delayed data check settings.
|
||||||
|
*
|
||||||
|
* See {@link DelayedDataCheckConfig#enabledDelayedDataCheckConfig(TimeValue)} for creating a new
|
||||||
|
* enabled datacheck with the given check_window
|
||||||
|
*
|
||||||
|
* See {@link DelayedDataCheckConfig#disabledDelayedDataCheckConfig()} for creating a config for disabling
|
||||||
|
* delayed data checking.
|
||||||
|
*/
|
||||||
|
public class DelayedDataCheckConfig implements ToXContentObject {
|
||||||
|
|
||||||
|
public static final ParseField ENABLED = new ParseField("enabled");
|
||||||
|
public static final ParseField CHECK_WINDOW = new ParseField("check_window");
|
||||||
|
|
||||||
|
// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
|
||||||
|
public static final ConstructingObjectParser<DelayedDataCheckConfig, Void> PARSER = new ConstructingObjectParser<>(
|
||||||
|
"delayed_data_check_config", true, a -> new DelayedDataCheckConfig((Boolean) a[0], (TimeValue) a[1]));
|
||||||
|
static {
|
||||||
|
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED);
|
||||||
|
PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
|
||||||
|
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
|
||||||
|
return TimeValue.parseTimeValue(p.text(), CHECK_WINDOW.getPreferredName());
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
|
||||||
|
}, CHECK_WINDOW, ObjectParser.ValueType.STRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This creates a new DelayedDataCheckConfig that has a check_window of the passed `timeValue`
|
||||||
|
*
|
||||||
|
* We query the index to the latest finalized bucket from this TimeValue in the past looking to see if any data has been indexed
|
||||||
|
* since the data was read with the Datafeed.
|
||||||
|
*
|
||||||
|
* The window must be larger than the {@link org.elasticsearch.client.ml.job.config.AnalysisConfig#bucketSpan}, less than
|
||||||
|
* 24 hours, and span less than 10,000x buckets.
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* @param timeValue The time length in the past from the latest finalized bucket to look for latent data.
|
||||||
|
* If `null` is provided, the appropriate window is calculated when it is used
|
||||||
|
**/
|
||||||
|
public static DelayedDataCheckConfig enabledDelayedDataCheckConfig(TimeValue timeValue) {
|
||||||
|
return new DelayedDataCheckConfig(true, timeValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This creates a new DelayedDataCheckConfig that disables the data check.
|
||||||
|
*/
|
||||||
|
public static DelayedDataCheckConfig disabledDelayedDataCheckConfig() {
|
||||||
|
return new DelayedDataCheckConfig(false, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final boolean enabled;
|
||||||
|
private final TimeValue checkWindow;
|
||||||
|
|
||||||
|
DelayedDataCheckConfig(Boolean enabled, TimeValue checkWindow) {
|
||||||
|
this.enabled = enabled;
|
||||||
|
this.checkWindow = checkWindow;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEnabled() {
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nullable
|
||||||
|
public TimeValue getCheckWindow() {
|
||||||
|
return checkWindow;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||||
|
builder.startObject();
|
||||||
|
builder.field(ENABLED.getPreferredName(), enabled);
|
||||||
|
if (checkWindow != null) {
|
||||||
|
builder.field(CHECK_WINDOW.getPreferredName(), checkWindow.getStringRep());
|
||||||
|
}
|
||||||
|
builder.endObject();
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(enabled, checkWindow);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (obj == null || getClass() != obj.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
DelayedDataCheckConfig other = (DelayedDataCheckConfig) obj;
|
||||||
|
return Objects.equals(this.enabled, other.enabled) && Objects.equals(this.checkWindow, other.checkWindow);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -95,6 +95,7 @@ import org.elasticsearch.client.ml.datafeed.ChunkingConfig;
|
||||||
import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
|
import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
|
||||||
import org.elasticsearch.client.ml.datafeed.DatafeedStats;
|
import org.elasticsearch.client.ml.datafeed.DatafeedStats;
|
||||||
import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
|
import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
|
||||||
|
import org.elasticsearch.client.ml.datafeed.DelayedDataCheckConfig;
|
||||||
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
|
import org.elasticsearch.client.ml.job.config.AnalysisConfig;
|
||||||
import org.elasticsearch.client.ml.job.config.AnalysisLimits;
|
import org.elasticsearch.client.ml.job.config.AnalysisLimits;
|
||||||
import org.elasticsearch.client.ml.job.config.DataDescription;
|
import org.elasticsearch.client.ml.job.config.DataDescription;
|
||||||
|
@ -583,6 +584,14 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
|
||||||
datafeedBuilder.setQueryDelay(TimeValue.timeValueMinutes(1)); // <1>
|
datafeedBuilder.setQueryDelay(TimeValue.timeValueMinutes(1)); // <1>
|
||||||
// end::put-datafeed-config-set-query-delay
|
// end::put-datafeed-config-set-query-delay
|
||||||
|
|
||||||
|
// tag::put-datafeed-config-set-delayed-data-check-config
|
||||||
|
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig
|
||||||
|
.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(1))); // <1>
|
||||||
|
// end::put-datafeed-config-set-delayed-data-check-config
|
||||||
|
|
||||||
|
// no need to accidentally trip internal validations due to job bucket size
|
||||||
|
datafeedBuilder.setDelayedDataCheckConfig(null);
|
||||||
|
|
||||||
List<SearchSourceBuilder.ScriptField> scriptFields = Collections.emptyList();
|
List<SearchSourceBuilder.ScriptField> scriptFields = Collections.emptyList();
|
||||||
// tag::put-datafeed-config-set-script-fields
|
// tag::put-datafeed-config-set-script-fields
|
||||||
datafeedBuilder.setScriptFields(scriptFields); // <1>
|
datafeedBuilder.setScriptFields(scriptFields); // <1>
|
||||||
|
|
|
@ -103,6 +103,9 @@ public class DatafeedConfigTests extends AbstractXContentTestCase<DatafeedConfig
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
|
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
|
||||||
}
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig());
|
||||||
|
}
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -83,6 +83,9 @@ public class DatafeedUpdateTests extends AbstractXContentTestCase<DatafeedUpdate
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
|
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
|
||||||
}
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig());
|
||||||
|
}
|
||||||
return builder.build();
|
return builder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
/*
|
||||||
|
* Licensed to Elasticsearch under one or more contributor
|
||||||
|
* license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright
|
||||||
|
* ownership. Elasticsearch licenses this file to you under
|
||||||
|
* the Apache License, Version 2.0 (the "License"); you may
|
||||||
|
* not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.client.ml.datafeed;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.test.AbstractXContentTestCase;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
public class DelayedDataCheckConfigTests extends AbstractXContentTestCase<DelayedDataCheckConfig> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DelayedDataCheckConfig createTestInstance() {
|
||||||
|
return createRandomizedConfig();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DelayedDataCheckConfig doParseInstance(XContentParser parser) {
|
||||||
|
return DelayedDataCheckConfig.PARSER.apply(parser, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean supportsUnknownFields() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEnabledDelayedDataCheckConfig() {
|
||||||
|
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(5));
|
||||||
|
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(true));
|
||||||
|
assertThat(delayedDataCheckConfig.getCheckWindow(), equalTo(TimeValue.timeValueHours(5)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDisabledDelayedDataCheckConfig() {
|
||||||
|
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.disabledDelayedDataCheckConfig();
|
||||||
|
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(false));
|
||||||
|
assertThat(delayedDataCheckConfig.getCheckWindow(), equalTo(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DelayedDataCheckConfig createRandomizedConfig() {
|
||||||
|
boolean enabled = randomBoolean();
|
||||||
|
TimeValue timeWindow = null;
|
||||||
|
if (enabled || randomBoolean()) {
|
||||||
|
timeWindow = TimeValue.timeValueMillis(randomLongBetween(1, 1_000));
|
||||||
|
}
|
||||||
|
return new DelayedDataCheckConfig(enabled, timeWindow);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -63,6 +63,17 @@ include-tagged::{doc-tests-file}[{api}-config-set-query-delay]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
<1> The time interval behind real time that data is queried.
|
<1> The time interval behind real time that data is queried.
|
||||||
|
|
||||||
|
["source","java",subs="attributes,callouts,macros"]
|
||||||
|
--------------------------------------------------
|
||||||
|
include-tagged::{doc-tests-file}[{api}-config-set-delayed-data-check-config]
|
||||||
|
--------------------------------------------------
|
||||||
|
<1> Sets the delayed data check configuration.
|
||||||
|
The window must be larger than the Job's bucket size, but smaller than 24 hours,
|
||||||
|
and span less than 10,000 buckets.
|
||||||
|
Defaults to `null`, which causes an appropriate window span to be calculated when
|
||||||
|
the datafeed runs.
|
||||||
|
To explicitly disable, pass `DelayedDataCheckConfig.disabledDelayedDataCheckConfig()`.
|
||||||
|
|
||||||
["source","java",subs="attributes,callouts,macros"]
|
["source","java",subs="attributes,callouts,macros"]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
include-tagged::{doc-tests-file}[{api}-config-set-script-fields]
|
include-tagged::{doc-tests-file}[{api}-config-set-script-fields]
|
||||||
|
|
|
@ -64,6 +64,11 @@ A {dfeed} resource has the following properties:
|
||||||
example: `[]`. This property is provided for backwards compatibility with
|
example: `[]`. This property is provided for backwards compatibility with
|
||||||
releases earlier than 6.0.0. For more information, see <<removal-of-types>>.
|
releases earlier than 6.0.0. For more information, see <<removal-of-types>>.
|
||||||
|
|
||||||
|
`delayed_data_check_config`::
|
||||||
|
(object) Specifies if and with how large a window should the data feed check
|
||||||
|
for missing data. See <<ml-datafeed-delayed-data-check-config>>.
|
||||||
|
For example: `{"enabled": true, "check_window": "1h"}`
|
||||||
|
|
||||||
[[ml-datafeed-chunking-config]]
|
[[ml-datafeed-chunking-config]]
|
||||||
==== Chunking Configuration Objects
|
==== Chunking Configuration Objects
|
||||||
|
|
||||||
|
@ -86,6 +91,27 @@ A chunking configuration object has the following properties:
|
||||||
This setting is only applicable when the mode is set to `manual`.
|
This setting is only applicable when the mode is set to `manual`.
|
||||||
For example: `3h`.
|
For example: `3h`.
|
||||||
|
|
||||||
|
[[ml-datafeed-delayed-data-check-config]]
|
||||||
|
==== Delayed Data Check Configuration Objects
|
||||||
|
|
||||||
|
The {dfeed} can optionally search over indices that have already been read in
|
||||||
|
an effort to find if any data has since been added to the index. If missing data
|
||||||
|
is found, it is a good indication that the `query_delay` option is set too low and
|
||||||
|
the data is being indexed after the {dfeed} has passed that moment in time.
|
||||||
|
|
||||||
|
This check only runs on real-time {dfeeds}
|
||||||
|
|
||||||
|
The configuration object has the following properties:
|
||||||
|
|
||||||
|
`enabled`::
|
||||||
|
(boolean) Should the {dfeed} periodically check for data being indexed after reading.
|
||||||
|
Defaults to `true`
|
||||||
|
|
||||||
|
`check_window`::
|
||||||
|
(time units) The window of time before the latest finalized bucket that should be searched
|
||||||
|
for late data. Defaults to `null` which causes an appropriate `check_window` to be calculated
|
||||||
|
when the real-time {dfeed} runs.
|
||||||
|
|
||||||
[float]
|
[float]
|
||||||
[[ml-datafeed-counts]]
|
[[ml-datafeed-counts]]
|
||||||
==== {dfeed-cap} Counts
|
==== {dfeed-cap} Counts
|
||||||
|
|
|
@ -78,6 +78,10 @@ You must create a job before you create a {dfeed}. You can associate only one
|
||||||
For example: `[]`. This property is provided for backwards compatibility with
|
For example: `[]`. This property is provided for backwards compatibility with
|
||||||
releases earlier than 6.0.0. For more information, see <<removal-of-types>>.
|
releases earlier than 6.0.0. For more information, see <<removal-of-types>>.
|
||||||
|
|
||||||
|
`delayed_data_check_config`::
|
||||||
|
(object) Specifies if and with how large a window should the data feed check
|
||||||
|
for missing data. See <<ml-datafeed-delayed-data-check-config>>.
|
||||||
|
|
||||||
For more information about these properties,
|
For more information about these properties,
|
||||||
see <<ml-datafeed-resource>>.
|
see <<ml-datafeed-resource>>.
|
||||||
|
|
||||||
|
|
|
@ -84,6 +84,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
public static final ParseField SOURCE = new ParseField("_source");
|
public static final ParseField SOURCE = new ParseField("_source");
|
||||||
public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config");
|
public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config");
|
||||||
public static final ParseField HEADERS = new ParseField("headers");
|
public static final ParseField HEADERS = new ParseField("headers");
|
||||||
|
public static final ParseField DELAYED_DATA_CHECK_CONFIG = new ParseField("delayed_data_check_config");
|
||||||
|
|
||||||
// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
|
// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
|
||||||
public static final ObjectParser<Builder, Void> LENIENT_PARSER = createParser(true);
|
public static final ObjectParser<Builder, Void> LENIENT_PARSER = createParser(true);
|
||||||
|
@ -124,7 +125,9 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
// (For config, headers are explicitly transferred from the auth headers by code in the put/update datafeed actions.)
|
// (For config, headers are explicitly transferred from the auth headers by code in the put/update datafeed actions.)
|
||||||
parser.declareObject(Builder::setHeaders, (p, c) -> p.mapStrings(), HEADERS);
|
parser.declareObject(Builder::setHeaders, (p, c) -> p.mapStrings(), HEADERS);
|
||||||
}
|
}
|
||||||
|
parser.declareObject(Builder::setDelayedDataCheckConfig,
|
||||||
|
ignoreUnknownFields ? DelayedDataCheckConfig.LENIENT_PARSER : DelayedDataCheckConfig.STRICT_PARSER,
|
||||||
|
DELAYED_DATA_CHECK_CONFIG);
|
||||||
return parser;
|
return parser;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,10 +152,12 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
private final Integer scrollSize;
|
private final Integer scrollSize;
|
||||||
private final ChunkingConfig chunkingConfig;
|
private final ChunkingConfig chunkingConfig;
|
||||||
private final Map<String, String> headers;
|
private final Map<String, String> headers;
|
||||||
|
private final DelayedDataCheckConfig delayedDataCheckConfig;
|
||||||
|
|
||||||
private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
|
private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
|
||||||
QueryBuilder query, AggregatorFactories.Builder aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
|
QueryBuilder query, AggregatorFactories.Builder aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
|
||||||
Integer scrollSize, ChunkingConfig chunkingConfig, Map<String, String> headers) {
|
Integer scrollSize, ChunkingConfig chunkingConfig, Map<String, String> headers,
|
||||||
|
DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.jobId = jobId;
|
this.jobId = jobId;
|
||||||
this.queryDelay = queryDelay;
|
this.queryDelay = queryDelay;
|
||||||
|
@ -165,6 +170,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
this.scrollSize = scrollSize;
|
this.scrollSize = scrollSize;
|
||||||
this.chunkingConfig = chunkingConfig;
|
this.chunkingConfig = chunkingConfig;
|
||||||
this.headers = Collections.unmodifiableMap(headers);
|
this.headers = Collections.unmodifiableMap(headers);
|
||||||
|
this.delayedDataCheckConfig = delayedDataCheckConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DatafeedConfig(StreamInput in) throws IOException {
|
public DatafeedConfig(StreamInput in) throws IOException {
|
||||||
|
@ -196,6 +202,11 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
} else {
|
} else {
|
||||||
this.headers = Collections.emptyMap();
|
this.headers = Collections.emptyMap();
|
||||||
}
|
}
|
||||||
|
if (in.getVersion().onOrAfter(Version.CURRENT)) {
|
||||||
|
delayedDataCheckConfig = in.readOptionalWriteable(DelayedDataCheckConfig::new);
|
||||||
|
} else {
|
||||||
|
delayedDataCheckConfig = DelayedDataCheckConfig.defaultDelayedDataCheckConfig();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getId() {
|
public String getId() {
|
||||||
|
@ -260,6 +271,10 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
return headers;
|
return headers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
|
||||||
|
return delayedDataCheckConfig;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void writeTo(StreamOutput out) throws IOException {
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
out.writeString(id);
|
out.writeString(id);
|
||||||
|
@ -291,6 +306,9 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
if (out.getVersion().onOrAfter(Version.V_6_2_0)) {
|
if (out.getVersion().onOrAfter(Version.V_6_2_0)) {
|
||||||
out.writeMap(headers, StreamOutput::writeString, StreamOutput::writeString);
|
out.writeMap(headers, StreamOutput::writeString, StreamOutput::writeString);
|
||||||
}
|
}
|
||||||
|
if (out.getVersion().onOrAfter(Version.CURRENT)) {
|
||||||
|
out.writeOptionalWriteable(delayedDataCheckConfig);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -328,6 +346,9 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
if (headers.isEmpty() == false && params.paramAsBoolean(ToXContentParams.FOR_CLUSTER_STATE, false) == true) {
|
if (headers.isEmpty() == false && params.paramAsBoolean(ToXContentParams.FOR_CLUSTER_STATE, false) == true) {
|
||||||
builder.field(HEADERS.getPreferredName(), headers);
|
builder.field(HEADERS.getPreferredName(), headers);
|
||||||
}
|
}
|
||||||
|
if (delayedDataCheckConfig != null) {
|
||||||
|
builder.field(DELAYED_DATA_CHECK_CONFIG.getPreferredName(), delayedDataCheckConfig);
|
||||||
|
}
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -359,13 +380,14 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
&& Objects.equals(this.aggregations, that.aggregations)
|
&& Objects.equals(this.aggregations, that.aggregations)
|
||||||
&& Objects.equals(this.scriptFields, that.scriptFields)
|
&& Objects.equals(this.scriptFields, that.scriptFields)
|
||||||
&& Objects.equals(this.chunkingConfig, that.chunkingConfig)
|
&& Objects.equals(this.chunkingConfig, that.chunkingConfig)
|
||||||
&& Objects.equals(this.headers, that.headers);
|
&& Objects.equals(this.headers, that.headers)
|
||||||
|
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, query, scrollSize, aggregations, scriptFields,
|
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, query, scrollSize, aggregations, scriptFields,
|
||||||
chunkingConfig, headers);
|
chunkingConfig, headers, delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -438,6 +460,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
private Integer scrollSize = DEFAULT_SCROLL_SIZE;
|
private Integer scrollSize = DEFAULT_SCROLL_SIZE;
|
||||||
private ChunkingConfig chunkingConfig;
|
private ChunkingConfig chunkingConfig;
|
||||||
private Map<String, String> headers = Collections.emptyMap();
|
private Map<String, String> headers = Collections.emptyMap();
|
||||||
|
private DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.defaultDelayedDataCheckConfig();
|
||||||
|
|
||||||
public Builder() {
|
public Builder() {
|
||||||
}
|
}
|
||||||
|
@ -461,6 +484,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
this.scrollSize = config.scrollSize;
|
this.scrollSize = config.scrollSize;
|
||||||
this.chunkingConfig = config.chunkingConfig;
|
this.chunkingConfig = config.chunkingConfig;
|
||||||
this.headers = config.headers;
|
this.headers = config.headers;
|
||||||
|
this.delayedDataCheckConfig = config.getDelayedDataCheckConfig();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setId(String datafeedId) {
|
public void setId(String datafeedId) {
|
||||||
|
@ -523,6 +547,10 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
this.chunkingConfig = chunkingConfig;
|
this.chunkingConfig = chunkingConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
|
this.delayedDataCheckConfig = delayedDataCheckConfig;
|
||||||
|
}
|
||||||
|
|
||||||
public DatafeedConfig build() {
|
public DatafeedConfig build() {
|
||||||
ExceptionsHelper.requireNonNull(id, ID.getPreferredName());
|
ExceptionsHelper.requireNonNull(id, ID.getPreferredName());
|
||||||
ExceptionsHelper.requireNonNull(jobId, Job.ID.getPreferredName());
|
ExceptionsHelper.requireNonNull(jobId, Job.ID.getPreferredName());
|
||||||
|
@ -535,11 +563,12 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
|
||||||
if (types == null || types.contains(null) || types.contains("")) {
|
if (types == null || types.contains(null) || types.contains("")) {
|
||||||
throw invalidOptionValue(TYPES.getPreferredName(), types);
|
throw invalidOptionValue(TYPES.getPreferredName(), types);
|
||||||
}
|
}
|
||||||
|
|
||||||
validateAggregations();
|
validateAggregations();
|
||||||
setDefaultChunkingConfig();
|
setDefaultChunkingConfig();
|
||||||
setDefaultQueryDelay();
|
setDefaultQueryDelay();
|
||||||
return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
|
return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
|
||||||
chunkingConfig, headers);
|
chunkingConfig, headers, delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
void validateAggregations() {
|
void validateAggregations() {
|
||||||
|
|
|
@ -31,6 +31,30 @@ public final class DatafeedJobValidator {
|
||||||
checkValidHistogramInterval(datafeedConfig, analysisConfig);
|
checkValidHistogramInterval(datafeedConfig, analysisConfig);
|
||||||
checkFrequencyIsMultipleOfHistogramInterval(datafeedConfig);
|
checkFrequencyIsMultipleOfHistogramInterval(datafeedConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DelayedDataCheckConfig delayedDataCheckConfig = datafeedConfig.getDelayedDataCheckConfig();
|
||||||
|
TimeValue bucketSpan = analysisConfig.getBucketSpan();
|
||||||
|
if (delayedDataCheckConfig.isEnabled()) {
|
||||||
|
checkValidDelayedDataCheckConfig(bucketSpan, delayedDataCheckConfig);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void checkValidDelayedDataCheckConfig(TimeValue bucketSpan, DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
|
TimeValue delayedDataCheckWindow = delayedDataCheckConfig.getCheckWindow();
|
||||||
|
if (delayedDataCheckWindow != null) { // NULL implies we calculate on use and thus is always valid
|
||||||
|
if (delayedDataCheckWindow.compareTo(bucketSpan) < 0) {
|
||||||
|
throw ExceptionsHelper.badRequestException(
|
||||||
|
Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL,
|
||||||
|
delayedDataCheckWindow,
|
||||||
|
bucketSpan));
|
||||||
|
}
|
||||||
|
if (delayedDataCheckWindow.millis() > bucketSpan.millis() * DelayedDataCheckConfig.MAX_NUMBER_SPANABLE_BUCKETS) {
|
||||||
|
throw ExceptionsHelper.badRequestException(
|
||||||
|
Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS,
|
||||||
|
delayedDataCheckWindow,
|
||||||
|
bucketSpan));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void checkSummaryCountFieldNameIsSet(AnalysisConfig analysisConfig) {
|
private static void checkSummaryCountFieldNameIsSet(AnalysisConfig analysisConfig) {
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.xpack.core.ml.datafeed;
|
package org.elasticsearch.xpack.core.ml.datafeed;
|
||||||
|
|
||||||
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.common.ParseField;
|
import org.elasticsearch.common.ParseField;
|
||||||
import org.elasticsearch.common.Strings;
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
@ -68,6 +69,9 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
}, DatafeedConfig.SCRIPT_FIELDS);
|
}, DatafeedConfig.SCRIPT_FIELDS);
|
||||||
PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE);
|
PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE);
|
||||||
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.STRICT_PARSER, DatafeedConfig.CHUNKING_CONFIG);
|
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.STRICT_PARSER, DatafeedConfig.CHUNKING_CONFIG);
|
||||||
|
PARSER.declareObject(Builder::setDelayedDataCheckConfig,
|
||||||
|
DelayedDataCheckConfig.STRICT_PARSER,
|
||||||
|
DatafeedConfig.DELAYED_DATA_CHECK_CONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String id;
|
private final String id;
|
||||||
|
@ -81,10 +85,11 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
private final List<SearchSourceBuilder.ScriptField> scriptFields;
|
private final List<SearchSourceBuilder.ScriptField> scriptFields;
|
||||||
private final Integer scrollSize;
|
private final Integer scrollSize;
|
||||||
private final ChunkingConfig chunkingConfig;
|
private final ChunkingConfig chunkingConfig;
|
||||||
|
private final DelayedDataCheckConfig delayedDataCheckConfig;
|
||||||
|
|
||||||
private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
|
private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
|
||||||
QueryBuilder query, AggregatorFactories.Builder aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
|
QueryBuilder query, AggregatorFactories.Builder aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
|
||||||
Integer scrollSize, ChunkingConfig chunkingConfig) {
|
Integer scrollSize, ChunkingConfig chunkingConfig, DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
this.id = id;
|
this.id = id;
|
||||||
this.jobId = jobId;
|
this.jobId = jobId;
|
||||||
this.queryDelay = queryDelay;
|
this.queryDelay = queryDelay;
|
||||||
|
@ -96,6 +101,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
this.scriptFields = scriptFields;
|
this.scriptFields = scriptFields;
|
||||||
this.scrollSize = scrollSize;
|
this.scrollSize = scrollSize;
|
||||||
this.chunkingConfig = chunkingConfig;
|
this.chunkingConfig = chunkingConfig;
|
||||||
|
this.delayedDataCheckConfig = delayedDataCheckConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DatafeedUpdate(StreamInput in) throws IOException {
|
public DatafeedUpdate(StreamInput in) throws IOException {
|
||||||
|
@ -122,6 +128,11 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
}
|
}
|
||||||
this.scrollSize = in.readOptionalVInt();
|
this.scrollSize = in.readOptionalVInt();
|
||||||
this.chunkingConfig = in.readOptionalWriteable(ChunkingConfig::new);
|
this.chunkingConfig = in.readOptionalWriteable(ChunkingConfig::new);
|
||||||
|
if (in.getVersion().onOrAfter(Version.CURRENT)) {
|
||||||
|
delayedDataCheckConfig = in.readOptionalWriteable(DelayedDataCheckConfig::new);
|
||||||
|
} else {
|
||||||
|
delayedDataCheckConfig = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -159,6 +170,9 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
}
|
}
|
||||||
out.writeOptionalVInt(scrollSize);
|
out.writeOptionalVInt(scrollSize);
|
||||||
out.writeOptionalWriteable(chunkingConfig);
|
out.writeOptionalWriteable(chunkingConfig);
|
||||||
|
if (out.getVersion().onOrAfter(Version.CURRENT)) {
|
||||||
|
out.writeOptionalWriteable(delayedDataCheckConfig);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -185,6 +199,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
}
|
}
|
||||||
addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize);
|
addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize);
|
||||||
addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig);
|
addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig);
|
||||||
|
addOptionalField(builder, DatafeedConfig.DELAYED_DATA_CHECK_CONFIG, delayedDataCheckConfig);
|
||||||
builder.endObject();
|
builder.endObject();
|
||||||
return builder;
|
return builder;
|
||||||
}
|
}
|
||||||
|
@ -250,6 +265,10 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
return chunkingConfig;
|
return chunkingConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
|
||||||
|
return delayedDataCheckConfig;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Applies the update to the given {@link DatafeedConfig}
|
* Applies the update to the given {@link DatafeedConfig}
|
||||||
* @return a new {@link DatafeedConfig} that contains the update
|
* @return a new {@link DatafeedConfig} that contains the update
|
||||||
|
@ -290,6 +309,9 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
if (chunkingConfig != null) {
|
if (chunkingConfig != null) {
|
||||||
builder.setChunkingConfig(chunkingConfig);
|
builder.setChunkingConfig(chunkingConfig);
|
||||||
}
|
}
|
||||||
|
if (delayedDataCheckConfig != null) {
|
||||||
|
builder.setDelayedDataCheckConfig(delayedDataCheckConfig);
|
||||||
|
}
|
||||||
|
|
||||||
if (headers.isEmpty() == false) {
|
if (headers.isEmpty() == false) {
|
||||||
// Adjust the request, adding security headers from the current thread context
|
// Adjust the request, adding security headers from the current thread context
|
||||||
|
@ -328,6 +350,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
&& Objects.equals(this.query, that.query)
|
&& Objects.equals(this.query, that.query)
|
||||||
&& Objects.equals(this.scrollSize, that.scrollSize)
|
&& Objects.equals(this.scrollSize, that.scrollSize)
|
||||||
&& Objects.equals(this.aggregations, that.aggregations)
|
&& Objects.equals(this.aggregations, that.aggregations)
|
||||||
|
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig)
|
||||||
&& Objects.equals(this.scriptFields, that.scriptFields)
|
&& Objects.equals(this.scriptFields, that.scriptFields)
|
||||||
&& Objects.equals(this.chunkingConfig, that.chunkingConfig);
|
&& Objects.equals(this.chunkingConfig, that.chunkingConfig);
|
||||||
}
|
}
|
||||||
|
@ -335,7 +358,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, query, scrollSize, aggregations, scriptFields,
|
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, query, scrollSize, aggregations, scriptFields,
|
||||||
chunkingConfig);
|
chunkingConfig, delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -352,6 +375,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
&& (scrollSize == null || Objects.equals(scrollSize, datafeed.getQueryDelay()))
|
&& (scrollSize == null || Objects.equals(scrollSize, datafeed.getQueryDelay()))
|
||||||
&& (aggregations == null || Objects.equals(aggregations, datafeed.getAggregations()))
|
&& (aggregations == null || Objects.equals(aggregations, datafeed.getAggregations()))
|
||||||
&& (scriptFields == null || Objects.equals(scriptFields, datafeed.getScriptFields()))
|
&& (scriptFields == null || Objects.equals(scriptFields, datafeed.getScriptFields()))
|
||||||
|
&& (delayedDataCheckConfig == null || Objects.equals(delayedDataCheckConfig, datafeed.getDelayedDataCheckConfig()))
|
||||||
&& (chunkingConfig == null || Objects.equals(chunkingConfig, datafeed.getChunkingConfig()));
|
&& (chunkingConfig == null || Objects.equals(chunkingConfig, datafeed.getChunkingConfig()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -368,6 +392,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
private List<SearchSourceBuilder.ScriptField> scriptFields;
|
private List<SearchSourceBuilder.ScriptField> scriptFields;
|
||||||
private Integer scrollSize;
|
private Integer scrollSize;
|
||||||
private ChunkingConfig chunkingConfig;
|
private ChunkingConfig chunkingConfig;
|
||||||
|
private DelayedDataCheckConfig delayedDataCheckConfig;
|
||||||
|
|
||||||
public Builder() {
|
public Builder() {
|
||||||
}
|
}
|
||||||
|
@ -388,6 +413,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
this.scriptFields = config.scriptFields;
|
this.scriptFields = config.scriptFields;
|
||||||
this.scrollSize = config.scrollSize;
|
this.scrollSize = config.scrollSize;
|
||||||
this.chunkingConfig = config.chunkingConfig;
|
this.chunkingConfig = config.chunkingConfig;
|
||||||
|
this.delayedDataCheckConfig = config.delayedDataCheckConfig;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setId(String datafeedId) {
|
public void setId(String datafeedId) {
|
||||||
|
@ -428,6 +454,10 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
this.scriptFields = sorted;
|
this.scriptFields = sorted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
|
||||||
|
this.delayedDataCheckConfig = delayedDataCheckConfig;
|
||||||
|
}
|
||||||
|
|
||||||
public void setScrollSize(int scrollSize) {
|
public void setScrollSize(int scrollSize) {
|
||||||
this.scrollSize = scrollSize;
|
this.scrollSize = scrollSize;
|
||||||
}
|
}
|
||||||
|
@ -438,7 +468,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
|
||||||
|
|
||||||
public DatafeedUpdate build() {
|
public DatafeedUpdate build() {
|
||||||
return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
|
return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
|
||||||
chunkingConfig);
|
chunkingConfig, delayedDataCheckConfig);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,127 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.core.ml.datafeed;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.Nullable;
|
||||||
|
import org.elasticsearch.common.ParseField;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
import org.elasticsearch.common.io.stream.Writeable;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.ObjectParser;
|
||||||
|
import org.elasticsearch.common.xcontent.ToXContent;
|
||||||
|
import org.elasticsearch.common.xcontent.ToXContentObject;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.xpack.core.ml.utils.time.TimeUtils;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public class DelayedDataCheckConfig implements ToXContentObject, Writeable {
|
||||||
|
|
||||||
|
public static final TimeValue MAX_DELAYED_DATA_WINDOW = TimeValue.timeValueHours(24);
|
||||||
|
public static final int MAX_NUMBER_SPANABLE_BUCKETS = 10_000;
|
||||||
|
|
||||||
|
public static final ParseField ENABLED = new ParseField("enabled");
|
||||||
|
public static final ParseField CHECK_WINDOW = new ParseField("check_window");
|
||||||
|
|
||||||
|
// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
|
||||||
|
public static final ConstructingObjectParser<DelayedDataCheckConfig, Void> LENIENT_PARSER = createParser(true);
|
||||||
|
public static final ConstructingObjectParser<DelayedDataCheckConfig, Void> STRICT_PARSER = createParser(false);
|
||||||
|
|
||||||
|
private static ConstructingObjectParser<DelayedDataCheckConfig, Void> createParser(boolean ignoreUnknownFields) {
|
||||||
|
ConstructingObjectParser<DelayedDataCheckConfig, Void> parser = new ConstructingObjectParser<>(
|
||||||
|
"delayed_data_check_config", ignoreUnknownFields, a -> new DelayedDataCheckConfig((Boolean) a[0], (TimeValue) a[1]));
|
||||||
|
|
||||||
|
parser.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED);
|
||||||
|
parser.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
|
||||||
|
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
|
||||||
|
return TimeValue.parseTimeValue(p.text(), CHECK_WINDOW.getPreferredName());
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
|
||||||
|
}, CHECK_WINDOW, ObjectParser.ValueType.STRING);
|
||||||
|
|
||||||
|
return parser;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DelayedDataCheckConfig defaultDelayedDataCheckConfig() {
|
||||||
|
return new DelayedDataCheckConfig(true, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DelayedDataCheckConfig enabledDelayedDataCheckConfig(TimeValue timeValue) {
|
||||||
|
return new DelayedDataCheckConfig(true, timeValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DelayedDataCheckConfig disabledDelayedDataCheckConfig() {
|
||||||
|
return new DelayedDataCheckConfig(false, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final boolean enabled;
|
||||||
|
private final TimeValue checkWindow;
|
||||||
|
|
||||||
|
DelayedDataCheckConfig(Boolean enabled, TimeValue checkWindow) {
|
||||||
|
this.enabled = enabled;
|
||||||
|
if (enabled && checkWindow != null) {
|
||||||
|
TimeUtils.checkPositive(checkWindow, CHECK_WINDOW);
|
||||||
|
if (checkWindow.compareTo(MAX_DELAYED_DATA_WINDOW) > 0) {
|
||||||
|
throw new IllegalArgumentException("check_window [" + checkWindow.getStringRep() + "] must be less than or equal to [24h]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.checkWindow = checkWindow;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DelayedDataCheckConfig(StreamInput in) throws IOException {
|
||||||
|
enabled = in.readBoolean();
|
||||||
|
checkWindow = in.readOptionalTimeValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeTo(StreamOutput out) throws IOException {
|
||||||
|
out.writeBoolean(enabled);
|
||||||
|
out.writeOptionalTimeValue(checkWindow);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEnabled() {
|
||||||
|
return enabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Nullable
|
||||||
|
public TimeValue getCheckWindow() {
|
||||||
|
return checkWindow;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
|
||||||
|
builder.startObject();
|
||||||
|
builder.field(ENABLED.getPreferredName(), enabled);
|
||||||
|
if (checkWindow != null) {
|
||||||
|
builder.field(CHECK_WINDOW.getPreferredName(), checkWindow.getStringRep());
|
||||||
|
}
|
||||||
|
builder.endObject();
|
||||||
|
return builder;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(enabled, checkWindow);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (this == obj) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (obj == null || getClass() != obj.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
DelayedDataCheckConfig other = (DelayedDataCheckConfig) obj;
|
||||||
|
return Objects.equals(this.enabled, other.enabled) && Objects.equals(this.checkWindow, other.checkWindow);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -22,6 +22,13 @@ public final class Messages {
|
||||||
public static final String DATAFEED_CONFIG_CANNOT_USE_SCRIPT_FIELDS_WITH_AGGS =
|
public static final String DATAFEED_CONFIG_CANNOT_USE_SCRIPT_FIELDS_WITH_AGGS =
|
||||||
"script_fields cannot be used in combination with aggregations";
|
"script_fields cannot be used in combination with aggregations";
|
||||||
public static final String DATAFEED_CONFIG_INVALID_OPTION_VALUE = "Invalid {0} value ''{1}'' in datafeed configuration";
|
public static final String DATAFEED_CONFIG_INVALID_OPTION_VALUE = "Invalid {0} value ''{1}'' in datafeed configuration";
|
||||||
|
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL =
|
||||||
|
"delayed_data_check_window [{0}] must be greater than the bucket_span [{1}]";
|
||||||
|
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_LARGE =
|
||||||
|
"delayed_data_check_window [{0}] must be less than or equal to [24h]";
|
||||||
|
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS =
|
||||||
|
"delayed_data_check_window [{0}] must be less than 10,000x the bucket_span [{1}]";
|
||||||
|
|
||||||
public static final String DATAFEED_DOES_NOT_SUPPORT_JOB_WITH_LATENCY = "A job configured with datafeed cannot support latency";
|
public static final String DATAFEED_DOES_NOT_SUPPORT_JOB_WITH_LATENCY = "A job configured with datafeed cannot support latency";
|
||||||
public static final String DATAFEED_NOT_FOUND = "No datafeed with id [{0}] exists";
|
public static final String DATAFEED_NOT_FOUND = "No datafeed with id [{0}] exists";
|
||||||
public static final String DATAFEED_AGGREGATIONS_REQUIRES_DATE_HISTOGRAM =
|
public static final String DATAFEED_AGGREGATIONS_REQUIRES_DATE_HISTOGRAM =
|
||||||
|
@ -63,6 +70,9 @@ public final class Messages {
|
||||||
public static final String JOB_AUDIT_DATAFEED_LOOKBACK_COMPLETED = "Datafeed lookback completed";
|
public static final String JOB_AUDIT_DATAFEED_LOOKBACK_COMPLETED = "Datafeed lookback completed";
|
||||||
public static final String JOB_AUDIT_DATAFEED_LOOKBACK_NO_DATA = "Datafeed lookback retrieved no data";
|
public static final String JOB_AUDIT_DATAFEED_LOOKBACK_NO_DATA = "Datafeed lookback retrieved no data";
|
||||||
public static final String JOB_AUDIT_DATAFEED_NO_DATA = "Datafeed has been retrieving no data for a while";
|
public static final String JOB_AUDIT_DATAFEED_NO_DATA = "Datafeed has been retrieving no data for a while";
|
||||||
|
public static final String JOB_AUDIT_DATAFEED_MISSING_DATA =
|
||||||
|
"Datafeed has missed {0} documents due to ingest latency, latest bucket with missing data is [{1}]." +
|
||||||
|
" Consider increasing query_delay";
|
||||||
public static final String JOB_AUDIT_DATAFEED_RECOVERED = "Datafeed has recovered data extraction and analysis";
|
public static final String JOB_AUDIT_DATAFEED_RECOVERED = "Datafeed has recovered data extraction and analysis";
|
||||||
public static final String JOB_AUDIT_DATAFEED_STARTED_FROM_TO = "Datafeed started (from: {0} to: {1}) with frequency [{2}]";
|
public static final String JOB_AUDIT_DATAFEED_STARTED_FROM_TO = "Datafeed started (from: {0} to: {1}) with frequency [{2}]";
|
||||||
public static final String JOB_AUDIT_DATAFEED_STARTED_REALTIME = "Datafeed started in real-time";
|
public static final String JOB_AUDIT_DATAFEED_STARTED_REALTIME = "Datafeed started in real-time";
|
||||||
|
|
|
@ -87,6 +87,22 @@ public final class TimeUtils {
|
||||||
checkMultiple(timeValue, baseUnit, field);
|
checkMultiple(timeValue, baseUnit, field);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks that the given {@code timeValue} is positive.
|
||||||
|
*
|
||||||
|
* <ul>
|
||||||
|
* <li>1s is valid</li>
|
||||||
|
* <li>-1s is invalid</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
public static void checkPositive(TimeValue timeValue, ParseField field) {
|
||||||
|
long nanos = timeValue.getNanos();
|
||||||
|
if (nanos <= 0) {
|
||||||
|
throw new IllegalArgumentException(field.getPreferredName() + " cannot be less or equal than 0. Value = "
|
||||||
|
+ timeValue.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static void checkNonNegative(TimeValue timeValue, ParseField field) {
|
private static void checkNonNegative(TimeValue timeValue, ParseField field) {
|
||||||
long nanos = timeValue.getNanos();
|
long nanos = timeValue.getNanos();
|
||||||
if (nanos < 0) {
|
if (nanos < 0) {
|
||||||
|
@ -94,13 +110,7 @@ public final class TimeUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void checkPositive(TimeValue timeValue, ParseField field) {
|
|
||||||
long nanos = timeValue.getNanos();
|
|
||||||
if (nanos <= 0) {
|
|
||||||
throw new IllegalArgumentException(field.getPreferredName() + " cannot be less or equal than 0. Value = "
|
|
||||||
+ timeValue.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check the given {@code timeValue} is a multiple of the {@code baseUnit}
|
* Check the given {@code timeValue} is a multiple of the {@code baseUnit}
|
||||||
|
|
|
@ -109,6 +109,9 @@ public class DatafeedConfigTests extends AbstractSerializingTestCase<DatafeedCon
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
|
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
|
||||||
}
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig(bucketSpanMillis));
|
||||||
|
}
|
||||||
return builder.build();
|
return builder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -89,6 +89,9 @@ public class DatafeedUpdateTests extends AbstractSerializingTestCase<DatafeedUpd
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
|
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
|
||||||
}
|
}
|
||||||
|
if (randomBoolean()) {
|
||||||
|
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig(randomLongBetween(300_001, 400_000)));
|
||||||
|
}
|
||||||
return builder.build();
|
return builder.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,6 +158,7 @@ public class DatafeedUpdateTests extends AbstractSerializingTestCase<DatafeedUpd
|
||||||
update.setScriptFields(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false)));
|
update.setScriptFields(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false)));
|
||||||
update.setScrollSize(8000);
|
update.setScrollSize(8000);
|
||||||
update.setChunkingConfig(ChunkingConfig.newManual(TimeValue.timeValueHours(1)));
|
update.setChunkingConfig(ChunkingConfig.newManual(TimeValue.timeValueHours(1)));
|
||||||
|
update.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(1)));
|
||||||
|
|
||||||
DatafeedConfig updatedDatafeed = update.build().apply(datafeed, Collections.emptyMap());
|
DatafeedConfig updatedDatafeed = update.build().apply(datafeed, Collections.emptyMap());
|
||||||
|
|
||||||
|
@ -169,6 +173,8 @@ public class DatafeedUpdateTests extends AbstractSerializingTestCase<DatafeedUpd
|
||||||
equalTo(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false))));
|
equalTo(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false))));
|
||||||
assertThat(updatedDatafeed.getScrollSize(), equalTo(8000));
|
assertThat(updatedDatafeed.getScrollSize(), equalTo(8000));
|
||||||
assertThat(updatedDatafeed.getChunkingConfig(), equalTo(ChunkingConfig.newManual(TimeValue.timeValueHours(1))));
|
assertThat(updatedDatafeed.getChunkingConfig(), equalTo(ChunkingConfig.newManual(TimeValue.timeValueHours(1))));
|
||||||
|
assertThat(updatedDatafeed.getDelayedDataCheckConfig().isEnabled(), equalTo(true));
|
||||||
|
assertThat(updatedDatafeed.getDelayedDataCheckConfig().getCheckWindow(), equalTo(TimeValue.timeValueHours(1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testApply_givenAggregations() {
|
public void testApply_givenAggregations() {
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.core.ml.datafeed;
|
||||||
|
|
||||||
|
import org.elasticsearch.common.io.stream.Writeable;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
|
import org.elasticsearch.test.AbstractSerializingTestCase;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
|
import static org.hamcrest.core.Is.is;
|
||||||
|
|
||||||
|
public class DelayedDataCheckConfigTests extends AbstractSerializingTestCase<DelayedDataCheckConfig> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DelayedDataCheckConfig createTestInstance(){
|
||||||
|
return createRandomizedConfig(100);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Writeable.Reader<DelayedDataCheckConfig> instanceReader() {
|
||||||
|
return DelayedDataCheckConfig::new;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DelayedDataCheckConfig doParseInstance(XContentParser parser) {
|
||||||
|
return DelayedDataCheckConfig.STRICT_PARSER.apply(parser, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testConstructor() {
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> new DelayedDataCheckConfig(true, TimeValue.MINUS_ONE));
|
||||||
|
expectThrows(IllegalArgumentException.class, () -> new DelayedDataCheckConfig(true, TimeValue.timeValueHours(25)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testEnabledDelayedDataCheckConfig() {
|
||||||
|
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(5));
|
||||||
|
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(true));
|
||||||
|
assertThat(delayedDataCheckConfig.getCheckWindow(), equalTo(TimeValue.timeValueHours(5)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDisabledDelayedDataCheckConfig() {
|
||||||
|
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.disabledDelayedDataCheckConfig();
|
||||||
|
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(false));
|
||||||
|
assertThat(delayedDataCheckConfig.getCheckWindow(), equalTo(null));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDefaultDelayedDataCheckConfig() {
|
||||||
|
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.defaultDelayedDataCheckConfig();
|
||||||
|
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(true));
|
||||||
|
assertThat(delayedDataCheckConfig.getCheckWindow(), is(nullValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DelayedDataCheckConfig createRandomizedConfig(long bucketSpanMillis) {
|
||||||
|
boolean enabled = randomBoolean();
|
||||||
|
TimeValue timeWindow = null;
|
||||||
|
if (enabled || randomBoolean()) {
|
||||||
|
// time span is required to be at least 1 millis, so we use a custom method to generate a time value here
|
||||||
|
timeWindow = new TimeValue(randomLongBetween(bucketSpanMillis,bucketSpanMillis*2));
|
||||||
|
}
|
||||||
|
return new DelayedDataCheckConfig(enabled, timeWindow);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DelayedDataCheckConfig mutateInstance(DelayedDataCheckConfig instance) throws IOException {
|
||||||
|
boolean enabled = instance.isEnabled();
|
||||||
|
TimeValue timeWindow = instance.getCheckWindow();
|
||||||
|
switch (between(0, 1)) {
|
||||||
|
case 0:
|
||||||
|
enabled = !enabled;
|
||||||
|
if (randomBoolean()) {
|
||||||
|
timeWindow = TimeValue.timeValueMillis(randomLongBetween(1, 1000));
|
||||||
|
} else {
|
||||||
|
timeWindow = null;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
if (timeWindow == null) {
|
||||||
|
timeWindow = TimeValue.timeValueMillis(randomLongBetween(1, 1000));
|
||||||
|
} else {
|
||||||
|
timeWindow = new TimeValue(timeWindow.getMillis() + between(10, 100));
|
||||||
|
}
|
||||||
|
enabled = true;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new AssertionError("Illegal randomisation branch");
|
||||||
|
}
|
||||||
|
return new DelayedDataCheckConfig(enabled, timeWindow);
|
||||||
|
}
|
||||||
|
}
|
|
@ -46,7 +46,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
|
||||||
|
|
||||||
TimeValue bucketSpan = AnalysisConfig.Builder.DEFAULT_BUCKET_SPAN;
|
TimeValue bucketSpan = AnalysisConfig.Builder.DEFAULT_BUCKET_SPAN;
|
||||||
if (randomBoolean()) {
|
if (randomBoolean()) {
|
||||||
bucketSpan = TimeValue.timeValueSeconds(randomIntBetween(1, 1_000_000));
|
bucketSpan = TimeValue.timeValueSeconds(randomIntBetween(1, 1_000));
|
||||||
builder.setBucketSpan(bucketSpan);
|
builder.setBucketSpan(bucketSpan);
|
||||||
}
|
}
|
||||||
if (isCategorization) {
|
if (isCategorization) {
|
||||||
|
|
|
@ -20,22 +20,24 @@ import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
|
||||||
import org.elasticsearch.xpack.core.ml.action.GetBucketsAction;
|
import org.elasticsearch.xpack.core.ml.action.GetBucketsAction;
|
||||||
import org.elasticsearch.xpack.core.ml.action.util.PageParams;
|
import org.elasticsearch.xpack.core.ml.action.util.PageParams;
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
||||||
|
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
|
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.Detector;
|
import org.elasticsearch.xpack.core.ml.job.config.Detector;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
||||||
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
|
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
|
||||||
import org.elasticsearch.xpack.core.ml.job.results.Result;
|
import org.elasticsearch.xpack.core.ml.job.results.Result;
|
||||||
import org.elasticsearch.xpack.ml.datafeed.DelayedDataDetector;
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetector;
|
||||||
import org.elasticsearch.xpack.ml.datafeed.DelayedDataDetector.BucketWithMissingData;
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory;
|
||||||
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory.BucketWithMissingData;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import static org.elasticsearch.xpack.ml.support.BaseMlIntegTestCase.createDatafeed;
|
|
||||||
import static org.elasticsearch.xpack.ml.support.BaseMlIntegTestCase.createDatafeedBuilder;
|
import static org.elasticsearch.xpack.ml.support.BaseMlIntegTestCase.createDatafeedBuilder;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
@ -64,7 +66,10 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
|
||||||
final String jobId = "delayed-data-detection-job";
|
final String jobId = "delayed-data-detection-job";
|
||||||
Job.Builder job = createJob(jobId, TimeValue.timeValueMinutes(5), "count", null);
|
Job.Builder job = createJob(jobId, TimeValue.timeValueMinutes(5), "count", null);
|
||||||
|
|
||||||
DatafeedConfig datafeedConfig = createDatafeed(job.getId() + "-datafeed", job.getId(), Collections.singletonList(index));
|
DatafeedConfig.Builder datafeedConfigBuilder =
|
||||||
|
createDatafeedBuilder(job.getId() + "-datafeed", job.getId(), Collections.singletonList(index));
|
||||||
|
datafeedConfigBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(12)));
|
||||||
|
DatafeedConfig datafeedConfig = datafeedConfigBuilder.build();
|
||||||
registerJob(job);
|
registerJob(job);
|
||||||
putJob(job);
|
putJob(job);
|
||||||
openJob(job.getId());
|
openJob(job.getId());
|
||||||
|
@ -77,26 +82,32 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
|
||||||
// Get the latest finalized bucket
|
// Get the latest finalized bucket
|
||||||
Bucket lastBucket = getLatestFinalizedBucket(jobId);
|
Bucket lastBucket = getLatestFinalizedBucket(jobId);
|
||||||
|
|
||||||
DelayedDataDetector delayedDataDetector =
|
DelayedDataDetector delayedDataDetector = newDetector(job.build(new Date()), datafeedConfig);
|
||||||
new DelayedDataDetector(job.build(new Date()), datafeedConfig, TimeValue.timeValueHours(12), client());
|
|
||||||
|
|
||||||
List<BucketWithMissingData> response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
|
List<BucketWithMissingData> response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
|
||||||
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(0L));
|
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(0L));
|
||||||
|
|
||||||
long missingDocs = randomIntBetween(32, 128);
|
long missingDocs = randomIntBetween(32, 128);
|
||||||
// Simply adding data within the current delayed data detection, the choice of 43100000 is arbitrary and within the window
|
// Simply adding data within the current delayed data detection, the choice of 43100000 is arbitrary and within the window
|
||||||
// for the DelayedDataDetector
|
// for the DatafeedDelayedDataDetector
|
||||||
writeData(logger, index, missingDocs, now - 43100000, lastBucket.getEpoch()*1000);
|
writeData(logger, index, missingDocs, now - 43100000, lastBucket.getEpoch()*1000);
|
||||||
|
|
||||||
response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
|
response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
|
||||||
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(missingDocs));
|
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(missingDocs));
|
||||||
|
// Assert that the are returned in order
|
||||||
|
List<Long> timeStamps = response.stream().map(BucketWithMissingData::getTimeStamp).collect(Collectors.toList());
|
||||||
|
assertEquals(timeStamps.stream().sorted().collect(Collectors.toList()), timeStamps);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMissingDataDetectionInSpecificBucket() throws Exception {
|
public void testMissingDataDetectionInSpecificBucket() throws Exception {
|
||||||
final String jobId = "delayed-data-detection-job-missing-test-specific-bucket";
|
final String jobId = "delayed-data-detection-job-missing-test-specific-bucket";
|
||||||
Job.Builder job = createJob(jobId, TimeValue.timeValueMinutes(5), "count", null);
|
Job.Builder job = createJob(jobId, TimeValue.timeValueMinutes(5), "count", null);
|
||||||
|
|
||||||
DatafeedConfig datafeedConfig = createDatafeed(job.getId() + "-datafeed", job.getId(), Collections.singletonList(index));
|
DatafeedConfig.Builder datafeedConfigBuilder =
|
||||||
|
createDatafeedBuilder(job.getId() + "-datafeed", job.getId(), Collections.singletonList(index));
|
||||||
|
datafeedConfigBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(12)));
|
||||||
|
DatafeedConfig datafeedConfig = datafeedConfigBuilder.build();
|
||||||
|
|
||||||
registerJob(job);
|
registerJob(job);
|
||||||
putJob(job);
|
putJob(job);
|
||||||
openJob(job.getId());
|
openJob(job.getId());
|
||||||
|
@ -110,8 +121,7 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
|
||||||
// Get the latest finalized bucket
|
// Get the latest finalized bucket
|
||||||
Bucket lastBucket = getLatestFinalizedBucket(jobId);
|
Bucket lastBucket = getLatestFinalizedBucket(jobId);
|
||||||
|
|
||||||
DelayedDataDetector delayedDataDetector =
|
DelayedDataDetector delayedDataDetector = newDetector(job.build(new Date()), datafeedConfig);
|
||||||
new DelayedDataDetector(job.build(new Date()), datafeedConfig, TimeValue.timeValueHours(12), client());
|
|
||||||
|
|
||||||
long missingDocs = randomIntBetween(1, 10);
|
long missingDocs = randomIntBetween(1, 10);
|
||||||
|
|
||||||
|
@ -127,6 +137,10 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assertThat(hasBucketWithMissing, equalTo(true));
|
assertThat(hasBucketWithMissing, equalTo(true));
|
||||||
|
|
||||||
|
// Assert that the are returned in order
|
||||||
|
List<Long> timeStamps = response.stream().map(BucketWithMissingData::getTimeStamp).collect(Collectors.toList());
|
||||||
|
assertEquals(timeStamps.stream().sorted().collect(Collectors.toList()), timeStamps);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testMissingDataDetectionWithAggregationsAndQuery() throws Exception {
|
public void testMissingDataDetectionWithAggregationsAndQuery() throws Exception {
|
||||||
|
@ -147,6 +161,8 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
|
||||||
.interval(TimeValue.timeValueMinutes(5).millis())));
|
.interval(TimeValue.timeValueMinutes(5).millis())));
|
||||||
datafeedConfigBuilder.setQuery(new RangeQueryBuilder("value").gte(numDocs/2));
|
datafeedConfigBuilder.setQuery(new RangeQueryBuilder("value").gte(numDocs/2));
|
||||||
datafeedConfigBuilder.setFrequency(TimeValue.timeValueMinutes(5));
|
datafeedConfigBuilder.setFrequency(TimeValue.timeValueMinutes(5));
|
||||||
|
datafeedConfigBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(12)));
|
||||||
|
|
||||||
DatafeedConfig datafeedConfig = datafeedConfigBuilder.build();
|
DatafeedConfig datafeedConfig = datafeedConfigBuilder.build();
|
||||||
registerJob(job);
|
registerJob(job);
|
||||||
putJob(job);
|
putJob(job);
|
||||||
|
@ -160,19 +176,21 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
|
||||||
// Get the latest finalized bucket
|
// Get the latest finalized bucket
|
||||||
Bucket lastBucket = getLatestFinalizedBucket(jobId);
|
Bucket lastBucket = getLatestFinalizedBucket(jobId);
|
||||||
|
|
||||||
DelayedDataDetector delayedDataDetector =
|
DelayedDataDetector delayedDataDetector = newDetector(job.build(new Date()), datafeedConfig);
|
||||||
new DelayedDataDetector(job.build(new Date()), datafeedConfig, TimeValue.timeValueHours(12), client());
|
|
||||||
|
|
||||||
List<BucketWithMissingData> response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
|
List<BucketWithMissingData> response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
|
||||||
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(0L));
|
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(0L));
|
||||||
|
|
||||||
long missingDocs = numDocs;
|
long missingDocs = numDocs;
|
||||||
// Simply adding data within the current delayed data detection, the choice of 43100000 is arbitrary and within the window
|
// Simply adding data within the current delayed data detection, the choice of 43100000 is arbitrary and within the window
|
||||||
// for the DelayedDataDetector
|
// for the DatafeedDelayedDataDetector
|
||||||
writeData(logger, index, missingDocs, now - 43100000, lastBucket.getEpoch()*1000);
|
writeData(logger, index, missingDocs, now - 43100000, lastBucket.getEpoch()*1000);
|
||||||
|
|
||||||
response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
|
response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
|
||||||
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo((missingDocs+1)/2));
|
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo((missingDocs+1)/2));
|
||||||
|
// Assert that the are returned in order
|
||||||
|
List<Long> timeStamps = response.stream().map(BucketWithMissingData::getTimeStamp).collect(Collectors.toList());
|
||||||
|
assertEquals(timeStamps.stream().sorted().collect(Collectors.toList()), timeStamps);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Job.Builder createJob(String id, TimeValue bucketSpan, String function, String field) {
|
private Job.Builder createJob(String id, TimeValue bucketSpan, String function, String field) {
|
||||||
|
@ -231,4 +249,8 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
|
||||||
getBucketsRequest.setPageParams(new PageParams(0, 1));
|
getBucketsRequest.setPageParams(new PageParams(0, 1));
|
||||||
return getBuckets(getBucketsRequest).get(0);
|
return getBuckets(getBucketsRequest).get(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private DelayedDataDetector newDetector(Job job, DatafeedConfig datafeedConfig) {
|
||||||
|
return DelayedDataDetectorFactory.buildDetector(job, datafeedConfig, client());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@ import org.elasticsearch.client.Client;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
import org.elasticsearch.common.util.concurrent.ThreadContext;
|
import org.elasticsearch.common.util.concurrent.ThreadContext;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentElasticsearchExtension;
|
||||||
import org.elasticsearch.common.xcontent.XContentType;
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
import org.elasticsearch.core.internal.io.Streams;
|
import org.elasticsearch.core.internal.io.Streams;
|
||||||
import org.elasticsearch.index.mapper.DateFieldMapper;
|
import org.elasticsearch.index.mapper.DateFieldMapper;
|
||||||
|
@ -23,12 +24,16 @@ import org.elasticsearch.xpack.core.ml.datafeed.extractor.DataExtractor;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
||||||
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
|
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
|
||||||
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
|
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
|
||||||
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetector;
|
||||||
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory.BucketWithMissingData;
|
||||||
import org.elasticsearch.xpack.ml.datafeed.extractor.DataExtractorFactory;
|
import org.elasticsearch.xpack.ml.datafeed.extractor.DataExtractorFactory;
|
||||||
import org.elasticsearch.xpack.ml.notifications.Auditor;
|
import org.elasticsearch.xpack.ml.notifications.Auditor;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
@ -41,6 +46,7 @@ class DatafeedJob {
|
||||||
|
|
||||||
private static final Logger LOGGER = LogManager.getLogger(DatafeedJob.class);
|
private static final Logger LOGGER = LogManager.getLogger(DatafeedJob.class);
|
||||||
private static final int NEXT_TASK_DELAY_MS = 100;
|
private static final int NEXT_TASK_DELAY_MS = 100;
|
||||||
|
static final long MISSING_DATA_CHECK_INTERVAL_MS = 900_000; //15 minutes in ms
|
||||||
|
|
||||||
private final Auditor auditor;
|
private final Auditor auditor;
|
||||||
private final String jobId;
|
private final String jobId;
|
||||||
|
@ -50,15 +56,19 @@ class DatafeedJob {
|
||||||
private final Client client;
|
private final Client client;
|
||||||
private final DataExtractorFactory dataExtractorFactory;
|
private final DataExtractorFactory dataExtractorFactory;
|
||||||
private final Supplier<Long> currentTimeSupplier;
|
private final Supplier<Long> currentTimeSupplier;
|
||||||
|
private final DelayedDataDetector delayedDataDetector;
|
||||||
|
|
||||||
private volatile long lookbackStartTimeMs;
|
private volatile long lookbackStartTimeMs;
|
||||||
|
private volatile long latestFinalBucketEndTimeMs;
|
||||||
|
private volatile long lastDataCheckTimeMs;
|
||||||
|
private volatile int lastDataCheckAudit;
|
||||||
private volatile Long lastEndTimeMs;
|
private volatile Long lastEndTimeMs;
|
||||||
private AtomicBoolean running = new AtomicBoolean(true);
|
private AtomicBoolean running = new AtomicBoolean(true);
|
||||||
private volatile boolean isIsolated;
|
private volatile boolean isIsolated;
|
||||||
|
|
||||||
DatafeedJob(String jobId, DataDescription dataDescription, long frequencyMs, long queryDelayMs,
|
DatafeedJob(String jobId, DataDescription dataDescription, long frequencyMs, long queryDelayMs,
|
||||||
DataExtractorFactory dataExtractorFactory, Client client, Auditor auditor, Supplier<Long> currentTimeSupplier,
|
DataExtractorFactory dataExtractorFactory, Client client, Auditor auditor, Supplier<Long> currentTimeSupplier,
|
||||||
long latestFinalBucketEndTimeMs, long latestRecordTimeMs) {
|
DelayedDataDetector delayedDataDetector, long latestFinalBucketEndTimeMs, long latestRecordTimeMs) {
|
||||||
this.jobId = jobId;
|
this.jobId = jobId;
|
||||||
this.dataDescription = Objects.requireNonNull(dataDescription);
|
this.dataDescription = Objects.requireNonNull(dataDescription);
|
||||||
this.frequencyMs = frequencyMs;
|
this.frequencyMs = frequencyMs;
|
||||||
|
@ -67,7 +77,8 @@ class DatafeedJob {
|
||||||
this.client = client;
|
this.client = client;
|
||||||
this.auditor = auditor;
|
this.auditor = auditor;
|
||||||
this.currentTimeSupplier = currentTimeSupplier;
|
this.currentTimeSupplier = currentTimeSupplier;
|
||||||
|
this.delayedDataDetector = delayedDataDetector;
|
||||||
|
this.latestFinalBucketEndTimeMs = latestFinalBucketEndTimeMs;
|
||||||
long lastEndTime = Math.max(latestFinalBucketEndTimeMs, latestRecordTimeMs);
|
long lastEndTime = Math.max(latestFinalBucketEndTimeMs, latestRecordTimeMs);
|
||||||
if (lastEndTime > 0) {
|
if (lastEndTime > 0) {
|
||||||
lastEndTimeMs = lastEndTime;
|
lastEndTimeMs = lastEndTime;
|
||||||
|
@ -151,9 +162,49 @@ class DatafeedJob {
|
||||||
request.setCalcInterim(true);
|
request.setCalcInterim(true);
|
||||||
request.setAdvanceTime(String.valueOf(end));
|
request.setAdvanceTime(String.valueOf(end));
|
||||||
run(start, end, request);
|
run(start, end, request);
|
||||||
|
checkForMissingDataIfNecessary();
|
||||||
return nextRealtimeTimestamp();
|
return nextRealtimeTimestamp();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkForMissingDataIfNecessary() {
|
||||||
|
if (isRunning() && !isIsolated && checkForMissingDataTriggered()) {
|
||||||
|
|
||||||
|
// Keep track of the last bucket time for which we did a missing data check
|
||||||
|
this.lastDataCheckTimeMs = this.currentTimeSupplier.get();
|
||||||
|
List<BucketWithMissingData> missingDataBuckets = delayedDataDetector.detectMissingData(latestFinalBucketEndTimeMs);
|
||||||
|
if (missingDataBuckets.isEmpty() == false) {
|
||||||
|
|
||||||
|
long totalRecordsMissing = missingDataBuckets.stream()
|
||||||
|
.mapToLong(BucketWithMissingData::getMissingDocumentCount)
|
||||||
|
.sum();
|
||||||
|
// The response is sorted by asc timestamp, so the last entry is the last bucket
|
||||||
|
Date lastBucketDate = missingDataBuckets.get(missingDataBuckets.size() - 1).getBucket().getTimestamp();
|
||||||
|
int newAudit = Objects.hash(totalRecordsMissing, lastBucketDate);
|
||||||
|
if (newAudit != lastDataCheckAudit) {
|
||||||
|
auditor.warning(jobId,
|
||||||
|
Messages.getMessage(Messages.JOB_AUDIT_DATAFEED_MISSING_DATA, totalRecordsMissing,
|
||||||
|
XContentElasticsearchExtension.DEFAULT_DATE_PRINTER.print(lastBucketDate.getTime())));
|
||||||
|
lastDataCheckAudit = newAudit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We wait a static interval of 15 minutes till the next missing data check.
|
||||||
|
*
|
||||||
|
* However, if our delayed data window is smaller than that, we will probably want to check at every available window (if freq. allows).
|
||||||
|
* This is to help to miss as few buckets in the delayed data check as possible.
|
||||||
|
*
|
||||||
|
* If our frequency/query delay are longer then our default interval or window size, we will end up looking for missing data on
|
||||||
|
* every real-time trigger. This should be OK as the we are pulling from the Index as such a slow pace, another query will
|
||||||
|
* probably not even be noticeable at such a large timescale.
|
||||||
|
*/
|
||||||
|
private boolean checkForMissingDataTriggered() {
|
||||||
|
return this.currentTimeSupplier.get() > this.lastDataCheckTimeMs
|
||||||
|
+ Math.min(MISSING_DATA_CHECK_INTERVAL_MS, delayedDataDetector.getWindow());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stops the datafeed job
|
* Stops the datafeed job
|
||||||
*
|
*
|
||||||
|
@ -260,7 +311,10 @@ class DatafeedJob {
|
||||||
// we call flush the job is closed. Thus, we don't flush unless the
|
// we call flush the job is closed. Thus, we don't flush unless the
|
||||||
// datafeed is still running.
|
// datafeed is still running.
|
||||||
if (isRunning() && !isIsolated) {
|
if (isRunning() && !isIsolated) {
|
||||||
flushJob(flushRequest);
|
Date lastFinalizedBucketEnd = flushJob(flushRequest).getLastFinalizedBucketEnd();
|
||||||
|
if (lastFinalizedBucketEnd != null) {
|
||||||
|
this.latestFinalBucketEndTimeMs = lastFinalizedBucketEnd.getTime();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (recordCount == 0) {
|
if (recordCount == 0) {
|
||||||
|
|
|
@ -13,6 +13,8 @@ import org.elasticsearch.xpack.core.ml.action.util.QueryPage;
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
||||||
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetector;
|
||||||
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory;
|
||||||
import org.elasticsearch.xpack.ml.job.persistence.BucketsQueryBuilder;
|
import org.elasticsearch.xpack.ml.job.persistence.BucketsQueryBuilder;
|
||||||
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
|
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
|
||||||
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
|
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
|
||||||
|
@ -46,8 +48,9 @@ public class DatafeedJobBuilder {
|
||||||
Consumer<Context> contextHanlder = context -> {
|
Consumer<Context> contextHanlder = context -> {
|
||||||
TimeValue frequency = getFrequencyOrDefault(datafeed, job);
|
TimeValue frequency = getFrequencyOrDefault(datafeed, job);
|
||||||
TimeValue queryDelay = datafeed.getQueryDelay();
|
TimeValue queryDelay = datafeed.getQueryDelay();
|
||||||
|
DelayedDataDetector delayedDataDetector = DelayedDataDetectorFactory.buildDetector(job, datafeed, client);
|
||||||
DatafeedJob datafeedJob = new DatafeedJob(job.getId(), buildDataDescription(job), frequency.millis(), queryDelay.millis(),
|
DatafeedJob datafeedJob = new DatafeedJob(job.getId(), buildDataDescription(job), frequency.millis(), queryDelay.millis(),
|
||||||
context.dataExtractorFactory, client, auditor, currentTimeSupplier,
|
context.dataExtractorFactory, client, auditor, currentTimeSupplier, delayedDataDetector,
|
||||||
context.latestFinalBucketEndMs, context.latestRecordTimeMs);
|
context.latestFinalBucketEndMs, context.latestRecordTimeMs);
|
||||||
listener.onResponse(datafeedJob);
|
listener.onResponse(datafeedJob);
|
||||||
};
|
};
|
||||||
|
|
|
@ -3,26 +3,26 @@
|
||||||
* or more contributor license agreements. Licensed under the Elastic License;
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
* you may not use this file except in compliance with the Elastic License.
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.xpack.ml.datafeed;
|
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
|
||||||
|
|
||||||
import org.elasticsearch.action.search.SearchAction;
|
import org.elasticsearch.action.search.SearchAction;
|
||||||
import org.elasticsearch.action.search.SearchRequest;
|
import org.elasticsearch.action.search.SearchRequest;
|
||||||
import org.elasticsearch.action.search.SearchResponse;
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
import org.elasticsearch.client.Client;
|
import org.elasticsearch.client.Client;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
|
||||||
import org.elasticsearch.common.util.concurrent.ThreadContext;
|
import org.elasticsearch.common.util.concurrent.ThreadContext;
|
||||||
|
import org.elasticsearch.index.query.QueryBuilder;
|
||||||
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
|
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
|
||||||
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
|
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
|
||||||
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
import org.elasticsearch.search.builder.SearchSourceBuilder;
|
||||||
import org.elasticsearch.xpack.core.ml.action.GetBucketsAction;
|
import org.elasticsearch.xpack.core.ml.action.GetBucketsAction;
|
||||||
import org.elasticsearch.xpack.core.ml.action.util.PageParams;
|
import org.elasticsearch.xpack.core.ml.action.util.PageParams;
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.extractor.ExtractorUtils;
|
import org.elasticsearch.xpack.core.ml.datafeed.extractor.ExtractorUtils;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory.BucketWithMissingData;
|
||||||
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
|
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
|
||||||
import org.elasticsearch.xpack.core.ml.utils.Intervals;
|
import org.elasticsearch.xpack.core.ml.utils.Intervals;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -35,32 +35,33 @@ import static org.elasticsearch.xpack.core.ClientHelper.stashWithOrigin;
|
||||||
/**
|
/**
|
||||||
* This class will search the buckets and indices over a given window to determine if any data is missing
|
* This class will search the buckets and indices over a given window to determine if any data is missing
|
||||||
*/
|
*/
|
||||||
public class DelayedDataDetector {
|
public class DatafeedDelayedDataDetector implements DelayedDataDetector {
|
||||||
|
|
||||||
private static final String DATE_BUCKETS = "date_buckets";
|
private static final String DATE_BUCKETS = "date_buckets";
|
||||||
|
|
||||||
private final long bucketSpan;
|
private final long bucketSpan;
|
||||||
private final long window;
|
private final long window;
|
||||||
private final DatafeedConfig datafeedConfig;
|
|
||||||
private final Client client;
|
private final Client client;
|
||||||
private final Job job;
|
private final String timeField;
|
||||||
|
private final String jobId;
|
||||||
|
private final QueryBuilder datafeedQuery;
|
||||||
|
private final String[] datafeedIndices;
|
||||||
|
|
||||||
public DelayedDataDetector(Job job, DatafeedConfig datafeedConfig, TimeValue window, Client client) {
|
DatafeedDelayedDataDetector(long bucketSpan, long window, String jobId, String timeField, QueryBuilder datafeedQuery,
|
||||||
this.job = job;
|
String[] datafeedIndices, Client client) {
|
||||||
this.bucketSpan = job.getAnalysisConfig().getBucketSpan().millis();
|
this.bucketSpan = bucketSpan;
|
||||||
this.datafeedConfig = datafeedConfig;
|
this.window = window;
|
||||||
long windowMillis = window.millis();
|
this.jobId = jobId;
|
||||||
if (windowMillis < bucketSpan) {
|
this.timeField = timeField;
|
||||||
throw new IllegalArgumentException("[window] must be greater or equal to the [bucket_span]");
|
this.datafeedQuery = datafeedQuery;
|
||||||
}
|
this.datafeedIndices = datafeedIndices;
|
||||||
if (Intervals.alignToFloor(windowMillis/bucketSpan, bucketSpan) >= 10000) {
|
|
||||||
throw new IllegalArgumentException("[window] must contain less than 10000 buckets at the current [bucket_span]");
|
|
||||||
}
|
|
||||||
this.window = windowMillis;
|
|
||||||
this.client = client;
|
this.client = client;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This method looks at the {@link DatafeedConfig} from {@code latestFinalizedBucket - window} to {@code latestFinalizedBucket}.
|
* This method looks at the {@link DatafeedDelayedDataDetector#datafeedIndices}
|
||||||
|
* from {@code latestFinalizedBucket - window} to {@code latestFinalizedBucket} and compares the document counts with the
|
||||||
|
* {@link DatafeedDelayedDataDetector#jobId}'s finalized buckets' event counts.
|
||||||
*
|
*
|
||||||
* It is done synchronously, and can block for a considerable amount of time, it should only be executed within the appropriate
|
* It is done synchronously, and can block for a considerable amount of time, it should only be executed within the appropriate
|
||||||
* thread pool.
|
* thread pool.
|
||||||
|
@ -68,9 +69,15 @@ public class DelayedDataDetector {
|
||||||
* @param latestFinalizedBucketMs The latest finalized bucket timestamp in milliseconds, signifies the end of the time window check
|
* @param latestFinalizedBucketMs The latest finalized bucket timestamp in milliseconds, signifies the end of the time window check
|
||||||
* @return A List of {@link BucketWithMissingData} objects that contain each bucket with the current number of missing docs
|
* @return A List of {@link BucketWithMissingData} objects that contain each bucket with the current number of missing docs
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public List<BucketWithMissingData> detectMissingData(long latestFinalizedBucketMs) {
|
public List<BucketWithMissingData> detectMissingData(long latestFinalizedBucketMs) {
|
||||||
final long end = Intervals.alignToFloor(latestFinalizedBucketMs, bucketSpan);
|
final long end = Intervals.alignToFloor(latestFinalizedBucketMs, bucketSpan);
|
||||||
final long start = Intervals.alignToFloor(latestFinalizedBucketMs - window, bucketSpan);
|
final long start = Intervals.alignToFloor(latestFinalizedBucketMs - window, bucketSpan);
|
||||||
|
|
||||||
|
if (end <= start) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
List<Bucket> finalizedBuckets = checkBucketEvents(start, end);
|
List<Bucket> finalizedBuckets = checkBucketEvents(start, end);
|
||||||
Map<Long, Long> indexedData = checkCurrentBucketEventCount(start, end);
|
Map<Long, Long> indexedData = checkCurrentBucketEventCount(start, end);
|
||||||
return finalizedBuckets.stream()
|
return finalizedBuckets.stream()
|
||||||
|
@ -81,10 +88,17 @@ public class DelayedDataDetector {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getWindow() {
|
||||||
|
return window;
|
||||||
|
}
|
||||||
|
|
||||||
private List<Bucket> checkBucketEvents(long start, long end) {
|
private List<Bucket> checkBucketEvents(long start, long end) {
|
||||||
GetBucketsAction.Request request = new GetBucketsAction.Request(job.getId());
|
GetBucketsAction.Request request = new GetBucketsAction.Request(jobId);
|
||||||
request.setStart(Long.toString(start));
|
request.setStart(Long.toString(start));
|
||||||
request.setEnd(Long.toString(end));
|
request.setEnd(Long.toString(end));
|
||||||
|
request.setSort("timestamp");
|
||||||
|
request.setDescending(false);
|
||||||
request.setExcludeInterim(true);
|
request.setExcludeInterim(true);
|
||||||
request.setPageParams(new PageParams(0, (int)((end - start)/bucketSpan)));
|
request.setPageParams(new PageParams(0, (int)((end - start)/bucketSpan)));
|
||||||
|
|
||||||
|
@ -95,13 +109,12 @@ public class DelayedDataDetector {
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<Long, Long> checkCurrentBucketEventCount(long start, long end) {
|
private Map<Long, Long> checkCurrentBucketEventCount(long start, long end) {
|
||||||
String timeField = job.getDataDescription().getTimeField();
|
|
||||||
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
|
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
|
||||||
.size(0)
|
.size(0)
|
||||||
.aggregation(new DateHistogramAggregationBuilder(DATE_BUCKETS).interval(bucketSpan).field(timeField))
|
.aggregation(new DateHistogramAggregationBuilder(DATE_BUCKETS).interval(bucketSpan).field(timeField))
|
||||||
.query(ExtractorUtils.wrapInTimeRangeQuery(datafeedConfig.getQuery(), timeField, start, end));
|
.query(ExtractorUtils.wrapInTimeRangeQuery(datafeedQuery, timeField, start, end));
|
||||||
|
|
||||||
SearchRequest searchRequest = new SearchRequest(datafeedConfig.getIndices().toArray(new String[0])).source(searchSourceBuilder);
|
SearchRequest searchRequest = new SearchRequest(datafeedIndices).source(searchSourceBuilder);
|
||||||
try (ThreadContext.StoredContext ignore = stashWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN)) {
|
try (ThreadContext.StoredContext ignore = stashWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN)) {
|
||||||
SearchResponse response = client.execute(SearchAction.INSTANCE, searchRequest).actionGet();
|
SearchResponse response = client.execute(SearchAction.INSTANCE, searchRequest).actionGet();
|
||||||
List<? extends Histogram.Bucket> buckets = ((Histogram)response.getAggregations().get(DATE_BUCKETS)).getBuckets();
|
List<? extends Histogram.Bucket> buckets = ((Histogram)response.getAggregations().get(DATE_BUCKETS)).getBuckets();
|
||||||
|
@ -132,27 +145,4 @@ public class DelayedDataDetector {
|
||||||
private static long calculateMissing(Map<Long, Long> indexedData, Bucket bucket) {
|
private static long calculateMissing(Map<Long, Long> indexedData, Bucket bucket) {
|
||||||
return indexedData.getOrDefault(bucket.getEpoch() * 1000, 0L) - bucket.getEventCount();
|
return indexedData.getOrDefault(bucket.getEpoch() * 1000, 0L) - bucket.getEventCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class BucketWithMissingData {
|
|
||||||
|
|
||||||
private final long missingDocumentCount;
|
|
||||||
private final Bucket bucket;
|
|
||||||
|
|
||||||
static BucketWithMissingData fromMissingAndBucket(long missingDocumentCount, Bucket bucket) {
|
|
||||||
return new BucketWithMissingData(missingDocumentCount, bucket);
|
|
||||||
}
|
|
||||||
|
|
||||||
private BucketWithMissingData(long missingDocumentCount, Bucket bucket) {
|
|
||||||
this.missingDocumentCount = missingDocumentCount;
|
|
||||||
this.bucket = bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Bucket getBucket() {
|
|
||||||
return bucket;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getMissingDocumentCount() {
|
|
||||||
return missingDocumentCount;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
|
@ -0,0 +1,14 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public interface DelayedDataDetector {
|
||||||
|
List<DelayedDataDetectorFactory.BucketWithMissingData> detectMissingData(long endingTimeStamp);
|
||||||
|
|
||||||
|
long getWindow();
|
||||||
|
}
|
|
@ -0,0 +1,125 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
|
||||||
|
|
||||||
|
import org.elasticsearch.client.Client;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
||||||
|
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builds the appropriate {@link DelayedDataDetector} implementation, with the appropriate settings, given the parameters.
|
||||||
|
*/
|
||||||
|
public class DelayedDataDetectorFactory {
|
||||||
|
|
||||||
|
// There are eight 15min buckets in a two hour span, so matching that number as the fallback for very long buckets
|
||||||
|
private static final int FALLBACK_NUMBER_OF_BUCKETS_TO_SPAN = 8;
|
||||||
|
private static final TimeValue DEFAULT_CHECK_WINDOW = TimeValue.timeValueHours(2);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This will build the appropriate detector given the parameters.
|
||||||
|
*
|
||||||
|
* If {@link DatafeedConfig#getDelayedDataCheckConfig()} is not `isEnabled()`, then a {@link NullDelayedDataDetector} is returned, which
|
||||||
|
* does not do any checks, and only supplies an empty collection.
|
||||||
|
*
|
||||||
|
* @param job The {@link Job} object for the given `datafeedConfig`
|
||||||
|
* @param datafeedConfig The {@link DatafeedConfig} for which to create the {@link DelayedDataDetector}
|
||||||
|
* @param client The {@link Client} capable of taking action against the ES Cluster.
|
||||||
|
* @return A new {@link DelayedDataDetector}
|
||||||
|
*/
|
||||||
|
public static DelayedDataDetector buildDetector(Job job, DatafeedConfig datafeedConfig, Client client) {
|
||||||
|
if (datafeedConfig.getDelayedDataCheckConfig().isEnabled()) {
|
||||||
|
long window = validateAndCalculateWindowLength(job.getAnalysisConfig().getBucketSpan(),
|
||||||
|
datafeedConfig.getDelayedDataCheckConfig().getCheckWindow());
|
||||||
|
long bucketSpan = job.getAnalysisConfig().getBucketSpan() == null ? 0 : job.getAnalysisConfig().getBucketSpan().millis();
|
||||||
|
return new DatafeedDelayedDataDetector(bucketSpan,
|
||||||
|
window,
|
||||||
|
job.getId(),
|
||||||
|
job.getDataDescription().getTimeField(),
|
||||||
|
datafeedConfig.getQuery(),
|
||||||
|
datafeedConfig.getIndices().toArray(new String[0]),
|
||||||
|
client);
|
||||||
|
} else {
|
||||||
|
return new NullDelayedDataDetector();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static long validateAndCalculateWindowLength(TimeValue bucketSpan, TimeValue currentWindow) {
|
||||||
|
if (bucketSpan == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (currentWindow == null) { // we should provide a good default as the user did not specify a window
|
||||||
|
if(bucketSpan.compareTo(DEFAULT_CHECK_WINDOW) >= 0) {
|
||||||
|
return FALLBACK_NUMBER_OF_BUCKETS_TO_SPAN * bucketSpan.millis();
|
||||||
|
} else {
|
||||||
|
return DEFAULT_CHECK_WINDOW.millis();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (currentWindow.compareTo(bucketSpan) < 0) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL, currentWindow.getStringRep(),
|
||||||
|
bucketSpan.getStringRep()));
|
||||||
|
} else if (currentWindow.millis() > bucketSpan.millis() * DelayedDataCheckConfig.MAX_NUMBER_SPANABLE_BUCKETS) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS, currentWindow.getStringRep(),
|
||||||
|
bucketSpan.getStringRep()));
|
||||||
|
}
|
||||||
|
return currentWindow.millis();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class BucketWithMissingData {
|
||||||
|
|
||||||
|
private final long missingDocumentCount;
|
||||||
|
private final Bucket bucket;
|
||||||
|
|
||||||
|
public static BucketWithMissingData fromMissingAndBucket(long missingDocumentCount, Bucket bucket) {
|
||||||
|
return new BucketWithMissingData(missingDocumentCount, bucket);
|
||||||
|
}
|
||||||
|
|
||||||
|
private BucketWithMissingData(long missingDocumentCount, Bucket bucket) {
|
||||||
|
this.missingDocumentCount = missingDocumentCount;
|
||||||
|
this.bucket = bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTimeStamp() {
|
||||||
|
return bucket.getEpoch();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Bucket getBucket() {
|
||||||
|
return bucket;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getMissingDocumentCount() {
|
||||||
|
return missingDocumentCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object other) {
|
||||||
|
if (other == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (other == null || getClass() != other.getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
BucketWithMissingData that = (BucketWithMissingData) other;
|
||||||
|
|
||||||
|
return Objects.equals(that.bucket, bucket) && Objects.equals(that.missingDocumentCount, missingDocumentCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return Objects.hash(bucket, missingDocumentCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class will always return an {@link Collections#emptyList()}.
|
||||||
|
*/
|
||||||
|
public class NullDelayedDataDetector implements DelayedDataDetector {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Always returns an empty collection
|
||||||
|
* @param unusedTimeStamp unused Parameter
|
||||||
|
* @return {@link Collections#emptyList()}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public List<DelayedDataDetectorFactory.BucketWithMissingData> detectMissingData(long unusedTimeStamp) {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Always returns 0
|
||||||
|
* @return a 0
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public long getWindow() {
|
||||||
|
return 0L;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -10,6 +10,7 @@ import org.elasticsearch.client.Client;
|
||||||
import org.elasticsearch.common.bytes.BytesArray;
|
import org.elasticsearch.common.bytes.BytesArray;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.util.concurrent.ThreadContext;
|
import org.elasticsearch.common.util.concurrent.ThreadContext;
|
||||||
|
import org.elasticsearch.common.xcontent.XContentElasticsearchExtension;
|
||||||
import org.elasticsearch.common.xcontent.XContentType;
|
import org.elasticsearch.common.xcontent.XContentType;
|
||||||
import org.elasticsearch.mock.orig.Mockito;
|
import org.elasticsearch.mock.orig.Mockito;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
@ -18,6 +19,10 @@ import org.elasticsearch.xpack.core.ml.action.FlushJobAction;
|
||||||
import org.elasticsearch.xpack.core.ml.action.PersistJobAction;
|
import org.elasticsearch.xpack.core.ml.action.PersistJobAction;
|
||||||
import org.elasticsearch.xpack.core.ml.action.PostDataAction;
|
import org.elasticsearch.xpack.core.ml.action.PostDataAction;
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.extractor.DataExtractor;
|
import org.elasticsearch.xpack.core.ml.datafeed.extractor.DataExtractor;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
|
||||||
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetector;
|
||||||
|
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory.BucketWithMissingData;
|
||||||
import org.elasticsearch.xpack.ml.datafeed.extractor.DataExtractorFactory;
|
import org.elasticsearch.xpack.ml.datafeed.extractor.DataExtractorFactory;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
||||||
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
|
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
|
||||||
|
@ -30,6 +35,7 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
@ -56,10 +62,12 @@ public class DatafeedJobTests extends ESTestCase {
|
||||||
private DataExtractorFactory dataExtractorFactory;
|
private DataExtractorFactory dataExtractorFactory;
|
||||||
private DataExtractor dataExtractor;
|
private DataExtractor dataExtractor;
|
||||||
private Client client;
|
private Client client;
|
||||||
|
private DelayedDataDetector delayedDataDetector;
|
||||||
private DataDescription.Builder dataDescription;
|
private DataDescription.Builder dataDescription;
|
||||||
ActionFuture<PostDataAction.Response> postDataFuture;
|
ActionFuture<PostDataAction.Response> postDataFuture;
|
||||||
private ActionFuture<FlushJobAction.Response> flushJobFuture;
|
private ActionFuture<FlushJobAction.Response> flushJobFuture;
|
||||||
private ArgumentCaptor<FlushJobAction.Request> flushJobRequests;
|
private ArgumentCaptor<FlushJobAction.Request> flushJobRequests;
|
||||||
|
private FlushJobAction.Response flushJobResponse;
|
||||||
|
|
||||||
private long currentTime;
|
private long currentTime;
|
||||||
private XContentType xContentType;
|
private XContentType xContentType;
|
||||||
|
@ -79,6 +87,9 @@ public class DatafeedJobTests extends ESTestCase {
|
||||||
dataDescription.setFormat(DataDescription.DataFormat.XCONTENT);
|
dataDescription.setFormat(DataDescription.DataFormat.XCONTENT);
|
||||||
postDataFuture = mock(ActionFuture.class);
|
postDataFuture = mock(ActionFuture.class);
|
||||||
flushJobFuture = mock(ActionFuture.class);
|
flushJobFuture = mock(ActionFuture.class);
|
||||||
|
flushJobResponse = new FlushJobAction.Response();
|
||||||
|
delayedDataDetector = mock(DelayedDataDetector.class);
|
||||||
|
when(delayedDataDetector.getWindow()).thenReturn(DatafeedJob.MISSING_DATA_CHECK_INTERVAL_MS);
|
||||||
currentTime = 0;
|
currentTime = 0;
|
||||||
xContentType = XContentType.JSON;
|
xContentType = XContentType.JSON;
|
||||||
|
|
||||||
|
@ -96,6 +107,7 @@ public class DatafeedJobTests extends ESTestCase {
|
||||||
when(postDataFuture.actionGet()).thenReturn(new PostDataAction.Response(dataCounts));
|
when(postDataFuture.actionGet()).thenReturn(new PostDataAction.Response(dataCounts));
|
||||||
|
|
||||||
flushJobRequests = ArgumentCaptor.forClass(FlushJobAction.Request.class);
|
flushJobRequests = ArgumentCaptor.forClass(FlushJobAction.Request.class);
|
||||||
|
when(flushJobFuture.actionGet()).thenReturn(flushJobResponse);
|
||||||
when(client.execute(same(FlushJobAction.INSTANCE), flushJobRequests.capture())).thenReturn(flushJobFuture);
|
when(client.execute(same(FlushJobAction.INSTANCE), flushJobRequests.capture())).thenReturn(flushJobFuture);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -193,6 +205,13 @@ public class DatafeedJobTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRealtimeRun() throws Exception {
|
public void testRealtimeRun() throws Exception {
|
||||||
|
flushJobResponse = new FlushJobAction.Response(true, new Date(2000));
|
||||||
|
Bucket bucket = mock(Bucket.class);
|
||||||
|
when(bucket.getTimestamp()).thenReturn(new Date(2000));
|
||||||
|
when(flushJobFuture.actionGet()).thenReturn(flushJobResponse);
|
||||||
|
when(client.execute(same(FlushJobAction.INSTANCE), flushJobRequests.capture())).thenReturn(flushJobFuture);
|
||||||
|
when(delayedDataDetector.detectMissingData(2000))
|
||||||
|
.thenReturn(Collections.singletonList(BucketWithMissingData.fromMissingAndBucket(10, bucket)));
|
||||||
currentTime = 60000L;
|
currentTime = 60000L;
|
||||||
long frequencyMs = 100;
|
long frequencyMs = 100;
|
||||||
long queryDelayMs = 1000;
|
long queryDelayMs = 1000;
|
||||||
|
@ -206,6 +225,29 @@ public class DatafeedJobTests extends ESTestCase {
|
||||||
flushRequest.setAdvanceTime("59000");
|
flushRequest.setAdvanceTime("59000");
|
||||||
verify(client).execute(same(FlushJobAction.INSTANCE), eq(flushRequest));
|
verify(client).execute(same(FlushJobAction.INSTANCE), eq(flushRequest));
|
||||||
verify(client, never()).execute(same(PersistJobAction.INSTANCE), any());
|
verify(client, never()).execute(same(PersistJobAction.INSTANCE), any());
|
||||||
|
|
||||||
|
// Execute a second valid time, but do so in a smaller window than the interval
|
||||||
|
currentTime = 62000L;
|
||||||
|
byte[] contentBytes = "content".getBytes(StandardCharsets.UTF_8);
|
||||||
|
InputStream inputStream = new ByteArrayInputStream(contentBytes);
|
||||||
|
when(dataExtractor.hasNext()).thenReturn(true).thenReturn(false);
|
||||||
|
when(dataExtractor.next()).thenReturn(Optional.of(inputStream));
|
||||||
|
when(dataExtractorFactory.newExtractor(anyLong(), anyLong())).thenReturn(dataExtractor);
|
||||||
|
datafeedJob.runRealtime();
|
||||||
|
|
||||||
|
// Execute a third time, but this time make sure we exceed the data check interval, but keep the delayedDataDetector response
|
||||||
|
// the same
|
||||||
|
currentTime = 62000L + DatafeedJob.MISSING_DATA_CHECK_INTERVAL_MS + 1;
|
||||||
|
inputStream = new ByteArrayInputStream(contentBytes);
|
||||||
|
when(dataExtractor.hasNext()).thenReturn(true).thenReturn(false);
|
||||||
|
when(dataExtractor.next()).thenReturn(Optional.of(inputStream));
|
||||||
|
when(dataExtractorFactory.newExtractor(anyLong(), anyLong())).thenReturn(dataExtractor);
|
||||||
|
datafeedJob.runRealtime();
|
||||||
|
|
||||||
|
verify(auditor, times(1)).warning(jobId,
|
||||||
|
Messages.getMessage(Messages.JOB_AUDIT_DATAFEED_MISSING_DATA,
|
||||||
|
10,
|
||||||
|
XContentElasticsearchExtension.DEFAULT_DATE_PRINTER.print(2000)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testEmptyDataCountGivenlookback() throws Exception {
|
public void testEmptyDataCountGivenlookback() throws Exception {
|
||||||
|
@ -321,6 +363,6 @@ public class DatafeedJobTests extends ESTestCase {
|
||||||
long latestRecordTimeMs) {
|
long latestRecordTimeMs) {
|
||||||
Supplier<Long> currentTimeSupplier = () -> currentTime;
|
Supplier<Long> currentTimeSupplier = () -> currentTime;
|
||||||
return new DatafeedJob(jobId, dataDescription.build(), frequencyMs, queryDelayMs, dataExtractorFactory, client, auditor,
|
return new DatafeedJob(jobId, dataDescription.build(), frequencyMs, queryDelayMs, dataExtractorFactory, client, auditor,
|
||||||
currentTimeSupplier, latestFinalBucketEndTimeMs, latestRecordTimeMs);
|
currentTimeSupplier, delayedDataDetector, latestFinalBucketEndTimeMs, latestRecordTimeMs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@ import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedJobValidator;
|
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedJobValidator;
|
||||||
|
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
|
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.Detector;
|
import org.elasticsearch.xpack.core.ml.job.config.Detector;
|
||||||
|
@ -176,6 +177,30 @@ public class DatafeedJobValidatorTests extends ESTestCase {
|
||||||
assertEquals("Datafeed frequency [1.5m] must be a multiple of the aggregation interval [60000ms]", e.getMessage());
|
assertEquals("Datafeed frequency [1.5m] must be a multiple of the aggregation interval [60000ms]", e.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testVerify_BucketIntervalAndDataCheckWindowAreValid() {
|
||||||
|
Job.Builder builder = buildJobBuilder("foo");
|
||||||
|
AnalysisConfig.Builder ac = createAnalysisConfig();
|
||||||
|
ac.setSummaryCountFieldName("some_count");
|
||||||
|
ac.setBucketSpan(TimeValue.timeValueSeconds(2));
|
||||||
|
builder.setAnalysisConfig(ac);
|
||||||
|
Job job = builder.build(new Date());
|
||||||
|
DatafeedConfig.Builder datafeedBuilder = createValidDatafeedConfig();
|
||||||
|
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueMinutes(10)));
|
||||||
|
|
||||||
|
DatafeedJobValidator.validate(datafeedBuilder.build(), job);
|
||||||
|
|
||||||
|
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueSeconds(1)));
|
||||||
|
ElasticsearchStatusException e = ESTestCase.expectThrows(ElasticsearchStatusException.class,
|
||||||
|
() -> DatafeedJobValidator.validate(datafeedBuilder.build(), job));
|
||||||
|
assertEquals(Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL, "1s", "2s"), e.getMessage());
|
||||||
|
|
||||||
|
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(24)));
|
||||||
|
e = ESTestCase.expectThrows(ElasticsearchStatusException.class,
|
||||||
|
() -> DatafeedJobValidator.validate(datafeedBuilder.build(), job));
|
||||||
|
assertEquals(Messages.getMessage(
|
||||||
|
Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS, "1d", "2s"), e.getMessage());
|
||||||
|
}
|
||||||
|
|
||||||
private static Job.Builder buildJobBuilder(String id) {
|
private static Job.Builder buildJobBuilder(String id) {
|
||||||
Job.Builder builder = new Job.Builder(id);
|
Job.Builder builder = new Job.Builder(id);
|
||||||
AnalysisConfig.Builder ac = createAnalysisConfig();
|
AnalysisConfig.Builder ac = createAnalysisConfig();
|
||||||
|
|
|
@ -1,76 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
|
||||||
* or more contributor license agreements. Licensed under the Elastic License;
|
|
||||||
* you may not use this file except in compliance with the Elastic License.
|
|
||||||
*/
|
|
||||||
package org.elasticsearch.xpack.ml.datafeed;
|
|
||||||
|
|
||||||
import org.elasticsearch.client.Client;
|
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
|
||||||
import org.elasticsearch.test.ESTestCase;
|
|
||||||
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
|
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.Detector;
|
|
||||||
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Date;
|
|
||||||
|
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
|
||||||
import static org.mockito.Mockito.mock;
|
|
||||||
|
|
||||||
|
|
||||||
public class DelayedDataDetectorTests extends ESTestCase {
|
|
||||||
|
|
||||||
|
|
||||||
public void testConstructorWithValueValues() {
|
|
||||||
TimeValue window = TimeValue.timeValueSeconds(10);
|
|
||||||
Job job = createJob(TimeValue.timeValueSeconds(1));
|
|
||||||
DelayedDataDetector delayedDataDetector = new DelayedDataDetector(job, createDatafeed(), window, mock(Client.class));
|
|
||||||
assertNotNull(delayedDataDetector);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testConstructorWithInvalidValues() {
|
|
||||||
TimeValue shortWindow = TimeValue.timeValueMillis(500);
|
|
||||||
Job job = createJob(TimeValue.timeValueSeconds(1));
|
|
||||||
|
|
||||||
Exception exception = expectThrows(IllegalArgumentException.class,
|
|
||||||
()-> new DelayedDataDetector(job, createDatafeed(), shortWindow, mock(Client.class)));
|
|
||||||
assertThat(exception.getMessage(), equalTo("[window] must be greater or equal to the [bucket_span]"));
|
|
||||||
|
|
||||||
TimeValue longWindow = TimeValue.timeValueSeconds(20000);
|
|
||||||
|
|
||||||
exception = expectThrows(IllegalArgumentException.class,
|
|
||||||
()-> new DelayedDataDetector(job, createDatafeed(), longWindow, mock(Client.class)));
|
|
||||||
assertThat(exception.getMessage(), equalTo("[window] must contain less than 10000 buckets at the current [bucket_span]"));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private Job createJob(TimeValue bucketSpan) {
|
|
||||||
DataDescription.Builder dataDescription = new DataDescription.Builder();
|
|
||||||
dataDescription.setFormat(DataDescription.DataFormat.XCONTENT);
|
|
||||||
dataDescription.setTimeField("time");
|
|
||||||
dataDescription.setTimeFormat(DataDescription.EPOCH_MS);
|
|
||||||
|
|
||||||
Detector.Builder d = new Detector.Builder("count", null);
|
|
||||||
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(Collections.singletonList(d.build()));
|
|
||||||
analysisConfig.setBucketSpan(bucketSpan);
|
|
||||||
|
|
||||||
Job.Builder builder = new Job.Builder();
|
|
||||||
builder.setId("test-job");
|
|
||||||
builder.setAnalysisConfig(analysisConfig);
|
|
||||||
builder.setDataDescription(dataDescription);
|
|
||||||
return builder.build(new Date());
|
|
||||||
}
|
|
||||||
|
|
||||||
private DatafeedConfig createDatafeed() {
|
|
||||||
DatafeedConfig.Builder builder = new DatafeedConfig.Builder("id", "jobId");
|
|
||||||
builder.setIndices(Collections.singletonList("index1"));
|
|
||||||
builder.setTypes(Collections.singletonList("doc"));
|
|
||||||
return builder.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
|
@ -0,0 +1,103 @@
|
||||||
|
/*
|
||||||
|
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||||
|
* or more contributor license agreements. Licensed under the Elastic License;
|
||||||
|
* you may not use this file except in compliance with the Elastic License.
|
||||||
|
*/
|
||||||
|
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
|
||||||
|
|
||||||
|
import org.elasticsearch.client.Client;
|
||||||
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
|
||||||
|
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.config.Detector;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.config.Job;
|
||||||
|
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Date;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
|
||||||
|
public class DelayedDataDetectorFactoryTests extends ESTestCase {
|
||||||
|
|
||||||
|
public void testBuilder() {
|
||||||
|
Job job = createJob(TimeValue.timeValueSeconds(2));
|
||||||
|
|
||||||
|
DatafeedConfig datafeedConfig = createDatafeed(false, null);
|
||||||
|
|
||||||
|
// Should not throw
|
||||||
|
assertThat(DelayedDataDetectorFactory.buildDetector(job, datafeedConfig, mock(Client.class)),
|
||||||
|
instanceOf(NullDelayedDataDetector.class));
|
||||||
|
|
||||||
|
datafeedConfig = createDatafeed(true, TimeValue.timeValueMinutes(10));
|
||||||
|
|
||||||
|
// Should not throw
|
||||||
|
assertThat(DelayedDataDetectorFactory.buildDetector(job, datafeedConfig, mock(Client.class)),
|
||||||
|
instanceOf(DatafeedDelayedDataDetector.class));
|
||||||
|
|
||||||
|
DatafeedConfig tooSmallDatafeedConfig = createDatafeed(true, TimeValue.timeValueSeconds(1));
|
||||||
|
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> DelayedDataDetectorFactory.buildDetector(job, tooSmallDatafeedConfig, mock(Client.class)));
|
||||||
|
assertEquals(Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL, "1s", "2s"), e.getMessage());
|
||||||
|
|
||||||
|
DatafeedConfig tooBigDatafeedConfig = createDatafeed(true, TimeValue.timeValueHours(12));
|
||||||
|
e = ESTestCase.expectThrows(IllegalArgumentException.class,
|
||||||
|
() -> DelayedDataDetectorFactory.buildDetector(job, tooBigDatafeedConfig, mock(Client.class)));
|
||||||
|
assertEquals(Messages.getMessage(
|
||||||
|
Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS, "12h", "2s"), e.getMessage());
|
||||||
|
|
||||||
|
Job withBigBucketSpan = createJob(TimeValue.timeValueHours(3));
|
||||||
|
datafeedConfig = createDatafeed(true, null);
|
||||||
|
|
||||||
|
// Should not throw
|
||||||
|
DelayedDataDetector delayedDataDetector =
|
||||||
|
DelayedDataDetectorFactory.buildDetector(withBigBucketSpan, datafeedConfig, mock(Client.class));
|
||||||
|
assertThat(delayedDataDetector.getWindow(), equalTo(TimeValue.timeValueHours(3).millis() * 8));
|
||||||
|
|
||||||
|
datafeedConfig = createDatafeed(true, null);
|
||||||
|
|
||||||
|
// Should not throw
|
||||||
|
delayedDataDetector =
|
||||||
|
DelayedDataDetectorFactory.buildDetector(job, datafeedConfig, mock(Client.class));
|
||||||
|
assertThat(delayedDataDetector.getWindow(), equalTo(TimeValue.timeValueHours(2).millis()));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private Job createJob(TimeValue bucketSpan) {
|
||||||
|
DataDescription.Builder dataDescription = new DataDescription.Builder();
|
||||||
|
dataDescription.setFormat(DataDescription.DataFormat.XCONTENT);
|
||||||
|
dataDescription.setTimeField("time");
|
||||||
|
dataDescription.setTimeFormat(DataDescription.EPOCH_MS);
|
||||||
|
|
||||||
|
Detector.Builder d = new Detector.Builder("count", null);
|
||||||
|
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(Collections.singletonList(d.build()));
|
||||||
|
analysisConfig.setBucketSpan(bucketSpan);
|
||||||
|
|
||||||
|
Job.Builder builder = new Job.Builder();
|
||||||
|
builder.setId("test-job");
|
||||||
|
builder.setAnalysisConfig(analysisConfig);
|
||||||
|
builder.setDataDescription(dataDescription);
|
||||||
|
return builder.build(new Date());
|
||||||
|
}
|
||||||
|
|
||||||
|
private DatafeedConfig createDatafeed(boolean shouldDetectDelayedData, TimeValue delayedDatacheckWindow) {
|
||||||
|
DatafeedConfig.Builder builder = new DatafeedConfig.Builder("id", "jobId");
|
||||||
|
builder.setIndices(Collections.singletonList("index1"));
|
||||||
|
builder.setTypes(Collections.singletonList("doc"));
|
||||||
|
|
||||||
|
if (shouldDetectDelayedData) {
|
||||||
|
builder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(delayedDatacheckWindow));
|
||||||
|
} else {
|
||||||
|
builder.setDelayedDataCheckConfig(DelayedDataCheckConfig.disabledDelayedDataCheckConfig());
|
||||||
|
}
|
||||||
|
return builder.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue