Add delayed datacheck to the datafeed job runner (#35387)

* ML: Adding missing datacheck to datafeedjob

* Adding client side and docs

* Making adjustments to validations

* Making values default to on, having more sensible limits

* Intermittent commit, still need to figure out interval

* Adjusting delayed data check interval

* updating docs

* Making parameter Boolean, so it is nullable

* bumping bwc to 7 before backport

* changing to version current

* moving delayed data check config its own object

* Separation of duties for delayed data detection

* fixing checkstyles

* fixing checkstyles

* Adjusting default behavior so that null windows are allowed

* Mentioning the default value

* Fixing comments, syncing up validations
This commit is contained in:
Benjamin Trent 2018-11-15 13:32:45 -06:00 committed by GitHub
parent c7a2c6d549
commit f7ada9b29b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 1136 additions and 165 deletions

View File

@ -62,6 +62,7 @@ public class DatafeedConfig implements ToXContentObject {
public static final ParseField AGGREGATIONS = new ParseField("aggregations"); public static final ParseField AGGREGATIONS = new ParseField("aggregations");
public static final ParseField SCRIPT_FIELDS = new ParseField("script_fields"); public static final ParseField SCRIPT_FIELDS = new ParseField("script_fields");
public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config"); public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config");
public static final ParseField DELAYED_DATA_CHECK_CONFIG = new ParseField("delayed_data_check_config");
public static final ConstructingObjectParser<Builder, Void> PARSER = new ConstructingObjectParser<>( public static final ConstructingObjectParser<Builder, Void> PARSER = new ConstructingObjectParser<>(
"datafeed_config", true, a -> new Builder((String)a[0], (String)a[1])); "datafeed_config", true, a -> new Builder((String)a[0], (String)a[1]));
@ -88,6 +89,7 @@ public class DatafeedConfig implements ToXContentObject {
}, SCRIPT_FIELDS); }, SCRIPT_FIELDS);
PARSER.declareInt(Builder::setScrollSize, SCROLL_SIZE); PARSER.declareInt(Builder::setScrollSize, SCROLL_SIZE);
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, CHUNKING_CONFIG); PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, CHUNKING_CONFIG);
PARSER.declareObject(Builder::setDelayedDataCheckConfig, DelayedDataCheckConfig.PARSER, DELAYED_DATA_CHECK_CONFIG);
} }
private static BytesReference parseBytes(XContentParser parser) throws IOException { private static BytesReference parseBytes(XContentParser parser) throws IOException {
@ -107,10 +109,12 @@ public class DatafeedConfig implements ToXContentObject {
private final List<SearchSourceBuilder.ScriptField> scriptFields; private final List<SearchSourceBuilder.ScriptField> scriptFields;
private final Integer scrollSize; private final Integer scrollSize;
private final ChunkingConfig chunkingConfig; private final ChunkingConfig chunkingConfig;
private final DelayedDataCheckConfig delayedDataCheckConfig;
private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types, private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields, BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
Integer scrollSize, ChunkingConfig chunkingConfig) { Integer scrollSize, ChunkingConfig chunkingConfig, DelayedDataCheckConfig delayedDataCheckConfig) {
this.id = id; this.id = id;
this.jobId = jobId; this.jobId = jobId;
this.queryDelay = queryDelay; this.queryDelay = queryDelay;
@ -122,6 +126,7 @@ public class DatafeedConfig implements ToXContentObject {
this.scriptFields = scriptFields == null ? null : Collections.unmodifiableList(scriptFields); this.scriptFields = scriptFields == null ? null : Collections.unmodifiableList(scriptFields);
this.scrollSize = scrollSize; this.scrollSize = scrollSize;
this.chunkingConfig = chunkingConfig; this.chunkingConfig = chunkingConfig;
this.delayedDataCheckConfig = delayedDataCheckConfig;
} }
public String getId() { public String getId() {
@ -168,6 +173,10 @@ public class DatafeedConfig implements ToXContentObject {
return chunkingConfig; return chunkingConfig;
} }
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
return delayedDataCheckConfig;
}
@Override @Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(); builder.startObject();
@ -204,6 +213,9 @@ public class DatafeedConfig implements ToXContentObject {
if (chunkingConfig != null) { if (chunkingConfig != null) {
builder.field(CHUNKING_CONFIG.getPreferredName(), chunkingConfig); builder.field(CHUNKING_CONFIG.getPreferredName(), chunkingConfig);
} }
if (delayedDataCheckConfig != null) {
builder.field(DELAYED_DATA_CHECK_CONFIG.getPreferredName(), delayedDataCheckConfig);
}
builder.endObject(); builder.endObject();
return builder; return builder;
@ -244,7 +256,8 @@ public class DatafeedConfig implements ToXContentObject {
&& Objects.equals(this.scrollSize, that.scrollSize) && Objects.equals(this.scrollSize, that.scrollSize)
&& Objects.equals(asMap(this.aggregations), asMap(that.aggregations)) && Objects.equals(asMap(this.aggregations), asMap(that.aggregations))
&& Objects.equals(this.scriptFields, that.scriptFields) && Objects.equals(this.scriptFields, that.scriptFields)
&& Objects.equals(this.chunkingConfig, that.chunkingConfig); && Objects.equals(this.chunkingConfig, that.chunkingConfig)
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig);
} }
/** /**
@ -255,7 +268,7 @@ public class DatafeedConfig implements ToXContentObject {
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields, return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields,
chunkingConfig); chunkingConfig, delayedDataCheckConfig);
} }
public static Builder builder(String id, String jobId) { public static Builder builder(String id, String jobId) {
@ -275,6 +288,7 @@ public class DatafeedConfig implements ToXContentObject {
private List<SearchSourceBuilder.ScriptField> scriptFields; private List<SearchSourceBuilder.ScriptField> scriptFields;
private Integer scrollSize; private Integer scrollSize;
private ChunkingConfig chunkingConfig; private ChunkingConfig chunkingConfig;
private DelayedDataCheckConfig delayedDataCheckConfig;
public Builder(String id, String jobId) { public Builder(String id, String jobId) {
this.id = Objects.requireNonNull(id, ID.getPreferredName()); this.id = Objects.requireNonNull(id, ID.getPreferredName());
@ -293,6 +307,7 @@ public class DatafeedConfig implements ToXContentObject {
this.scriptFields = config.scriptFields; this.scriptFields = config.scriptFields;
this.scrollSize = config.scrollSize; this.scrollSize = config.scrollSize;
this.chunkingConfig = config.chunkingConfig; this.chunkingConfig = config.chunkingConfig;
this.delayedDataCheckConfig = config.getDelayedDataCheckConfig();
} }
public Builder setIndices(List<String> indices) { public Builder setIndices(List<String> indices) {
@ -366,9 +381,23 @@ public class DatafeedConfig implements ToXContentObject {
return this; return this;
} }
/**
* This sets the {@link DelayedDataCheckConfig} settings.
*
* See {@link DelayedDataCheckConfig} for more information.
*
* @param delayedDataCheckConfig the delayed data check configuration
* Default value is enabled, with `check_window` being null. This means the true window is
* calculated when the real-time Datafeed runs.
*/
public Builder setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
this.delayedDataCheckConfig = delayedDataCheckConfig;
return this;
}
public DatafeedConfig build() { public DatafeedConfig build() {
return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize, return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
chunkingConfig); chunkingConfig, delayedDataCheckConfig);
} }
private static BytesReference xContentToBytes(ToXContentObject object) throws IOException { private static BytesReference xContentToBytes(ToXContentObject object) throws IOException {

View File

@ -77,6 +77,9 @@ public class DatafeedUpdate implements ToXContentObject {
}, DatafeedConfig.SCRIPT_FIELDS); }, DatafeedConfig.SCRIPT_FIELDS);
PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE); PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE);
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, DatafeedConfig.CHUNKING_CONFIG); PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.PARSER, DatafeedConfig.CHUNKING_CONFIG);
PARSER.declareObject(Builder::setDelayedDataCheckConfig,
DelayedDataCheckConfig.PARSER,
DatafeedConfig.DELAYED_DATA_CHECK_CONFIG);
} }
private static BytesReference parseBytes(XContentParser parser) throws IOException { private static BytesReference parseBytes(XContentParser parser) throws IOException {
@ -96,10 +99,11 @@ public class DatafeedUpdate implements ToXContentObject {
private final List<SearchSourceBuilder.ScriptField> scriptFields; private final List<SearchSourceBuilder.ScriptField> scriptFields;
private final Integer scrollSize; private final Integer scrollSize;
private final ChunkingConfig chunkingConfig; private final ChunkingConfig chunkingConfig;
private final DelayedDataCheckConfig delayedDataCheckConfig;
private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types, private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields, BytesReference query, BytesReference aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
Integer scrollSize, ChunkingConfig chunkingConfig) { Integer scrollSize, ChunkingConfig chunkingConfig, DelayedDataCheckConfig delayedDataCheckConfig) {
this.id = id; this.id = id;
this.jobId = jobId; this.jobId = jobId;
this.queryDelay = queryDelay; this.queryDelay = queryDelay;
@ -111,6 +115,7 @@ public class DatafeedUpdate implements ToXContentObject {
this.scriptFields = scriptFields; this.scriptFields = scriptFields;
this.scrollSize = scrollSize; this.scrollSize = scrollSize;
this.chunkingConfig = chunkingConfig; this.chunkingConfig = chunkingConfig;
this.delayedDataCheckConfig = delayedDataCheckConfig;
} }
/** /**
@ -146,6 +151,9 @@ public class DatafeedUpdate implements ToXContentObject {
} }
builder.endObject(); builder.endObject();
} }
if (delayedDataCheckConfig != null) {
builder.field(DatafeedConfig.DELAYED_DATA_CHECK_CONFIG.getPreferredName(), delayedDataCheckConfig);
}
addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize); addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize);
addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig); addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig);
builder.endObject(); builder.endObject();
@ -198,6 +206,10 @@ public class DatafeedUpdate implements ToXContentObject {
return chunkingConfig; return chunkingConfig;
} }
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
return delayedDataCheckConfig;
}
private static Map<String, Object> asMap(BytesReference bytesReference) { private static Map<String, Object> asMap(BytesReference bytesReference) {
return bytesReference == null ? null : XContentHelper.convertToMap(bytesReference, true, XContentType.JSON).v2(); return bytesReference == null ? null : XContentHelper.convertToMap(bytesReference, true, XContentType.JSON).v2();
} }
@ -232,6 +244,7 @@ public class DatafeedUpdate implements ToXContentObject {
&& Objects.equals(asMap(this.query), asMap(that.query)) && Objects.equals(asMap(this.query), asMap(that.query))
&& Objects.equals(this.scrollSize, that.scrollSize) && Objects.equals(this.scrollSize, that.scrollSize)
&& Objects.equals(asMap(this.aggregations), asMap(that.aggregations)) && Objects.equals(asMap(this.aggregations), asMap(that.aggregations))
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig)
&& Objects.equals(this.scriptFields, that.scriptFields) && Objects.equals(this.scriptFields, that.scriptFields)
&& Objects.equals(this.chunkingConfig, that.chunkingConfig); && Objects.equals(this.chunkingConfig, that.chunkingConfig);
} }
@ -244,7 +257,7 @@ public class DatafeedUpdate implements ToXContentObject {
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields, return Objects.hash(id, jobId, frequency, queryDelay, indices, types, asMap(query), scrollSize, asMap(aggregations), scriptFields,
chunkingConfig); chunkingConfig, delayedDataCheckConfig);
} }
public static Builder builder(String id) { public static Builder builder(String id) {
@ -264,6 +277,7 @@ public class DatafeedUpdate implements ToXContentObject {
private List<SearchSourceBuilder.ScriptField> scriptFields; private List<SearchSourceBuilder.ScriptField> scriptFields;
private Integer scrollSize; private Integer scrollSize;
private ChunkingConfig chunkingConfig; private ChunkingConfig chunkingConfig;
private DelayedDataCheckConfig delayedDataCheckConfig;
public Builder(String id) { public Builder(String id) {
this.id = Objects.requireNonNull(id, DatafeedConfig.ID.getPreferredName()); this.id = Objects.requireNonNull(id, DatafeedConfig.ID.getPreferredName());
@ -281,6 +295,7 @@ public class DatafeedUpdate implements ToXContentObject {
this.scriptFields = config.scriptFields; this.scriptFields = config.scriptFields;
this.scrollSize = config.scrollSize; this.scrollSize = config.scrollSize;
this.chunkingConfig = config.chunkingConfig; this.chunkingConfig = config.chunkingConfig;
this.delayedDataCheckConfig = config.delayedDataCheckConfig;
} }
public Builder setJobId(String jobId) { public Builder setJobId(String jobId) {
@ -359,9 +374,14 @@ public class DatafeedUpdate implements ToXContentObject {
return this; return this;
} }
public Builder setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
this.delayedDataCheckConfig = delayedDataCheckConfig;
return this;
}
public DatafeedUpdate build() { public DatafeedUpdate build() {
return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize, return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
chunkingConfig); chunkingConfig, delayedDataCheckConfig);
} }
private static BytesReference xContentToBytes(ToXContentObject object) throws IOException { private static BytesReference xContentToBytes(ToXContentObject object) throws IOException {

View File

@ -0,0 +1,130 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.datafeed;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import java.io.IOException;
import java.util.Objects;
/**
* The configuration object containing the delayed data check settings.
*
* See {@link DelayedDataCheckConfig#enabledDelayedDataCheckConfig(TimeValue)} for creating a new
* enabled datacheck with the given check_window
*
* See {@link DelayedDataCheckConfig#disabledDelayedDataCheckConfig()} for creating a config for disabling
* delayed data checking.
*/
public class DelayedDataCheckConfig implements ToXContentObject {
public static final ParseField ENABLED = new ParseField("enabled");
public static final ParseField CHECK_WINDOW = new ParseField("check_window");
// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
public static final ConstructingObjectParser<DelayedDataCheckConfig, Void> PARSER = new ConstructingObjectParser<>(
"delayed_data_check_config", true, a -> new DelayedDataCheckConfig((Boolean) a[0], (TimeValue) a[1]));
static {
PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED);
PARSER.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
return TimeValue.parseTimeValue(p.text(), CHECK_WINDOW.getPreferredName());
}
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
}, CHECK_WINDOW, ObjectParser.ValueType.STRING);
}
/**
* This creates a new DelayedDataCheckConfig that has a check_window of the passed `timeValue`
*
* We query the index to the latest finalized bucket from this TimeValue in the past looking to see if any data has been indexed
* since the data was read with the Datafeed.
*
* The window must be larger than the {@link org.elasticsearch.client.ml.job.config.AnalysisConfig#bucketSpan}, less than
* 24 hours, and span less than 10,000x buckets.
*
*
* @param timeValue The time length in the past from the latest finalized bucket to look for latent data.
* If `null` is provided, the appropriate window is calculated when it is used
**/
public static DelayedDataCheckConfig enabledDelayedDataCheckConfig(TimeValue timeValue) {
return new DelayedDataCheckConfig(true, timeValue);
}
/**
* This creates a new DelayedDataCheckConfig that disables the data check.
*/
public static DelayedDataCheckConfig disabledDelayedDataCheckConfig() {
return new DelayedDataCheckConfig(false, null);
}
private final boolean enabled;
private final TimeValue checkWindow;
DelayedDataCheckConfig(Boolean enabled, TimeValue checkWindow) {
this.enabled = enabled;
this.checkWindow = checkWindow;
}
public boolean isEnabled() {
return enabled;
}
@Nullable
public TimeValue getCheckWindow() {
return checkWindow;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(ENABLED.getPreferredName(), enabled);
if (checkWindow != null) {
builder.field(CHECK_WINDOW.getPreferredName(), checkWindow.getStringRep());
}
builder.endObject();
return builder;
}
@Override
public int hashCode() {
return Objects.hash(enabled, checkWindow);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
DelayedDataCheckConfig other = (DelayedDataCheckConfig) obj;
return Objects.equals(this.enabled, other.enabled) && Objects.equals(this.checkWindow, other.checkWindow);
}
}

View File

@ -95,6 +95,7 @@ import org.elasticsearch.client.ml.datafeed.ChunkingConfig;
import org.elasticsearch.client.ml.datafeed.DatafeedConfig; import org.elasticsearch.client.ml.datafeed.DatafeedConfig;
import org.elasticsearch.client.ml.datafeed.DatafeedStats; import org.elasticsearch.client.ml.datafeed.DatafeedStats;
import org.elasticsearch.client.ml.datafeed.DatafeedUpdate; import org.elasticsearch.client.ml.datafeed.DatafeedUpdate;
import org.elasticsearch.client.ml.datafeed.DelayedDataCheckConfig;
import org.elasticsearch.client.ml.job.config.AnalysisConfig; import org.elasticsearch.client.ml.job.config.AnalysisConfig;
import org.elasticsearch.client.ml.job.config.AnalysisLimits; import org.elasticsearch.client.ml.job.config.AnalysisLimits;
import org.elasticsearch.client.ml.job.config.DataDescription; import org.elasticsearch.client.ml.job.config.DataDescription;
@ -583,6 +584,14 @@ public class MlClientDocumentationIT extends ESRestHighLevelClientTestCase {
datafeedBuilder.setQueryDelay(TimeValue.timeValueMinutes(1)); // <1> datafeedBuilder.setQueryDelay(TimeValue.timeValueMinutes(1)); // <1>
// end::put-datafeed-config-set-query-delay // end::put-datafeed-config-set-query-delay
// tag::put-datafeed-config-set-delayed-data-check-config
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig
.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(1))); // <1>
// end::put-datafeed-config-set-delayed-data-check-config
// no need to accidentally trip internal validations due to job bucket size
datafeedBuilder.setDelayedDataCheckConfig(null);
List<SearchSourceBuilder.ScriptField> scriptFields = Collections.emptyList(); List<SearchSourceBuilder.ScriptField> scriptFields = Collections.emptyList();
// tag::put-datafeed-config-set-script-fields // tag::put-datafeed-config-set-script-fields
datafeedBuilder.setScriptFields(scriptFields); // <1> datafeedBuilder.setScriptFields(scriptFields); // <1>

View File

@ -103,6 +103,9 @@ public class DatafeedConfigTests extends AbstractXContentTestCase<DatafeedConfig
if (randomBoolean()) { if (randomBoolean()) {
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk()); builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
} }
if (randomBoolean()) {
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig());
}
return builder; return builder;
} }

View File

@ -83,6 +83,9 @@ public class DatafeedUpdateTests extends AbstractXContentTestCase<DatafeedUpdate
if (randomBoolean()) { if (randomBoolean()) {
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk()); builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
} }
if (randomBoolean()) {
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig());
}
return builder.build(); return builder.build();
} }

View File

@ -0,0 +1,65 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.client.ml.datafeed;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractXContentTestCase;
import static org.hamcrest.Matchers.equalTo;
public class DelayedDataCheckConfigTests extends AbstractXContentTestCase<DelayedDataCheckConfig> {
@Override
protected DelayedDataCheckConfig createTestInstance() {
return createRandomizedConfig();
}
@Override
protected DelayedDataCheckConfig doParseInstance(XContentParser parser) {
return DelayedDataCheckConfig.PARSER.apply(parser, null);
}
@Override
protected boolean supportsUnknownFields() {
return true;
}
public void testEnabledDelayedDataCheckConfig() {
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(5));
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(true));
assertThat(delayedDataCheckConfig.getCheckWindow(), equalTo(TimeValue.timeValueHours(5)));
}
public void testDisabledDelayedDataCheckConfig() {
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.disabledDelayedDataCheckConfig();
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(false));
assertThat(delayedDataCheckConfig.getCheckWindow(), equalTo(null));
}
public static DelayedDataCheckConfig createRandomizedConfig() {
boolean enabled = randomBoolean();
TimeValue timeWindow = null;
if (enabled || randomBoolean()) {
timeWindow = TimeValue.timeValueMillis(randomLongBetween(1, 1_000));
}
return new DelayedDataCheckConfig(enabled, timeWindow);
}
}

View File

@ -63,6 +63,17 @@ include-tagged::{doc-tests-file}[{api}-config-set-query-delay]
-------------------------------------------------- --------------------------------------------------
<1> The time interval behind real time that data is queried. <1> The time interval behind real time that data is queried.
["source","java",subs="attributes,callouts,macros"]
--------------------------------------------------
include-tagged::{doc-tests-file}[{api}-config-set-delayed-data-check-config]
--------------------------------------------------
<1> Sets the delayed data check configuration.
The window must be larger than the Job's bucket size, but smaller than 24 hours,
and span less than 10,000 buckets.
Defaults to `null`, which causes an appropriate window span to be calculated when
the datafeed runs.
To explicitly disable, pass `DelayedDataCheckConfig.disabledDelayedDataCheckConfig()`.
["source","java",subs="attributes,callouts,macros"] ["source","java",subs="attributes,callouts,macros"]
-------------------------------------------------- --------------------------------------------------
include-tagged::{doc-tests-file}[{api}-config-set-script-fields] include-tagged::{doc-tests-file}[{api}-config-set-script-fields]

View File

@ -64,6 +64,11 @@ A {dfeed} resource has the following properties:
example: `[]`. This property is provided for backwards compatibility with example: `[]`. This property is provided for backwards compatibility with
releases earlier than 6.0.0. For more information, see <<removal-of-types>>. releases earlier than 6.0.0. For more information, see <<removal-of-types>>.
`delayed_data_check_config`::
(object) Specifies if and with how large a window should the data feed check
for missing data. See <<ml-datafeed-delayed-data-check-config>>.
For example: `{"enabled": true, "check_window": "1h"}`
[[ml-datafeed-chunking-config]] [[ml-datafeed-chunking-config]]
==== Chunking Configuration Objects ==== Chunking Configuration Objects
@ -86,6 +91,27 @@ A chunking configuration object has the following properties:
This setting is only applicable when the mode is set to `manual`. This setting is only applicable when the mode is set to `manual`.
For example: `3h`. For example: `3h`.
[[ml-datafeed-delayed-data-check-config]]
==== Delayed Data Check Configuration Objects
The {dfeed} can optionally search over indices that have already been read in
an effort to find if any data has since been added to the index. If missing data
is found, it is a good indication that the `query_delay` option is set too low and
the data is being indexed after the {dfeed} has passed that moment in time.
This check only runs on real-time {dfeeds}
The configuration object has the following properties:
`enabled`::
(boolean) Should the {dfeed} periodically check for data being indexed after reading.
Defaults to `true`
`check_window`::
(time units) The window of time before the latest finalized bucket that should be searched
for late data. Defaults to `null` which causes an appropriate `check_window` to be calculated
when the real-time {dfeed} runs.
[float] [float]
[[ml-datafeed-counts]] [[ml-datafeed-counts]]
==== {dfeed-cap} Counts ==== {dfeed-cap} Counts

View File

@ -78,6 +78,10 @@ You must create a job before you create a {dfeed}. You can associate only one
For example: `[]`. This property is provided for backwards compatibility with For example: `[]`. This property is provided for backwards compatibility with
releases earlier than 6.0.0. For more information, see <<removal-of-types>>. releases earlier than 6.0.0. For more information, see <<removal-of-types>>.
`delayed_data_check_config`::
(object) Specifies if and with how large a window should the data feed check
for missing data. See <<ml-datafeed-delayed-data-check-config>>.
For more information about these properties, For more information about these properties,
see <<ml-datafeed-resource>>. see <<ml-datafeed-resource>>.

View File

@ -84,6 +84,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
public static final ParseField SOURCE = new ParseField("_source"); public static final ParseField SOURCE = new ParseField("_source");
public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config"); public static final ParseField CHUNKING_CONFIG = new ParseField("chunking_config");
public static final ParseField HEADERS = new ParseField("headers"); public static final ParseField HEADERS = new ParseField("headers");
public static final ParseField DELAYED_DATA_CHECK_CONFIG = new ParseField("delayed_data_check_config");
// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly // These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
public static final ObjectParser<Builder, Void> LENIENT_PARSER = createParser(true); public static final ObjectParser<Builder, Void> LENIENT_PARSER = createParser(true);
@ -124,7 +125,9 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
// (For config, headers are explicitly transferred from the auth headers by code in the put/update datafeed actions.) // (For config, headers are explicitly transferred from the auth headers by code in the put/update datafeed actions.)
parser.declareObject(Builder::setHeaders, (p, c) -> p.mapStrings(), HEADERS); parser.declareObject(Builder::setHeaders, (p, c) -> p.mapStrings(), HEADERS);
} }
parser.declareObject(Builder::setDelayedDataCheckConfig,
ignoreUnknownFields ? DelayedDataCheckConfig.LENIENT_PARSER : DelayedDataCheckConfig.STRICT_PARSER,
DELAYED_DATA_CHECK_CONFIG);
return parser; return parser;
} }
@ -149,10 +152,12 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
private final Integer scrollSize; private final Integer scrollSize;
private final ChunkingConfig chunkingConfig; private final ChunkingConfig chunkingConfig;
private final Map<String, String> headers; private final Map<String, String> headers;
private final DelayedDataCheckConfig delayedDataCheckConfig;
private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types, private DatafeedConfig(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
QueryBuilder query, AggregatorFactories.Builder aggregations, List<SearchSourceBuilder.ScriptField> scriptFields, QueryBuilder query, AggregatorFactories.Builder aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
Integer scrollSize, ChunkingConfig chunkingConfig, Map<String, String> headers) { Integer scrollSize, ChunkingConfig chunkingConfig, Map<String, String> headers,
DelayedDataCheckConfig delayedDataCheckConfig) {
this.id = id; this.id = id;
this.jobId = jobId; this.jobId = jobId;
this.queryDelay = queryDelay; this.queryDelay = queryDelay;
@ -165,6 +170,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
this.scrollSize = scrollSize; this.scrollSize = scrollSize;
this.chunkingConfig = chunkingConfig; this.chunkingConfig = chunkingConfig;
this.headers = Collections.unmodifiableMap(headers); this.headers = Collections.unmodifiableMap(headers);
this.delayedDataCheckConfig = delayedDataCheckConfig;
} }
public DatafeedConfig(StreamInput in) throws IOException { public DatafeedConfig(StreamInput in) throws IOException {
@ -196,6 +202,11 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
} else { } else {
this.headers = Collections.emptyMap(); this.headers = Collections.emptyMap();
} }
if (in.getVersion().onOrAfter(Version.CURRENT)) {
delayedDataCheckConfig = in.readOptionalWriteable(DelayedDataCheckConfig::new);
} else {
delayedDataCheckConfig = DelayedDataCheckConfig.defaultDelayedDataCheckConfig();
}
} }
public String getId() { public String getId() {
@ -260,6 +271,10 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
return headers; return headers;
} }
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
return delayedDataCheckConfig;
}
@Override @Override
public void writeTo(StreamOutput out) throws IOException { public void writeTo(StreamOutput out) throws IOException {
out.writeString(id); out.writeString(id);
@ -291,6 +306,9 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
if (out.getVersion().onOrAfter(Version.V_6_2_0)) { if (out.getVersion().onOrAfter(Version.V_6_2_0)) {
out.writeMap(headers, StreamOutput::writeString, StreamOutput::writeString); out.writeMap(headers, StreamOutput::writeString, StreamOutput::writeString);
} }
if (out.getVersion().onOrAfter(Version.CURRENT)) {
out.writeOptionalWriteable(delayedDataCheckConfig);
}
} }
@Override @Override
@ -328,6 +346,9 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
if (headers.isEmpty() == false && params.paramAsBoolean(ToXContentParams.FOR_CLUSTER_STATE, false) == true) { if (headers.isEmpty() == false && params.paramAsBoolean(ToXContentParams.FOR_CLUSTER_STATE, false) == true) {
builder.field(HEADERS.getPreferredName(), headers); builder.field(HEADERS.getPreferredName(), headers);
} }
if (delayedDataCheckConfig != null) {
builder.field(DELAYED_DATA_CHECK_CONFIG.getPreferredName(), delayedDataCheckConfig);
}
return builder; return builder;
} }
@ -359,13 +380,14 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
&& Objects.equals(this.aggregations, that.aggregations) && Objects.equals(this.aggregations, that.aggregations)
&& Objects.equals(this.scriptFields, that.scriptFields) && Objects.equals(this.scriptFields, that.scriptFields)
&& Objects.equals(this.chunkingConfig, that.chunkingConfig) && Objects.equals(this.chunkingConfig, that.chunkingConfig)
&& Objects.equals(this.headers, that.headers); && Objects.equals(this.headers, that.headers)
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, query, scrollSize, aggregations, scriptFields, return Objects.hash(id, jobId, frequency, queryDelay, indices, types, query, scrollSize, aggregations, scriptFields,
chunkingConfig, headers); chunkingConfig, headers, delayedDataCheckConfig);
} }
@Override @Override
@ -438,6 +460,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
private Integer scrollSize = DEFAULT_SCROLL_SIZE; private Integer scrollSize = DEFAULT_SCROLL_SIZE;
private ChunkingConfig chunkingConfig; private ChunkingConfig chunkingConfig;
private Map<String, String> headers = Collections.emptyMap(); private Map<String, String> headers = Collections.emptyMap();
private DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.defaultDelayedDataCheckConfig();
public Builder() { public Builder() {
} }
@ -461,6 +484,7 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
this.scrollSize = config.scrollSize; this.scrollSize = config.scrollSize;
this.chunkingConfig = config.chunkingConfig; this.chunkingConfig = config.chunkingConfig;
this.headers = config.headers; this.headers = config.headers;
this.delayedDataCheckConfig = config.getDelayedDataCheckConfig();
} }
public void setId(String datafeedId) { public void setId(String datafeedId) {
@ -523,6 +547,10 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
this.chunkingConfig = chunkingConfig; this.chunkingConfig = chunkingConfig;
} }
public void setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
this.delayedDataCheckConfig = delayedDataCheckConfig;
}
public DatafeedConfig build() { public DatafeedConfig build() {
ExceptionsHelper.requireNonNull(id, ID.getPreferredName()); ExceptionsHelper.requireNonNull(id, ID.getPreferredName());
ExceptionsHelper.requireNonNull(jobId, Job.ID.getPreferredName()); ExceptionsHelper.requireNonNull(jobId, Job.ID.getPreferredName());
@ -535,11 +563,12 @@ public class DatafeedConfig extends AbstractDiffable<DatafeedConfig> implements
if (types == null || types.contains(null) || types.contains("")) { if (types == null || types.contains(null) || types.contains("")) {
throw invalidOptionValue(TYPES.getPreferredName(), types); throw invalidOptionValue(TYPES.getPreferredName(), types);
} }
validateAggregations(); validateAggregations();
setDefaultChunkingConfig(); setDefaultChunkingConfig();
setDefaultQueryDelay(); setDefaultQueryDelay();
return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize, return new DatafeedConfig(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
chunkingConfig, headers); chunkingConfig, headers, delayedDataCheckConfig);
} }
void validateAggregations() { void validateAggregations() {

View File

@ -31,6 +31,30 @@ public final class DatafeedJobValidator {
checkValidHistogramInterval(datafeedConfig, analysisConfig); checkValidHistogramInterval(datafeedConfig, analysisConfig);
checkFrequencyIsMultipleOfHistogramInterval(datafeedConfig); checkFrequencyIsMultipleOfHistogramInterval(datafeedConfig);
} }
DelayedDataCheckConfig delayedDataCheckConfig = datafeedConfig.getDelayedDataCheckConfig();
TimeValue bucketSpan = analysisConfig.getBucketSpan();
if (delayedDataCheckConfig.isEnabled()) {
checkValidDelayedDataCheckConfig(bucketSpan, delayedDataCheckConfig);
}
}
private static void checkValidDelayedDataCheckConfig(TimeValue bucketSpan, DelayedDataCheckConfig delayedDataCheckConfig) {
TimeValue delayedDataCheckWindow = delayedDataCheckConfig.getCheckWindow();
if (delayedDataCheckWindow != null) { // NULL implies we calculate on use and thus is always valid
if (delayedDataCheckWindow.compareTo(bucketSpan) < 0) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL,
delayedDataCheckWindow,
bucketSpan));
}
if (delayedDataCheckWindow.millis() > bucketSpan.millis() * DelayedDataCheckConfig.MAX_NUMBER_SPANABLE_BUCKETS) {
throw ExceptionsHelper.badRequestException(
Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS,
delayedDataCheckWindow,
bucketSpan));
}
}
} }
private static void checkSummaryCountFieldNameIsSet(AnalysisConfig analysisConfig) { private static void checkSummaryCountFieldNameIsSet(AnalysisConfig analysisConfig) {

View File

@ -5,6 +5,7 @@
*/ */
package org.elasticsearch.xpack.core.ml.datafeed; package org.elasticsearch.xpack.core.ml.datafeed;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamInput;
@ -68,6 +69,9 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
}, DatafeedConfig.SCRIPT_FIELDS); }, DatafeedConfig.SCRIPT_FIELDS);
PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE); PARSER.declareInt(Builder::setScrollSize, DatafeedConfig.SCROLL_SIZE);
PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.STRICT_PARSER, DatafeedConfig.CHUNKING_CONFIG); PARSER.declareObject(Builder::setChunkingConfig, ChunkingConfig.STRICT_PARSER, DatafeedConfig.CHUNKING_CONFIG);
PARSER.declareObject(Builder::setDelayedDataCheckConfig,
DelayedDataCheckConfig.STRICT_PARSER,
DatafeedConfig.DELAYED_DATA_CHECK_CONFIG);
} }
private final String id; private final String id;
@ -81,10 +85,11 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
private final List<SearchSourceBuilder.ScriptField> scriptFields; private final List<SearchSourceBuilder.ScriptField> scriptFields;
private final Integer scrollSize; private final Integer scrollSize;
private final ChunkingConfig chunkingConfig; private final ChunkingConfig chunkingConfig;
private final DelayedDataCheckConfig delayedDataCheckConfig;
private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types, private DatafeedUpdate(String id, String jobId, TimeValue queryDelay, TimeValue frequency, List<String> indices, List<String> types,
QueryBuilder query, AggregatorFactories.Builder aggregations, List<SearchSourceBuilder.ScriptField> scriptFields, QueryBuilder query, AggregatorFactories.Builder aggregations, List<SearchSourceBuilder.ScriptField> scriptFields,
Integer scrollSize, ChunkingConfig chunkingConfig) { Integer scrollSize, ChunkingConfig chunkingConfig, DelayedDataCheckConfig delayedDataCheckConfig) {
this.id = id; this.id = id;
this.jobId = jobId; this.jobId = jobId;
this.queryDelay = queryDelay; this.queryDelay = queryDelay;
@ -96,6 +101,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
this.scriptFields = scriptFields; this.scriptFields = scriptFields;
this.scrollSize = scrollSize; this.scrollSize = scrollSize;
this.chunkingConfig = chunkingConfig; this.chunkingConfig = chunkingConfig;
this.delayedDataCheckConfig = delayedDataCheckConfig;
} }
public DatafeedUpdate(StreamInput in) throws IOException { public DatafeedUpdate(StreamInput in) throws IOException {
@ -122,6 +128,11 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
} }
this.scrollSize = in.readOptionalVInt(); this.scrollSize = in.readOptionalVInt();
this.chunkingConfig = in.readOptionalWriteable(ChunkingConfig::new); this.chunkingConfig = in.readOptionalWriteable(ChunkingConfig::new);
if (in.getVersion().onOrAfter(Version.CURRENT)) {
delayedDataCheckConfig = in.readOptionalWriteable(DelayedDataCheckConfig::new);
} else {
delayedDataCheckConfig = null;
}
} }
/** /**
@ -159,6 +170,9 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
} }
out.writeOptionalVInt(scrollSize); out.writeOptionalVInt(scrollSize);
out.writeOptionalWriteable(chunkingConfig); out.writeOptionalWriteable(chunkingConfig);
if (out.getVersion().onOrAfter(Version.CURRENT)) {
out.writeOptionalWriteable(delayedDataCheckConfig);
}
} }
@Override @Override
@ -185,6 +199,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
} }
addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize); addOptionalField(builder, DatafeedConfig.SCROLL_SIZE, scrollSize);
addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig); addOptionalField(builder, DatafeedConfig.CHUNKING_CONFIG, chunkingConfig);
addOptionalField(builder, DatafeedConfig.DELAYED_DATA_CHECK_CONFIG, delayedDataCheckConfig);
builder.endObject(); builder.endObject();
return builder; return builder;
} }
@ -250,6 +265,10 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
return chunkingConfig; return chunkingConfig;
} }
public DelayedDataCheckConfig getDelayedDataCheckConfig() {
return delayedDataCheckConfig;
}
/** /**
* Applies the update to the given {@link DatafeedConfig} * Applies the update to the given {@link DatafeedConfig}
* @return a new {@link DatafeedConfig} that contains the update * @return a new {@link DatafeedConfig} that contains the update
@ -290,6 +309,9 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
if (chunkingConfig != null) { if (chunkingConfig != null) {
builder.setChunkingConfig(chunkingConfig); builder.setChunkingConfig(chunkingConfig);
} }
if (delayedDataCheckConfig != null) {
builder.setDelayedDataCheckConfig(delayedDataCheckConfig);
}
if (headers.isEmpty() == false) { if (headers.isEmpty() == false) {
// Adjust the request, adding security headers from the current thread context // Adjust the request, adding security headers from the current thread context
@ -328,6 +350,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
&& Objects.equals(this.query, that.query) && Objects.equals(this.query, that.query)
&& Objects.equals(this.scrollSize, that.scrollSize) && Objects.equals(this.scrollSize, that.scrollSize)
&& Objects.equals(this.aggregations, that.aggregations) && Objects.equals(this.aggregations, that.aggregations)
&& Objects.equals(this.delayedDataCheckConfig, that.delayedDataCheckConfig)
&& Objects.equals(this.scriptFields, that.scriptFields) && Objects.equals(this.scriptFields, that.scriptFields)
&& Objects.equals(this.chunkingConfig, that.chunkingConfig); && Objects.equals(this.chunkingConfig, that.chunkingConfig);
} }
@ -335,7 +358,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
@Override @Override
public int hashCode() { public int hashCode() {
return Objects.hash(id, jobId, frequency, queryDelay, indices, types, query, scrollSize, aggregations, scriptFields, return Objects.hash(id, jobId, frequency, queryDelay, indices, types, query, scrollSize, aggregations, scriptFields,
chunkingConfig); chunkingConfig, delayedDataCheckConfig);
} }
@Override @Override
@ -352,6 +375,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
&& (scrollSize == null || Objects.equals(scrollSize, datafeed.getQueryDelay())) && (scrollSize == null || Objects.equals(scrollSize, datafeed.getQueryDelay()))
&& (aggregations == null || Objects.equals(aggregations, datafeed.getAggregations())) && (aggregations == null || Objects.equals(aggregations, datafeed.getAggregations()))
&& (scriptFields == null || Objects.equals(scriptFields, datafeed.getScriptFields())) && (scriptFields == null || Objects.equals(scriptFields, datafeed.getScriptFields()))
&& (delayedDataCheckConfig == null || Objects.equals(delayedDataCheckConfig, datafeed.getDelayedDataCheckConfig()))
&& (chunkingConfig == null || Objects.equals(chunkingConfig, datafeed.getChunkingConfig())); && (chunkingConfig == null || Objects.equals(chunkingConfig, datafeed.getChunkingConfig()));
} }
@ -368,6 +392,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
private List<SearchSourceBuilder.ScriptField> scriptFields; private List<SearchSourceBuilder.ScriptField> scriptFields;
private Integer scrollSize; private Integer scrollSize;
private ChunkingConfig chunkingConfig; private ChunkingConfig chunkingConfig;
private DelayedDataCheckConfig delayedDataCheckConfig;
public Builder() { public Builder() {
} }
@ -388,6 +413,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
this.scriptFields = config.scriptFields; this.scriptFields = config.scriptFields;
this.scrollSize = config.scrollSize; this.scrollSize = config.scrollSize;
this.chunkingConfig = config.chunkingConfig; this.chunkingConfig = config.chunkingConfig;
this.delayedDataCheckConfig = config.delayedDataCheckConfig;
} }
public void setId(String datafeedId) { public void setId(String datafeedId) {
@ -428,6 +454,10 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
this.scriptFields = sorted; this.scriptFields = sorted;
} }
public void setDelayedDataCheckConfig(DelayedDataCheckConfig delayedDataCheckConfig) {
this.delayedDataCheckConfig = delayedDataCheckConfig;
}
public void setScrollSize(int scrollSize) { public void setScrollSize(int scrollSize) {
this.scrollSize = scrollSize; this.scrollSize = scrollSize;
} }
@ -438,7 +468,7 @@ public class DatafeedUpdate implements Writeable, ToXContentObject {
public DatafeedUpdate build() { public DatafeedUpdate build() {
return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize, return new DatafeedUpdate(id, jobId, queryDelay, frequency, indices, types, query, aggregations, scriptFields, scrollSize,
chunkingConfig); chunkingConfig, delayedDataCheckConfig);
} }
} }
} }

View File

@ -0,0 +1,127 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.datafeed;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.ObjectParser;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.xpack.core.ml.utils.time.TimeUtils;
import java.io.IOException;
import java.util.Objects;
public class DelayedDataCheckConfig implements ToXContentObject, Writeable {
public static final TimeValue MAX_DELAYED_DATA_WINDOW = TimeValue.timeValueHours(24);
public static final int MAX_NUMBER_SPANABLE_BUCKETS = 10_000;
public static final ParseField ENABLED = new ParseField("enabled");
public static final ParseField CHECK_WINDOW = new ParseField("check_window");
// These parsers follow the pattern that metadata is parsed leniently (to allow for enhancements), whilst config is parsed strictly
public static final ConstructingObjectParser<DelayedDataCheckConfig, Void> LENIENT_PARSER = createParser(true);
public static final ConstructingObjectParser<DelayedDataCheckConfig, Void> STRICT_PARSER = createParser(false);
private static ConstructingObjectParser<DelayedDataCheckConfig, Void> createParser(boolean ignoreUnknownFields) {
ConstructingObjectParser<DelayedDataCheckConfig, Void> parser = new ConstructingObjectParser<>(
"delayed_data_check_config", ignoreUnknownFields, a -> new DelayedDataCheckConfig((Boolean) a[0], (TimeValue) a[1]));
parser.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED);
parser.declareField(ConstructingObjectParser.optionalConstructorArg(), p -> {
if (p.currentToken() == XContentParser.Token.VALUE_STRING) {
return TimeValue.parseTimeValue(p.text(), CHECK_WINDOW.getPreferredName());
}
throw new IllegalArgumentException("Unsupported token [" + p.currentToken() + "]");
}, CHECK_WINDOW, ObjectParser.ValueType.STRING);
return parser;
}
public static DelayedDataCheckConfig defaultDelayedDataCheckConfig() {
return new DelayedDataCheckConfig(true, null);
}
public static DelayedDataCheckConfig enabledDelayedDataCheckConfig(TimeValue timeValue) {
return new DelayedDataCheckConfig(true, timeValue);
}
public static DelayedDataCheckConfig disabledDelayedDataCheckConfig() {
return new DelayedDataCheckConfig(false, null);
}
private final boolean enabled;
private final TimeValue checkWindow;
DelayedDataCheckConfig(Boolean enabled, TimeValue checkWindow) {
this.enabled = enabled;
if (enabled && checkWindow != null) {
TimeUtils.checkPositive(checkWindow, CHECK_WINDOW);
if (checkWindow.compareTo(MAX_DELAYED_DATA_WINDOW) > 0) {
throw new IllegalArgumentException("check_window [" + checkWindow.getStringRep() + "] must be less than or equal to [24h]");
}
}
this.checkWindow = checkWindow;
}
public DelayedDataCheckConfig(StreamInput in) throws IOException {
enabled = in.readBoolean();
checkWindow = in.readOptionalTimeValue();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(enabled);
out.writeOptionalTimeValue(checkWindow);
}
public boolean isEnabled() {
return enabled;
}
@Nullable
public TimeValue getCheckWindow() {
return checkWindow;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, ToXContent.Params params) throws IOException {
builder.startObject();
builder.field(ENABLED.getPreferredName(), enabled);
if (checkWindow != null) {
builder.field(CHECK_WINDOW.getPreferredName(), checkWindow.getStringRep());
}
builder.endObject();
return builder;
}
@Override
public int hashCode() {
return Objects.hash(enabled, checkWindow);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
DelayedDataCheckConfig other = (DelayedDataCheckConfig) obj;
return Objects.equals(this.enabled, other.enabled) && Objects.equals(this.checkWindow, other.checkWindow);
}
}

View File

@ -22,6 +22,13 @@ public final class Messages {
public static final String DATAFEED_CONFIG_CANNOT_USE_SCRIPT_FIELDS_WITH_AGGS = public static final String DATAFEED_CONFIG_CANNOT_USE_SCRIPT_FIELDS_WITH_AGGS =
"script_fields cannot be used in combination with aggregations"; "script_fields cannot be used in combination with aggregations";
public static final String DATAFEED_CONFIG_INVALID_OPTION_VALUE = "Invalid {0} value ''{1}'' in datafeed configuration"; public static final String DATAFEED_CONFIG_INVALID_OPTION_VALUE = "Invalid {0} value ''{1}'' in datafeed configuration";
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL =
"delayed_data_check_window [{0}] must be greater than the bucket_span [{1}]";
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_LARGE =
"delayed_data_check_window [{0}] must be less than or equal to [24h]";
public static final String DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS =
"delayed_data_check_window [{0}] must be less than 10,000x the bucket_span [{1}]";
public static final String DATAFEED_DOES_NOT_SUPPORT_JOB_WITH_LATENCY = "A job configured with datafeed cannot support latency"; public static final String DATAFEED_DOES_NOT_SUPPORT_JOB_WITH_LATENCY = "A job configured with datafeed cannot support latency";
public static final String DATAFEED_NOT_FOUND = "No datafeed with id [{0}] exists"; public static final String DATAFEED_NOT_FOUND = "No datafeed with id [{0}] exists";
public static final String DATAFEED_AGGREGATIONS_REQUIRES_DATE_HISTOGRAM = public static final String DATAFEED_AGGREGATIONS_REQUIRES_DATE_HISTOGRAM =
@ -63,6 +70,9 @@ public final class Messages {
public static final String JOB_AUDIT_DATAFEED_LOOKBACK_COMPLETED = "Datafeed lookback completed"; public static final String JOB_AUDIT_DATAFEED_LOOKBACK_COMPLETED = "Datafeed lookback completed";
public static final String JOB_AUDIT_DATAFEED_LOOKBACK_NO_DATA = "Datafeed lookback retrieved no data"; public static final String JOB_AUDIT_DATAFEED_LOOKBACK_NO_DATA = "Datafeed lookback retrieved no data";
public static final String JOB_AUDIT_DATAFEED_NO_DATA = "Datafeed has been retrieving no data for a while"; public static final String JOB_AUDIT_DATAFEED_NO_DATA = "Datafeed has been retrieving no data for a while";
public static final String JOB_AUDIT_DATAFEED_MISSING_DATA =
"Datafeed has missed {0} documents due to ingest latency, latest bucket with missing data is [{1}]." +
" Consider increasing query_delay";
public static final String JOB_AUDIT_DATAFEED_RECOVERED = "Datafeed has recovered data extraction and analysis"; public static final String JOB_AUDIT_DATAFEED_RECOVERED = "Datafeed has recovered data extraction and analysis";
public static final String JOB_AUDIT_DATAFEED_STARTED_FROM_TO = "Datafeed started (from: {0} to: {1}) with frequency [{2}]"; public static final String JOB_AUDIT_DATAFEED_STARTED_FROM_TO = "Datafeed started (from: {0} to: {1}) with frequency [{2}]";
public static final String JOB_AUDIT_DATAFEED_STARTED_REALTIME = "Datafeed started in real-time"; public static final String JOB_AUDIT_DATAFEED_STARTED_REALTIME = "Datafeed started in real-time";

View File

@ -87,6 +87,22 @@ public final class TimeUtils {
checkMultiple(timeValue, baseUnit, field); checkMultiple(timeValue, baseUnit, field);
} }
/**
* Checks that the given {@code timeValue} is positive.
*
* <ul>
* <li>1s is valid</li>
* <li>-1s is invalid</li>
* </ul>
*/
public static void checkPositive(TimeValue timeValue, ParseField field) {
long nanos = timeValue.getNanos();
if (nanos <= 0) {
throw new IllegalArgumentException(field.getPreferredName() + " cannot be less or equal than 0. Value = "
+ timeValue.toString());
}
}
private static void checkNonNegative(TimeValue timeValue, ParseField field) { private static void checkNonNegative(TimeValue timeValue, ParseField field) {
long nanos = timeValue.getNanos(); long nanos = timeValue.getNanos();
if (nanos < 0) { if (nanos < 0) {
@ -94,13 +110,7 @@ public final class TimeUtils {
} }
} }
private static void checkPositive(TimeValue timeValue, ParseField field) {
long nanos = timeValue.getNanos();
if (nanos <= 0) {
throw new IllegalArgumentException(field.getPreferredName() + " cannot be less or equal than 0. Value = "
+ timeValue.toString());
}
}
/** /**
* Check the given {@code timeValue} is a multiple of the {@code baseUnit} * Check the given {@code timeValue} is a multiple of the {@code baseUnit}

View File

@ -109,6 +109,9 @@ public class DatafeedConfigTests extends AbstractSerializingTestCase<DatafeedCon
if (randomBoolean()) { if (randomBoolean()) {
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk()); builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
} }
if (randomBoolean()) {
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig(bucketSpanMillis));
}
return builder.build(); return builder.build();
} }

View File

@ -89,6 +89,9 @@ public class DatafeedUpdateTests extends AbstractSerializingTestCase<DatafeedUpd
if (randomBoolean()) { if (randomBoolean()) {
builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk()); builder.setChunkingConfig(ChunkingConfigTests.createRandomizedChunk());
} }
if (randomBoolean()) {
builder.setDelayedDataCheckConfig(DelayedDataCheckConfigTests.createRandomizedConfig(randomLongBetween(300_001, 400_000)));
}
return builder.build(); return builder.build();
} }
@ -155,6 +158,7 @@ public class DatafeedUpdateTests extends AbstractSerializingTestCase<DatafeedUpd
update.setScriptFields(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false))); update.setScriptFields(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false)));
update.setScrollSize(8000); update.setScrollSize(8000);
update.setChunkingConfig(ChunkingConfig.newManual(TimeValue.timeValueHours(1))); update.setChunkingConfig(ChunkingConfig.newManual(TimeValue.timeValueHours(1)));
update.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(1)));
DatafeedConfig updatedDatafeed = update.build().apply(datafeed, Collections.emptyMap()); DatafeedConfig updatedDatafeed = update.build().apply(datafeed, Collections.emptyMap());
@ -169,6 +173,8 @@ public class DatafeedUpdateTests extends AbstractSerializingTestCase<DatafeedUpd
equalTo(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false)))); equalTo(Collections.singletonList(new SearchSourceBuilder.ScriptField("a", mockScript("b"), false))));
assertThat(updatedDatafeed.getScrollSize(), equalTo(8000)); assertThat(updatedDatafeed.getScrollSize(), equalTo(8000));
assertThat(updatedDatafeed.getChunkingConfig(), equalTo(ChunkingConfig.newManual(TimeValue.timeValueHours(1)))); assertThat(updatedDatafeed.getChunkingConfig(), equalTo(ChunkingConfig.newManual(TimeValue.timeValueHours(1))));
assertThat(updatedDatafeed.getDelayedDataCheckConfig().isEnabled(), equalTo(true));
assertThat(updatedDatafeed.getDelayedDataCheckConfig().getCheckWindow(), equalTo(TimeValue.timeValueHours(1)));
} }
public void testApply_givenAggregations() { public void testApply_givenAggregations() {

View File

@ -0,0 +1,95 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.core.ml.datafeed;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.test.AbstractSerializingTestCase;
import java.io.IOException;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.nullValue;
import static org.hamcrest.core.Is.is;
public class DelayedDataCheckConfigTests extends AbstractSerializingTestCase<DelayedDataCheckConfig> {
@Override
protected DelayedDataCheckConfig createTestInstance(){
return createRandomizedConfig(100);
}
@Override
protected Writeable.Reader<DelayedDataCheckConfig> instanceReader() {
return DelayedDataCheckConfig::new;
}
@Override
protected DelayedDataCheckConfig doParseInstance(XContentParser parser) {
return DelayedDataCheckConfig.STRICT_PARSER.apply(parser, null);
}
public void testConstructor() {
expectThrows(IllegalArgumentException.class, () -> new DelayedDataCheckConfig(true, TimeValue.MINUS_ONE));
expectThrows(IllegalArgumentException.class, () -> new DelayedDataCheckConfig(true, TimeValue.timeValueHours(25)));
}
public void testEnabledDelayedDataCheckConfig() {
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(5));
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(true));
assertThat(delayedDataCheckConfig.getCheckWindow(), equalTo(TimeValue.timeValueHours(5)));
}
public void testDisabledDelayedDataCheckConfig() {
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.disabledDelayedDataCheckConfig();
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(false));
assertThat(delayedDataCheckConfig.getCheckWindow(), equalTo(null));
}
public void testDefaultDelayedDataCheckConfig() {
DelayedDataCheckConfig delayedDataCheckConfig = DelayedDataCheckConfig.defaultDelayedDataCheckConfig();
assertThat(delayedDataCheckConfig.isEnabled(), equalTo(true));
assertThat(delayedDataCheckConfig.getCheckWindow(), is(nullValue()));
}
public static DelayedDataCheckConfig createRandomizedConfig(long bucketSpanMillis) {
boolean enabled = randomBoolean();
TimeValue timeWindow = null;
if (enabled || randomBoolean()) {
// time span is required to be at least 1 millis, so we use a custom method to generate a time value here
timeWindow = new TimeValue(randomLongBetween(bucketSpanMillis,bucketSpanMillis*2));
}
return new DelayedDataCheckConfig(enabled, timeWindow);
}
@Override
protected DelayedDataCheckConfig mutateInstance(DelayedDataCheckConfig instance) throws IOException {
boolean enabled = instance.isEnabled();
TimeValue timeWindow = instance.getCheckWindow();
switch (between(0, 1)) {
case 0:
enabled = !enabled;
if (randomBoolean()) {
timeWindow = TimeValue.timeValueMillis(randomLongBetween(1, 1000));
} else {
timeWindow = null;
}
break;
case 1:
if (timeWindow == null) {
timeWindow = TimeValue.timeValueMillis(randomLongBetween(1, 1000));
} else {
timeWindow = new TimeValue(timeWindow.getMillis() + between(10, 100));
}
enabled = true;
break;
default:
throw new AssertionError("Illegal randomisation branch");
}
return new DelayedDataCheckConfig(enabled, timeWindow);
}
}

View File

@ -46,7 +46,7 @@ public class AnalysisConfigTests extends AbstractSerializingTestCase<AnalysisCon
TimeValue bucketSpan = AnalysisConfig.Builder.DEFAULT_BUCKET_SPAN; TimeValue bucketSpan = AnalysisConfig.Builder.DEFAULT_BUCKET_SPAN;
if (randomBoolean()) { if (randomBoolean()) {
bucketSpan = TimeValue.timeValueSeconds(randomIntBetween(1, 1_000_000)); bucketSpan = TimeValue.timeValueSeconds(randomIntBetween(1, 1_000));
builder.setBucketSpan(bucketSpan); builder.setBucketSpan(bucketSpan);
} }
if (isCategorization) { if (isCategorization) {

View File

@ -20,22 +20,24 @@ import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.elasticsearch.xpack.core.ml.action.GetBucketsAction; import org.elasticsearch.xpack.core.ml.action.GetBucketsAction;
import org.elasticsearch.xpack.core.ml.action.util.PageParams; import org.elasticsearch.xpack.core.ml.action.util.PageParams;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription; import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.Detector; import org.elasticsearch.xpack.core.ml.job.config.Detector;
import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.job.results.Bucket; import org.elasticsearch.xpack.core.ml.job.results.Bucket;
import org.elasticsearch.xpack.core.ml.job.results.Result; import org.elasticsearch.xpack.core.ml.job.results.Result;
import org.elasticsearch.xpack.ml.datafeed.DelayedDataDetector; import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetector;
import org.elasticsearch.xpack.ml.datafeed.DelayedDataDetector.BucketWithMissingData; import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory;
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory.BucketWithMissingData;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
import java.util.Collections; import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.stream.Collectors;
import static org.elasticsearch.xpack.ml.support.BaseMlIntegTestCase.createDatafeed;
import static org.elasticsearch.xpack.ml.support.BaseMlIntegTestCase.createDatafeedBuilder; import static org.elasticsearch.xpack.ml.support.BaseMlIntegTestCase.createDatafeedBuilder;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
@ -64,7 +66,10 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
final String jobId = "delayed-data-detection-job"; final String jobId = "delayed-data-detection-job";
Job.Builder job = createJob(jobId, TimeValue.timeValueMinutes(5), "count", null); Job.Builder job = createJob(jobId, TimeValue.timeValueMinutes(5), "count", null);
DatafeedConfig datafeedConfig = createDatafeed(job.getId() + "-datafeed", job.getId(), Collections.singletonList(index)); DatafeedConfig.Builder datafeedConfigBuilder =
createDatafeedBuilder(job.getId() + "-datafeed", job.getId(), Collections.singletonList(index));
datafeedConfigBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(12)));
DatafeedConfig datafeedConfig = datafeedConfigBuilder.build();
registerJob(job); registerJob(job);
putJob(job); putJob(job);
openJob(job.getId()); openJob(job.getId());
@ -77,26 +82,32 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
// Get the latest finalized bucket // Get the latest finalized bucket
Bucket lastBucket = getLatestFinalizedBucket(jobId); Bucket lastBucket = getLatestFinalizedBucket(jobId);
DelayedDataDetector delayedDataDetector = DelayedDataDetector delayedDataDetector = newDetector(job.build(new Date()), datafeedConfig);
new DelayedDataDetector(job.build(new Date()), datafeedConfig, TimeValue.timeValueHours(12), client());
List<BucketWithMissingData> response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000); List<BucketWithMissingData> response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(0L)); assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(0L));
long missingDocs = randomIntBetween(32, 128); long missingDocs = randomIntBetween(32, 128);
// Simply adding data within the current delayed data detection, the choice of 43100000 is arbitrary and within the window // Simply adding data within the current delayed data detection, the choice of 43100000 is arbitrary and within the window
// for the DelayedDataDetector // for the DatafeedDelayedDataDetector
writeData(logger, index, missingDocs, now - 43100000, lastBucket.getEpoch()*1000); writeData(logger, index, missingDocs, now - 43100000, lastBucket.getEpoch()*1000);
response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000); response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(missingDocs)); assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(missingDocs));
// Assert that the are returned in order
List<Long> timeStamps = response.stream().map(BucketWithMissingData::getTimeStamp).collect(Collectors.toList());
assertEquals(timeStamps.stream().sorted().collect(Collectors.toList()), timeStamps);
} }
public void testMissingDataDetectionInSpecificBucket() throws Exception { public void testMissingDataDetectionInSpecificBucket() throws Exception {
final String jobId = "delayed-data-detection-job-missing-test-specific-bucket"; final String jobId = "delayed-data-detection-job-missing-test-specific-bucket";
Job.Builder job = createJob(jobId, TimeValue.timeValueMinutes(5), "count", null); Job.Builder job = createJob(jobId, TimeValue.timeValueMinutes(5), "count", null);
DatafeedConfig datafeedConfig = createDatafeed(job.getId() + "-datafeed", job.getId(), Collections.singletonList(index)); DatafeedConfig.Builder datafeedConfigBuilder =
createDatafeedBuilder(job.getId() + "-datafeed", job.getId(), Collections.singletonList(index));
datafeedConfigBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(12)));
DatafeedConfig datafeedConfig = datafeedConfigBuilder.build();
registerJob(job); registerJob(job);
putJob(job); putJob(job);
openJob(job.getId()); openJob(job.getId());
@ -110,8 +121,7 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
// Get the latest finalized bucket // Get the latest finalized bucket
Bucket lastBucket = getLatestFinalizedBucket(jobId); Bucket lastBucket = getLatestFinalizedBucket(jobId);
DelayedDataDetector delayedDataDetector = DelayedDataDetector delayedDataDetector = newDetector(job.build(new Date()), datafeedConfig);
new DelayedDataDetector(job.build(new Date()), datafeedConfig, TimeValue.timeValueHours(12), client());
long missingDocs = randomIntBetween(1, 10); long missingDocs = randomIntBetween(1, 10);
@ -127,6 +137,10 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
} }
} }
assertThat(hasBucketWithMissing, equalTo(true)); assertThat(hasBucketWithMissing, equalTo(true));
// Assert that the are returned in order
List<Long> timeStamps = response.stream().map(BucketWithMissingData::getTimeStamp).collect(Collectors.toList());
assertEquals(timeStamps.stream().sorted().collect(Collectors.toList()), timeStamps);
} }
public void testMissingDataDetectionWithAggregationsAndQuery() throws Exception { public void testMissingDataDetectionWithAggregationsAndQuery() throws Exception {
@ -147,6 +161,8 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
.interval(TimeValue.timeValueMinutes(5).millis()))); .interval(TimeValue.timeValueMinutes(5).millis())));
datafeedConfigBuilder.setQuery(new RangeQueryBuilder("value").gte(numDocs/2)); datafeedConfigBuilder.setQuery(new RangeQueryBuilder("value").gte(numDocs/2));
datafeedConfigBuilder.setFrequency(TimeValue.timeValueMinutes(5)); datafeedConfigBuilder.setFrequency(TimeValue.timeValueMinutes(5));
datafeedConfigBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(12)));
DatafeedConfig datafeedConfig = datafeedConfigBuilder.build(); DatafeedConfig datafeedConfig = datafeedConfigBuilder.build();
registerJob(job); registerJob(job);
putJob(job); putJob(job);
@ -160,19 +176,21 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
// Get the latest finalized bucket // Get the latest finalized bucket
Bucket lastBucket = getLatestFinalizedBucket(jobId); Bucket lastBucket = getLatestFinalizedBucket(jobId);
DelayedDataDetector delayedDataDetector = DelayedDataDetector delayedDataDetector = newDetector(job.build(new Date()), datafeedConfig);
new DelayedDataDetector(job.build(new Date()), datafeedConfig, TimeValue.timeValueHours(12), client());
List<BucketWithMissingData> response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000); List<BucketWithMissingData> response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(0L)); assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo(0L));
long missingDocs = numDocs; long missingDocs = numDocs;
// Simply adding data within the current delayed data detection, the choice of 43100000 is arbitrary and within the window // Simply adding data within the current delayed data detection, the choice of 43100000 is arbitrary and within the window
// for the DelayedDataDetector // for the DatafeedDelayedDataDetector
writeData(logger, index, missingDocs, now - 43100000, lastBucket.getEpoch()*1000); writeData(logger, index, missingDocs, now - 43100000, lastBucket.getEpoch()*1000);
response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000); response = delayedDataDetector.detectMissingData(lastBucket.getEpoch()*1000);
assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo((missingDocs+1)/2)); assertThat(response.stream().mapToLong(BucketWithMissingData::getMissingDocumentCount).sum(), equalTo((missingDocs+1)/2));
// Assert that the are returned in order
List<Long> timeStamps = response.stream().map(BucketWithMissingData::getTimeStamp).collect(Collectors.toList());
assertEquals(timeStamps.stream().sorted().collect(Collectors.toList()), timeStamps);
} }
private Job.Builder createJob(String id, TimeValue bucketSpan, String function, String field) { private Job.Builder createJob(String id, TimeValue bucketSpan, String function, String field) {
@ -231,4 +249,8 @@ public class DelayedDataDetectorIT extends MlNativeAutodetectIntegTestCase {
getBucketsRequest.setPageParams(new PageParams(0, 1)); getBucketsRequest.setPageParams(new PageParams(0, 1));
return getBuckets(getBucketsRequest).get(0); return getBuckets(getBucketsRequest).get(0);
} }
private DelayedDataDetector newDetector(Job job, DatafeedConfig datafeedConfig) {
return DelayedDataDetectorFactory.buildDetector(job, datafeedConfig, client());
}
} }

View File

@ -12,6 +12,7 @@ import org.elasticsearch.client.Client;
import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.common.xcontent.XContentElasticsearchExtension;
import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.core.internal.io.Streams; import org.elasticsearch.core.internal.io.Streams;
import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.DateFieldMapper;
@ -23,12 +24,16 @@ import org.elasticsearch.xpack.core.ml.datafeed.extractor.DataExtractor;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription; import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.messages.Messages; import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetector;
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory.BucketWithMissingData;
import org.elasticsearch.xpack.ml.datafeed.extractor.DataExtractorFactory; import org.elasticsearch.xpack.ml.datafeed.extractor.DataExtractorFactory;
import org.elasticsearch.xpack.ml.notifications.Auditor; import org.elasticsearch.xpack.ml.notifications.Auditor;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.Date;
import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.Optional; import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
@ -41,6 +46,7 @@ class DatafeedJob {
private static final Logger LOGGER = LogManager.getLogger(DatafeedJob.class); private static final Logger LOGGER = LogManager.getLogger(DatafeedJob.class);
private static final int NEXT_TASK_DELAY_MS = 100; private static final int NEXT_TASK_DELAY_MS = 100;
static final long MISSING_DATA_CHECK_INTERVAL_MS = 900_000; //15 minutes in ms
private final Auditor auditor; private final Auditor auditor;
private final String jobId; private final String jobId;
@ -50,15 +56,19 @@ class DatafeedJob {
private final Client client; private final Client client;
private final DataExtractorFactory dataExtractorFactory; private final DataExtractorFactory dataExtractorFactory;
private final Supplier<Long> currentTimeSupplier; private final Supplier<Long> currentTimeSupplier;
private final DelayedDataDetector delayedDataDetector;
private volatile long lookbackStartTimeMs; private volatile long lookbackStartTimeMs;
private volatile long latestFinalBucketEndTimeMs;
private volatile long lastDataCheckTimeMs;
private volatile int lastDataCheckAudit;
private volatile Long lastEndTimeMs; private volatile Long lastEndTimeMs;
private AtomicBoolean running = new AtomicBoolean(true); private AtomicBoolean running = new AtomicBoolean(true);
private volatile boolean isIsolated; private volatile boolean isIsolated;
DatafeedJob(String jobId, DataDescription dataDescription, long frequencyMs, long queryDelayMs, DatafeedJob(String jobId, DataDescription dataDescription, long frequencyMs, long queryDelayMs,
DataExtractorFactory dataExtractorFactory, Client client, Auditor auditor, Supplier<Long> currentTimeSupplier, DataExtractorFactory dataExtractorFactory, Client client, Auditor auditor, Supplier<Long> currentTimeSupplier,
long latestFinalBucketEndTimeMs, long latestRecordTimeMs) { DelayedDataDetector delayedDataDetector, long latestFinalBucketEndTimeMs, long latestRecordTimeMs) {
this.jobId = jobId; this.jobId = jobId;
this.dataDescription = Objects.requireNonNull(dataDescription); this.dataDescription = Objects.requireNonNull(dataDescription);
this.frequencyMs = frequencyMs; this.frequencyMs = frequencyMs;
@ -67,7 +77,8 @@ class DatafeedJob {
this.client = client; this.client = client;
this.auditor = auditor; this.auditor = auditor;
this.currentTimeSupplier = currentTimeSupplier; this.currentTimeSupplier = currentTimeSupplier;
this.delayedDataDetector = delayedDataDetector;
this.latestFinalBucketEndTimeMs = latestFinalBucketEndTimeMs;
long lastEndTime = Math.max(latestFinalBucketEndTimeMs, latestRecordTimeMs); long lastEndTime = Math.max(latestFinalBucketEndTimeMs, latestRecordTimeMs);
if (lastEndTime > 0) { if (lastEndTime > 0) {
lastEndTimeMs = lastEndTime; lastEndTimeMs = lastEndTime;
@ -151,9 +162,49 @@ class DatafeedJob {
request.setCalcInterim(true); request.setCalcInterim(true);
request.setAdvanceTime(String.valueOf(end)); request.setAdvanceTime(String.valueOf(end));
run(start, end, request); run(start, end, request);
checkForMissingDataIfNecessary();
return nextRealtimeTimestamp(); return nextRealtimeTimestamp();
} }
private void checkForMissingDataIfNecessary() {
if (isRunning() && !isIsolated && checkForMissingDataTriggered()) {
// Keep track of the last bucket time for which we did a missing data check
this.lastDataCheckTimeMs = this.currentTimeSupplier.get();
List<BucketWithMissingData> missingDataBuckets = delayedDataDetector.detectMissingData(latestFinalBucketEndTimeMs);
if (missingDataBuckets.isEmpty() == false) {
long totalRecordsMissing = missingDataBuckets.stream()
.mapToLong(BucketWithMissingData::getMissingDocumentCount)
.sum();
// The response is sorted by asc timestamp, so the last entry is the last bucket
Date lastBucketDate = missingDataBuckets.get(missingDataBuckets.size() - 1).getBucket().getTimestamp();
int newAudit = Objects.hash(totalRecordsMissing, lastBucketDate);
if (newAudit != lastDataCheckAudit) {
auditor.warning(jobId,
Messages.getMessage(Messages.JOB_AUDIT_DATAFEED_MISSING_DATA, totalRecordsMissing,
XContentElasticsearchExtension.DEFAULT_DATE_PRINTER.print(lastBucketDate.getTime())));
lastDataCheckAudit = newAudit;
}
}
}
}
/**
* We wait a static interval of 15 minutes till the next missing data check.
*
* However, if our delayed data window is smaller than that, we will probably want to check at every available window (if freq. allows).
* This is to help to miss as few buckets in the delayed data check as possible.
*
* If our frequency/query delay are longer then our default interval or window size, we will end up looking for missing data on
* every real-time trigger. This should be OK as the we are pulling from the Index as such a slow pace, another query will
* probably not even be noticeable at such a large timescale.
*/
private boolean checkForMissingDataTriggered() {
return this.currentTimeSupplier.get() > this.lastDataCheckTimeMs
+ Math.min(MISSING_DATA_CHECK_INTERVAL_MS, delayedDataDetector.getWindow());
}
/** /**
* Stops the datafeed job * Stops the datafeed job
* *
@ -260,7 +311,10 @@ class DatafeedJob {
// we call flush the job is closed. Thus, we don't flush unless the // we call flush the job is closed. Thus, we don't flush unless the
// datafeed is still running. // datafeed is still running.
if (isRunning() && !isIsolated) { if (isRunning() && !isIsolated) {
flushJob(flushRequest); Date lastFinalizedBucketEnd = flushJob(flushRequest).getLastFinalizedBucketEnd();
if (lastFinalizedBucketEnd != null) {
this.latestFinalBucketEndTimeMs = lastFinalizedBucketEnd.getTime();
}
} }
if (recordCount == 0) { if (recordCount == 0) {

View File

@ -13,6 +13,8 @@ import org.elasticsearch.xpack.core.ml.action.util.QueryPage;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription; import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetector;
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory;
import org.elasticsearch.xpack.ml.job.persistence.BucketsQueryBuilder; import org.elasticsearch.xpack.ml.job.persistence.BucketsQueryBuilder;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.core.ml.job.results.Bucket; import org.elasticsearch.xpack.core.ml.job.results.Bucket;
@ -46,8 +48,9 @@ public class DatafeedJobBuilder {
Consumer<Context> contextHanlder = context -> { Consumer<Context> contextHanlder = context -> {
TimeValue frequency = getFrequencyOrDefault(datafeed, job); TimeValue frequency = getFrequencyOrDefault(datafeed, job);
TimeValue queryDelay = datafeed.getQueryDelay(); TimeValue queryDelay = datafeed.getQueryDelay();
DelayedDataDetector delayedDataDetector = DelayedDataDetectorFactory.buildDetector(job, datafeed, client);
DatafeedJob datafeedJob = new DatafeedJob(job.getId(), buildDataDescription(job), frequency.millis(), queryDelay.millis(), DatafeedJob datafeedJob = new DatafeedJob(job.getId(), buildDataDescription(job), frequency.millis(), queryDelay.millis(),
context.dataExtractorFactory, client, auditor, currentTimeSupplier, context.dataExtractorFactory, client, auditor, currentTimeSupplier, delayedDataDetector,
context.latestFinalBucketEndMs, context.latestRecordTimeMs); context.latestFinalBucketEndMs, context.latestRecordTimeMs);
listener.onResponse(datafeedJob); listener.onResponse(datafeedJob);
}; };

View File

@ -3,26 +3,26 @@
* or more contributor license agreements. Licensed under the Elastic License; * or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License. * you may not use this file except in compliance with the Elastic License.
*/ */
package org.elasticsearch.xpack.ml.datafeed; package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
import org.elasticsearch.action.search.SearchAction; import org.elasticsearch.action.search.SearchAction;
import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client; import org.elasticsearch.client.Client;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram;
import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.xpack.core.ml.action.GetBucketsAction; import org.elasticsearch.xpack.core.ml.action.GetBucketsAction;
import org.elasticsearch.xpack.core.ml.action.util.PageParams; import org.elasticsearch.xpack.core.ml.action.util.PageParams;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.datafeed.extractor.ExtractorUtils; import org.elasticsearch.xpack.core.ml.datafeed.extractor.ExtractorUtils;
import org.elasticsearch.xpack.core.ml.job.config.Job; import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory.BucketWithMissingData;
import org.elasticsearch.xpack.core.ml.job.results.Bucket; import org.elasticsearch.xpack.core.ml.job.results.Bucket;
import org.elasticsearch.xpack.core.ml.utils.Intervals; import org.elasticsearch.xpack.core.ml.utils.Intervals;
import org.joda.time.DateTime; import org.joda.time.DateTime;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -35,32 +35,33 @@ import static org.elasticsearch.xpack.core.ClientHelper.stashWithOrigin;
/** /**
* This class will search the buckets and indices over a given window to determine if any data is missing * This class will search the buckets and indices over a given window to determine if any data is missing
*/ */
public class DelayedDataDetector { public class DatafeedDelayedDataDetector implements DelayedDataDetector {
private static final String DATE_BUCKETS = "date_buckets"; private static final String DATE_BUCKETS = "date_buckets";
private final long bucketSpan; private final long bucketSpan;
private final long window; private final long window;
private final DatafeedConfig datafeedConfig;
private final Client client; private final Client client;
private final Job job; private final String timeField;
private final String jobId;
private final QueryBuilder datafeedQuery;
private final String[] datafeedIndices;
public DelayedDataDetector(Job job, DatafeedConfig datafeedConfig, TimeValue window, Client client) { DatafeedDelayedDataDetector(long bucketSpan, long window, String jobId, String timeField, QueryBuilder datafeedQuery,
this.job = job; String[] datafeedIndices, Client client) {
this.bucketSpan = job.getAnalysisConfig().getBucketSpan().millis(); this.bucketSpan = bucketSpan;
this.datafeedConfig = datafeedConfig; this.window = window;
long windowMillis = window.millis(); this.jobId = jobId;
if (windowMillis < bucketSpan) { this.timeField = timeField;
throw new IllegalArgumentException("[window] must be greater or equal to the [bucket_span]"); this.datafeedQuery = datafeedQuery;
} this.datafeedIndices = datafeedIndices;
if (Intervals.alignToFloor(windowMillis/bucketSpan, bucketSpan) >= 10000) {
throw new IllegalArgumentException("[window] must contain less than 10000 buckets at the current [bucket_span]");
}
this.window = windowMillis;
this.client = client; this.client = client;
} }
/** /**
* This method looks at the {@link DatafeedConfig} from {@code latestFinalizedBucket - window} to {@code latestFinalizedBucket}. * This method looks at the {@link DatafeedDelayedDataDetector#datafeedIndices}
* from {@code latestFinalizedBucket - window} to {@code latestFinalizedBucket} and compares the document counts with the
* {@link DatafeedDelayedDataDetector#jobId}'s finalized buckets' event counts.
* *
* It is done synchronously, and can block for a considerable amount of time, it should only be executed within the appropriate * It is done synchronously, and can block for a considerable amount of time, it should only be executed within the appropriate
* thread pool. * thread pool.
@ -68,9 +69,15 @@ public class DelayedDataDetector {
* @param latestFinalizedBucketMs The latest finalized bucket timestamp in milliseconds, signifies the end of the time window check * @param latestFinalizedBucketMs The latest finalized bucket timestamp in milliseconds, signifies the end of the time window check
* @return A List of {@link BucketWithMissingData} objects that contain each bucket with the current number of missing docs * @return A List of {@link BucketWithMissingData} objects that contain each bucket with the current number of missing docs
*/ */
@Override
public List<BucketWithMissingData> detectMissingData(long latestFinalizedBucketMs) { public List<BucketWithMissingData> detectMissingData(long latestFinalizedBucketMs) {
final long end = Intervals.alignToFloor(latestFinalizedBucketMs, bucketSpan); final long end = Intervals.alignToFloor(latestFinalizedBucketMs, bucketSpan);
final long start = Intervals.alignToFloor(latestFinalizedBucketMs - window, bucketSpan); final long start = Intervals.alignToFloor(latestFinalizedBucketMs - window, bucketSpan);
if (end <= start) {
return Collections.emptyList();
}
List<Bucket> finalizedBuckets = checkBucketEvents(start, end); List<Bucket> finalizedBuckets = checkBucketEvents(start, end);
Map<Long, Long> indexedData = checkCurrentBucketEventCount(start, end); Map<Long, Long> indexedData = checkCurrentBucketEventCount(start, end);
return finalizedBuckets.stream() return finalizedBuckets.stream()
@ -81,10 +88,17 @@ public class DelayedDataDetector {
.collect(Collectors.toList()); .collect(Collectors.toList());
} }
@Override
public long getWindow() {
return window;
}
private List<Bucket> checkBucketEvents(long start, long end) { private List<Bucket> checkBucketEvents(long start, long end) {
GetBucketsAction.Request request = new GetBucketsAction.Request(job.getId()); GetBucketsAction.Request request = new GetBucketsAction.Request(jobId);
request.setStart(Long.toString(start)); request.setStart(Long.toString(start));
request.setEnd(Long.toString(end)); request.setEnd(Long.toString(end));
request.setSort("timestamp");
request.setDescending(false);
request.setExcludeInterim(true); request.setExcludeInterim(true);
request.setPageParams(new PageParams(0, (int)((end - start)/bucketSpan))); request.setPageParams(new PageParams(0, (int)((end - start)/bucketSpan)));
@ -95,13 +109,12 @@ public class DelayedDataDetector {
} }
private Map<Long, Long> checkCurrentBucketEventCount(long start, long end) { private Map<Long, Long> checkCurrentBucketEventCount(long start, long end) {
String timeField = job.getDataDescription().getTimeField();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.size(0) .size(0)
.aggregation(new DateHistogramAggregationBuilder(DATE_BUCKETS).interval(bucketSpan).field(timeField)) .aggregation(new DateHistogramAggregationBuilder(DATE_BUCKETS).interval(bucketSpan).field(timeField))
.query(ExtractorUtils.wrapInTimeRangeQuery(datafeedConfig.getQuery(), timeField, start, end)); .query(ExtractorUtils.wrapInTimeRangeQuery(datafeedQuery, timeField, start, end));
SearchRequest searchRequest = new SearchRequest(datafeedConfig.getIndices().toArray(new String[0])).source(searchSourceBuilder); SearchRequest searchRequest = new SearchRequest(datafeedIndices).source(searchSourceBuilder);
try (ThreadContext.StoredContext ignore = stashWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN)) { try (ThreadContext.StoredContext ignore = stashWithOrigin(client.threadPool().getThreadContext(), ML_ORIGIN)) {
SearchResponse response = client.execute(SearchAction.INSTANCE, searchRequest).actionGet(); SearchResponse response = client.execute(SearchAction.INSTANCE, searchRequest).actionGet();
List<? extends Histogram.Bucket> buckets = ((Histogram)response.getAggregations().get(DATE_BUCKETS)).getBuckets(); List<? extends Histogram.Bucket> buckets = ((Histogram)response.getAggregations().get(DATE_BUCKETS)).getBuckets();
@ -132,27 +145,4 @@ public class DelayedDataDetector {
private static long calculateMissing(Map<Long, Long> indexedData, Bucket bucket) { private static long calculateMissing(Map<Long, Long> indexedData, Bucket bucket) {
return indexedData.getOrDefault(bucket.getEpoch() * 1000, 0L) - bucket.getEventCount(); return indexedData.getOrDefault(bucket.getEpoch() * 1000, 0L) - bucket.getEventCount();
} }
public static class BucketWithMissingData {
private final long missingDocumentCount;
private final Bucket bucket;
static BucketWithMissingData fromMissingAndBucket(long missingDocumentCount, Bucket bucket) {
return new BucketWithMissingData(missingDocumentCount, bucket);
}
private BucketWithMissingData(long missingDocumentCount, Bucket bucket) {
this.missingDocumentCount = missingDocumentCount;
this.bucket = bucket;
}
public Bucket getBucket() {
return bucket;
}
public long getMissingDocumentCount() {
return missingDocumentCount;
}
}
} }

View File

@ -0,0 +1,14 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
import java.util.List;
public interface DelayedDataDetector {
List<DelayedDataDetectorFactory.BucketWithMissingData> detectMissingData(long endingTimeStamp);
long getWindow();
}

View File

@ -0,0 +1,125 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
import java.util.Objects;
/**
* Builds the appropriate {@link DelayedDataDetector} implementation, with the appropriate settings, given the parameters.
*/
public class DelayedDataDetectorFactory {
// There are eight 15min buckets in a two hour span, so matching that number as the fallback for very long buckets
private static final int FALLBACK_NUMBER_OF_BUCKETS_TO_SPAN = 8;
private static final TimeValue DEFAULT_CHECK_WINDOW = TimeValue.timeValueHours(2);
/**
* This will build the appropriate detector given the parameters.
*
* If {@link DatafeedConfig#getDelayedDataCheckConfig()} is not `isEnabled()`, then a {@link NullDelayedDataDetector} is returned, which
* does not do any checks, and only supplies an empty collection.
*
* @param job The {@link Job} object for the given `datafeedConfig`
* @param datafeedConfig The {@link DatafeedConfig} for which to create the {@link DelayedDataDetector}
* @param client The {@link Client} capable of taking action against the ES Cluster.
* @return A new {@link DelayedDataDetector}
*/
public static DelayedDataDetector buildDetector(Job job, DatafeedConfig datafeedConfig, Client client) {
if (datafeedConfig.getDelayedDataCheckConfig().isEnabled()) {
long window = validateAndCalculateWindowLength(job.getAnalysisConfig().getBucketSpan(),
datafeedConfig.getDelayedDataCheckConfig().getCheckWindow());
long bucketSpan = job.getAnalysisConfig().getBucketSpan() == null ? 0 : job.getAnalysisConfig().getBucketSpan().millis();
return new DatafeedDelayedDataDetector(bucketSpan,
window,
job.getId(),
job.getDataDescription().getTimeField(),
datafeedConfig.getQuery(),
datafeedConfig.getIndices().toArray(new String[0]),
client);
} else {
return new NullDelayedDataDetector();
}
}
private static long validateAndCalculateWindowLength(TimeValue bucketSpan, TimeValue currentWindow) {
if (bucketSpan == null) {
return 0;
}
if (currentWindow == null) { // we should provide a good default as the user did not specify a window
if(bucketSpan.compareTo(DEFAULT_CHECK_WINDOW) >= 0) {
return FALLBACK_NUMBER_OF_BUCKETS_TO_SPAN * bucketSpan.millis();
} else {
return DEFAULT_CHECK_WINDOW.millis();
}
}
if (currentWindow.compareTo(bucketSpan) < 0) {
throw new IllegalArgumentException(
Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL, currentWindow.getStringRep(),
bucketSpan.getStringRep()));
} else if (currentWindow.millis() > bucketSpan.millis() * DelayedDataCheckConfig.MAX_NUMBER_SPANABLE_BUCKETS) {
throw new IllegalArgumentException(
Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS, currentWindow.getStringRep(),
bucketSpan.getStringRep()));
}
return currentWindow.millis();
}
public static class BucketWithMissingData {
private final long missingDocumentCount;
private final Bucket bucket;
public static BucketWithMissingData fromMissingAndBucket(long missingDocumentCount, Bucket bucket) {
return new BucketWithMissingData(missingDocumentCount, bucket);
}
private BucketWithMissingData(long missingDocumentCount, Bucket bucket) {
this.missingDocumentCount = missingDocumentCount;
this.bucket = bucket;
}
public long getTimeStamp() {
return bucket.getEpoch();
}
public Bucket getBucket() {
return bucket;
}
public long getMissingDocumentCount() {
return missingDocumentCount;
}
@Override
public boolean equals(Object other) {
if (other == this) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
BucketWithMissingData that = (BucketWithMissingData) other;
return Objects.equals(that.bucket, bucket) && Objects.equals(that.missingDocumentCount, missingDocumentCount);
}
@Override
public int hashCode() {
return Objects.hash(bucket, missingDocumentCount);
}
}
}

View File

@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
import java.util.Collections;
import java.util.List;
/**
* This class will always return an {@link Collections#emptyList()}.
*/
public class NullDelayedDataDetector implements DelayedDataDetector {
/**
* Always returns an empty collection
* @param unusedTimeStamp unused Parameter
* @return {@link Collections#emptyList()}
*/
@Override
public List<DelayedDataDetectorFactory.BucketWithMissingData> detectMissingData(long unusedTimeStamp) {
return Collections.emptyList();
}
/**
* Always returns 0
* @return a 0
*/
@Override
public long getWindow() {
return 0L;
}
}

View File

@ -10,6 +10,7 @@ import org.elasticsearch.client.Client;
import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.common.xcontent.XContentElasticsearchExtension;
import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.mock.orig.Mockito; import org.elasticsearch.mock.orig.Mockito;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
@ -18,6 +19,10 @@ import org.elasticsearch.xpack.core.ml.action.FlushJobAction;
import org.elasticsearch.xpack.core.ml.action.PersistJobAction; import org.elasticsearch.xpack.core.ml.action.PersistJobAction;
import org.elasticsearch.xpack.core.ml.action.PostDataAction; import org.elasticsearch.xpack.core.ml.action.PostDataAction;
import org.elasticsearch.xpack.core.ml.datafeed.extractor.DataExtractor; import org.elasticsearch.xpack.core.ml.datafeed.extractor.DataExtractor;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetector;
import org.elasticsearch.xpack.ml.datafeed.delayeddatacheck.DelayedDataDetectorFactory.BucketWithMissingData;
import org.elasticsearch.xpack.ml.datafeed.extractor.DataExtractorFactory; import org.elasticsearch.xpack.ml.datafeed.extractor.DataExtractorFactory;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription; import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts; import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
@ -30,6 +35,7 @@ import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;
@ -56,10 +62,12 @@ public class DatafeedJobTests extends ESTestCase {
private DataExtractorFactory dataExtractorFactory; private DataExtractorFactory dataExtractorFactory;
private DataExtractor dataExtractor; private DataExtractor dataExtractor;
private Client client; private Client client;
private DelayedDataDetector delayedDataDetector;
private DataDescription.Builder dataDescription; private DataDescription.Builder dataDescription;
ActionFuture<PostDataAction.Response> postDataFuture; ActionFuture<PostDataAction.Response> postDataFuture;
private ActionFuture<FlushJobAction.Response> flushJobFuture; private ActionFuture<FlushJobAction.Response> flushJobFuture;
private ArgumentCaptor<FlushJobAction.Request> flushJobRequests; private ArgumentCaptor<FlushJobAction.Request> flushJobRequests;
private FlushJobAction.Response flushJobResponse;
private long currentTime; private long currentTime;
private XContentType xContentType; private XContentType xContentType;
@ -79,6 +87,9 @@ public class DatafeedJobTests extends ESTestCase {
dataDescription.setFormat(DataDescription.DataFormat.XCONTENT); dataDescription.setFormat(DataDescription.DataFormat.XCONTENT);
postDataFuture = mock(ActionFuture.class); postDataFuture = mock(ActionFuture.class);
flushJobFuture = mock(ActionFuture.class); flushJobFuture = mock(ActionFuture.class);
flushJobResponse = new FlushJobAction.Response();
delayedDataDetector = mock(DelayedDataDetector.class);
when(delayedDataDetector.getWindow()).thenReturn(DatafeedJob.MISSING_DATA_CHECK_INTERVAL_MS);
currentTime = 0; currentTime = 0;
xContentType = XContentType.JSON; xContentType = XContentType.JSON;
@ -96,6 +107,7 @@ public class DatafeedJobTests extends ESTestCase {
when(postDataFuture.actionGet()).thenReturn(new PostDataAction.Response(dataCounts)); when(postDataFuture.actionGet()).thenReturn(new PostDataAction.Response(dataCounts));
flushJobRequests = ArgumentCaptor.forClass(FlushJobAction.Request.class); flushJobRequests = ArgumentCaptor.forClass(FlushJobAction.Request.class);
when(flushJobFuture.actionGet()).thenReturn(flushJobResponse);
when(client.execute(same(FlushJobAction.INSTANCE), flushJobRequests.capture())).thenReturn(flushJobFuture); when(client.execute(same(FlushJobAction.INSTANCE), flushJobRequests.capture())).thenReturn(flushJobFuture);
} }
@ -193,6 +205,13 @@ public class DatafeedJobTests extends ESTestCase {
} }
public void testRealtimeRun() throws Exception { public void testRealtimeRun() throws Exception {
flushJobResponse = new FlushJobAction.Response(true, new Date(2000));
Bucket bucket = mock(Bucket.class);
when(bucket.getTimestamp()).thenReturn(new Date(2000));
when(flushJobFuture.actionGet()).thenReturn(flushJobResponse);
when(client.execute(same(FlushJobAction.INSTANCE), flushJobRequests.capture())).thenReturn(flushJobFuture);
when(delayedDataDetector.detectMissingData(2000))
.thenReturn(Collections.singletonList(BucketWithMissingData.fromMissingAndBucket(10, bucket)));
currentTime = 60000L; currentTime = 60000L;
long frequencyMs = 100; long frequencyMs = 100;
long queryDelayMs = 1000; long queryDelayMs = 1000;
@ -206,6 +225,29 @@ public class DatafeedJobTests extends ESTestCase {
flushRequest.setAdvanceTime("59000"); flushRequest.setAdvanceTime("59000");
verify(client).execute(same(FlushJobAction.INSTANCE), eq(flushRequest)); verify(client).execute(same(FlushJobAction.INSTANCE), eq(flushRequest));
verify(client, never()).execute(same(PersistJobAction.INSTANCE), any()); verify(client, never()).execute(same(PersistJobAction.INSTANCE), any());
// Execute a second valid time, but do so in a smaller window than the interval
currentTime = 62000L;
byte[] contentBytes = "content".getBytes(StandardCharsets.UTF_8);
InputStream inputStream = new ByteArrayInputStream(contentBytes);
when(dataExtractor.hasNext()).thenReturn(true).thenReturn(false);
when(dataExtractor.next()).thenReturn(Optional.of(inputStream));
when(dataExtractorFactory.newExtractor(anyLong(), anyLong())).thenReturn(dataExtractor);
datafeedJob.runRealtime();
// Execute a third time, but this time make sure we exceed the data check interval, but keep the delayedDataDetector response
// the same
currentTime = 62000L + DatafeedJob.MISSING_DATA_CHECK_INTERVAL_MS + 1;
inputStream = new ByteArrayInputStream(contentBytes);
when(dataExtractor.hasNext()).thenReturn(true).thenReturn(false);
when(dataExtractor.next()).thenReturn(Optional.of(inputStream));
when(dataExtractorFactory.newExtractor(anyLong(), anyLong())).thenReturn(dataExtractor);
datafeedJob.runRealtime();
verify(auditor, times(1)).warning(jobId,
Messages.getMessage(Messages.JOB_AUDIT_DATAFEED_MISSING_DATA,
10,
XContentElasticsearchExtension.DEFAULT_DATE_PRINTER.print(2000)));
} }
public void testEmptyDataCountGivenlookback() throws Exception { public void testEmptyDataCountGivenlookback() throws Exception {
@ -321,6 +363,6 @@ public class DatafeedJobTests extends ESTestCase {
long latestRecordTimeMs) { long latestRecordTimeMs) {
Supplier<Long> currentTimeSupplier = () -> currentTime; Supplier<Long> currentTimeSupplier = () -> currentTime;
return new DatafeedJob(jobId, dataDescription.build(), frequencyMs, queryDelayMs, dataExtractorFactory, client, auditor, return new DatafeedJob(jobId, dataDescription.build(), frequencyMs, queryDelayMs, dataExtractorFactory, client, auditor,
currentTimeSupplier, latestFinalBucketEndTimeMs, latestRecordTimeMs); currentTimeSupplier, delayedDataDetector, latestFinalBucketEndTimeMs, latestRecordTimeMs);
} }
} }

View File

@ -14,6 +14,7 @@ import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedJobValidator; import org.elasticsearch.xpack.core.ml.datafeed.DatafeedJobValidator;
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig; import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription; import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.Detector; import org.elasticsearch.xpack.core.ml.job.config.Detector;
@ -176,6 +177,30 @@ public class DatafeedJobValidatorTests extends ESTestCase {
assertEquals("Datafeed frequency [1.5m] must be a multiple of the aggregation interval [60000ms]", e.getMessage()); assertEquals("Datafeed frequency [1.5m] must be a multiple of the aggregation interval [60000ms]", e.getMessage());
} }
public void testVerify_BucketIntervalAndDataCheckWindowAreValid() {
Job.Builder builder = buildJobBuilder("foo");
AnalysisConfig.Builder ac = createAnalysisConfig();
ac.setSummaryCountFieldName("some_count");
ac.setBucketSpan(TimeValue.timeValueSeconds(2));
builder.setAnalysisConfig(ac);
Job job = builder.build(new Date());
DatafeedConfig.Builder datafeedBuilder = createValidDatafeedConfig();
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueMinutes(10)));
DatafeedJobValidator.validate(datafeedBuilder.build(), job);
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueSeconds(1)));
ElasticsearchStatusException e = ESTestCase.expectThrows(ElasticsearchStatusException.class,
() -> DatafeedJobValidator.validate(datafeedBuilder.build(), job));
assertEquals(Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL, "1s", "2s"), e.getMessage());
datafeedBuilder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(TimeValue.timeValueHours(24)));
e = ESTestCase.expectThrows(ElasticsearchStatusException.class,
() -> DatafeedJobValidator.validate(datafeedBuilder.build(), job));
assertEquals(Messages.getMessage(
Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS, "1d", "2s"), e.getMessage());
}
private static Job.Builder buildJobBuilder(String id) { private static Job.Builder buildJobBuilder(String id) {
Job.Builder builder = new Job.Builder(id); Job.Builder builder = new Job.Builder(id);
AnalysisConfig.Builder ac = createAnalysisConfig(); AnalysisConfig.Builder ac = createAnalysisConfig();

View File

@ -1,76 +0,0 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.datafeed;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.Detector;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import java.util.Collections;
import java.util.Date;
import static org.hamcrest.Matchers.equalTo;
import static org.mockito.Mockito.mock;
public class DelayedDataDetectorTests extends ESTestCase {
public void testConstructorWithValueValues() {
TimeValue window = TimeValue.timeValueSeconds(10);
Job job = createJob(TimeValue.timeValueSeconds(1));
DelayedDataDetector delayedDataDetector = new DelayedDataDetector(job, createDatafeed(), window, mock(Client.class));
assertNotNull(delayedDataDetector);
}
public void testConstructorWithInvalidValues() {
TimeValue shortWindow = TimeValue.timeValueMillis(500);
Job job = createJob(TimeValue.timeValueSeconds(1));
Exception exception = expectThrows(IllegalArgumentException.class,
()-> new DelayedDataDetector(job, createDatafeed(), shortWindow, mock(Client.class)));
assertThat(exception.getMessage(), equalTo("[window] must be greater or equal to the [bucket_span]"));
TimeValue longWindow = TimeValue.timeValueSeconds(20000);
exception = expectThrows(IllegalArgumentException.class,
()-> new DelayedDataDetector(job, createDatafeed(), longWindow, mock(Client.class)));
assertThat(exception.getMessage(), equalTo("[window] must contain less than 10000 buckets at the current [bucket_span]"));
}
private Job createJob(TimeValue bucketSpan) {
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataDescription.DataFormat.XCONTENT);
dataDescription.setTimeField("time");
dataDescription.setTimeFormat(DataDescription.EPOCH_MS);
Detector.Builder d = new Detector.Builder("count", null);
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(Collections.singletonList(d.build()));
analysisConfig.setBucketSpan(bucketSpan);
Job.Builder builder = new Job.Builder();
builder.setId("test-job");
builder.setAnalysisConfig(analysisConfig);
builder.setDataDescription(dataDescription);
return builder.build(new Date());
}
private DatafeedConfig createDatafeed() {
DatafeedConfig.Builder builder = new DatafeedConfig.Builder("id", "jobId");
builder.setIndices(Collections.singletonList("index1"));
builder.setTypes(Collections.singletonList("doc"));
return builder.build();
}
}

View File

@ -0,0 +1,103 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.ml.datafeed.delayeddatacheck;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.Detector;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import java.util.Collections;
import java.util.Date;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.mockito.Mockito.mock;
public class DelayedDataDetectorFactoryTests extends ESTestCase {
public void testBuilder() {
Job job = createJob(TimeValue.timeValueSeconds(2));
DatafeedConfig datafeedConfig = createDatafeed(false, null);
// Should not throw
assertThat(DelayedDataDetectorFactory.buildDetector(job, datafeedConfig, mock(Client.class)),
instanceOf(NullDelayedDataDetector.class));
datafeedConfig = createDatafeed(true, TimeValue.timeValueMinutes(10));
// Should not throw
assertThat(DelayedDataDetectorFactory.buildDetector(job, datafeedConfig, mock(Client.class)),
instanceOf(DatafeedDelayedDataDetector.class));
DatafeedConfig tooSmallDatafeedConfig = createDatafeed(true, TimeValue.timeValueSeconds(1));
IllegalArgumentException e = ESTestCase.expectThrows(IllegalArgumentException.class,
() -> DelayedDataDetectorFactory.buildDetector(job, tooSmallDatafeedConfig, mock(Client.class)));
assertEquals(Messages.getMessage(Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_TOO_SMALL, "1s", "2s"), e.getMessage());
DatafeedConfig tooBigDatafeedConfig = createDatafeed(true, TimeValue.timeValueHours(12));
e = ESTestCase.expectThrows(IllegalArgumentException.class,
() -> DelayedDataDetectorFactory.buildDetector(job, tooBigDatafeedConfig, mock(Client.class)));
assertEquals(Messages.getMessage(
Messages.DATAFEED_CONFIG_DELAYED_DATA_CHECK_SPANS_TOO_MANY_BUCKETS, "12h", "2s"), e.getMessage());
Job withBigBucketSpan = createJob(TimeValue.timeValueHours(3));
datafeedConfig = createDatafeed(true, null);
// Should not throw
DelayedDataDetector delayedDataDetector =
DelayedDataDetectorFactory.buildDetector(withBigBucketSpan, datafeedConfig, mock(Client.class));
assertThat(delayedDataDetector.getWindow(), equalTo(TimeValue.timeValueHours(3).millis() * 8));
datafeedConfig = createDatafeed(true, null);
// Should not throw
delayedDataDetector =
DelayedDataDetectorFactory.buildDetector(job, datafeedConfig, mock(Client.class));
assertThat(delayedDataDetector.getWindow(), equalTo(TimeValue.timeValueHours(2).millis()));
}
private Job createJob(TimeValue bucketSpan) {
DataDescription.Builder dataDescription = new DataDescription.Builder();
dataDescription.setFormat(DataDescription.DataFormat.XCONTENT);
dataDescription.setTimeField("time");
dataDescription.setTimeFormat(DataDescription.EPOCH_MS);
Detector.Builder d = new Detector.Builder("count", null);
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(Collections.singletonList(d.build()));
analysisConfig.setBucketSpan(bucketSpan);
Job.Builder builder = new Job.Builder();
builder.setId("test-job");
builder.setAnalysisConfig(analysisConfig);
builder.setDataDescription(dataDescription);
return builder.build(new Date());
}
private DatafeedConfig createDatafeed(boolean shouldDetectDelayedData, TimeValue delayedDatacheckWindow) {
DatafeedConfig.Builder builder = new DatafeedConfig.Builder("id", "jobId");
builder.setIndices(Collections.singletonList("index1"));
builder.setTypes(Collections.singletonList("doc"));
if (shouldDetectDelayedData) {
builder.setDelayedDataCheckConfig(DelayedDataCheckConfig.enabledDelayedDataCheckConfig(delayedDatacheckWindow));
} else {
builder.setDelayedDataCheckConfig(DelayedDataCheckConfig.disabledDelayedDataCheckConfig());
}
return builder.build();
}
}